]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/socket.c
net: cleanly handle kernel vs user buffers for ->msg_control
[mirror_ubuntu-jammy-kernel.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
1da177e4 55#include <linux/mm.h>
1da177e4
LT
56#include <linux/socket.h>
57#include <linux/file.h>
58#include <linux/net.h>
59#include <linux/interrupt.h>
aaca0bdc 60#include <linux/thread_info.h>
55737fda 61#include <linux/rcupdate.h>
1da177e4
LT
62#include <linux/netdevice.h>
63#include <linux/proc_fs.h>
64#include <linux/seq_file.h>
4a3e2f71 65#include <linux/mutex.h>
1da177e4 66#include <linux/if_bridge.h>
20380731
ACM
67#include <linux/if_frad.h>
68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
c7dc504e 103#include <linux/termios.h>
6b96018b 104#include <linux/sockios.h>
076bb0c8 105#include <net/busy_poll.h>
f24b9be5 106#include <linux/errqueue.h>
06021292 107
e0d1095a 108#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
109unsigned int sysctl_net_busy_read __read_mostly;
110unsigned int sysctl_net_busy_poll __read_mostly;
06021292 111#endif
6b96018b 112
8ae5e030
AV
113static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
114static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 115static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
116
117static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
118static __poll_t sock_poll(struct file *file,
119 struct poll_table_struct *wait);
89bddce5 120static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
121#ifdef CONFIG_COMPAT
122static long compat_sock_ioctl(struct file *file,
89bddce5 123 unsigned int cmd, unsigned long arg);
89bbfc95 124#endif
1da177e4 125static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
126static ssize_t sock_sendpage(struct file *file, struct page *page,
127 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 128static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 129 struct pipe_inode_info *pipe, size_t len,
9c55e01c 130 unsigned int flags);
542d3065
AB
131
132#ifdef CONFIG_PROC_FS
133static void sock_show_fdinfo(struct seq_file *m, struct file *f)
134{
135 struct socket *sock = f->private_data;
136
137 if (sock->ops->show_fdinfo)
138 sock->ops->show_fdinfo(m, sock);
139}
140#else
141#define sock_show_fdinfo NULL
142#endif
1da177e4 143
1da177e4
LT
144/*
145 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
146 * in the operation structures but are done directly via the socketcall() multiplexor.
147 */
148
da7071d7 149static const struct file_operations socket_file_ops = {
1da177e4
LT
150 .owner = THIS_MODULE,
151 .llseek = no_llseek,
8ae5e030
AV
152 .read_iter = sock_read_iter,
153 .write_iter = sock_write_iter,
1da177e4
LT
154 .poll = sock_poll,
155 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
156#ifdef CONFIG_COMPAT
157 .compat_ioctl = compat_sock_ioctl,
158#endif
1da177e4 159 .mmap = sock_mmap,
1da177e4
LT
160 .release = sock_close,
161 .fasync = sock_fasync,
5274f052
JA
162 .sendpage = sock_sendpage,
163 .splice_write = generic_splice_sendpage,
9c55e01c 164 .splice_read = sock_splice_read,
b4653342 165 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
166};
167
168/*
169 * The protocol list. Each protocol is registered in here.
170 */
171
1da177e4 172static DEFINE_SPINLOCK(net_family_lock);
190683a9 173static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 174
1da177e4 175/*
89bddce5
SH
176 * Support routines.
177 * Move socket addresses back and forth across the kernel/user
178 * divide and look after the messy bits.
1da177e4
LT
179 */
180
1da177e4
LT
181/**
182 * move_addr_to_kernel - copy a socket address into kernel space
183 * @uaddr: Address in user space
184 * @kaddr: Address in kernel space
185 * @ulen: Length in user space
186 *
187 * The address is copied into kernel space. If the provided address is
188 * too long an error code of -EINVAL is returned. If the copy gives
189 * invalid addresses -EFAULT is returned. On a success 0 is returned.
190 */
191
43db362d 192int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 193{
230b1839 194 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 195 return -EINVAL;
89bddce5 196 if (ulen == 0)
1da177e4 197 return 0;
89bddce5 198 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 199 return -EFAULT;
3ec3b2fb 200 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
201}
202
203/**
204 * move_addr_to_user - copy an address to user space
205 * @kaddr: kernel space address
206 * @klen: length of address in kernel
207 * @uaddr: user space address
208 * @ulen: pointer to user length field
209 *
210 * The value pointed to by ulen on entry is the buffer length available.
211 * This is overwritten with the buffer space used. -EINVAL is returned
212 * if an overlong buffer is specified or a negative buffer size. -EFAULT
213 * is returned if either the buffer or the length field are not
214 * accessible.
215 * After copying the data up to the limit the user specifies, the true
216 * length of the data is written over the length limit the user
217 * specified. Zero is returned for a success.
218 */
89bddce5 219
43db362d 220static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 221 void __user *uaddr, int __user *ulen)
1da177e4
LT
222{
223 int err;
224 int len;
225
68c6beb3 226 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
227 err = get_user(len, ulen);
228 if (err)
1da177e4 229 return err;
89bddce5
SH
230 if (len > klen)
231 len = klen;
68c6beb3 232 if (len < 0)
1da177e4 233 return -EINVAL;
89bddce5 234 if (len) {
d6fe3945
SG
235 if (audit_sockaddr(klen, kaddr))
236 return -ENOMEM;
89bddce5 237 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
238 return -EFAULT;
239 }
240 /*
89bddce5
SH
241 * "fromlen shall refer to the value before truncation.."
242 * 1003.1g
1da177e4
LT
243 */
244 return __put_user(klen, ulen);
245}
246
08009a76 247static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
248
249static struct inode *sock_alloc_inode(struct super_block *sb)
250{
251 struct socket_alloc *ei;
89bddce5 252
e94b1766 253 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
254 if (!ei)
255 return NULL;
333f7909
AV
256 init_waitqueue_head(&ei->socket.wq.wait);
257 ei->socket.wq.fasync_list = NULL;
258 ei->socket.wq.flags = 0;
89bddce5 259
1da177e4
LT
260 ei->socket.state = SS_UNCONNECTED;
261 ei->socket.flags = 0;
262 ei->socket.ops = NULL;
263 ei->socket.sk = NULL;
264 ei->socket.file = NULL;
1da177e4
LT
265
266 return &ei->vfs_inode;
267}
268
6d7855c5 269static void sock_free_inode(struct inode *inode)
1da177e4 270{
43815482
ED
271 struct socket_alloc *ei;
272
273 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 274 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
275}
276
51cc5068 277static void init_once(void *foo)
1da177e4 278{
89bddce5 279 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 280
a35afb83 281 inode_init_once(&ei->vfs_inode);
1da177e4 282}
89bddce5 283
1e911632 284static void init_inodecache(void)
1da177e4
LT
285{
286 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
287 sizeof(struct socket_alloc),
288 0,
289 (SLAB_HWCACHE_ALIGN |
290 SLAB_RECLAIM_ACCOUNT |
5d097056 291 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 292 init_once);
1e911632 293 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
294}
295
b87221de 296static const struct super_operations sockfs_ops = {
c6d409cf 297 .alloc_inode = sock_alloc_inode,
6d7855c5 298 .free_inode = sock_free_inode,
c6d409cf 299 .statfs = simple_statfs,
1da177e4
LT
300};
301
c23fbb6b
ED
302/*
303 * sockfs_dname() is called from d_path().
304 */
305static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
306{
307 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 308 d_inode(dentry)->i_ino);
c23fbb6b
ED
309}
310
3ba13d17 311static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 312 .d_dname = sockfs_dname,
1da177e4
LT
313};
314
bba0bd31
AG
315static int sockfs_xattr_get(const struct xattr_handler *handler,
316 struct dentry *dentry, struct inode *inode,
317 const char *suffix, void *value, size_t size)
318{
319 if (value) {
320 if (dentry->d_name.len + 1 > size)
321 return -ERANGE;
322 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
323 }
324 return dentry->d_name.len + 1;
325}
326
327#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
328#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
329#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
330
331static const struct xattr_handler sockfs_xattr_handler = {
332 .name = XATTR_NAME_SOCKPROTONAME,
333 .get = sockfs_xattr_get,
334};
335
4a590153
AG
336static int sockfs_security_xattr_set(const struct xattr_handler *handler,
337 struct dentry *dentry, struct inode *inode,
338 const char *suffix, const void *value,
339 size_t size, int flags)
340{
341 /* Handled by LSM. */
342 return -EAGAIN;
343}
344
345static const struct xattr_handler sockfs_security_xattr_handler = {
346 .prefix = XATTR_SECURITY_PREFIX,
347 .set = sockfs_security_xattr_set,
348};
349
bba0bd31
AG
350static const struct xattr_handler *sockfs_xattr_handlers[] = {
351 &sockfs_xattr_handler,
4a590153 352 &sockfs_security_xattr_handler,
bba0bd31
AG
353 NULL
354};
355
fba9be49 356static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 357{
fba9be49
DH
358 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
359 if (!ctx)
360 return -ENOMEM;
361 ctx->ops = &sockfs_ops;
362 ctx->dops = &sockfs_dentry_operations;
363 ctx->xattr = sockfs_xattr_handlers;
364 return 0;
c74a1cbb
AV
365}
366
367static struct vfsmount *sock_mnt __read_mostly;
368
369static struct file_system_type sock_fs_type = {
370 .name = "sockfs",
fba9be49 371 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
372 .kill_sb = kill_anon_super,
373};
374
1da177e4
LT
375/*
376 * Obtains the first available file descriptor and sets it up for use.
377 *
39d8c1b6
DM
378 * These functions create file structures and maps them to fd space
379 * of the current process. On success it returns file descriptor
1da177e4
LT
380 * and file struct implicitly stored in sock->file.
381 * Note that another thread may close file descriptor before we return
382 * from this function. We use the fact that now we do not refer
383 * to socket after mapping. If one day we will need it, this
384 * function will increment ref. count on file by 1.
385 *
386 * In any case returned fd MAY BE not valid!
387 * This race condition is unavoidable
388 * with shared fd spaces, we cannot solve it inside kernel,
389 * but we take care of internal coherence yet.
390 */
391
8a3c245c
PT
392/**
393 * sock_alloc_file - Bind a &socket to a &file
394 * @sock: socket
395 * @flags: file status flags
396 * @dname: protocol name
397 *
398 * Returns the &file bound with @sock, implicitly storing it
399 * in sock->file. If dname is %NULL, sets to "".
400 * On failure the return is a ERR pointer (see linux/err.h).
401 * This function uses GFP_KERNEL internally.
402 */
403
aab174f0 404struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 405{
7cbe66b6 406 struct file *file;
1da177e4 407
d93aa9d8
AV
408 if (!dname)
409 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 410
d93aa9d8
AV
411 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
412 O_RDWR | (flags & O_NONBLOCK),
413 &socket_file_ops);
b5ffe634 414 if (IS_ERR(file)) {
8e1611e2 415 sock_release(sock);
39b65252 416 return file;
cc3808f8
AV
417 }
418
419 sock->file = file;
39d8c1b6 420 file->private_data = sock;
d8e464ec 421 stream_open(SOCK_INODE(sock), file);
28407630 422 return file;
39d8c1b6 423}
56b31d1c 424EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 425
56b31d1c 426static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
427{
428 struct file *newfile;
28407630 429 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
430 if (unlikely(fd < 0)) {
431 sock_release(sock);
28407630 432 return fd;
ce4bb04c 433 }
39d8c1b6 434
aab174f0 435 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 436 if (!IS_ERR(newfile)) {
39d8c1b6 437 fd_install(fd, newfile);
28407630
AV
438 return fd;
439 }
7cbe66b6 440
28407630
AV
441 put_unused_fd(fd);
442 return PTR_ERR(newfile);
1da177e4
LT
443}
444
8a3c245c
PT
445/**
446 * sock_from_file - Return the &socket bounded to @file.
447 * @file: file
448 * @err: pointer to an error code return
449 *
450 * On failure returns %NULL and assigns -ENOTSOCK to @err.
451 */
452
406a3c63 453struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 454{
6cb153ca
BL
455 if (file->f_op == &socket_file_ops)
456 return file->private_data; /* set in sock_map_fd */
457
23bb80d2
ED
458 *err = -ENOTSOCK;
459 return NULL;
6cb153ca 460}
406a3c63 461EXPORT_SYMBOL(sock_from_file);
6cb153ca 462
1da177e4 463/**
c6d409cf 464 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
465 * @fd: file handle
466 * @err: pointer to an error code return
467 *
468 * The file handle passed in is locked and the socket it is bound
241c4667 469 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
470 * with a negative errno code and NULL is returned. The function checks
471 * for both invalid handles and passing a handle which is not a socket.
472 *
473 * On a success the socket object pointer is returned.
474 */
475
476struct socket *sockfd_lookup(int fd, int *err)
477{
478 struct file *file;
1da177e4
LT
479 struct socket *sock;
480
89bddce5
SH
481 file = fget(fd);
482 if (!file) {
1da177e4
LT
483 *err = -EBADF;
484 return NULL;
485 }
89bddce5 486
6cb153ca
BL
487 sock = sock_from_file(file, err);
488 if (!sock)
1da177e4 489 fput(file);
6cb153ca
BL
490 return sock;
491}
c6d409cf 492EXPORT_SYMBOL(sockfd_lookup);
1da177e4 493
6cb153ca
BL
494static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
495{
00e188ef 496 struct fd f = fdget(fd);
6cb153ca
BL
497 struct socket *sock;
498
3672558c 499 *err = -EBADF;
00e188ef
AV
500 if (f.file) {
501 sock = sock_from_file(f.file, err);
502 if (likely(sock)) {
503 *fput_needed = f.flags;
6cb153ca 504 return sock;
00e188ef
AV
505 }
506 fdput(f);
1da177e4 507 }
6cb153ca 508 return NULL;
1da177e4
LT
509}
510
600e1779
MY
511static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
512 size_t size)
513{
514 ssize_t len;
515 ssize_t used = 0;
516
c5ef6035 517 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
518 if (len < 0)
519 return len;
520 used += len;
521 if (buffer) {
522 if (size < used)
523 return -ERANGE;
524 buffer += len;
525 }
526
527 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
528 used += len;
529 if (buffer) {
530 if (size < used)
531 return -ERANGE;
532 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
533 buffer += len;
534 }
535
536 return used;
537}
538
dc647ec8 539static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
540{
541 int err = simple_setattr(dentry, iattr);
542
e1a3a60a 543 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
544 struct socket *sock = SOCKET_I(d_inode(dentry));
545
6d8c50dc
CW
546 if (sock->sk)
547 sock->sk->sk_uid = iattr->ia_uid;
548 else
549 err = -ENOENT;
86741ec2
LC
550 }
551
552 return err;
553}
554
600e1779 555static const struct inode_operations sockfs_inode_ops = {
600e1779 556 .listxattr = sockfs_listxattr,
86741ec2 557 .setattr = sockfs_setattr,
600e1779
MY
558};
559
1da177e4 560/**
8a3c245c 561 * sock_alloc - allocate a socket
89bddce5 562 *
1da177e4
LT
563 * Allocate a new inode and socket object. The two are bound together
564 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 565 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
566 */
567
f4a00aac 568struct socket *sock_alloc(void)
1da177e4 569{
89bddce5
SH
570 struct inode *inode;
571 struct socket *sock;
1da177e4 572
a209dfc7 573 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
574 if (!inode)
575 return NULL;
576
577 sock = SOCKET_I(inode);
578
85fe4025 579 inode->i_ino = get_next_ino();
89bddce5 580 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
581 inode->i_uid = current_fsuid();
582 inode->i_gid = current_fsgid();
600e1779 583 inode->i_op = &sockfs_inode_ops;
1da177e4 584
1da177e4
LT
585 return sock;
586}
f4a00aac 587EXPORT_SYMBOL(sock_alloc);
1da177e4 588
1da177e4 589/**
8a3c245c 590 * sock_release - close a socket
1da177e4
LT
591 * @sock: socket to close
592 *
593 * The socket is released from the protocol stack if it has a release
594 * callback, and the inode is then released if the socket is bound to
89bddce5 595 * an inode not a file.
1da177e4 596 */
89bddce5 597
6d8c50dc 598static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
599{
600 if (sock->ops) {
601 struct module *owner = sock->ops->owner;
602
6d8c50dc
CW
603 if (inode)
604 inode_lock(inode);
1da177e4 605 sock->ops->release(sock);
ff7b11aa 606 sock->sk = NULL;
6d8c50dc
CW
607 if (inode)
608 inode_unlock(inode);
1da177e4
LT
609 sock->ops = NULL;
610 module_put(owner);
611 }
612
333f7909 613 if (sock->wq.fasync_list)
3410f22e 614 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 615
1da177e4
LT
616 if (!sock->file) {
617 iput(SOCK_INODE(sock));
618 return;
619 }
89bddce5 620 sock->file = NULL;
1da177e4 621}
6d8c50dc
CW
622
623void sock_release(struct socket *sock)
624{
625 __sock_release(sock, NULL);
626}
c6d409cf 627EXPORT_SYMBOL(sock_release);
1da177e4 628
c14ac945 629void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 630{
140c55d4
ED
631 u8 flags = *tx_flags;
632
c14ac945 633 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
634 flags |= SKBTX_HW_TSTAMP;
635
c14ac945 636 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
637 flags |= SKBTX_SW_TSTAMP;
638
c14ac945 639 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
640 flags |= SKBTX_SCHED_TSTAMP;
641
140c55d4 642 *tx_flags = flags;
20d49473 643}
67cc0d40 644EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 645
8c3c447b
PA
646INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
647 size_t));
a648a592
PA
648INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
649 size_t));
d8725c86 650static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 651{
a648a592
PA
652 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
653 inet_sendmsg, sock, msg,
654 msg_data_left(msg));
d8725c86
AV
655 BUG_ON(ret == -EIOCBQUEUED);
656 return ret;
1da177e4
LT
657}
658
85806af0
RD
659/**
660 * sock_sendmsg - send a message through @sock
661 * @sock: socket
662 * @msg: message to send
663 *
664 * Sends @msg through @sock, passing through LSM.
665 * Returns the number of bytes sent, or an error code.
666 */
d8725c86 667int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 668{
d8725c86 669 int err = security_socket_sendmsg(sock, msg,
01e97e65 670 msg_data_left(msg));
228e548e 671
d8725c86 672 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 673}
c6d409cf 674EXPORT_SYMBOL(sock_sendmsg);
1da177e4 675
8a3c245c
PT
676/**
677 * kernel_sendmsg - send a message through @sock (kernel-space)
678 * @sock: socket
679 * @msg: message header
680 * @vec: kernel vec
681 * @num: vec array length
682 * @size: total message data size
683 *
684 * Builds the message data with @vec and sends it through @sock.
685 * Returns the number of bytes sent, or an error code.
686 */
687
1da177e4
LT
688int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
689 struct kvec *vec, size_t num, size_t size)
690{
aa563d7b 691 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 692 return sock_sendmsg(sock, msg);
1da177e4 693}
c6d409cf 694EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 695
8a3c245c
PT
696/**
697 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
698 * @sk: sock
699 * @msg: message header
700 * @vec: output s/g array
701 * @num: output s/g array length
702 * @size: total message data size
703 *
704 * Builds the message data with @vec and sends it through @sock.
705 * Returns the number of bytes sent, or an error code.
706 * Caller must hold @sk.
707 */
708
306b13eb
TH
709int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
710 struct kvec *vec, size_t num, size_t size)
711{
712 struct socket *sock = sk->sk_socket;
713
714 if (!sock->ops->sendmsg_locked)
db5980d8 715 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 716
aa563d7b 717 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
718
719 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
720}
721EXPORT_SYMBOL(kernel_sendmsg_locked);
722
8605330a
SHY
723static bool skb_is_err_queue(const struct sk_buff *skb)
724{
725 /* pkt_type of skbs enqueued on the error queue are set to
726 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
727 * in recvmsg, since skbs received on a local socket will never
728 * have a pkt_type of PACKET_OUTGOING.
729 */
730 return skb->pkt_type == PACKET_OUTGOING;
731}
732
b50a5c70
ML
733/* On transmit, software and hardware timestamps are returned independently.
734 * As the two skb clones share the hardware timestamp, which may be updated
735 * before the software timestamp is received, a hardware TX timestamp may be
736 * returned only if there is no software TX timestamp. Ignore false software
737 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 738 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
739 * hardware timestamp.
740 */
741static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
742{
743 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
744}
745
aad9c8c4
ML
746static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
747{
748 struct scm_ts_pktinfo ts_pktinfo;
749 struct net_device *orig_dev;
750
751 if (!skb_mac_header_was_set(skb))
752 return;
753
754 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
755
756 rcu_read_lock();
757 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
758 if (orig_dev)
759 ts_pktinfo.if_index = orig_dev->ifindex;
760 rcu_read_unlock();
761
762 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
763 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
764 sizeof(ts_pktinfo), &ts_pktinfo);
765}
766
92f37fd2
ED
767/*
768 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
769 */
770void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
771 struct sk_buff *skb)
772{
20d49473 773 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 774 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
775 struct scm_timestamping_internal tss;
776
b50a5c70 777 int empty = 1, false_tstamp = 0;
20d49473
PO
778 struct skb_shared_hwtstamps *shhwtstamps =
779 skb_hwtstamps(skb);
780
781 /* Race occurred between timestamp enabling and packet
782 receiving. Fill in the current time for now. */
b50a5c70 783 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 784 __net_timestamp(skb);
b50a5c70
ML
785 false_tstamp = 1;
786 }
20d49473
PO
787
788 if (need_software_tstamp) {
789 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
790 if (new_tstamp) {
791 struct __kernel_sock_timeval tv;
792
793 skb_get_new_timestamp(skb, &tv);
794 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
795 sizeof(tv), &tv);
796 } else {
797 struct __kernel_old_timeval tv;
798
799 skb_get_timestamp(skb, &tv);
800 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
801 sizeof(tv), &tv);
802 }
20d49473 803 } else {
887feae3
DD
804 if (new_tstamp) {
805 struct __kernel_timespec ts;
806
807 skb_get_new_timestampns(skb, &ts);
808 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
809 sizeof(ts), &ts);
810 } else {
df1b4ba9 811 struct __kernel_old_timespec ts;
887feae3
DD
812
813 skb_get_timestampns(skb, &ts);
814 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
815 sizeof(ts), &ts);
816 }
20d49473
PO
817 }
818 }
819
f24b9be5 820 memset(&tss, 0, sizeof(tss));
c199105d 821 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 822 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 823 empty = 0;
4d276eb6 824 if (shhwtstamps &&
b9f40e21 825 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 826 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 827 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 828 empty = 0;
aad9c8c4
ML
829 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
830 !skb_is_err_queue(skb))
831 put_ts_pktinfo(msg, skb);
832 }
1c885808 833 if (!empty) {
9718475e
DD
834 if (sock_flag(sk, SOCK_TSTAMP_NEW))
835 put_cmsg_scm_timestamping64(msg, &tss);
836 else
837 put_cmsg_scm_timestamping(msg, &tss);
1c885808 838
8605330a 839 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 840 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
841 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
842 skb->len, skb->data);
843 }
92f37fd2 844}
7c81fd8b
ACM
845EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
846
6e3e939f
JB
847void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
848 struct sk_buff *skb)
849{
850 int ack;
851
852 if (!sock_flag(sk, SOCK_WIFI_STATUS))
853 return;
854 if (!skb->wifi_acked_valid)
855 return;
856
857 ack = skb->wifi_acked;
858
859 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
860}
861EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
862
11165f14 863static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
864 struct sk_buff *skb)
3b885787 865{
744d5a3e 866 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 867 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 868 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
869}
870
767dd033 871void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
872 struct sk_buff *skb)
873{
874 sock_recv_timestamp(msg, sk, skb);
875 sock_recv_drops(msg, sk, skb);
876}
767dd033 877EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 878
8c3c447b 879INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
880 size_t, int));
881INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
882 size_t, int));
1b784140 883static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 884 int flags)
1da177e4 885{
a648a592
PA
886 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
887 inet_recvmsg, sock, msg, msg_data_left(msg),
888 flags);
1da177e4
LT
889}
890
85806af0
RD
891/**
892 * sock_recvmsg - receive a message from @sock
893 * @sock: socket
894 * @msg: message to receive
895 * @flags: message flags
896 *
897 * Receives @msg from @sock, passing through LSM. Returns the total number
898 * of bytes received, or an error.
899 */
2da62906 900int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 901{
2da62906 902 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 903
2da62906 904 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 905}
c6d409cf 906EXPORT_SYMBOL(sock_recvmsg);
1da177e4 907
c1249c0a 908/**
8a3c245c
PT
909 * kernel_recvmsg - Receive a message from a socket (kernel space)
910 * @sock: The socket to receive the message from
911 * @msg: Received message
912 * @vec: Input s/g array for message data
913 * @num: Size of input s/g array
914 * @size: Number of bytes to read
915 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 916 *
8a3c245c
PT
917 * On return the msg structure contains the scatter/gather array passed in the
918 * vec argument. The array is modified so that it consists of the unfilled
919 * portion of the original array.
c1249c0a 920 *
8a3c245c 921 * The returned value is the total number of bytes received, or an error.
c1249c0a 922 */
8a3c245c 923
89bddce5
SH
924int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
925 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 926{
1f466e1f 927 msg->msg_control_is_user = false;
aa563d7b 928 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1f466e1f 929 return sock_recvmsg(sock, msg, flags);
1da177e4 930}
c6d409cf 931EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 932
ce1d4d3e
CH
933static ssize_t sock_sendpage(struct file *file, struct page *page,
934 int offset, size_t size, loff_t *ppos, int more)
1da177e4 935{
1da177e4
LT
936 struct socket *sock;
937 int flags;
938
ce1d4d3e
CH
939 sock = file->private_data;
940
35f9c09f
ED
941 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
942 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
943 flags |= more;
ce1d4d3e 944
e6949583 945 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 946}
1da177e4 947
9c55e01c 948static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 949 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
950 unsigned int flags)
951{
952 struct socket *sock = file->private_data;
953
997b37da 954 if (unlikely(!sock->ops->splice_read))
95506588 955 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 956
9c55e01c
JA
957 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
958}
959
8ae5e030 960static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 961{
6d652330
AV
962 struct file *file = iocb->ki_filp;
963 struct socket *sock = file->private_data;
0345f931 964 struct msghdr msg = {.msg_iter = *to,
965 .msg_iocb = iocb};
8ae5e030 966 ssize_t res;
ce1d4d3e 967
ebfcd895 968 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
969 msg.msg_flags = MSG_DONTWAIT;
970
971 if (iocb->ki_pos != 0)
1da177e4 972 return -ESPIPE;
027445c3 973
66ee59af 974 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
975 return 0;
976
2da62906 977 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
978 *to = msg.msg_iter;
979 return res;
1da177e4
LT
980}
981
8ae5e030 982static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 983{
6d652330
AV
984 struct file *file = iocb->ki_filp;
985 struct socket *sock = file->private_data;
0345f931 986 struct msghdr msg = {.msg_iter = *from,
987 .msg_iocb = iocb};
8ae5e030 988 ssize_t res;
1da177e4 989
8ae5e030 990 if (iocb->ki_pos != 0)
ce1d4d3e 991 return -ESPIPE;
027445c3 992
ebfcd895 993 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
994 msg.msg_flags = MSG_DONTWAIT;
995
6d652330
AV
996 if (sock->type == SOCK_SEQPACKET)
997 msg.msg_flags |= MSG_EOR;
998
d8725c86 999 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1000 *from = msg.msg_iter;
1001 return res;
1da177e4
LT
1002}
1003
1da177e4
LT
1004/*
1005 * Atomic setting of ioctl hooks to avoid race
1006 * with module unload.
1007 */
1008
4a3e2f71 1009static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1010static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1011
881d966b 1012void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1013{
4a3e2f71 1014 mutex_lock(&br_ioctl_mutex);
1da177e4 1015 br_ioctl_hook = hook;
4a3e2f71 1016 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1017}
1018EXPORT_SYMBOL(brioctl_set);
1019
4a3e2f71 1020static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1021static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1022
881d966b 1023void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1024{
4a3e2f71 1025 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1026 vlan_ioctl_hook = hook;
4a3e2f71 1027 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1028}
1029EXPORT_SYMBOL(vlan_ioctl_set);
1030
4a3e2f71 1031static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1032static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1033
89bddce5 1034void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1035{
4a3e2f71 1036 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1037 dlci_ioctl_hook = hook;
4a3e2f71 1038 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1039}
1040EXPORT_SYMBOL(dlci_ioctl_set);
1041
6b96018b 1042static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1043 unsigned int cmd, unsigned long arg)
6b96018b
AB
1044{
1045 int err;
1046 void __user *argp = (void __user *)arg;
1047
1048 err = sock->ops->ioctl(sock, cmd, arg);
1049
1050 /*
1051 * If this ioctl is unknown try to hand it down
1052 * to the NIC driver.
1053 */
36fd633e
AV
1054 if (err != -ENOIOCTLCMD)
1055 return err;
6b96018b 1056
36fd633e
AV
1057 if (cmd == SIOCGIFCONF) {
1058 struct ifconf ifc;
1059 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1060 return -EFAULT;
1061 rtnl_lock();
1062 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1063 rtnl_unlock();
1064 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1065 err = -EFAULT;
44c02a2c
AV
1066 } else {
1067 struct ifreq ifr;
1068 bool need_copyout;
63ff03ab 1069 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1070 return -EFAULT;
1071 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1072 if (!err && need_copyout)
63ff03ab 1073 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1074 return -EFAULT;
36fd633e 1075 }
6b96018b
AB
1076 return err;
1077}
1078
1da177e4
LT
1079/*
1080 * With an ioctl, arg may well be a user mode pointer, but we don't know
1081 * what to do with it - that's up to the protocol still.
1082 */
1083
8a3c245c
PT
1084/**
1085 * get_net_ns - increment the refcount of the network namespace
1086 * @ns: common namespace (net)
1087 *
1088 * Returns the net's common namespace.
1089 */
1090
d8d211a2 1091struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1092{
1093 return &get_net(container_of(ns, struct net, ns))->ns;
1094}
d8d211a2 1095EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1096
1da177e4
LT
1097static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1098{
1099 struct socket *sock;
881d966b 1100 struct sock *sk;
1da177e4
LT
1101 void __user *argp = (void __user *)arg;
1102 int pid, err;
881d966b 1103 struct net *net;
1da177e4 1104
b69aee04 1105 sock = file->private_data;
881d966b 1106 sk = sock->sk;
3b1e0a65 1107 net = sock_net(sk);
44c02a2c
AV
1108 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1109 struct ifreq ifr;
1110 bool need_copyout;
1111 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1112 return -EFAULT;
1113 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1114 if (!err && need_copyout)
1115 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1116 return -EFAULT;
1da177e4 1117 } else
3d23e349 1118#ifdef CONFIG_WEXT_CORE
1da177e4 1119 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1120 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1121 } else
3d23e349 1122#endif
89bddce5 1123 switch (cmd) {
1da177e4
LT
1124 case FIOSETOWN:
1125 case SIOCSPGRP:
1126 err = -EFAULT;
1127 if (get_user(pid, (int __user *)argp))
1128 break;
393cc3f5 1129 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1130 break;
1131 case FIOGETOWN:
1132 case SIOCGPGRP:
609d7fa9 1133 err = put_user(f_getown(sock->file),
89bddce5 1134 (int __user *)argp);
1da177e4
LT
1135 break;
1136 case SIOCGIFBR:
1137 case SIOCSIFBR:
1138 case SIOCBRADDBR:
1139 case SIOCBRDELBR:
1140 err = -ENOPKG;
1141 if (!br_ioctl_hook)
1142 request_module("bridge");
1143
4a3e2f71 1144 mutex_lock(&br_ioctl_mutex);
89bddce5 1145 if (br_ioctl_hook)
881d966b 1146 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1147 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1148 break;
1149 case SIOCGIFVLAN:
1150 case SIOCSIFVLAN:
1151 err = -ENOPKG;
1152 if (!vlan_ioctl_hook)
1153 request_module("8021q");
1154
4a3e2f71 1155 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1156 if (vlan_ioctl_hook)
881d966b 1157 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1158 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1159 break;
1da177e4
LT
1160 case SIOCADDDLCI:
1161 case SIOCDELDLCI:
1162 err = -ENOPKG;
1163 if (!dlci_ioctl_hook)
1164 request_module("dlci");
1165
7512cbf6
PE
1166 mutex_lock(&dlci_ioctl_mutex);
1167 if (dlci_ioctl_hook)
1da177e4 1168 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1169 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1170 break;
c62cce2c
AV
1171 case SIOCGSKNS:
1172 err = -EPERM;
1173 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1174 break;
1175
1176 err = open_related_ns(&net->ns, get_net_ns);
1177 break;
0768e170
AB
1178 case SIOCGSTAMP_OLD:
1179 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1180 if (!sock->ops->gettstamp) {
1181 err = -ENOIOCTLCMD;
1182 break;
1183 }
1184 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1185 cmd == SIOCGSTAMP_OLD,
1186 !IS_ENABLED(CONFIG_64BIT));
60747828 1187 break;
0768e170
AB
1188 case SIOCGSTAMP_NEW:
1189 case SIOCGSTAMPNS_NEW:
1190 if (!sock->ops->gettstamp) {
1191 err = -ENOIOCTLCMD;
1192 break;
1193 }
1194 err = sock->ops->gettstamp(sock, argp,
1195 cmd == SIOCGSTAMP_NEW,
1196 false);
c7cbdbf2 1197 break;
1da177e4 1198 default:
63ff03ab 1199 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1200 break;
89bddce5 1201 }
1da177e4
LT
1202 return err;
1203}
1204
8a3c245c
PT
1205/**
1206 * sock_create_lite - creates a socket
1207 * @family: protocol family (AF_INET, ...)
1208 * @type: communication type (SOCK_STREAM, ...)
1209 * @protocol: protocol (0, ...)
1210 * @res: new socket
1211 *
1212 * Creates a new socket and assigns it to @res, passing through LSM.
1213 * The new socket initialization is not complete, see kernel_accept().
1214 * Returns 0 or an error. On failure @res is set to %NULL.
1215 * This function internally uses GFP_KERNEL.
1216 */
1217
1da177e4
LT
1218int sock_create_lite(int family, int type, int protocol, struct socket **res)
1219{
1220 int err;
1221 struct socket *sock = NULL;
89bddce5 1222
1da177e4
LT
1223 err = security_socket_create(family, type, protocol, 1);
1224 if (err)
1225 goto out;
1226
1227 sock = sock_alloc();
1228 if (!sock) {
1229 err = -ENOMEM;
1230 goto out;
1231 }
1232
1da177e4 1233 sock->type = type;
7420ed23
VY
1234 err = security_socket_post_create(sock, family, type, protocol, 1);
1235 if (err)
1236 goto out_release;
1237
1da177e4
LT
1238out:
1239 *res = sock;
1240 return err;
7420ed23
VY
1241out_release:
1242 sock_release(sock);
1243 sock = NULL;
1244 goto out;
1da177e4 1245}
c6d409cf 1246EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1247
1248/* No kernel lock held - perfect */
ade994f4 1249static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1250{
3cafb376 1251 struct socket *sock = file->private_data;
a331de3b 1252 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1253
e88958e6
CH
1254 if (!sock->ops->poll)
1255 return 0;
f641f13b 1256
a331de3b
CH
1257 if (sk_can_busy_loop(sock->sk)) {
1258 /* poll once if requested by the syscall */
1259 if (events & POLL_BUSY_LOOP)
1260 sk_busy_loop(sock->sk, 1);
1261
1262 /* if this socket can poll_ll, tell the system call */
1263 flag = POLL_BUSY_LOOP;
1264 }
1265
1266 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1267}
1268
89bddce5 1269static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1270{
b69aee04 1271 struct socket *sock = file->private_data;
1da177e4
LT
1272
1273 return sock->ops->mmap(file, sock, vma);
1274}
1275
20380731 1276static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1277{
6d8c50dc 1278 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1279 return 0;
1280}
1281
1282/*
1283 * Update the socket async list
1284 *
1285 * Fasync_list locking strategy.
1286 *
1287 * 1. fasync_list is modified only under process context socket lock
1288 * i.e. under semaphore.
1289 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1290 * or under socket lock
1da177e4
LT
1291 */
1292
1293static int sock_fasync(int fd, struct file *filp, int on)
1294{
989a2979
ED
1295 struct socket *sock = filp->private_data;
1296 struct sock *sk = sock->sk;
333f7909 1297 struct socket_wq *wq = &sock->wq;
1da177e4 1298
989a2979 1299 if (sk == NULL)
1da177e4 1300 return -EINVAL;
1da177e4
LT
1301
1302 lock_sock(sk);
eaefd110 1303 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1304
eaefd110 1305 if (!wq->fasync_list)
989a2979
ED
1306 sock_reset_flag(sk, SOCK_FASYNC);
1307 else
bcdce719 1308 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1309
989a2979 1310 release_sock(sk);
1da177e4
LT
1311 return 0;
1312}
1313
ceb5d58b 1314/* This function may be called only under rcu_lock */
1da177e4 1315
ceb5d58b 1316int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1317{
ceb5d58b 1318 if (!wq || !wq->fasync_list)
1da177e4 1319 return -1;
ceb5d58b 1320
89bddce5 1321 switch (how) {
8d8ad9d7 1322 case SOCK_WAKE_WAITD:
ceb5d58b 1323 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1324 break;
1325 goto call_kill;
8d8ad9d7 1326 case SOCK_WAKE_SPACE:
ceb5d58b 1327 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1328 break;
1329 /* fall through */
8d8ad9d7 1330 case SOCK_WAKE_IO:
89bddce5 1331call_kill:
43815482 1332 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1333 break;
8d8ad9d7 1334 case SOCK_WAKE_URG:
43815482 1335 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1336 }
ceb5d58b 1337
1da177e4
LT
1338 return 0;
1339}
c6d409cf 1340EXPORT_SYMBOL(sock_wake_async);
1da177e4 1341
8a3c245c
PT
1342/**
1343 * __sock_create - creates a socket
1344 * @net: net namespace
1345 * @family: protocol family (AF_INET, ...)
1346 * @type: communication type (SOCK_STREAM, ...)
1347 * @protocol: protocol (0, ...)
1348 * @res: new socket
1349 * @kern: boolean for kernel space sockets
1350 *
1351 * Creates a new socket and assigns it to @res, passing through LSM.
1352 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1353 * be set to true if the socket resides in kernel space.
1354 * This function internally uses GFP_KERNEL.
1355 */
1356
721db93a 1357int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1358 struct socket **res, int kern)
1da177e4
LT
1359{
1360 int err;
1361 struct socket *sock;
55737fda 1362 const struct net_proto_family *pf;
1da177e4
LT
1363
1364 /*
89bddce5 1365 * Check protocol is in range
1da177e4
LT
1366 */
1367 if (family < 0 || family >= NPROTO)
1368 return -EAFNOSUPPORT;
1369 if (type < 0 || type >= SOCK_MAX)
1370 return -EINVAL;
1371
1372 /* Compatibility.
1373
1374 This uglymoron is moved from INET layer to here to avoid
1375 deadlock in module load.
1376 */
1377 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1378 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1379 current->comm);
1da177e4
LT
1380 family = PF_PACKET;
1381 }
1382
1383 err = security_socket_create(family, type, protocol, kern);
1384 if (err)
1385 return err;
89bddce5 1386
55737fda
SH
1387 /*
1388 * Allocate the socket and allow the family to set things up. if
1389 * the protocol is 0, the family is instructed to select an appropriate
1390 * default.
1391 */
1392 sock = sock_alloc();
1393 if (!sock) {
e87cc472 1394 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1395 return -ENFILE; /* Not exactly a match, but its the
1396 closest posix thing */
1397 }
1398
1399 sock->type = type;
1400
95a5afca 1401#ifdef CONFIG_MODULES
89bddce5
SH
1402 /* Attempt to load a protocol module if the find failed.
1403 *
1404 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1405 * requested real, full-featured networking support upon configuration.
1406 * Otherwise module support will break!
1407 */
190683a9 1408 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1409 request_module("net-pf-%d", family);
1da177e4
LT
1410#endif
1411
55737fda
SH
1412 rcu_read_lock();
1413 pf = rcu_dereference(net_families[family]);
1414 err = -EAFNOSUPPORT;
1415 if (!pf)
1416 goto out_release;
1da177e4
LT
1417
1418 /*
1419 * We will call the ->create function, that possibly is in a loadable
1420 * module, so we have to bump that loadable module refcnt first.
1421 */
55737fda 1422 if (!try_module_get(pf->owner))
1da177e4
LT
1423 goto out_release;
1424
55737fda
SH
1425 /* Now protected by module ref count */
1426 rcu_read_unlock();
1427
3f378b68 1428 err = pf->create(net, sock, protocol, kern);
55737fda 1429 if (err < 0)
1da177e4 1430 goto out_module_put;
a79af59e 1431
1da177e4
LT
1432 /*
1433 * Now to bump the refcnt of the [loadable] module that owns this
1434 * socket at sock_release time we decrement its refcnt.
1435 */
55737fda
SH
1436 if (!try_module_get(sock->ops->owner))
1437 goto out_module_busy;
1438
1da177e4
LT
1439 /*
1440 * Now that we're done with the ->create function, the [loadable]
1441 * module can have its refcnt decremented
1442 */
55737fda 1443 module_put(pf->owner);
7420ed23
VY
1444 err = security_socket_post_create(sock, family, type, protocol, kern);
1445 if (err)
3b185525 1446 goto out_sock_release;
55737fda 1447 *res = sock;
1da177e4 1448
55737fda
SH
1449 return 0;
1450
1451out_module_busy:
1452 err = -EAFNOSUPPORT;
1da177e4 1453out_module_put:
55737fda
SH
1454 sock->ops = NULL;
1455 module_put(pf->owner);
1456out_sock_release:
1da177e4 1457 sock_release(sock);
55737fda
SH
1458 return err;
1459
1460out_release:
1461 rcu_read_unlock();
1462 goto out_sock_release;
1da177e4 1463}
721db93a 1464EXPORT_SYMBOL(__sock_create);
1da177e4 1465
8a3c245c
PT
1466/**
1467 * sock_create - creates a socket
1468 * @family: protocol family (AF_INET, ...)
1469 * @type: communication type (SOCK_STREAM, ...)
1470 * @protocol: protocol (0, ...)
1471 * @res: new socket
1472 *
1473 * A wrapper around __sock_create().
1474 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1475 */
1476
1da177e4
LT
1477int sock_create(int family, int type, int protocol, struct socket **res)
1478{
1b8d7ae4 1479 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1480}
c6d409cf 1481EXPORT_SYMBOL(sock_create);
1da177e4 1482
8a3c245c
PT
1483/**
1484 * sock_create_kern - creates a socket (kernel space)
1485 * @net: net namespace
1486 * @family: protocol family (AF_INET, ...)
1487 * @type: communication type (SOCK_STREAM, ...)
1488 * @protocol: protocol (0, ...)
1489 * @res: new socket
1490 *
1491 * A wrapper around __sock_create().
1492 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1493 */
1494
eeb1bd5c 1495int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1496{
eeb1bd5c 1497 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1498}
c6d409cf 1499EXPORT_SYMBOL(sock_create_kern);
1da177e4 1500
9d6a15c3 1501int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1502{
1503 int retval;
1504 struct socket *sock;
a677a039
UD
1505 int flags;
1506
e38b36f3
UD
1507 /* Check the SOCK_* constants for consistency. */
1508 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1509 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1510 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1511 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1512
a677a039 1513 flags = type & ~SOCK_TYPE_MASK;
77d27200 1514 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1515 return -EINVAL;
1516 type &= SOCK_TYPE_MASK;
1da177e4 1517
aaca0bdc
UD
1518 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1519 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1520
1da177e4
LT
1521 retval = sock_create(family, type, protocol, &sock);
1522 if (retval < 0)
8e1611e2 1523 return retval;
1da177e4 1524
8e1611e2 1525 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1526}
1527
9d6a15c3
DB
1528SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1529{
1530 return __sys_socket(family, type, protocol);
1531}
1532
1da177e4
LT
1533/*
1534 * Create a pair of connected sockets.
1535 */
1536
6debc8d8 1537int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1538{
1539 struct socket *sock1, *sock2;
1540 int fd1, fd2, err;
db349509 1541 struct file *newfile1, *newfile2;
a677a039
UD
1542 int flags;
1543
1544 flags = type & ~SOCK_TYPE_MASK;
77d27200 1545 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1546 return -EINVAL;
1547 type &= SOCK_TYPE_MASK;
1da177e4 1548
aaca0bdc
UD
1549 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1550 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1551
016a266b
AV
1552 /*
1553 * reserve descriptors and make sure we won't fail
1554 * to return them to userland.
1555 */
1556 fd1 = get_unused_fd_flags(flags);
1557 if (unlikely(fd1 < 0))
1558 return fd1;
1559
1560 fd2 = get_unused_fd_flags(flags);
1561 if (unlikely(fd2 < 0)) {
1562 put_unused_fd(fd1);
1563 return fd2;
1564 }
1565
1566 err = put_user(fd1, &usockvec[0]);
1567 if (err)
1568 goto out;
1569
1570 err = put_user(fd2, &usockvec[1]);
1571 if (err)
1572 goto out;
1573
1da177e4
LT
1574 /*
1575 * Obtain the first socket and check if the underlying protocol
1576 * supports the socketpair call.
1577 */
1578
1579 err = sock_create(family, type, protocol, &sock1);
016a266b 1580 if (unlikely(err < 0))
1da177e4
LT
1581 goto out;
1582
1583 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1584 if (unlikely(err < 0)) {
1585 sock_release(sock1);
1586 goto out;
bf3c23d1 1587 }
d73aa286 1588
d47cd945
DH
1589 err = security_socket_socketpair(sock1, sock2);
1590 if (unlikely(err)) {
1591 sock_release(sock2);
1592 sock_release(sock1);
1593 goto out;
1594 }
1595
016a266b
AV
1596 err = sock1->ops->socketpair(sock1, sock2);
1597 if (unlikely(err < 0)) {
1598 sock_release(sock2);
1599 sock_release(sock1);
1600 goto out;
28407630
AV
1601 }
1602
aab174f0 1603 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1604 if (IS_ERR(newfile1)) {
28407630 1605 err = PTR_ERR(newfile1);
016a266b
AV
1606 sock_release(sock2);
1607 goto out;
28407630
AV
1608 }
1609
aab174f0 1610 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1611 if (IS_ERR(newfile2)) {
1612 err = PTR_ERR(newfile2);
016a266b
AV
1613 fput(newfile1);
1614 goto out;
db349509
AV
1615 }
1616
157cf649 1617 audit_fd_pair(fd1, fd2);
d73aa286 1618
db349509
AV
1619 fd_install(fd1, newfile1);
1620 fd_install(fd2, newfile2);
d73aa286 1621 return 0;
1da177e4 1622
016a266b 1623out:
d73aa286 1624 put_unused_fd(fd2);
d73aa286 1625 put_unused_fd(fd1);
1da177e4
LT
1626 return err;
1627}
1628
6debc8d8
DB
1629SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1630 int __user *, usockvec)
1631{
1632 return __sys_socketpair(family, type, protocol, usockvec);
1633}
1634
1da177e4
LT
1635/*
1636 * Bind a name to a socket. Nothing much to do here since it's
1637 * the protocol's responsibility to handle the local address.
1638 *
1639 * We move the socket address to kernel space before we call
1640 * the protocol layer (having also checked the address is ok).
1641 */
1642
a87d35d8 1643int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1644{
1645 struct socket *sock;
230b1839 1646 struct sockaddr_storage address;
6cb153ca 1647 int err, fput_needed;
1da177e4 1648
89bddce5 1649 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1650 if (sock) {
43db362d 1651 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1652 if (!err) {
89bddce5 1653 err = security_socket_bind(sock,
230b1839 1654 (struct sockaddr *)&address,
89bddce5 1655 addrlen);
6cb153ca
BL
1656 if (!err)
1657 err = sock->ops->bind(sock,
89bddce5 1658 (struct sockaddr *)
230b1839 1659 &address, addrlen);
1da177e4 1660 }
6cb153ca 1661 fput_light(sock->file, fput_needed);
89bddce5 1662 }
1da177e4
LT
1663 return err;
1664}
1665
a87d35d8
DB
1666SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1667{
1668 return __sys_bind(fd, umyaddr, addrlen);
1669}
1670
1da177e4
LT
1671/*
1672 * Perform a listen. Basically, we allow the protocol to do anything
1673 * necessary for a listen, and if that works, we mark the socket as
1674 * ready for listening.
1675 */
1676
25e290ee 1677int __sys_listen(int fd, int backlog)
1da177e4
LT
1678{
1679 struct socket *sock;
6cb153ca 1680 int err, fput_needed;
b8e1f9b5 1681 int somaxconn;
89bddce5
SH
1682
1683 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1684 if (sock) {
8efa6e93 1685 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1686 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1687 backlog = somaxconn;
1da177e4
LT
1688
1689 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1690 if (!err)
1691 err = sock->ops->listen(sock, backlog);
1da177e4 1692
6cb153ca 1693 fput_light(sock->file, fput_needed);
1da177e4
LT
1694 }
1695 return err;
1696}
1697
25e290ee
DB
1698SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1699{
1700 return __sys_listen(fd, backlog);
1701}
1702
de2ea4b6
JA
1703int __sys_accept4_file(struct file *file, unsigned file_flags,
1704 struct sockaddr __user *upeer_sockaddr,
09952e3e
JA
1705 int __user *upeer_addrlen, int flags,
1706 unsigned long nofile)
1da177e4
LT
1707{
1708 struct socket *sock, *newsock;
39d8c1b6 1709 struct file *newfile;
de2ea4b6 1710 int err, len, newfd;
230b1839 1711 struct sockaddr_storage address;
1da177e4 1712
77d27200 1713 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1714 return -EINVAL;
1715
1716 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1717 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1718
de2ea4b6 1719 sock = sock_from_file(file, &err);
1da177e4
LT
1720 if (!sock)
1721 goto out;
1722
1723 err = -ENFILE;
c6d409cf
ED
1724 newsock = sock_alloc();
1725 if (!newsock)
de2ea4b6 1726 goto out;
1da177e4
LT
1727
1728 newsock->type = sock->type;
1729 newsock->ops = sock->ops;
1730
1da177e4
LT
1731 /*
1732 * We don't need try_module_get here, as the listening socket (sock)
1733 * has the protocol module (sock->ops->owner) held.
1734 */
1735 __module_get(newsock->ops->owner);
1736
09952e3e 1737 newfd = __get_unused_fd_flags(flags, nofile);
39d8c1b6
DM
1738 if (unlikely(newfd < 0)) {
1739 err = newfd;
9a1875e6 1740 sock_release(newsock);
de2ea4b6 1741 goto out;
39d8c1b6 1742 }
aab174f0 1743 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1744 if (IS_ERR(newfile)) {
28407630
AV
1745 err = PTR_ERR(newfile);
1746 put_unused_fd(newfd);
de2ea4b6 1747 goto out;
28407630 1748 }
39d8c1b6 1749
a79af59e
FF
1750 err = security_socket_accept(sock, newsock);
1751 if (err)
39d8c1b6 1752 goto out_fd;
a79af59e 1753
de2ea4b6
JA
1754 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1755 false);
1da177e4 1756 if (err < 0)
39d8c1b6 1757 goto out_fd;
1da177e4
LT
1758
1759 if (upeer_sockaddr) {
9b2c45d4
DV
1760 len = newsock->ops->getname(newsock,
1761 (struct sockaddr *)&address, 2);
1762 if (len < 0) {
1da177e4 1763 err = -ECONNABORTED;
39d8c1b6 1764 goto out_fd;
1da177e4 1765 }
43db362d 1766 err = move_addr_to_user(&address,
230b1839 1767 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1768 if (err < 0)
39d8c1b6 1769 goto out_fd;
1da177e4
LT
1770 }
1771
1772 /* File flags are not inherited via accept() unlike another OSes. */
1773
39d8c1b6
DM
1774 fd_install(newfd, newfile);
1775 err = newfd;
1da177e4
LT
1776out:
1777 return err;
39d8c1b6 1778out_fd:
9606a216 1779 fput(newfile);
39d8c1b6 1780 put_unused_fd(newfd);
de2ea4b6
JA
1781 goto out;
1782
1783}
1784
1785/*
1786 * For accept, we attempt to create a new socket, set up the link
1787 * with the client, wake up the client, then return the new
1788 * connected fd. We collect the address of the connector in kernel
1789 * space and move it to user at the very end. This is unclean because
1790 * we open the socket then return an error.
1791 *
1792 * 1003.1g adds the ability to recvmsg() to query connection pending
1793 * status to recvmsg. We need to add that support in a way thats
1794 * clean when we restructure accept also.
1795 */
1796
1797int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1798 int __user *upeer_addrlen, int flags)
1799{
1800 int ret = -EBADF;
1801 struct fd f;
1802
1803 f = fdget(fd);
1804 if (f.file) {
1805 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
09952e3e
JA
1806 upeer_addrlen, flags,
1807 rlimit(RLIMIT_NOFILE));
de2ea4b6
JA
1808 if (f.flags)
1809 fput(f.file);
1810 }
1811
1812 return ret;
1da177e4
LT
1813}
1814
4541e805
DB
1815SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1816 int __user *, upeer_addrlen, int, flags)
1817{
1818 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1819}
1820
20f37034
HC
1821SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1822 int __user *, upeer_addrlen)
aaca0bdc 1823{
4541e805 1824 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1825}
1826
1da177e4
LT
1827/*
1828 * Attempt to connect to a socket with the server address. The address
1829 * is in user space so we verify it is OK and move it to kernel space.
1830 *
1831 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1832 * break bindings
1833 *
1834 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1835 * other SEQPACKET protocols that take time to connect() as it doesn't
1836 * include the -EINPROGRESS status for such sockets.
1837 */
1838
f499a021 1839int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 1840 int addrlen, int file_flags)
1da177e4
LT
1841{
1842 struct socket *sock;
bd3ded31 1843 int err;
1da177e4 1844
bd3ded31 1845 sock = sock_from_file(file, &err);
1da177e4
LT
1846 if (!sock)
1847 goto out;
1da177e4 1848
89bddce5 1849 err =
f499a021 1850 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 1851 if (err)
bd3ded31 1852 goto out;
1da177e4 1853
f499a021 1854 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
bd3ded31 1855 sock->file->f_flags | file_flags);
1da177e4
LT
1856out:
1857 return err;
1858}
1859
bd3ded31
JA
1860int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1861{
1862 int ret = -EBADF;
1863 struct fd f;
1864
1865 f = fdget(fd);
1866 if (f.file) {
f499a021
JA
1867 struct sockaddr_storage address;
1868
1869 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1870 if (!ret)
1871 ret = __sys_connect_file(f.file, &address, addrlen, 0);
bd3ded31
JA
1872 if (f.flags)
1873 fput(f.file);
1874 }
1875
1876 return ret;
1877}
1878
1387c2c2
DB
1879SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1880 int, addrlen)
1881{
1882 return __sys_connect(fd, uservaddr, addrlen);
1883}
1884
1da177e4
LT
1885/*
1886 * Get the local address ('name') of a socket object. Move the obtained
1887 * name to user space.
1888 */
1889
8882a107
DB
1890int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1891 int __user *usockaddr_len)
1da177e4
LT
1892{
1893 struct socket *sock;
230b1839 1894 struct sockaddr_storage address;
9b2c45d4 1895 int err, fput_needed;
89bddce5 1896
6cb153ca 1897 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1898 if (!sock)
1899 goto out;
1900
1901 err = security_socket_getsockname(sock);
1902 if (err)
1903 goto out_put;
1904
9b2c45d4
DV
1905 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1906 if (err < 0)
1da177e4 1907 goto out_put;
9b2c45d4
DV
1908 /* "err" is actually length in this case */
1909 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1910
1911out_put:
6cb153ca 1912 fput_light(sock->file, fput_needed);
1da177e4
LT
1913out:
1914 return err;
1915}
1916
8882a107
DB
1917SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1918 int __user *, usockaddr_len)
1919{
1920 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1921}
1922
1da177e4
LT
1923/*
1924 * Get the remote address ('name') of a socket object. Move the obtained
1925 * name to user space.
1926 */
1927
b21c8f83
DB
1928int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1929 int __user *usockaddr_len)
1da177e4
LT
1930{
1931 struct socket *sock;
230b1839 1932 struct sockaddr_storage address;
9b2c45d4 1933 int err, fput_needed;
1da177e4 1934
89bddce5
SH
1935 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1936 if (sock != NULL) {
1da177e4
LT
1937 err = security_socket_getpeername(sock);
1938 if (err) {
6cb153ca 1939 fput_light(sock->file, fput_needed);
1da177e4
LT
1940 return err;
1941 }
1942
9b2c45d4
DV
1943 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1944 if (err >= 0)
1945 /* "err" is actually length in this case */
1946 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1947 usockaddr_len);
6cb153ca 1948 fput_light(sock->file, fput_needed);
1da177e4
LT
1949 }
1950 return err;
1951}
1952
b21c8f83
DB
1953SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1954 int __user *, usockaddr_len)
1955{
1956 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1957}
1958
1da177e4
LT
1959/*
1960 * Send a datagram to a given address. We move the address into kernel
1961 * space and check the user space data area is readable before invoking
1962 * the protocol.
1963 */
211b634b
DB
1964int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1965 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1966{
1967 struct socket *sock;
230b1839 1968 struct sockaddr_storage address;
1da177e4
LT
1969 int err;
1970 struct msghdr msg;
1971 struct iovec iov;
6cb153ca 1972 int fput_needed;
6cb153ca 1973
602bd0e9
AV
1974 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1975 if (unlikely(err))
1976 return err;
de0fa95c
PE
1977 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1978 if (!sock)
4387ff75 1979 goto out;
6cb153ca 1980
89bddce5 1981 msg.msg_name = NULL;
89bddce5
SH
1982 msg.msg_control = NULL;
1983 msg.msg_controllen = 0;
1984 msg.msg_namelen = 0;
6cb153ca 1985 if (addr) {
43db362d 1986 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1987 if (err < 0)
1988 goto out_put;
230b1839 1989 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1990 msg.msg_namelen = addr_len;
1da177e4
LT
1991 }
1992 if (sock->file->f_flags & O_NONBLOCK)
1993 flags |= MSG_DONTWAIT;
1994 msg.msg_flags = flags;
d8725c86 1995 err = sock_sendmsg(sock, &msg);
1da177e4 1996
89bddce5 1997out_put:
de0fa95c 1998 fput_light(sock->file, fput_needed);
4387ff75 1999out:
1da177e4
LT
2000 return err;
2001}
2002
211b634b
DB
2003SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2004 unsigned int, flags, struct sockaddr __user *, addr,
2005 int, addr_len)
2006{
2007 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2008}
2009
1da177e4 2010/*
89bddce5 2011 * Send a datagram down a socket.
1da177e4
LT
2012 */
2013
3e0fa65f 2014SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2015 unsigned int, flags)
1da177e4 2016{
211b634b 2017 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2018}
2019
2020/*
89bddce5 2021 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2022 * sender. We verify the buffers are writable and if needed move the
2023 * sender address from kernel to user space.
2024 */
7a09e1eb
DB
2025int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2026 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2027{
2028 struct socket *sock;
2029 struct iovec iov;
2030 struct msghdr msg;
230b1839 2031 struct sockaddr_storage address;
89bddce5 2032 int err, err2;
6cb153ca
BL
2033 int fput_needed;
2034
602bd0e9
AV
2035 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2036 if (unlikely(err))
2037 return err;
de0fa95c 2038 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2039 if (!sock)
de0fa95c 2040 goto out;
1da177e4 2041
89bddce5
SH
2042 msg.msg_control = NULL;
2043 msg.msg_controllen = 0;
f3d33426
HFS
2044 /* Save some cycles and don't copy the address if not needed */
2045 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2046 /* We assume all kernel code knows the size of sockaddr_storage */
2047 msg.msg_namelen = 0;
130ed5d1 2048 msg.msg_iocb = NULL;
9f138fa6 2049 msg.msg_flags = 0;
1da177e4
LT
2050 if (sock->file->f_flags & O_NONBLOCK)
2051 flags |= MSG_DONTWAIT;
2da62906 2052 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2053
89bddce5 2054 if (err >= 0 && addr != NULL) {
43db362d 2055 err2 = move_addr_to_user(&address,
230b1839 2056 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2057 if (err2 < 0)
2058 err = err2;
1da177e4 2059 }
de0fa95c
PE
2060
2061 fput_light(sock->file, fput_needed);
4387ff75 2062out:
1da177e4
LT
2063 return err;
2064}
2065
7a09e1eb
DB
2066SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2067 unsigned int, flags, struct sockaddr __user *, addr,
2068 int __user *, addr_len)
2069{
2070 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2071}
2072
1da177e4 2073/*
89bddce5 2074 * Receive a datagram from a socket.
1da177e4
LT
2075 */
2076
b7c0ddf5
JG
2077SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2078 unsigned int, flags)
1da177e4 2079{
7a09e1eb 2080 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2081}
2082
2083/*
2084 * Set a socket option. Because we don't know the option lengths we have
2085 * to pass the user mode parameter for the protocols to sort out.
2086 */
2087
cc36dca0
DB
2088static int __sys_setsockopt(int fd, int level, int optname,
2089 char __user *optval, int optlen)
1da177e4 2090{
0d01da6a
SF
2091 mm_segment_t oldfs = get_fs();
2092 char *kernel_optval = NULL;
6cb153ca 2093 int err, fput_needed;
1da177e4
LT
2094 struct socket *sock;
2095
2096 if (optlen < 0)
2097 return -EINVAL;
89bddce5
SH
2098
2099 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2100 if (sock != NULL) {
2101 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2102 if (err)
2103 goto out_put;
1da177e4 2104
0d01da6a
SF
2105 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
2106 &optname, optval, &optlen,
2107 &kernel_optval);
2108
2109 if (err < 0) {
2110 goto out_put;
2111 } else if (err > 0) {
2112 err = 0;
2113 goto out_put;
2114 }
2115
2116 if (kernel_optval) {
2117 set_fs(KERNEL_DS);
2118 optval = (char __user __force *)kernel_optval;
2119 }
2120
1da177e4 2121 if (level == SOL_SOCKET)
89bddce5
SH
2122 err =
2123 sock_setsockopt(sock, level, optname, optval,
2124 optlen);
1da177e4 2125 else
89bddce5
SH
2126 err =
2127 sock->ops->setsockopt(sock, level, optname, optval,
2128 optlen);
0d01da6a
SF
2129
2130 if (kernel_optval) {
2131 set_fs(oldfs);
2132 kfree(kernel_optval);
2133 }
6cb153ca
BL
2134out_put:
2135 fput_light(sock->file, fput_needed);
1da177e4
LT
2136 }
2137 return err;
2138}
2139
cc36dca0
DB
2140SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2141 char __user *, optval, int, optlen)
2142{
2143 return __sys_setsockopt(fd, level, optname, optval, optlen);
2144}
2145
1da177e4
LT
2146/*
2147 * Get a socket option. Because we don't know the option lengths we have
2148 * to pass a user mode parameter for the protocols to sort out.
2149 */
2150
13a2d70e
DB
2151static int __sys_getsockopt(int fd, int level, int optname,
2152 char __user *optval, int __user *optlen)
1da177e4 2153{
6cb153ca 2154 int err, fput_needed;
1da177e4 2155 struct socket *sock;
0d01da6a 2156 int max_optlen;
1da177e4 2157
89bddce5
SH
2158 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2159 if (sock != NULL) {
6cb153ca
BL
2160 err = security_socket_getsockopt(sock, level, optname);
2161 if (err)
2162 goto out_put;
1da177e4 2163
0d01da6a
SF
2164 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
2165
1da177e4 2166 if (level == SOL_SOCKET)
89bddce5
SH
2167 err =
2168 sock_getsockopt(sock, level, optname, optval,
2169 optlen);
1da177e4 2170 else
89bddce5
SH
2171 err =
2172 sock->ops->getsockopt(sock, level, optname, optval,
2173 optlen);
0d01da6a
SF
2174
2175 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2176 optval, optlen,
2177 max_optlen, err);
6cb153ca
BL
2178out_put:
2179 fput_light(sock->file, fput_needed);
1da177e4
LT
2180 }
2181 return err;
2182}
2183
13a2d70e
DB
2184SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2185 char __user *, optval, int __user *, optlen)
2186{
2187 return __sys_getsockopt(fd, level, optname, optval, optlen);
2188}
2189
1da177e4
LT
2190/*
2191 * Shutdown a socket.
2192 */
2193
005a1aea 2194int __sys_shutdown(int fd, int how)
1da177e4 2195{
6cb153ca 2196 int err, fput_needed;
1da177e4
LT
2197 struct socket *sock;
2198
89bddce5
SH
2199 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2200 if (sock != NULL) {
1da177e4 2201 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2202 if (!err)
2203 err = sock->ops->shutdown(sock, how);
2204 fput_light(sock->file, fput_needed);
1da177e4
LT
2205 }
2206 return err;
2207}
2208
005a1aea
DB
2209SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2210{
2211 return __sys_shutdown(fd, how);
2212}
2213
89bddce5 2214/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2215 * fields which are the same type (int / unsigned) on our platforms.
2216 */
2217#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2218#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2219#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2220
c71d8ebe
TH
2221struct used_address {
2222 struct sockaddr_storage name;
2223 unsigned int name_len;
2224};
2225
0a384abf
JA
2226int __copy_msghdr_from_user(struct msghdr *kmsg,
2227 struct user_msghdr __user *umsg,
2228 struct sockaddr __user **save_addr,
2229 struct iovec __user **uiov, size_t *nsegs)
1661bf36 2230{
ffb07550 2231 struct user_msghdr msg;
08adb7da
AV
2232 ssize_t err;
2233
ffb07550 2234 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2235 return -EFAULT;
dbb490b9 2236
1f466e1f
CH
2237 kmsg->msg_control_is_user = true;
2238 kmsg->msg_control_user = msg.msg_control;
ffb07550
AV
2239 kmsg->msg_controllen = msg.msg_controllen;
2240 kmsg->msg_flags = msg.msg_flags;
2241
2242 kmsg->msg_namelen = msg.msg_namelen;
2243 if (!msg.msg_name)
6a2a2b3a
AS
2244 kmsg->msg_namelen = 0;
2245
dbb490b9
ML
2246 if (kmsg->msg_namelen < 0)
2247 return -EINVAL;
2248
1661bf36 2249 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2250 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2251
2252 if (save_addr)
ffb07550 2253 *save_addr = msg.msg_name;
08adb7da 2254
ffb07550 2255 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2256 if (!save_addr) {
864d9664
PA
2257 err = move_addr_to_kernel(msg.msg_name,
2258 kmsg->msg_namelen,
08adb7da
AV
2259 kmsg->msg_name);
2260 if (err < 0)
2261 return err;
2262 }
2263 } else {
2264 kmsg->msg_name = NULL;
2265 kmsg->msg_namelen = 0;
2266 }
2267
ffb07550 2268 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2269 return -EMSGSIZE;
2270
0345f931 2271 kmsg->msg_iocb = NULL;
0a384abf
JA
2272 *uiov = msg.msg_iov;
2273 *nsegs = msg.msg_iovlen;
2274 return 0;
2275}
2276
2277static int copy_msghdr_from_user(struct msghdr *kmsg,
2278 struct user_msghdr __user *umsg,
2279 struct sockaddr __user **save_addr,
2280 struct iovec **iov)
2281{
2282 struct user_msghdr msg;
2283 ssize_t err;
2284
2285 err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
2286 &msg.msg_iovlen);
2287 if (err)
2288 return err;
0345f931 2289
87e5e6da 2290 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2291 msg.msg_iov, msg.msg_iovlen,
da184284 2292 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2293 return err < 0 ? err : 0;
1661bf36
DC
2294}
2295
4257c8ca
JA
2296static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2297 unsigned int flags, struct used_address *used_address,
2298 unsigned int allowed_msghdr_flags)
1da177e4 2299{
b9d717a7 2300 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2301 __aligned(sizeof(__kernel_size_t));
89bddce5 2302 /* 20 is size of ipv6_pktinfo */
1da177e4 2303 unsigned char *ctl_buf = ctl;
d8725c86 2304 int ctl_len;
08adb7da 2305 ssize_t err;
89bddce5 2306
1da177e4
LT
2307 err = -ENOBUFS;
2308
228e548e 2309 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2310 goto out;
28a94d8f 2311 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2312 ctl_len = msg_sys->msg_controllen;
1da177e4 2313 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2314 err =
228e548e 2315 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2316 sizeof(ctl));
1da177e4 2317 if (err)
4257c8ca 2318 goto out;
228e548e
AB
2319 ctl_buf = msg_sys->msg_control;
2320 ctl_len = msg_sys->msg_controllen;
1da177e4 2321 } else if (ctl_len) {
ac4340fc
DM
2322 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2323 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2324 if (ctl_len > sizeof(ctl)) {
1da177e4 2325 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2326 if (ctl_buf == NULL)
4257c8ca 2327 goto out;
1da177e4
LT
2328 }
2329 err = -EFAULT;
1f466e1f 2330 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2331 goto out_freectl;
228e548e 2332 msg_sys->msg_control = ctl_buf;
1f466e1f 2333 msg_sys->msg_control_is_user = false;
1da177e4 2334 }
228e548e 2335 msg_sys->msg_flags = flags;
1da177e4
LT
2336
2337 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2338 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2339 /*
2340 * If this is sendmmsg() and current destination address is same as
2341 * previously succeeded address, omit asking LSM's decision.
2342 * used_address->name_len is initialized to UINT_MAX so that the first
2343 * destination address never matches.
2344 */
bc909d9d
MD
2345 if (used_address && msg_sys->msg_name &&
2346 used_address->name_len == msg_sys->msg_namelen &&
2347 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2348 used_address->name_len)) {
d8725c86 2349 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2350 goto out_freectl;
2351 }
d8725c86 2352 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2353 /*
2354 * If this is sendmmsg() and sending to current destination address was
2355 * successful, remember it.
2356 */
2357 if (used_address && err >= 0) {
2358 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2359 if (msg_sys->msg_name)
2360 memcpy(&used_address->name, msg_sys->msg_name,
2361 used_address->name_len);
c71d8ebe 2362 }
1da177e4
LT
2363
2364out_freectl:
89bddce5 2365 if (ctl_buf != ctl)
1da177e4 2366 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2367out:
2368 return err;
2369}
2370
03b1230c
JA
2371int sendmsg_copy_msghdr(struct msghdr *msg,
2372 struct user_msghdr __user *umsg, unsigned flags,
2373 struct iovec **iov)
4257c8ca
JA
2374{
2375 int err;
2376
2377 if (flags & MSG_CMSG_COMPAT) {
2378 struct compat_msghdr __user *msg_compat;
2379
2380 msg_compat = (struct compat_msghdr __user *) umsg;
2381 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2382 } else {
2383 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2384 }
2385 if (err < 0)
2386 return err;
2387
2388 return 0;
2389}
2390
2391static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2392 struct msghdr *msg_sys, unsigned int flags,
2393 struct used_address *used_address,
2394 unsigned int allowed_msghdr_flags)
2395{
2396 struct sockaddr_storage address;
2397 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2398 ssize_t err;
2399
2400 msg_sys->msg_name = &address;
2401
2402 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2403 if (err < 0)
2404 return err;
2405
2406 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2407 allowed_msghdr_flags);
da184284 2408 kfree(iov);
228e548e
AB
2409 return err;
2410}
2411
2412/*
2413 * BSD sendmsg interface
2414 */
03b1230c 2415long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2416 unsigned int flags)
2417{
d69e0779 2418 /* disallow ancillary data requests from this path */
03b1230c
JA
2419 if (msg->msg_control || msg->msg_controllen)
2420 return -EINVAL;
d69e0779 2421
03b1230c 2422 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2423}
228e548e 2424
e1834a32
DB
2425long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2426 bool forbid_cmsg_compat)
228e548e
AB
2427{
2428 int fput_needed, err;
2429 struct msghdr msg_sys;
1be374a0
AL
2430 struct socket *sock;
2431
e1834a32
DB
2432 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2433 return -EINVAL;
2434
1be374a0 2435 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2436 if (!sock)
2437 goto out;
2438
28a94d8f 2439 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2440
6cb153ca 2441 fput_light(sock->file, fput_needed);
89bddce5 2442out:
1da177e4
LT
2443 return err;
2444}
2445
666547ff 2446SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2447{
e1834a32 2448 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2449}
2450
228e548e
AB
2451/*
2452 * Linux sendmmsg interface
2453 */
2454
2455int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2456 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2457{
2458 int fput_needed, err, datagrams;
2459 struct socket *sock;
2460 struct mmsghdr __user *entry;
2461 struct compat_mmsghdr __user *compat_entry;
2462 struct msghdr msg_sys;
c71d8ebe 2463 struct used_address used_address;
f092276d 2464 unsigned int oflags = flags;
228e548e 2465
e1834a32
DB
2466 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2467 return -EINVAL;
2468
98382f41
AB
2469 if (vlen > UIO_MAXIOV)
2470 vlen = UIO_MAXIOV;
228e548e
AB
2471
2472 datagrams = 0;
2473
2474 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2475 if (!sock)
2476 return err;
2477
c71d8ebe 2478 used_address.name_len = UINT_MAX;
228e548e
AB
2479 entry = mmsg;
2480 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2481 err = 0;
f092276d 2482 flags |= MSG_BATCH;
228e548e
AB
2483
2484 while (datagrams < vlen) {
f092276d
TH
2485 if (datagrams == vlen - 1)
2486 flags = oflags;
2487
228e548e 2488 if (MSG_CMSG_COMPAT & flags) {
666547ff 2489 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2490 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2491 if (err < 0)
2492 break;
2493 err = __put_user(err, &compat_entry->msg_len);
2494 ++compat_entry;
2495 } else {
a7526eb5 2496 err = ___sys_sendmsg(sock,
666547ff 2497 (struct user_msghdr __user *)entry,
28a94d8f 2498 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2499 if (err < 0)
2500 break;
2501 err = put_user(err, &entry->msg_len);
2502 ++entry;
2503 }
2504
2505 if (err)
2506 break;
2507 ++datagrams;
3023898b
SHY
2508 if (msg_data_left(&msg_sys))
2509 break;
a78cb84c 2510 cond_resched();
228e548e
AB
2511 }
2512
228e548e
AB
2513 fput_light(sock->file, fput_needed);
2514
728ffb86
AB
2515 /* We only return an error if no datagrams were able to be sent */
2516 if (datagrams != 0)
228e548e
AB
2517 return datagrams;
2518
228e548e
AB
2519 return err;
2520}
2521
2522SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2523 unsigned int, vlen, unsigned int, flags)
2524{
e1834a32 2525 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2526}
2527
03b1230c
JA
2528int recvmsg_copy_msghdr(struct msghdr *msg,
2529 struct user_msghdr __user *umsg, unsigned flags,
2530 struct sockaddr __user **uaddr,
2531 struct iovec **iov)
1da177e4 2532{
08adb7da 2533 ssize_t err;
1da177e4 2534
4257c8ca
JA
2535 if (MSG_CMSG_COMPAT & flags) {
2536 struct compat_msghdr __user *msg_compat;
1da177e4 2537
4257c8ca
JA
2538 msg_compat = (struct compat_msghdr __user *) umsg;
2539 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2540 } else {
2541 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2542 }
1da177e4 2543 if (err < 0)
da184284 2544 return err;
1da177e4 2545
4257c8ca
JA
2546 return 0;
2547}
2548
2549static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2550 struct user_msghdr __user *msg,
2551 struct sockaddr __user *uaddr,
2552 unsigned int flags, int nosec)
2553{
2554 struct compat_msghdr __user *msg_compat =
2555 (struct compat_msghdr __user *) msg;
2556 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2557 struct sockaddr_storage addr;
2558 unsigned long cmsg_ptr;
2559 int len;
2560 ssize_t err;
2561
2562 msg_sys->msg_name = &addr;
a2e27255
ACM
2563 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2564 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2565
f3d33426
HFS
2566 /* We assume all kernel code knows the size of sockaddr_storage */
2567 msg_sys->msg_namelen = 0;
2568
1da177e4
LT
2569 if (sock->file->f_flags & O_NONBLOCK)
2570 flags |= MSG_DONTWAIT;
1af66221
ED
2571
2572 if (unlikely(nosec))
2573 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2574 else
2575 err = sock_recvmsg(sock, msg_sys, flags);
2576
1da177e4 2577 if (err < 0)
4257c8ca 2578 goto out;
1da177e4
LT
2579 len = err;
2580
2581 if (uaddr != NULL) {
43db362d 2582 err = move_addr_to_user(&addr,
a2e27255 2583 msg_sys->msg_namelen, uaddr,
89bddce5 2584 uaddr_len);
1da177e4 2585 if (err < 0)
4257c8ca 2586 goto out;
1da177e4 2587 }
a2e27255 2588 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2589 COMPAT_FLAGS(msg));
1da177e4 2590 if (err)
4257c8ca 2591 goto out;
1da177e4 2592 if (MSG_CMSG_COMPAT & flags)
a2e27255 2593 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2594 &msg_compat->msg_controllen);
2595 else
a2e27255 2596 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2597 &msg->msg_controllen);
2598 if (err)
4257c8ca 2599 goto out;
1da177e4 2600 err = len;
4257c8ca
JA
2601out:
2602 return err;
2603}
2604
2605static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2606 struct msghdr *msg_sys, unsigned int flags, int nosec)
2607{
2608 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2609 /* user mode address pointers */
2610 struct sockaddr __user *uaddr;
2611 ssize_t err;
2612
2613 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2614 if (err < 0)
2615 return err;
1da177e4 2616
4257c8ca 2617 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2618 kfree(iov);
a2e27255
ACM
2619 return err;
2620}
2621
2622/*
2623 * BSD recvmsg interface
2624 */
2625
03b1230c
JA
2626long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2627 struct user_msghdr __user *umsg,
2628 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2629{
d69e0779 2630 /* disallow ancillary data requests from this path */
03b1230c
JA
2631 if (msg->msg_control || msg->msg_controllen)
2632 return -EINVAL;
aa1fa28f 2633
03b1230c 2634 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2635}
2636
e1834a32
DB
2637long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2638 bool forbid_cmsg_compat)
a2e27255
ACM
2639{
2640 int fput_needed, err;
2641 struct msghdr msg_sys;
1be374a0
AL
2642 struct socket *sock;
2643
e1834a32
DB
2644 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2645 return -EINVAL;
2646
1be374a0 2647 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2648 if (!sock)
2649 goto out;
2650
a7526eb5 2651 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2652
6cb153ca 2653 fput_light(sock->file, fput_needed);
1da177e4
LT
2654out:
2655 return err;
2656}
2657
666547ff 2658SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2659 unsigned int, flags)
2660{
e1834a32 2661 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2662}
2663
a2e27255
ACM
2664/*
2665 * Linux recvmmsg interface
2666 */
2667
e11d4284
AB
2668static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2669 unsigned int vlen, unsigned int flags,
2670 struct timespec64 *timeout)
a2e27255
ACM
2671{
2672 int fput_needed, err, datagrams;
2673 struct socket *sock;
2674 struct mmsghdr __user *entry;
d7256d0e 2675 struct compat_mmsghdr __user *compat_entry;
a2e27255 2676 struct msghdr msg_sys;
766b9f92
DD
2677 struct timespec64 end_time;
2678 struct timespec64 timeout64;
a2e27255
ACM
2679
2680 if (timeout &&
2681 poll_select_set_timeout(&end_time, timeout->tv_sec,
2682 timeout->tv_nsec))
2683 return -EINVAL;
2684
2685 datagrams = 0;
2686
2687 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2688 if (!sock)
2689 return err;
2690
7797dc41
SHY
2691 if (likely(!(flags & MSG_ERRQUEUE))) {
2692 err = sock_error(sock->sk);
2693 if (err) {
2694 datagrams = err;
2695 goto out_put;
2696 }
e623a9e9 2697 }
a2e27255
ACM
2698
2699 entry = mmsg;
d7256d0e 2700 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2701
2702 while (datagrams < vlen) {
2703 /*
2704 * No need to ask LSM for more than the first datagram.
2705 */
d7256d0e 2706 if (MSG_CMSG_COMPAT & flags) {
666547ff 2707 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2708 &msg_sys, flags & ~MSG_WAITFORONE,
2709 datagrams);
d7256d0e
JMG
2710 if (err < 0)
2711 break;
2712 err = __put_user(err, &compat_entry->msg_len);
2713 ++compat_entry;
2714 } else {
a7526eb5 2715 err = ___sys_recvmsg(sock,
666547ff 2716 (struct user_msghdr __user *)entry,
a7526eb5
AL
2717 &msg_sys, flags & ~MSG_WAITFORONE,
2718 datagrams);
d7256d0e
JMG
2719 if (err < 0)
2720 break;
2721 err = put_user(err, &entry->msg_len);
2722 ++entry;
2723 }
2724
a2e27255
ACM
2725 if (err)
2726 break;
a2e27255
ACM
2727 ++datagrams;
2728
71c5c159
BB
2729 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2730 if (flags & MSG_WAITFORONE)
2731 flags |= MSG_DONTWAIT;
2732
a2e27255 2733 if (timeout) {
766b9f92 2734 ktime_get_ts64(&timeout64);
c2e6c856 2735 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2736 if (timeout->tv_sec < 0) {
2737 timeout->tv_sec = timeout->tv_nsec = 0;
2738 break;
2739 }
2740
2741 /* Timeout, return less than vlen datagrams */
2742 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2743 break;
2744 }
2745
2746 /* Out of band data, return right away */
2747 if (msg_sys.msg_flags & MSG_OOB)
2748 break;
a78cb84c 2749 cond_resched();
a2e27255
ACM
2750 }
2751
a2e27255 2752 if (err == 0)
34b88a68
ACM
2753 goto out_put;
2754
2755 if (datagrams == 0) {
2756 datagrams = err;
2757 goto out_put;
2758 }
a2e27255 2759
34b88a68
ACM
2760 /*
2761 * We may return less entries than requested (vlen) if the
2762 * sock is non block and there aren't enough datagrams...
2763 */
2764 if (err != -EAGAIN) {
a2e27255 2765 /*
34b88a68
ACM
2766 * ... or if recvmsg returns an error after we
2767 * received some datagrams, where we record the
2768 * error to return on the next call or if the
2769 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2770 */
34b88a68 2771 sock->sk->sk_err = -err;
a2e27255 2772 }
34b88a68
ACM
2773out_put:
2774 fput_light(sock->file, fput_needed);
a2e27255 2775
34b88a68 2776 return datagrams;
a2e27255
ACM
2777}
2778
e11d4284
AB
2779int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2780 unsigned int vlen, unsigned int flags,
2781 struct __kernel_timespec __user *timeout,
2782 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2783{
2784 int datagrams;
c2e6c856 2785 struct timespec64 timeout_sys;
a2e27255 2786
e11d4284
AB
2787 if (timeout && get_timespec64(&timeout_sys, timeout))
2788 return -EFAULT;
a2e27255 2789
e11d4284 2790 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2791 return -EFAULT;
2792
e11d4284
AB
2793 if (!timeout && !timeout32)
2794 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2795
2796 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2797
e11d4284
AB
2798 if (datagrams <= 0)
2799 return datagrams;
2800
2801 if (timeout && put_timespec64(&timeout_sys, timeout))
2802 datagrams = -EFAULT;
2803
2804 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2805 datagrams = -EFAULT;
2806
2807 return datagrams;
2808}
2809
1255e269
DB
2810SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2811 unsigned int, vlen, unsigned int, flags,
c2e6c856 2812 struct __kernel_timespec __user *, timeout)
1255e269 2813{
e11d4284
AB
2814 if (flags & MSG_CMSG_COMPAT)
2815 return -EINVAL;
2816
2817 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2818}
2819
2820#ifdef CONFIG_COMPAT_32BIT_TIME
2821SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2822 unsigned int, vlen, unsigned int, flags,
2823 struct old_timespec32 __user *, timeout)
2824{
2825 if (flags & MSG_CMSG_COMPAT)
2826 return -EINVAL;
2827
2828 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2829}
e11d4284 2830#endif
1255e269 2831
a2e27255 2832#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2833/* Argument list sizes for sys_socketcall */
2834#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2835static const unsigned char nargs[21] = {
c6d409cf
ED
2836 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2837 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2838 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2839 AL(4), AL(5), AL(4)
89bddce5
SH
2840};
2841
1da177e4
LT
2842#undef AL
2843
2844/*
89bddce5 2845 * System call vectors.
1da177e4
LT
2846 *
2847 * Argument checking cleaned up. Saved 20% in size.
2848 * This function doesn't need to set the kernel lock because
89bddce5 2849 * it is set by the callees.
1da177e4
LT
2850 */
2851
3e0fa65f 2852SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2853{
2950fa9d 2854 unsigned long a[AUDITSC_ARGS];
89bddce5 2855 unsigned long a0, a1;
1da177e4 2856 int err;
47379052 2857 unsigned int len;
1da177e4 2858
228e548e 2859 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2860 return -EINVAL;
c8e8cd57 2861 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2862
47379052
AV
2863 len = nargs[call];
2864 if (len > sizeof(a))
2865 return -EINVAL;
2866
1da177e4 2867 /* copy_from_user should be SMP safe. */
47379052 2868 if (copy_from_user(a, args, len))
1da177e4 2869 return -EFAULT;
3ec3b2fb 2870
2950fa9d
CG
2871 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2872 if (err)
2873 return err;
3ec3b2fb 2874
89bddce5
SH
2875 a0 = a[0];
2876 a1 = a[1];
2877
2878 switch (call) {
2879 case SYS_SOCKET:
9d6a15c3 2880 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2881 break;
2882 case SYS_BIND:
a87d35d8 2883 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2884 break;
2885 case SYS_CONNECT:
1387c2c2 2886 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2887 break;
2888 case SYS_LISTEN:
25e290ee 2889 err = __sys_listen(a0, a1);
89bddce5
SH
2890 break;
2891 case SYS_ACCEPT:
4541e805
DB
2892 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2893 (int __user *)a[2], 0);
89bddce5
SH
2894 break;
2895 case SYS_GETSOCKNAME:
2896 err =
8882a107
DB
2897 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2898 (int __user *)a[2]);
89bddce5
SH
2899 break;
2900 case SYS_GETPEERNAME:
2901 err =
b21c8f83
DB
2902 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2903 (int __user *)a[2]);
89bddce5
SH
2904 break;
2905 case SYS_SOCKETPAIR:
6debc8d8 2906 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2907 break;
2908 case SYS_SEND:
f3bf896b
DB
2909 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2910 NULL, 0);
89bddce5
SH
2911 break;
2912 case SYS_SENDTO:
211b634b
DB
2913 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2914 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2915 break;
2916 case SYS_RECV:
d27e9afc
DB
2917 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2918 NULL, NULL);
89bddce5
SH
2919 break;
2920 case SYS_RECVFROM:
7a09e1eb
DB
2921 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2922 (struct sockaddr __user *)a[4],
2923 (int __user *)a[5]);
89bddce5
SH
2924 break;
2925 case SYS_SHUTDOWN:
005a1aea 2926 err = __sys_shutdown(a0, a1);
89bddce5
SH
2927 break;
2928 case SYS_SETSOCKOPT:
cc36dca0
DB
2929 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2930 a[4]);
89bddce5
SH
2931 break;
2932 case SYS_GETSOCKOPT:
2933 err =
13a2d70e
DB
2934 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2935 (int __user *)a[4]);
89bddce5
SH
2936 break;
2937 case SYS_SENDMSG:
e1834a32
DB
2938 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2939 a[2], true);
89bddce5 2940 break;
228e548e 2941 case SYS_SENDMMSG:
e1834a32
DB
2942 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2943 a[3], true);
228e548e 2944 break;
89bddce5 2945 case SYS_RECVMSG:
e1834a32
DB
2946 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2947 a[2], true);
89bddce5 2948 break;
a2e27255 2949 case SYS_RECVMMSG:
3ca47e95 2950 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
2951 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2952 a[2], a[3],
2953 (struct __kernel_timespec __user *)a[4],
2954 NULL);
2955 else
2956 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2957 a[2], a[3], NULL,
2958 (struct old_timespec32 __user *)a[4]);
a2e27255 2959 break;
de11defe 2960 case SYS_ACCEPT4:
4541e805
DB
2961 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2962 (int __user *)a[2], a[3]);
aaca0bdc 2963 break;
89bddce5
SH
2964 default:
2965 err = -EINVAL;
2966 break;
1da177e4
LT
2967 }
2968 return err;
2969}
2970
89bddce5 2971#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2972
55737fda
SH
2973/**
2974 * sock_register - add a socket protocol handler
2975 * @ops: description of protocol
2976 *
1da177e4
LT
2977 * This function is called by a protocol handler that wants to
2978 * advertise its address family, and have it linked into the
e793c0f7 2979 * socket interface. The value ops->family corresponds to the
55737fda 2980 * socket system call protocol family.
1da177e4 2981 */
f0fd27d4 2982int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2983{
2984 int err;
2985
2986 if (ops->family >= NPROTO) {
3410f22e 2987 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2988 return -ENOBUFS;
2989 }
55737fda
SH
2990
2991 spin_lock(&net_family_lock);
190683a9
ED
2992 if (rcu_dereference_protected(net_families[ops->family],
2993 lockdep_is_held(&net_family_lock)))
55737fda
SH
2994 err = -EEXIST;
2995 else {
cf778b00 2996 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2997 err = 0;
2998 }
55737fda
SH
2999 spin_unlock(&net_family_lock);
3000
3410f22e 3001 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
3002 return err;
3003}
c6d409cf 3004EXPORT_SYMBOL(sock_register);
1da177e4 3005
55737fda
SH
3006/**
3007 * sock_unregister - remove a protocol handler
3008 * @family: protocol family to remove
3009 *
1da177e4
LT
3010 * This function is called by a protocol handler that wants to
3011 * remove its address family, and have it unlinked from the
55737fda
SH
3012 * new socket creation.
3013 *
3014 * If protocol handler is a module, then it can use module reference
3015 * counts to protect against new references. If protocol handler is not
3016 * a module then it needs to provide its own protection in
3017 * the ops->create routine.
1da177e4 3018 */
f0fd27d4 3019void sock_unregister(int family)
1da177e4 3020{
f0fd27d4 3021 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3022
55737fda 3023 spin_lock(&net_family_lock);
a9b3cd7f 3024 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3025 spin_unlock(&net_family_lock);
3026
3027 synchronize_rcu();
3028
3410f22e 3029 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 3030}
c6d409cf 3031EXPORT_SYMBOL(sock_unregister);
1da177e4 3032
bf2ae2e4
XL
3033bool sock_is_registered(int family)
3034{
66b51b0a 3035 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3036}
3037
77d76ea3 3038static int __init sock_init(void)
1da177e4 3039{
b3e19d92 3040 int err;
2ca794e5
EB
3041 /*
3042 * Initialize the network sysctl infrastructure.
3043 */
3044 err = net_sysctl_init();
3045 if (err)
3046 goto out;
b3e19d92 3047
1da177e4 3048 /*
89bddce5 3049 * Initialize skbuff SLAB cache
1da177e4
LT
3050 */
3051 skb_init();
1da177e4
LT
3052
3053 /*
89bddce5 3054 * Initialize the protocols module.
1da177e4
LT
3055 */
3056
3057 init_inodecache();
b3e19d92
NP
3058
3059 err = register_filesystem(&sock_fs_type);
3060 if (err)
3061 goto out_fs;
1da177e4 3062 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3063 if (IS_ERR(sock_mnt)) {
3064 err = PTR_ERR(sock_mnt);
3065 goto out_mount;
3066 }
77d76ea3
AK
3067
3068 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3069 */
3070
3071#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3072 err = netfilter_init();
3073 if (err)
3074 goto out;
1da177e4 3075#endif
cbeb321a 3076
408eccce 3077 ptp_classifier_init();
c1f19b51 3078
b3e19d92
NP
3079out:
3080 return err;
3081
3082out_mount:
3083 unregister_filesystem(&sock_fs_type);
3084out_fs:
3085 goto out;
1da177e4
LT
3086}
3087
77d76ea3
AK
3088core_initcall(sock_init); /* early initcall */
3089
1da177e4
LT
3090#ifdef CONFIG_PROC_FS
3091void socket_seq_show(struct seq_file *seq)
3092{
648845ab
TZ
3093 seq_printf(seq, "sockets: used %d\n",
3094 sock_inuse_get(seq->private));
1da177e4 3095}
89bddce5 3096#endif /* CONFIG_PROC_FS */
1da177e4 3097
89bbfc95 3098#ifdef CONFIG_COMPAT
36fd633e 3099static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 3100{
6b96018b 3101 struct compat_ifconf ifc32;
7a229387 3102 struct ifconf ifc;
7a229387
AB
3103 int err;
3104
6b96018b 3105 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3106 return -EFAULT;
3107
36fd633e
AV
3108 ifc.ifc_len = ifc32.ifc_len;
3109 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 3110
36fd633e
AV
3111 rtnl_lock();
3112 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
3113 rtnl_unlock();
7a229387
AB
3114 if (err)
3115 return err;
3116
36fd633e 3117 ifc32.ifc_len = ifc.ifc_len;
6b96018b 3118 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3119 return -EFAULT;
3120
3121 return 0;
3122}
3123
6b96018b 3124static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 3125{
3a7da39d
BH
3126 struct compat_ethtool_rxnfc __user *compat_rxnfc;
3127 bool convert_in = false, convert_out = false;
44c02a2c
AV
3128 size_t buf_size = 0;
3129 struct ethtool_rxnfc __user *rxnfc = NULL;
3130 struct ifreq ifr;
3a7da39d
BH
3131 u32 rule_cnt = 0, actual_rule_cnt;
3132 u32 ethcmd;
7a229387 3133 u32 data;
3a7da39d 3134 int ret;
7a229387 3135
3a7da39d
BH
3136 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
3137 return -EFAULT;
7a229387 3138
3a7da39d
BH
3139 compat_rxnfc = compat_ptr(data);
3140
3141 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
3142 return -EFAULT;
3143
3a7da39d
BH
3144 /* Most ethtool structures are defined without padding.
3145 * Unfortunately struct ethtool_rxnfc is an exception.
3146 */
3147 switch (ethcmd) {
3148 default:
3149 break;
3150 case ETHTOOL_GRXCLSRLALL:
3151 /* Buffer size is variable */
3152 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3153 return -EFAULT;
3154 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3155 return -ENOMEM;
3156 buf_size += rule_cnt * sizeof(u32);
3157 /* fall through */
3158 case ETHTOOL_GRXRINGS:
3159 case ETHTOOL_GRXCLSRLCNT:
3160 case ETHTOOL_GRXCLSRULE:
55664f32 3161 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3162 convert_out = true;
3163 /* fall through */
3164 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3165 buf_size += sizeof(struct ethtool_rxnfc);
3166 convert_in = true;
44c02a2c 3167 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3168 break;
3169 }
3170
44c02a2c 3171 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3172 return -EFAULT;
3173
44c02a2c 3174 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3175
3a7da39d 3176 if (convert_in) {
127fe533 3177 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3178 * fs.ring_cookie and at the end of fs, but nowhere else.
3179 */
127fe533
AD
3180 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3181 sizeof(compat_rxnfc->fs.m_ext) !=
3182 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3183 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3184 BUILD_BUG_ON(
3185 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3186 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3187 offsetof(struct ethtool_rxnfc, fs.location) -
3188 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3189
3190 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3191 (void __user *)(&rxnfc->fs.m_ext + 1) -
3192 (void __user *)rxnfc) ||
3a7da39d
BH
3193 copy_in_user(&rxnfc->fs.ring_cookie,
3194 &compat_rxnfc->fs.ring_cookie,
954b1244 3195 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3196 (void __user *)&rxnfc->fs.ring_cookie))
3197 return -EFAULT;
3198 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3199 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3200 return -EFAULT;
3201 } else if (copy_in_user(&rxnfc->rule_cnt,
3202 &compat_rxnfc->rule_cnt,
3203 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3204 return -EFAULT;
3205 }
3206
44c02a2c 3207 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3208 if (ret)
3209 return ret;
3210
3211 if (convert_out) {
3212 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3213 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3214 (const void __user *)rxnfc) ||
3a7da39d
BH
3215 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3216 &rxnfc->fs.ring_cookie,
954b1244
SH
3217 (const void __user *)(&rxnfc->fs.location + 1) -
3218 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3219 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3220 sizeof(rxnfc->rule_cnt)))
3221 return -EFAULT;
3222
3223 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3224 /* As an optimisation, we only copy the actual
3225 * number of rules that the underlying
3226 * function returned. Since Mallory might
3227 * change the rule count in user memory, we
3228 * check that it is less than the rule count
3229 * originally given (as the user buffer size),
3230 * which has been range-checked.
3231 */
3232 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3233 return -EFAULT;
3234 if (actual_rule_cnt < rule_cnt)
3235 rule_cnt = actual_rule_cnt;
3236 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3237 &rxnfc->rule_locs[0],
3238 rule_cnt * sizeof(u32)))
3239 return -EFAULT;
3240 }
3241 }
3242
3243 return 0;
7a229387
AB
3244}
3245
7a50a240
AB
3246static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3247{
7a50a240 3248 compat_uptr_t uptr32;
44c02a2c
AV
3249 struct ifreq ifr;
3250 void __user *saved;
3251 int err;
7a50a240 3252
44c02a2c 3253 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3254 return -EFAULT;
3255
3256 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3257 return -EFAULT;
3258
44c02a2c
AV
3259 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3260 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3261
44c02a2c
AV
3262 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3263 if (!err) {
3264 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3265 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3266 err = -EFAULT;
ccbd6a5a 3267 }
44c02a2c 3268 return err;
7a229387
AB
3269}
3270
590d4693
BH
3271/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3272static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3273 struct compat_ifreq __user *u_ifreq32)
7a229387 3274{
44c02a2c 3275 struct ifreq ifreq;
7a229387
AB
3276 u32 data32;
3277
44c02a2c 3278 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3279 return -EFAULT;
44c02a2c 3280 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3281 return -EFAULT;
44c02a2c 3282 ifreq.ifr_data = compat_ptr(data32);
7a229387 3283
44c02a2c 3284 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3285}
3286
37ac39bd
JB
3287static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3288 unsigned int cmd,
3289 struct compat_ifreq __user *uifr32)
3290{
3291 struct ifreq __user *uifr;
3292 int err;
3293
3294 /* Handle the fact that while struct ifreq has the same *layout* on
3295 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3296 * which are handled elsewhere, it still has different *size* due to
3297 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3298 * resulting in struct ifreq being 32 and 40 bytes respectively).
3299 * As a result, if the struct happens to be at the end of a page and
3300 * the next page isn't readable/writable, we get a fault. To prevent
3301 * that, copy back and forth to the full size.
3302 */
3303
3304 uifr = compat_alloc_user_space(sizeof(*uifr));
3305 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3306 return -EFAULT;
3307
3308 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3309
3310 if (!err) {
3311 switch (cmd) {
3312 case SIOCGIFFLAGS:
3313 case SIOCGIFMETRIC:
3314 case SIOCGIFMTU:
3315 case SIOCGIFMEM:
3316 case SIOCGIFHWADDR:
3317 case SIOCGIFINDEX:
3318 case SIOCGIFADDR:
3319 case SIOCGIFBRDADDR:
3320 case SIOCGIFDSTADDR:
3321 case SIOCGIFNETMASK:
3322 case SIOCGIFPFLAGS:
3323 case SIOCGIFTXQLEN:
3324 case SIOCGMIIPHY:
3325 case SIOCGMIIREG:
c6c9fee3 3326 case SIOCGIFNAME:
37ac39bd
JB
3327 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3328 err = -EFAULT;
3329 break;
3330 }
3331 }
3332 return err;
3333}
3334
a2116ed2
AB
3335static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3336 struct compat_ifreq __user *uifr32)
3337{
3338 struct ifreq ifr;
3339 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3340 int err;
3341
3342 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3343 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3344 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3345 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3346 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3347 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3348 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3349 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3350 if (err)
3351 return -EFAULT;
3352
44c02a2c 3353 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3354
3355 if (cmd == SIOCGIFMAP && !err) {
3356 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3357 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3358 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3359 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3360 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3361 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3362 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3363 if (err)
3364 err = -EFAULT;
3365 }
3366 return err;
3367}
3368
7a229387 3369struct rtentry32 {
c6d409cf 3370 u32 rt_pad1;
7a229387
AB
3371 struct sockaddr rt_dst; /* target address */
3372 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3373 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3374 unsigned short rt_flags;
3375 short rt_pad2;
3376 u32 rt_pad3;
3377 unsigned char rt_tos;
3378 unsigned char rt_class;
3379 short rt_pad4;
3380 short rt_metric; /* +1 for binary compatibility! */
7a229387 3381 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3382 u32 rt_mtu; /* per route MTU/Window */
3383 u32 rt_window; /* Window clamping */
7a229387
AB
3384 unsigned short rt_irtt; /* Initial RTT */
3385};
3386
3387struct in6_rtmsg32 {
3388 struct in6_addr rtmsg_dst;
3389 struct in6_addr rtmsg_src;
3390 struct in6_addr rtmsg_gateway;
3391 u32 rtmsg_type;
3392 u16 rtmsg_dst_len;
3393 u16 rtmsg_src_len;
3394 u32 rtmsg_metric;
3395 u32 rtmsg_info;
3396 u32 rtmsg_flags;
3397 s32 rtmsg_ifindex;
3398};
3399
6b96018b
AB
3400static int routing_ioctl(struct net *net, struct socket *sock,
3401 unsigned int cmd, void __user *argp)
7a229387
AB
3402{
3403 int ret;
3404 void *r = NULL;
3405 struct in6_rtmsg r6;
3406 struct rtentry r4;
3407 char devname[16];
3408 u32 rtdev;
3409 mm_segment_t old_fs = get_fs();
3410
6b96018b
AB
3411 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3412 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3413 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3414 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3415 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3416 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3417 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3418 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3419 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3420 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3421 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3422
3423 r = (void *) &r6;
3424 } else { /* ipv4 */
6b96018b 3425 struct rtentry32 __user *ur4 = argp;
c6d409cf 3426 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3427 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3428 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3429 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3430 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3431 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3432 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3433 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3434 if (rtdev) {
c6d409cf 3435 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3436 r4.rt_dev = (char __user __force *)devname;
3437 devname[15] = 0;
7a229387
AB
3438 } else
3439 r4.rt_dev = NULL;
3440
3441 r = (void *) &r4;
3442 }
3443
3444 if (ret) {
3445 ret = -EFAULT;
3446 goto out;
3447 }
3448
c6d409cf 3449 set_fs(KERNEL_DS);
63ff03ab 3450 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3451 set_fs(old_fs);
7a229387
AB
3452
3453out:
7a229387
AB
3454 return ret;
3455}
3456
3457/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3458 * for some operations; this forces use of the newer bridge-utils that
25985edc 3459 * use compatible ioctls
7a229387 3460 */
6b96018b 3461static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3462{
6b96018b 3463 compat_ulong_t tmp;
7a229387 3464
6b96018b 3465 if (get_user(tmp, argp))
7a229387
AB
3466 return -EFAULT;
3467 if (tmp == BRCTL_GET_VERSION)
3468 return BRCTL_VERSION + 1;
3469 return -EINVAL;
3470}
3471
6b96018b
AB
3472static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3473 unsigned int cmd, unsigned long arg)
3474{
3475 void __user *argp = compat_ptr(arg);
3476 struct sock *sk = sock->sk;
3477 struct net *net = sock_net(sk);
7a229387 3478
6b96018b 3479 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3480 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3481
3482 switch (cmd) {
3483 case SIOCSIFBR:
3484 case SIOCGIFBR:
3485 return old_bridge_ioctl(argp);
6b96018b 3486 case SIOCGIFCONF:
36fd633e 3487 return compat_dev_ifconf(net, argp);
6b96018b
AB
3488 case SIOCETHTOOL:
3489 return ethtool_ioctl(net, argp);
7a50a240
AB
3490 case SIOCWANDEV:
3491 return compat_siocwandev(net, argp);
a2116ed2
AB
3492 case SIOCGIFMAP:
3493 case SIOCSIFMAP:
3494 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3495 case SIOCADDRT:
3496 case SIOCDELRT:
3497 return routing_ioctl(net, sock, cmd, argp);
0768e170
AB
3498 case SIOCGSTAMP_OLD:
3499 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3500 if (!sock->ops->gettstamp)
3501 return -ENOIOCTLCMD;
0768e170 3502 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3503 !COMPAT_USE_64BIT_TIME);
3504
590d4693
BH
3505 case SIOCBONDSLAVEINFOQUERY:
3506 case SIOCBONDINFOQUERY:
a2116ed2 3507 case SIOCSHWTSTAMP:
fd468c74 3508 case SIOCGHWTSTAMP:
590d4693 3509 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3510
3511 case FIOSETOWN:
3512 case SIOCSPGRP:
3513 case FIOGETOWN:
3514 case SIOCGPGRP:
3515 case SIOCBRADDBR:
3516 case SIOCBRDELBR:
3517 case SIOCGIFVLAN:
3518 case SIOCSIFVLAN:
3519 case SIOCADDDLCI:
3520 case SIOCDELDLCI:
c62cce2c 3521 case SIOCGSKNS:
0768e170
AB
3522 case SIOCGSTAMP_NEW:
3523 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3524 return sock_ioctl(file, cmd, arg);
3525
3526 case SIOCGIFFLAGS:
3527 case SIOCSIFFLAGS:
3528 case SIOCGIFMETRIC:
3529 case SIOCSIFMETRIC:
3530 case SIOCGIFMTU:
3531 case SIOCSIFMTU:
3532 case SIOCGIFMEM:
3533 case SIOCSIFMEM:
3534 case SIOCGIFHWADDR:
3535 case SIOCSIFHWADDR:
3536 case SIOCADDMULTI:
3537 case SIOCDELMULTI:
3538 case SIOCGIFINDEX:
6b96018b
AB
3539 case SIOCGIFADDR:
3540 case SIOCSIFADDR:
3541 case SIOCSIFHWBROADCAST:
6b96018b 3542 case SIOCDIFADDR:
6b96018b
AB
3543 case SIOCGIFBRDADDR:
3544 case SIOCSIFBRDADDR:
3545 case SIOCGIFDSTADDR:
3546 case SIOCSIFDSTADDR:
3547 case SIOCGIFNETMASK:
3548 case SIOCSIFNETMASK:
3549 case SIOCSIFPFLAGS:
3550 case SIOCGIFPFLAGS:
3551 case SIOCGIFTXQLEN:
3552 case SIOCSIFTXQLEN:
3553 case SIOCBRADDIF:
3554 case SIOCBRDELIF:
c6c9fee3 3555 case SIOCGIFNAME:
9177efd3
AB
3556 case SIOCSIFNAME:
3557 case SIOCGMIIPHY:
3558 case SIOCGMIIREG:
3559 case SIOCSMIIREG:
f92d4fc9
AV
3560 case SIOCBONDENSLAVE:
3561 case SIOCBONDRELEASE:
3562 case SIOCBONDSETHWADDR:
3563 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3564 return compat_ifreq_ioctl(net, sock, cmd, argp);
3565
6b96018b
AB
3566 case SIOCSARP:
3567 case SIOCGARP:
3568 case SIOCDARP:
c7dc504e 3569 case SIOCOUTQ:
9d7bf41f 3570 case SIOCOUTQNSD:
6b96018b 3571 case SIOCATMARK:
63ff03ab 3572 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3573 }
3574
6b96018b
AB
3575 return -ENOIOCTLCMD;
3576}
7a229387 3577
95c96174 3578static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3579 unsigned long arg)
89bbfc95
SP
3580{
3581 struct socket *sock = file->private_data;
3582 int ret = -ENOIOCTLCMD;
87de87d5
DM
3583 struct sock *sk;
3584 struct net *net;
3585
3586 sk = sock->sk;
3587 net = sock_net(sk);
89bbfc95
SP
3588
3589 if (sock->ops->compat_ioctl)
3590 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3591
87de87d5
DM
3592 if (ret == -ENOIOCTLCMD &&
3593 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3594 ret = compat_wext_handle_ioctl(net, cmd, arg);
3595
6b96018b
AB
3596 if (ret == -ENOIOCTLCMD)
3597 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3598
89bbfc95
SP
3599 return ret;
3600}
3601#endif
3602
8a3c245c
PT
3603/**
3604 * kernel_bind - bind an address to a socket (kernel space)
3605 * @sock: socket
3606 * @addr: address
3607 * @addrlen: length of address
3608 *
3609 * Returns 0 or an error.
3610 */
3611
ac5a488e
SS
3612int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3613{
3614 return sock->ops->bind(sock, addr, addrlen);
3615}
c6d409cf 3616EXPORT_SYMBOL(kernel_bind);
ac5a488e 3617
8a3c245c
PT
3618/**
3619 * kernel_listen - move socket to listening state (kernel space)
3620 * @sock: socket
3621 * @backlog: pending connections queue size
3622 *
3623 * Returns 0 or an error.
3624 */
3625
ac5a488e
SS
3626int kernel_listen(struct socket *sock, int backlog)
3627{
3628 return sock->ops->listen(sock, backlog);
3629}
c6d409cf 3630EXPORT_SYMBOL(kernel_listen);
ac5a488e 3631
8a3c245c
PT
3632/**
3633 * kernel_accept - accept a connection (kernel space)
3634 * @sock: listening socket
3635 * @newsock: new connected socket
3636 * @flags: flags
3637 *
3638 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3639 * If it fails, @newsock is guaranteed to be %NULL.
3640 * Returns 0 or an error.
3641 */
3642
ac5a488e
SS
3643int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3644{
3645 struct sock *sk = sock->sk;
3646 int err;
3647
3648 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3649 newsock);
3650 if (err < 0)
3651 goto done;
3652
cdfbabfb 3653 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3654 if (err < 0) {
3655 sock_release(*newsock);
fa8705b0 3656 *newsock = NULL;
ac5a488e
SS
3657 goto done;
3658 }
3659
3660 (*newsock)->ops = sock->ops;
1b08534e 3661 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3662
3663done:
3664 return err;
3665}
c6d409cf 3666EXPORT_SYMBOL(kernel_accept);
ac5a488e 3667
8a3c245c
PT
3668/**
3669 * kernel_connect - connect a socket (kernel space)
3670 * @sock: socket
3671 * @addr: address
3672 * @addrlen: address length
3673 * @flags: flags (O_NONBLOCK, ...)
3674 *
3675 * For datagram sockets, @addr is the addres to which datagrams are sent
3676 * by default, and the only address from which datagrams are received.
3677 * For stream sockets, attempts to connect to @addr.
3678 * Returns 0 or an error code.
3679 */
3680
ac5a488e 3681int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3682 int flags)
ac5a488e
SS
3683{
3684 return sock->ops->connect(sock, addr, addrlen, flags);
3685}
c6d409cf 3686EXPORT_SYMBOL(kernel_connect);
ac5a488e 3687
8a3c245c
PT
3688/**
3689 * kernel_getsockname - get the address which the socket is bound (kernel space)
3690 * @sock: socket
3691 * @addr: address holder
3692 *
3693 * Fills the @addr pointer with the address which the socket is bound.
3694 * Returns 0 or an error code.
3695 */
3696
9b2c45d4 3697int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3698{
9b2c45d4 3699 return sock->ops->getname(sock, addr, 0);
ac5a488e 3700}
c6d409cf 3701EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3702
8a3c245c
PT
3703/**
3704 * kernel_peername - get the address which the socket is connected (kernel space)
3705 * @sock: socket
3706 * @addr: address holder
3707 *
3708 * Fills the @addr pointer with the address which the socket is connected.
3709 * Returns 0 or an error code.
3710 */
3711
9b2c45d4 3712int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3713{
9b2c45d4 3714 return sock->ops->getname(sock, addr, 1);
ac5a488e 3715}
c6d409cf 3716EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3717
8a3c245c
PT
3718/**
3719 * kernel_getsockopt - get a socket option (kernel space)
3720 * @sock: socket
3721 * @level: API level (SOL_SOCKET, ...)
3722 * @optname: option tag
3723 * @optval: option value
3724 * @optlen: option length
3725 *
3726 * Assigns the option length to @optlen.
3727 * Returns 0 or an error.
3728 */
3729
ac5a488e
SS
3730int kernel_getsockopt(struct socket *sock, int level, int optname,
3731 char *optval, int *optlen)
3732{
3733 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3734 char __user *uoptval;
3735 int __user *uoptlen;
ac5a488e
SS
3736 int err;
3737
fb8621bb
NK
3738 uoptval = (char __user __force *) optval;
3739 uoptlen = (int __user __force *) optlen;
3740
ac5a488e
SS
3741 set_fs(KERNEL_DS);
3742 if (level == SOL_SOCKET)
fb8621bb 3743 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3744 else
fb8621bb
NK
3745 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3746 uoptlen);
ac5a488e
SS
3747 set_fs(oldfs);
3748 return err;
3749}
c6d409cf 3750EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3751
8a3c245c
PT
3752/**
3753 * kernel_setsockopt - set a socket option (kernel space)
3754 * @sock: socket
3755 * @level: API level (SOL_SOCKET, ...)
3756 * @optname: option tag
3757 * @optval: option value
3758 * @optlen: option length
3759 *
3760 * Returns 0 or an error.
3761 */
3762
ac5a488e 3763int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3764 char *optval, unsigned int optlen)
ac5a488e
SS
3765{
3766 mm_segment_t oldfs = get_fs();
fb8621bb 3767 char __user *uoptval;
ac5a488e
SS
3768 int err;
3769
fb8621bb
NK
3770 uoptval = (char __user __force *) optval;
3771
ac5a488e
SS
3772 set_fs(KERNEL_DS);
3773 if (level == SOL_SOCKET)
fb8621bb 3774 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3775 else
fb8621bb 3776 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3777 optlen);
3778 set_fs(oldfs);
3779 return err;
3780}
c6d409cf 3781EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3782
8a3c245c
PT
3783/**
3784 * kernel_sendpage - send a &page through a socket (kernel space)
3785 * @sock: socket
3786 * @page: page
3787 * @offset: page offset
3788 * @size: total size in bytes
3789 * @flags: flags (MSG_DONTWAIT, ...)
3790 *
3791 * Returns the total amount sent in bytes or an error.
3792 */
3793
ac5a488e
SS
3794int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3795 size_t size, int flags)
3796{
3797 if (sock->ops->sendpage)
3798 return sock->ops->sendpage(sock, page, offset, size, flags);
3799
3800 return sock_no_sendpage(sock, page, offset, size, flags);
3801}
c6d409cf 3802EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3803
8a3c245c
PT
3804/**
3805 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3806 * @sk: sock
3807 * @page: page
3808 * @offset: page offset
3809 * @size: total size in bytes
3810 * @flags: flags (MSG_DONTWAIT, ...)
3811 *
3812 * Returns the total amount sent in bytes or an error.
3813 * Caller must hold @sk.
3814 */
3815
306b13eb
TH
3816int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3817 size_t size, int flags)
3818{
3819 struct socket *sock = sk->sk_socket;
3820
3821 if (sock->ops->sendpage_locked)
3822 return sock->ops->sendpage_locked(sk, page, offset, size,
3823 flags);
3824
3825 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3826}
3827EXPORT_SYMBOL(kernel_sendpage_locked);
3828
8a3c245c
PT
3829/**
3830 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3831 * @sock: socket
3832 * @how: connection part
3833 *
3834 * Returns 0 or an error.
3835 */
3836
91cf45f0
TM
3837int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3838{
3839 return sock->ops->shutdown(sock, how);
3840}
91cf45f0 3841EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3842
8a3c245c
PT
3843/**
3844 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3845 * @sk: socket
3846 *
3847 * This routine returns the IP overhead imposed by a socket i.e.
3848 * the length of the underlying IP header, depending on whether
3849 * this is an IPv4 or IPv6 socket and the length from IP options turned
3850 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3851 */
8a3c245c 3852
113c3075
P
3853u32 kernel_sock_ip_overhead(struct sock *sk)
3854{
3855 struct inet_sock *inet;
3856 struct ip_options_rcu *opt;
3857 u32 overhead = 0;
113c3075
P
3858#if IS_ENABLED(CONFIG_IPV6)
3859 struct ipv6_pinfo *np;
3860 struct ipv6_txoptions *optv6 = NULL;
3861#endif /* IS_ENABLED(CONFIG_IPV6) */
3862
3863 if (!sk)
3864 return overhead;
3865
113c3075
P
3866 switch (sk->sk_family) {
3867 case AF_INET:
3868 inet = inet_sk(sk);
3869 overhead += sizeof(struct iphdr);
3870 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3871 sock_owned_by_user(sk));
113c3075
P
3872 if (opt)
3873 overhead += opt->opt.optlen;
3874 return overhead;
3875#if IS_ENABLED(CONFIG_IPV6)
3876 case AF_INET6:
3877 np = inet6_sk(sk);
3878 overhead += sizeof(struct ipv6hdr);
3879 if (np)
3880 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3881 sock_owned_by_user(sk));
113c3075
P
3882 if (optv6)
3883 overhead += (optv6->opt_flen + optv6->opt_nflen);
3884 return overhead;
3885#endif /* IS_ENABLED(CONFIG_IPV6) */
3886 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3887 return overhead;
3888 }
3889}
3890EXPORT_SYMBOL(kernel_sock_ip_overhead);