]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - net/socket.c
io_uring: only hash regular files for async work execution
[mirror_ubuntu-hirsute-kernel.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
1da177e4 55#include <linux/mm.h>
1da177e4
LT
56#include <linux/socket.h>
57#include <linux/file.h>
58#include <linux/net.h>
59#include <linux/interrupt.h>
aaca0bdc 60#include <linux/thread_info.h>
55737fda 61#include <linux/rcupdate.h>
1da177e4
LT
62#include <linux/netdevice.h>
63#include <linux/proc_fs.h>
64#include <linux/seq_file.h>
4a3e2f71 65#include <linux/mutex.h>
1da177e4 66#include <linux/if_bridge.h>
20380731
ACM
67#include <linux/if_frad.h>
68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
c7dc504e 103#include <linux/termios.h>
6b96018b 104#include <linux/sockios.h>
076bb0c8 105#include <net/busy_poll.h>
f24b9be5 106#include <linux/errqueue.h>
06021292 107
e0d1095a 108#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
109unsigned int sysctl_net_busy_read __read_mostly;
110unsigned int sysctl_net_busy_poll __read_mostly;
06021292 111#endif
6b96018b 112
8ae5e030
AV
113static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
114static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 115static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
116
117static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
118static __poll_t sock_poll(struct file *file,
119 struct poll_table_struct *wait);
89bddce5 120static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
121#ifdef CONFIG_COMPAT
122static long compat_sock_ioctl(struct file *file,
89bddce5 123 unsigned int cmd, unsigned long arg);
89bbfc95 124#endif
1da177e4 125static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
126static ssize_t sock_sendpage(struct file *file, struct page *page,
127 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 128static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 129 struct pipe_inode_info *pipe, size_t len,
9c55e01c 130 unsigned int flags);
1da177e4 131
1da177e4
LT
132/*
133 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
134 * in the operation structures but are done directly via the socketcall() multiplexor.
135 */
136
da7071d7 137static const struct file_operations socket_file_ops = {
1da177e4
LT
138 .owner = THIS_MODULE,
139 .llseek = no_llseek,
8ae5e030
AV
140 .read_iter = sock_read_iter,
141 .write_iter = sock_write_iter,
1da177e4
LT
142 .poll = sock_poll,
143 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
144#ifdef CONFIG_COMPAT
145 .compat_ioctl = compat_sock_ioctl,
146#endif
1da177e4 147 .mmap = sock_mmap,
1da177e4
LT
148 .release = sock_close,
149 .fasync = sock_fasync,
5274f052
JA
150 .sendpage = sock_sendpage,
151 .splice_write = generic_splice_sendpage,
9c55e01c 152 .splice_read = sock_splice_read,
1da177e4
LT
153};
154
155/*
156 * The protocol list. Each protocol is registered in here.
157 */
158
1da177e4 159static DEFINE_SPINLOCK(net_family_lock);
190683a9 160static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 161
1da177e4 162/*
89bddce5
SH
163 * Support routines.
164 * Move socket addresses back and forth across the kernel/user
165 * divide and look after the messy bits.
1da177e4
LT
166 */
167
1da177e4
LT
168/**
169 * move_addr_to_kernel - copy a socket address into kernel space
170 * @uaddr: Address in user space
171 * @kaddr: Address in kernel space
172 * @ulen: Length in user space
173 *
174 * The address is copied into kernel space. If the provided address is
175 * too long an error code of -EINVAL is returned. If the copy gives
176 * invalid addresses -EFAULT is returned. On a success 0 is returned.
177 */
178
43db362d 179int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 180{
230b1839 181 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 182 return -EINVAL;
89bddce5 183 if (ulen == 0)
1da177e4 184 return 0;
89bddce5 185 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 186 return -EFAULT;
3ec3b2fb 187 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
188}
189
190/**
191 * move_addr_to_user - copy an address to user space
192 * @kaddr: kernel space address
193 * @klen: length of address in kernel
194 * @uaddr: user space address
195 * @ulen: pointer to user length field
196 *
197 * The value pointed to by ulen on entry is the buffer length available.
198 * This is overwritten with the buffer space used. -EINVAL is returned
199 * if an overlong buffer is specified or a negative buffer size. -EFAULT
200 * is returned if either the buffer or the length field are not
201 * accessible.
202 * After copying the data up to the limit the user specifies, the true
203 * length of the data is written over the length limit the user
204 * specified. Zero is returned for a success.
205 */
89bddce5 206
43db362d 207static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 208 void __user *uaddr, int __user *ulen)
1da177e4
LT
209{
210 int err;
211 int len;
212
68c6beb3 213 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
214 err = get_user(len, ulen);
215 if (err)
1da177e4 216 return err;
89bddce5
SH
217 if (len > klen)
218 len = klen;
68c6beb3 219 if (len < 0)
1da177e4 220 return -EINVAL;
89bddce5 221 if (len) {
d6fe3945
SG
222 if (audit_sockaddr(klen, kaddr))
223 return -ENOMEM;
89bddce5 224 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
225 return -EFAULT;
226 }
227 /*
89bddce5
SH
228 * "fromlen shall refer to the value before truncation.."
229 * 1003.1g
1da177e4
LT
230 */
231 return __put_user(klen, ulen);
232}
233
08009a76 234static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
235
236static struct inode *sock_alloc_inode(struct super_block *sb)
237{
238 struct socket_alloc *ei;
89bddce5 239
e94b1766 240 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
241 if (!ei)
242 return NULL;
333f7909
AV
243 init_waitqueue_head(&ei->socket.wq.wait);
244 ei->socket.wq.fasync_list = NULL;
245 ei->socket.wq.flags = 0;
89bddce5 246
1da177e4
LT
247 ei->socket.state = SS_UNCONNECTED;
248 ei->socket.flags = 0;
249 ei->socket.ops = NULL;
250 ei->socket.sk = NULL;
251 ei->socket.file = NULL;
1da177e4
LT
252
253 return &ei->vfs_inode;
254}
255
6d7855c5 256static void sock_free_inode(struct inode *inode)
1da177e4 257{
43815482
ED
258 struct socket_alloc *ei;
259
260 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 261 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
262}
263
51cc5068 264static void init_once(void *foo)
1da177e4 265{
89bddce5 266 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 267
a35afb83 268 inode_init_once(&ei->vfs_inode);
1da177e4 269}
89bddce5 270
1e911632 271static void init_inodecache(void)
1da177e4
LT
272{
273 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
274 sizeof(struct socket_alloc),
275 0,
276 (SLAB_HWCACHE_ALIGN |
277 SLAB_RECLAIM_ACCOUNT |
5d097056 278 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 279 init_once);
1e911632 280 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
281}
282
b87221de 283static const struct super_operations sockfs_ops = {
c6d409cf 284 .alloc_inode = sock_alloc_inode,
6d7855c5 285 .free_inode = sock_free_inode,
c6d409cf 286 .statfs = simple_statfs,
1da177e4
LT
287};
288
c23fbb6b
ED
289/*
290 * sockfs_dname() is called from d_path().
291 */
292static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
293{
294 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 295 d_inode(dentry)->i_ino);
c23fbb6b
ED
296}
297
3ba13d17 298static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 299 .d_dname = sockfs_dname,
1da177e4
LT
300};
301
bba0bd31
AG
302static int sockfs_xattr_get(const struct xattr_handler *handler,
303 struct dentry *dentry, struct inode *inode,
304 const char *suffix, void *value, size_t size)
305{
306 if (value) {
307 if (dentry->d_name.len + 1 > size)
308 return -ERANGE;
309 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
310 }
311 return dentry->d_name.len + 1;
312}
313
314#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
315#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
316#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
317
318static const struct xattr_handler sockfs_xattr_handler = {
319 .name = XATTR_NAME_SOCKPROTONAME,
320 .get = sockfs_xattr_get,
321};
322
4a590153
AG
323static int sockfs_security_xattr_set(const struct xattr_handler *handler,
324 struct dentry *dentry, struct inode *inode,
325 const char *suffix, const void *value,
326 size_t size, int flags)
327{
328 /* Handled by LSM. */
329 return -EAGAIN;
330}
331
332static const struct xattr_handler sockfs_security_xattr_handler = {
333 .prefix = XATTR_SECURITY_PREFIX,
334 .set = sockfs_security_xattr_set,
335};
336
bba0bd31
AG
337static const struct xattr_handler *sockfs_xattr_handlers[] = {
338 &sockfs_xattr_handler,
4a590153 339 &sockfs_security_xattr_handler,
bba0bd31
AG
340 NULL
341};
342
fba9be49 343static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 344{
fba9be49
DH
345 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
346 if (!ctx)
347 return -ENOMEM;
348 ctx->ops = &sockfs_ops;
349 ctx->dops = &sockfs_dentry_operations;
350 ctx->xattr = sockfs_xattr_handlers;
351 return 0;
c74a1cbb
AV
352}
353
354static struct vfsmount *sock_mnt __read_mostly;
355
356static struct file_system_type sock_fs_type = {
357 .name = "sockfs",
fba9be49 358 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
359 .kill_sb = kill_anon_super,
360};
361
1da177e4
LT
362/*
363 * Obtains the first available file descriptor and sets it up for use.
364 *
39d8c1b6
DM
365 * These functions create file structures and maps them to fd space
366 * of the current process. On success it returns file descriptor
1da177e4
LT
367 * and file struct implicitly stored in sock->file.
368 * Note that another thread may close file descriptor before we return
369 * from this function. We use the fact that now we do not refer
370 * to socket after mapping. If one day we will need it, this
371 * function will increment ref. count on file by 1.
372 *
373 * In any case returned fd MAY BE not valid!
374 * This race condition is unavoidable
375 * with shared fd spaces, we cannot solve it inside kernel,
376 * but we take care of internal coherence yet.
377 */
378
8a3c245c
PT
379/**
380 * sock_alloc_file - Bind a &socket to a &file
381 * @sock: socket
382 * @flags: file status flags
383 * @dname: protocol name
384 *
385 * Returns the &file bound with @sock, implicitly storing it
386 * in sock->file. If dname is %NULL, sets to "".
387 * On failure the return is a ERR pointer (see linux/err.h).
388 * This function uses GFP_KERNEL internally.
389 */
390
aab174f0 391struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 392{
7cbe66b6 393 struct file *file;
1da177e4 394
d93aa9d8
AV
395 if (!dname)
396 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 397
d93aa9d8
AV
398 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
399 O_RDWR | (flags & O_NONBLOCK),
400 &socket_file_ops);
b5ffe634 401 if (IS_ERR(file)) {
8e1611e2 402 sock_release(sock);
39b65252 403 return file;
cc3808f8
AV
404 }
405
406 sock->file = file;
39d8c1b6 407 file->private_data = sock;
d8e464ec 408 stream_open(SOCK_INODE(sock), file);
28407630 409 return file;
39d8c1b6 410}
56b31d1c 411EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 412
56b31d1c 413static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
414{
415 struct file *newfile;
28407630 416 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
417 if (unlikely(fd < 0)) {
418 sock_release(sock);
28407630 419 return fd;
ce4bb04c 420 }
39d8c1b6 421
aab174f0 422 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 423 if (!IS_ERR(newfile)) {
39d8c1b6 424 fd_install(fd, newfile);
28407630
AV
425 return fd;
426 }
7cbe66b6 427
28407630
AV
428 put_unused_fd(fd);
429 return PTR_ERR(newfile);
1da177e4
LT
430}
431
8a3c245c
PT
432/**
433 * sock_from_file - Return the &socket bounded to @file.
434 * @file: file
435 * @err: pointer to an error code return
436 *
437 * On failure returns %NULL and assigns -ENOTSOCK to @err.
438 */
439
406a3c63 440struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 441{
6cb153ca
BL
442 if (file->f_op == &socket_file_ops)
443 return file->private_data; /* set in sock_map_fd */
444
23bb80d2
ED
445 *err = -ENOTSOCK;
446 return NULL;
6cb153ca 447}
406a3c63 448EXPORT_SYMBOL(sock_from_file);
6cb153ca 449
1da177e4 450/**
c6d409cf 451 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
452 * @fd: file handle
453 * @err: pointer to an error code return
454 *
455 * The file handle passed in is locked and the socket it is bound
241c4667 456 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
457 * with a negative errno code and NULL is returned. The function checks
458 * for both invalid handles and passing a handle which is not a socket.
459 *
460 * On a success the socket object pointer is returned.
461 */
462
463struct socket *sockfd_lookup(int fd, int *err)
464{
465 struct file *file;
1da177e4
LT
466 struct socket *sock;
467
89bddce5
SH
468 file = fget(fd);
469 if (!file) {
1da177e4
LT
470 *err = -EBADF;
471 return NULL;
472 }
89bddce5 473
6cb153ca
BL
474 sock = sock_from_file(file, err);
475 if (!sock)
1da177e4 476 fput(file);
6cb153ca
BL
477 return sock;
478}
c6d409cf 479EXPORT_SYMBOL(sockfd_lookup);
1da177e4 480
6cb153ca
BL
481static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
482{
00e188ef 483 struct fd f = fdget(fd);
6cb153ca
BL
484 struct socket *sock;
485
3672558c 486 *err = -EBADF;
00e188ef
AV
487 if (f.file) {
488 sock = sock_from_file(f.file, err);
489 if (likely(sock)) {
490 *fput_needed = f.flags;
6cb153ca 491 return sock;
00e188ef
AV
492 }
493 fdput(f);
1da177e4 494 }
6cb153ca 495 return NULL;
1da177e4
LT
496}
497
600e1779
MY
498static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
499 size_t size)
500{
501 ssize_t len;
502 ssize_t used = 0;
503
c5ef6035 504 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
505 if (len < 0)
506 return len;
507 used += len;
508 if (buffer) {
509 if (size < used)
510 return -ERANGE;
511 buffer += len;
512 }
513
514 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
515 used += len;
516 if (buffer) {
517 if (size < used)
518 return -ERANGE;
519 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
520 buffer += len;
521 }
522
523 return used;
524}
525
dc647ec8 526static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
527{
528 int err = simple_setattr(dentry, iattr);
529
e1a3a60a 530 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
531 struct socket *sock = SOCKET_I(d_inode(dentry));
532
6d8c50dc
CW
533 if (sock->sk)
534 sock->sk->sk_uid = iattr->ia_uid;
535 else
536 err = -ENOENT;
86741ec2
LC
537 }
538
539 return err;
540}
541
600e1779 542static const struct inode_operations sockfs_inode_ops = {
600e1779 543 .listxattr = sockfs_listxattr,
86741ec2 544 .setattr = sockfs_setattr,
600e1779
MY
545};
546
1da177e4 547/**
8a3c245c 548 * sock_alloc - allocate a socket
89bddce5 549 *
1da177e4
LT
550 * Allocate a new inode and socket object. The two are bound together
551 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 552 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
553 */
554
f4a00aac 555struct socket *sock_alloc(void)
1da177e4 556{
89bddce5
SH
557 struct inode *inode;
558 struct socket *sock;
1da177e4 559
a209dfc7 560 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
561 if (!inode)
562 return NULL;
563
564 sock = SOCKET_I(inode);
565
85fe4025 566 inode->i_ino = get_next_ino();
89bddce5 567 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
568 inode->i_uid = current_fsuid();
569 inode->i_gid = current_fsgid();
600e1779 570 inode->i_op = &sockfs_inode_ops;
1da177e4 571
1da177e4
LT
572 return sock;
573}
f4a00aac 574EXPORT_SYMBOL(sock_alloc);
1da177e4 575
1da177e4 576/**
8a3c245c 577 * sock_release - close a socket
1da177e4
LT
578 * @sock: socket to close
579 *
580 * The socket is released from the protocol stack if it has a release
581 * callback, and the inode is then released if the socket is bound to
89bddce5 582 * an inode not a file.
1da177e4 583 */
89bddce5 584
6d8c50dc 585static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
586{
587 if (sock->ops) {
588 struct module *owner = sock->ops->owner;
589
6d8c50dc
CW
590 if (inode)
591 inode_lock(inode);
1da177e4 592 sock->ops->release(sock);
ff7b11aa 593 sock->sk = NULL;
6d8c50dc
CW
594 if (inode)
595 inode_unlock(inode);
1da177e4
LT
596 sock->ops = NULL;
597 module_put(owner);
598 }
599
333f7909 600 if (sock->wq.fasync_list)
3410f22e 601 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 602
1da177e4
LT
603 if (!sock->file) {
604 iput(SOCK_INODE(sock));
605 return;
606 }
89bddce5 607 sock->file = NULL;
1da177e4 608}
6d8c50dc
CW
609
610void sock_release(struct socket *sock)
611{
612 __sock_release(sock, NULL);
613}
c6d409cf 614EXPORT_SYMBOL(sock_release);
1da177e4 615
c14ac945 616void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 617{
140c55d4
ED
618 u8 flags = *tx_flags;
619
c14ac945 620 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
621 flags |= SKBTX_HW_TSTAMP;
622
c14ac945 623 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
624 flags |= SKBTX_SW_TSTAMP;
625
c14ac945 626 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
627 flags |= SKBTX_SCHED_TSTAMP;
628
140c55d4 629 *tx_flags = flags;
20d49473 630}
67cc0d40 631EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 632
8c3c447b
PA
633INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
634 size_t));
a648a592
PA
635INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
636 size_t));
d8725c86 637static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 638{
a648a592
PA
639 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
640 inet_sendmsg, sock, msg,
641 msg_data_left(msg));
d8725c86
AV
642 BUG_ON(ret == -EIOCBQUEUED);
643 return ret;
1da177e4
LT
644}
645
85806af0
RD
646/**
647 * sock_sendmsg - send a message through @sock
648 * @sock: socket
649 * @msg: message to send
650 *
651 * Sends @msg through @sock, passing through LSM.
652 * Returns the number of bytes sent, or an error code.
653 */
d8725c86 654int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 655{
d8725c86 656 int err = security_socket_sendmsg(sock, msg,
01e97e65 657 msg_data_left(msg));
228e548e 658
d8725c86 659 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 660}
c6d409cf 661EXPORT_SYMBOL(sock_sendmsg);
1da177e4 662
8a3c245c
PT
663/**
664 * kernel_sendmsg - send a message through @sock (kernel-space)
665 * @sock: socket
666 * @msg: message header
667 * @vec: kernel vec
668 * @num: vec array length
669 * @size: total message data size
670 *
671 * Builds the message data with @vec and sends it through @sock.
672 * Returns the number of bytes sent, or an error code.
673 */
674
1da177e4
LT
675int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
676 struct kvec *vec, size_t num, size_t size)
677{
aa563d7b 678 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 679 return sock_sendmsg(sock, msg);
1da177e4 680}
c6d409cf 681EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 682
8a3c245c
PT
683/**
684 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
685 * @sk: sock
686 * @msg: message header
687 * @vec: output s/g array
688 * @num: output s/g array length
689 * @size: total message data size
690 *
691 * Builds the message data with @vec and sends it through @sock.
692 * Returns the number of bytes sent, or an error code.
693 * Caller must hold @sk.
694 */
695
306b13eb
TH
696int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
697 struct kvec *vec, size_t num, size_t size)
698{
699 struct socket *sock = sk->sk_socket;
700
701 if (!sock->ops->sendmsg_locked)
db5980d8 702 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 703
aa563d7b 704 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
705
706 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
707}
708EXPORT_SYMBOL(kernel_sendmsg_locked);
709
8605330a
SHY
710static bool skb_is_err_queue(const struct sk_buff *skb)
711{
712 /* pkt_type of skbs enqueued on the error queue are set to
713 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
714 * in recvmsg, since skbs received on a local socket will never
715 * have a pkt_type of PACKET_OUTGOING.
716 */
717 return skb->pkt_type == PACKET_OUTGOING;
718}
719
b50a5c70
ML
720/* On transmit, software and hardware timestamps are returned independently.
721 * As the two skb clones share the hardware timestamp, which may be updated
722 * before the software timestamp is received, a hardware TX timestamp may be
723 * returned only if there is no software TX timestamp. Ignore false software
724 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 725 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
726 * hardware timestamp.
727 */
728static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
729{
730 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
731}
732
aad9c8c4
ML
733static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
734{
735 struct scm_ts_pktinfo ts_pktinfo;
736 struct net_device *orig_dev;
737
738 if (!skb_mac_header_was_set(skb))
739 return;
740
741 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
742
743 rcu_read_lock();
744 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
745 if (orig_dev)
746 ts_pktinfo.if_index = orig_dev->ifindex;
747 rcu_read_unlock();
748
749 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
750 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
751 sizeof(ts_pktinfo), &ts_pktinfo);
752}
753
92f37fd2
ED
754/*
755 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
756 */
757void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
758 struct sk_buff *skb)
759{
20d49473 760 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 761 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
762 struct scm_timestamping_internal tss;
763
b50a5c70 764 int empty = 1, false_tstamp = 0;
20d49473
PO
765 struct skb_shared_hwtstamps *shhwtstamps =
766 skb_hwtstamps(skb);
767
768 /* Race occurred between timestamp enabling and packet
769 receiving. Fill in the current time for now. */
b50a5c70 770 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 771 __net_timestamp(skb);
b50a5c70
ML
772 false_tstamp = 1;
773 }
20d49473
PO
774
775 if (need_software_tstamp) {
776 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
777 if (new_tstamp) {
778 struct __kernel_sock_timeval tv;
779
780 skb_get_new_timestamp(skb, &tv);
781 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
782 sizeof(tv), &tv);
783 } else {
784 struct __kernel_old_timeval tv;
785
786 skb_get_timestamp(skb, &tv);
787 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
788 sizeof(tv), &tv);
789 }
20d49473 790 } else {
887feae3
DD
791 if (new_tstamp) {
792 struct __kernel_timespec ts;
793
794 skb_get_new_timestampns(skb, &ts);
795 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
796 sizeof(ts), &ts);
797 } else {
df1b4ba9 798 struct __kernel_old_timespec ts;
887feae3
DD
799
800 skb_get_timestampns(skb, &ts);
801 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
802 sizeof(ts), &ts);
803 }
20d49473
PO
804 }
805 }
806
f24b9be5 807 memset(&tss, 0, sizeof(tss));
c199105d 808 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 809 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 810 empty = 0;
4d276eb6 811 if (shhwtstamps &&
b9f40e21 812 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 813 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 814 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 815 empty = 0;
aad9c8c4
ML
816 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
817 !skb_is_err_queue(skb))
818 put_ts_pktinfo(msg, skb);
819 }
1c885808 820 if (!empty) {
9718475e
DD
821 if (sock_flag(sk, SOCK_TSTAMP_NEW))
822 put_cmsg_scm_timestamping64(msg, &tss);
823 else
824 put_cmsg_scm_timestamping(msg, &tss);
1c885808 825
8605330a 826 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 827 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
828 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
829 skb->len, skb->data);
830 }
92f37fd2 831}
7c81fd8b
ACM
832EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
833
6e3e939f
JB
834void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
835 struct sk_buff *skb)
836{
837 int ack;
838
839 if (!sock_flag(sk, SOCK_WIFI_STATUS))
840 return;
841 if (!skb->wifi_acked_valid)
842 return;
843
844 ack = skb->wifi_acked;
845
846 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
847}
848EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
849
11165f14 850static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
851 struct sk_buff *skb)
3b885787 852{
744d5a3e 853 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 854 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 855 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
856}
857
767dd033 858void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
859 struct sk_buff *skb)
860{
861 sock_recv_timestamp(msg, sk, skb);
862 sock_recv_drops(msg, sk, skb);
863}
767dd033 864EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 865
8c3c447b 866INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
867 size_t, int));
868INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
869 size_t, int));
1b784140 870static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 871 int flags)
1da177e4 872{
a648a592
PA
873 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
874 inet_recvmsg, sock, msg, msg_data_left(msg),
875 flags);
1da177e4
LT
876}
877
85806af0
RD
878/**
879 * sock_recvmsg - receive a message from @sock
880 * @sock: socket
881 * @msg: message to receive
882 * @flags: message flags
883 *
884 * Receives @msg from @sock, passing through LSM. Returns the total number
885 * of bytes received, or an error.
886 */
2da62906 887int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 888{
2da62906 889 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 890
2da62906 891 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 892}
c6d409cf 893EXPORT_SYMBOL(sock_recvmsg);
1da177e4 894
c1249c0a 895/**
8a3c245c
PT
896 * kernel_recvmsg - Receive a message from a socket (kernel space)
897 * @sock: The socket to receive the message from
898 * @msg: Received message
899 * @vec: Input s/g array for message data
900 * @num: Size of input s/g array
901 * @size: Number of bytes to read
902 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 903 *
8a3c245c
PT
904 * On return the msg structure contains the scatter/gather array passed in the
905 * vec argument. The array is modified so that it consists of the unfilled
906 * portion of the original array.
c1249c0a 907 *
8a3c245c 908 * The returned value is the total number of bytes received, or an error.
c1249c0a 909 */
8a3c245c 910
89bddce5
SH
911int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
912 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
913{
914 mm_segment_t oldfs = get_fs();
915 int result;
916
aa563d7b 917 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 918 set_fs(KERNEL_DS);
2da62906 919 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
920 set_fs(oldfs);
921 return result;
922}
c6d409cf 923EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 924
ce1d4d3e
CH
925static ssize_t sock_sendpage(struct file *file, struct page *page,
926 int offset, size_t size, loff_t *ppos, int more)
1da177e4 927{
1da177e4
LT
928 struct socket *sock;
929 int flags;
930
ce1d4d3e
CH
931 sock = file->private_data;
932
35f9c09f
ED
933 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
934 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
935 flags |= more;
ce1d4d3e 936
e6949583 937 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 938}
1da177e4 939
9c55e01c 940static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 941 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
942 unsigned int flags)
943{
944 struct socket *sock = file->private_data;
945
997b37da 946 if (unlikely(!sock->ops->splice_read))
95506588 947 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 948
9c55e01c
JA
949 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
950}
951
8ae5e030 952static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 953{
6d652330
AV
954 struct file *file = iocb->ki_filp;
955 struct socket *sock = file->private_data;
0345f931 956 struct msghdr msg = {.msg_iter = *to,
957 .msg_iocb = iocb};
8ae5e030 958 ssize_t res;
ce1d4d3e 959
8ae5e030
AV
960 if (file->f_flags & O_NONBLOCK)
961 msg.msg_flags = MSG_DONTWAIT;
962
963 if (iocb->ki_pos != 0)
1da177e4 964 return -ESPIPE;
027445c3 965
66ee59af 966 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
967 return 0;
968
2da62906 969 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
970 *to = msg.msg_iter;
971 return res;
1da177e4
LT
972}
973
8ae5e030 974static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 975{
6d652330
AV
976 struct file *file = iocb->ki_filp;
977 struct socket *sock = file->private_data;
0345f931 978 struct msghdr msg = {.msg_iter = *from,
979 .msg_iocb = iocb};
8ae5e030 980 ssize_t res;
1da177e4 981
8ae5e030 982 if (iocb->ki_pos != 0)
ce1d4d3e 983 return -ESPIPE;
027445c3 984
8ae5e030
AV
985 if (file->f_flags & O_NONBLOCK)
986 msg.msg_flags = MSG_DONTWAIT;
987
6d652330
AV
988 if (sock->type == SOCK_SEQPACKET)
989 msg.msg_flags |= MSG_EOR;
990
d8725c86 991 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
992 *from = msg.msg_iter;
993 return res;
1da177e4
LT
994}
995
1da177e4
LT
996/*
997 * Atomic setting of ioctl hooks to avoid race
998 * with module unload.
999 */
1000
4a3e2f71 1001static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1002static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1003
881d966b 1004void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1005{
4a3e2f71 1006 mutex_lock(&br_ioctl_mutex);
1da177e4 1007 br_ioctl_hook = hook;
4a3e2f71 1008 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1009}
1010EXPORT_SYMBOL(brioctl_set);
1011
4a3e2f71 1012static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1013static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1014
881d966b 1015void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1016{
4a3e2f71 1017 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1018 vlan_ioctl_hook = hook;
4a3e2f71 1019 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1020}
1021EXPORT_SYMBOL(vlan_ioctl_set);
1022
4a3e2f71 1023static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1024static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1025
89bddce5 1026void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1027{
4a3e2f71 1028 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1029 dlci_ioctl_hook = hook;
4a3e2f71 1030 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1031}
1032EXPORT_SYMBOL(dlci_ioctl_set);
1033
6b96018b 1034static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1035 unsigned int cmd, unsigned long arg)
6b96018b
AB
1036{
1037 int err;
1038 void __user *argp = (void __user *)arg;
1039
1040 err = sock->ops->ioctl(sock, cmd, arg);
1041
1042 /*
1043 * If this ioctl is unknown try to hand it down
1044 * to the NIC driver.
1045 */
36fd633e
AV
1046 if (err != -ENOIOCTLCMD)
1047 return err;
6b96018b 1048
36fd633e
AV
1049 if (cmd == SIOCGIFCONF) {
1050 struct ifconf ifc;
1051 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1052 return -EFAULT;
1053 rtnl_lock();
1054 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1055 rtnl_unlock();
1056 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1057 err = -EFAULT;
44c02a2c
AV
1058 } else {
1059 struct ifreq ifr;
1060 bool need_copyout;
63ff03ab 1061 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1062 return -EFAULT;
1063 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1064 if (!err && need_copyout)
63ff03ab 1065 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1066 return -EFAULT;
36fd633e 1067 }
6b96018b
AB
1068 return err;
1069}
1070
1da177e4
LT
1071/*
1072 * With an ioctl, arg may well be a user mode pointer, but we don't know
1073 * what to do with it - that's up to the protocol still.
1074 */
1075
8a3c245c
PT
1076/**
1077 * get_net_ns - increment the refcount of the network namespace
1078 * @ns: common namespace (net)
1079 *
1080 * Returns the net's common namespace.
1081 */
1082
d8d211a2 1083struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1084{
1085 return &get_net(container_of(ns, struct net, ns))->ns;
1086}
d8d211a2 1087EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1088
1da177e4
LT
1089static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1090{
1091 struct socket *sock;
881d966b 1092 struct sock *sk;
1da177e4
LT
1093 void __user *argp = (void __user *)arg;
1094 int pid, err;
881d966b 1095 struct net *net;
1da177e4 1096
b69aee04 1097 sock = file->private_data;
881d966b 1098 sk = sock->sk;
3b1e0a65 1099 net = sock_net(sk);
44c02a2c
AV
1100 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1101 struct ifreq ifr;
1102 bool need_copyout;
1103 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1104 return -EFAULT;
1105 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1106 if (!err && need_copyout)
1107 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1108 return -EFAULT;
1da177e4 1109 } else
3d23e349 1110#ifdef CONFIG_WEXT_CORE
1da177e4 1111 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1112 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1113 } else
3d23e349 1114#endif
89bddce5 1115 switch (cmd) {
1da177e4
LT
1116 case FIOSETOWN:
1117 case SIOCSPGRP:
1118 err = -EFAULT;
1119 if (get_user(pid, (int __user *)argp))
1120 break;
393cc3f5 1121 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1122 break;
1123 case FIOGETOWN:
1124 case SIOCGPGRP:
609d7fa9 1125 err = put_user(f_getown(sock->file),
89bddce5 1126 (int __user *)argp);
1da177e4
LT
1127 break;
1128 case SIOCGIFBR:
1129 case SIOCSIFBR:
1130 case SIOCBRADDBR:
1131 case SIOCBRDELBR:
1132 err = -ENOPKG;
1133 if (!br_ioctl_hook)
1134 request_module("bridge");
1135
4a3e2f71 1136 mutex_lock(&br_ioctl_mutex);
89bddce5 1137 if (br_ioctl_hook)
881d966b 1138 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1139 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1140 break;
1141 case SIOCGIFVLAN:
1142 case SIOCSIFVLAN:
1143 err = -ENOPKG;
1144 if (!vlan_ioctl_hook)
1145 request_module("8021q");
1146
4a3e2f71 1147 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1148 if (vlan_ioctl_hook)
881d966b 1149 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1150 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1151 break;
1da177e4
LT
1152 case SIOCADDDLCI:
1153 case SIOCDELDLCI:
1154 err = -ENOPKG;
1155 if (!dlci_ioctl_hook)
1156 request_module("dlci");
1157
7512cbf6
PE
1158 mutex_lock(&dlci_ioctl_mutex);
1159 if (dlci_ioctl_hook)
1da177e4 1160 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1161 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1162 break;
c62cce2c
AV
1163 case SIOCGSKNS:
1164 err = -EPERM;
1165 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1166 break;
1167
1168 err = open_related_ns(&net->ns, get_net_ns);
1169 break;
0768e170
AB
1170 case SIOCGSTAMP_OLD:
1171 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1172 if (!sock->ops->gettstamp) {
1173 err = -ENOIOCTLCMD;
1174 break;
1175 }
1176 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1177 cmd == SIOCGSTAMP_OLD,
1178 !IS_ENABLED(CONFIG_64BIT));
60747828 1179 break;
0768e170
AB
1180 case SIOCGSTAMP_NEW:
1181 case SIOCGSTAMPNS_NEW:
1182 if (!sock->ops->gettstamp) {
1183 err = -ENOIOCTLCMD;
1184 break;
1185 }
1186 err = sock->ops->gettstamp(sock, argp,
1187 cmd == SIOCGSTAMP_NEW,
1188 false);
c7cbdbf2 1189 break;
1da177e4 1190 default:
63ff03ab 1191 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1192 break;
89bddce5 1193 }
1da177e4
LT
1194 return err;
1195}
1196
8a3c245c
PT
1197/**
1198 * sock_create_lite - creates a socket
1199 * @family: protocol family (AF_INET, ...)
1200 * @type: communication type (SOCK_STREAM, ...)
1201 * @protocol: protocol (0, ...)
1202 * @res: new socket
1203 *
1204 * Creates a new socket and assigns it to @res, passing through LSM.
1205 * The new socket initialization is not complete, see kernel_accept().
1206 * Returns 0 or an error. On failure @res is set to %NULL.
1207 * This function internally uses GFP_KERNEL.
1208 */
1209
1da177e4
LT
1210int sock_create_lite(int family, int type, int protocol, struct socket **res)
1211{
1212 int err;
1213 struct socket *sock = NULL;
89bddce5 1214
1da177e4
LT
1215 err = security_socket_create(family, type, protocol, 1);
1216 if (err)
1217 goto out;
1218
1219 sock = sock_alloc();
1220 if (!sock) {
1221 err = -ENOMEM;
1222 goto out;
1223 }
1224
1da177e4 1225 sock->type = type;
7420ed23
VY
1226 err = security_socket_post_create(sock, family, type, protocol, 1);
1227 if (err)
1228 goto out_release;
1229
1da177e4
LT
1230out:
1231 *res = sock;
1232 return err;
7420ed23
VY
1233out_release:
1234 sock_release(sock);
1235 sock = NULL;
1236 goto out;
1da177e4 1237}
c6d409cf 1238EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1239
1240/* No kernel lock held - perfect */
ade994f4 1241static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1242{
3cafb376 1243 struct socket *sock = file->private_data;
a331de3b 1244 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1245
e88958e6
CH
1246 if (!sock->ops->poll)
1247 return 0;
f641f13b 1248
a331de3b
CH
1249 if (sk_can_busy_loop(sock->sk)) {
1250 /* poll once if requested by the syscall */
1251 if (events & POLL_BUSY_LOOP)
1252 sk_busy_loop(sock->sk, 1);
1253
1254 /* if this socket can poll_ll, tell the system call */
1255 flag = POLL_BUSY_LOOP;
1256 }
1257
1258 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1259}
1260
89bddce5 1261static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1262{
b69aee04 1263 struct socket *sock = file->private_data;
1da177e4
LT
1264
1265 return sock->ops->mmap(file, sock, vma);
1266}
1267
20380731 1268static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1269{
6d8c50dc 1270 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1271 return 0;
1272}
1273
1274/*
1275 * Update the socket async list
1276 *
1277 * Fasync_list locking strategy.
1278 *
1279 * 1. fasync_list is modified only under process context socket lock
1280 * i.e. under semaphore.
1281 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1282 * or under socket lock
1da177e4
LT
1283 */
1284
1285static int sock_fasync(int fd, struct file *filp, int on)
1286{
989a2979
ED
1287 struct socket *sock = filp->private_data;
1288 struct sock *sk = sock->sk;
333f7909 1289 struct socket_wq *wq = &sock->wq;
1da177e4 1290
989a2979 1291 if (sk == NULL)
1da177e4 1292 return -EINVAL;
1da177e4
LT
1293
1294 lock_sock(sk);
eaefd110 1295 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1296
eaefd110 1297 if (!wq->fasync_list)
989a2979
ED
1298 sock_reset_flag(sk, SOCK_FASYNC);
1299 else
bcdce719 1300 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1301
989a2979 1302 release_sock(sk);
1da177e4
LT
1303 return 0;
1304}
1305
ceb5d58b 1306/* This function may be called only under rcu_lock */
1da177e4 1307
ceb5d58b 1308int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1309{
ceb5d58b 1310 if (!wq || !wq->fasync_list)
1da177e4 1311 return -1;
ceb5d58b 1312
89bddce5 1313 switch (how) {
8d8ad9d7 1314 case SOCK_WAKE_WAITD:
ceb5d58b 1315 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1316 break;
1317 goto call_kill;
8d8ad9d7 1318 case SOCK_WAKE_SPACE:
ceb5d58b 1319 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1320 break;
1321 /* fall through */
8d8ad9d7 1322 case SOCK_WAKE_IO:
89bddce5 1323call_kill:
43815482 1324 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1325 break;
8d8ad9d7 1326 case SOCK_WAKE_URG:
43815482 1327 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1328 }
ceb5d58b 1329
1da177e4
LT
1330 return 0;
1331}
c6d409cf 1332EXPORT_SYMBOL(sock_wake_async);
1da177e4 1333
8a3c245c
PT
1334/**
1335 * __sock_create - creates a socket
1336 * @net: net namespace
1337 * @family: protocol family (AF_INET, ...)
1338 * @type: communication type (SOCK_STREAM, ...)
1339 * @protocol: protocol (0, ...)
1340 * @res: new socket
1341 * @kern: boolean for kernel space sockets
1342 *
1343 * Creates a new socket and assigns it to @res, passing through LSM.
1344 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1345 * be set to true if the socket resides in kernel space.
1346 * This function internally uses GFP_KERNEL.
1347 */
1348
721db93a 1349int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1350 struct socket **res, int kern)
1da177e4
LT
1351{
1352 int err;
1353 struct socket *sock;
55737fda 1354 const struct net_proto_family *pf;
1da177e4
LT
1355
1356 /*
89bddce5 1357 * Check protocol is in range
1da177e4
LT
1358 */
1359 if (family < 0 || family >= NPROTO)
1360 return -EAFNOSUPPORT;
1361 if (type < 0 || type >= SOCK_MAX)
1362 return -EINVAL;
1363
1364 /* Compatibility.
1365
1366 This uglymoron is moved from INET layer to here to avoid
1367 deadlock in module load.
1368 */
1369 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1370 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1371 current->comm);
1da177e4
LT
1372 family = PF_PACKET;
1373 }
1374
1375 err = security_socket_create(family, type, protocol, kern);
1376 if (err)
1377 return err;
89bddce5 1378
55737fda
SH
1379 /*
1380 * Allocate the socket and allow the family to set things up. if
1381 * the protocol is 0, the family is instructed to select an appropriate
1382 * default.
1383 */
1384 sock = sock_alloc();
1385 if (!sock) {
e87cc472 1386 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1387 return -ENFILE; /* Not exactly a match, but its the
1388 closest posix thing */
1389 }
1390
1391 sock->type = type;
1392
95a5afca 1393#ifdef CONFIG_MODULES
89bddce5
SH
1394 /* Attempt to load a protocol module if the find failed.
1395 *
1396 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1397 * requested real, full-featured networking support upon configuration.
1398 * Otherwise module support will break!
1399 */
190683a9 1400 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1401 request_module("net-pf-%d", family);
1da177e4
LT
1402#endif
1403
55737fda
SH
1404 rcu_read_lock();
1405 pf = rcu_dereference(net_families[family]);
1406 err = -EAFNOSUPPORT;
1407 if (!pf)
1408 goto out_release;
1da177e4
LT
1409
1410 /*
1411 * We will call the ->create function, that possibly is in a loadable
1412 * module, so we have to bump that loadable module refcnt first.
1413 */
55737fda 1414 if (!try_module_get(pf->owner))
1da177e4
LT
1415 goto out_release;
1416
55737fda
SH
1417 /* Now protected by module ref count */
1418 rcu_read_unlock();
1419
3f378b68 1420 err = pf->create(net, sock, protocol, kern);
55737fda 1421 if (err < 0)
1da177e4 1422 goto out_module_put;
a79af59e 1423
1da177e4
LT
1424 /*
1425 * Now to bump the refcnt of the [loadable] module that owns this
1426 * socket at sock_release time we decrement its refcnt.
1427 */
55737fda
SH
1428 if (!try_module_get(sock->ops->owner))
1429 goto out_module_busy;
1430
1da177e4
LT
1431 /*
1432 * Now that we're done with the ->create function, the [loadable]
1433 * module can have its refcnt decremented
1434 */
55737fda 1435 module_put(pf->owner);
7420ed23
VY
1436 err = security_socket_post_create(sock, family, type, protocol, kern);
1437 if (err)
3b185525 1438 goto out_sock_release;
55737fda 1439 *res = sock;
1da177e4 1440
55737fda
SH
1441 return 0;
1442
1443out_module_busy:
1444 err = -EAFNOSUPPORT;
1da177e4 1445out_module_put:
55737fda
SH
1446 sock->ops = NULL;
1447 module_put(pf->owner);
1448out_sock_release:
1da177e4 1449 sock_release(sock);
55737fda
SH
1450 return err;
1451
1452out_release:
1453 rcu_read_unlock();
1454 goto out_sock_release;
1da177e4 1455}
721db93a 1456EXPORT_SYMBOL(__sock_create);
1da177e4 1457
8a3c245c
PT
1458/**
1459 * sock_create - creates a socket
1460 * @family: protocol family (AF_INET, ...)
1461 * @type: communication type (SOCK_STREAM, ...)
1462 * @protocol: protocol (0, ...)
1463 * @res: new socket
1464 *
1465 * A wrapper around __sock_create().
1466 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1467 */
1468
1da177e4
LT
1469int sock_create(int family, int type, int protocol, struct socket **res)
1470{
1b8d7ae4 1471 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1472}
c6d409cf 1473EXPORT_SYMBOL(sock_create);
1da177e4 1474
8a3c245c
PT
1475/**
1476 * sock_create_kern - creates a socket (kernel space)
1477 * @net: net namespace
1478 * @family: protocol family (AF_INET, ...)
1479 * @type: communication type (SOCK_STREAM, ...)
1480 * @protocol: protocol (0, ...)
1481 * @res: new socket
1482 *
1483 * A wrapper around __sock_create().
1484 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1485 */
1486
eeb1bd5c 1487int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1488{
eeb1bd5c 1489 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1490}
c6d409cf 1491EXPORT_SYMBOL(sock_create_kern);
1da177e4 1492
9d6a15c3 1493int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1494{
1495 int retval;
1496 struct socket *sock;
a677a039
UD
1497 int flags;
1498
e38b36f3
UD
1499 /* Check the SOCK_* constants for consistency. */
1500 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1501 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1502 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1503 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1504
a677a039 1505 flags = type & ~SOCK_TYPE_MASK;
77d27200 1506 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1507 return -EINVAL;
1508 type &= SOCK_TYPE_MASK;
1da177e4 1509
aaca0bdc
UD
1510 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1511 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1512
1da177e4
LT
1513 retval = sock_create(family, type, protocol, &sock);
1514 if (retval < 0)
8e1611e2 1515 return retval;
1da177e4 1516
8e1611e2 1517 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1518}
1519
9d6a15c3
DB
1520SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1521{
1522 return __sys_socket(family, type, protocol);
1523}
1524
1da177e4
LT
1525/*
1526 * Create a pair of connected sockets.
1527 */
1528
6debc8d8 1529int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1530{
1531 struct socket *sock1, *sock2;
1532 int fd1, fd2, err;
db349509 1533 struct file *newfile1, *newfile2;
a677a039
UD
1534 int flags;
1535
1536 flags = type & ~SOCK_TYPE_MASK;
77d27200 1537 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1538 return -EINVAL;
1539 type &= SOCK_TYPE_MASK;
1da177e4 1540
aaca0bdc
UD
1541 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1542 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1543
016a266b
AV
1544 /*
1545 * reserve descriptors and make sure we won't fail
1546 * to return them to userland.
1547 */
1548 fd1 = get_unused_fd_flags(flags);
1549 if (unlikely(fd1 < 0))
1550 return fd1;
1551
1552 fd2 = get_unused_fd_flags(flags);
1553 if (unlikely(fd2 < 0)) {
1554 put_unused_fd(fd1);
1555 return fd2;
1556 }
1557
1558 err = put_user(fd1, &usockvec[0]);
1559 if (err)
1560 goto out;
1561
1562 err = put_user(fd2, &usockvec[1]);
1563 if (err)
1564 goto out;
1565
1da177e4
LT
1566 /*
1567 * Obtain the first socket and check if the underlying protocol
1568 * supports the socketpair call.
1569 */
1570
1571 err = sock_create(family, type, protocol, &sock1);
016a266b 1572 if (unlikely(err < 0))
1da177e4
LT
1573 goto out;
1574
1575 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1576 if (unlikely(err < 0)) {
1577 sock_release(sock1);
1578 goto out;
bf3c23d1 1579 }
d73aa286 1580
d47cd945
DH
1581 err = security_socket_socketpair(sock1, sock2);
1582 if (unlikely(err)) {
1583 sock_release(sock2);
1584 sock_release(sock1);
1585 goto out;
1586 }
1587
016a266b
AV
1588 err = sock1->ops->socketpair(sock1, sock2);
1589 if (unlikely(err < 0)) {
1590 sock_release(sock2);
1591 sock_release(sock1);
1592 goto out;
28407630
AV
1593 }
1594
aab174f0 1595 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1596 if (IS_ERR(newfile1)) {
28407630 1597 err = PTR_ERR(newfile1);
016a266b
AV
1598 sock_release(sock2);
1599 goto out;
28407630
AV
1600 }
1601
aab174f0 1602 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1603 if (IS_ERR(newfile2)) {
1604 err = PTR_ERR(newfile2);
016a266b
AV
1605 fput(newfile1);
1606 goto out;
db349509
AV
1607 }
1608
157cf649 1609 audit_fd_pair(fd1, fd2);
d73aa286 1610
db349509
AV
1611 fd_install(fd1, newfile1);
1612 fd_install(fd2, newfile2);
d73aa286 1613 return 0;
1da177e4 1614
016a266b 1615out:
d73aa286 1616 put_unused_fd(fd2);
d73aa286 1617 put_unused_fd(fd1);
1da177e4
LT
1618 return err;
1619}
1620
6debc8d8
DB
1621SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1622 int __user *, usockvec)
1623{
1624 return __sys_socketpair(family, type, protocol, usockvec);
1625}
1626
1da177e4
LT
1627/*
1628 * Bind a name to a socket. Nothing much to do here since it's
1629 * the protocol's responsibility to handle the local address.
1630 *
1631 * We move the socket address to kernel space before we call
1632 * the protocol layer (having also checked the address is ok).
1633 */
1634
a87d35d8 1635int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1636{
1637 struct socket *sock;
230b1839 1638 struct sockaddr_storage address;
6cb153ca 1639 int err, fput_needed;
1da177e4 1640
89bddce5 1641 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1642 if (sock) {
43db362d 1643 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1644 if (!err) {
89bddce5 1645 err = security_socket_bind(sock,
230b1839 1646 (struct sockaddr *)&address,
89bddce5 1647 addrlen);
6cb153ca
BL
1648 if (!err)
1649 err = sock->ops->bind(sock,
89bddce5 1650 (struct sockaddr *)
230b1839 1651 &address, addrlen);
1da177e4 1652 }
6cb153ca 1653 fput_light(sock->file, fput_needed);
89bddce5 1654 }
1da177e4
LT
1655 return err;
1656}
1657
a87d35d8
DB
1658SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1659{
1660 return __sys_bind(fd, umyaddr, addrlen);
1661}
1662
1da177e4
LT
1663/*
1664 * Perform a listen. Basically, we allow the protocol to do anything
1665 * necessary for a listen, and if that works, we mark the socket as
1666 * ready for listening.
1667 */
1668
25e290ee 1669int __sys_listen(int fd, int backlog)
1da177e4
LT
1670{
1671 struct socket *sock;
6cb153ca 1672 int err, fput_needed;
b8e1f9b5 1673 int somaxconn;
89bddce5
SH
1674
1675 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1676 if (sock) {
8efa6e93 1677 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1678 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1679 backlog = somaxconn;
1da177e4
LT
1680
1681 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1682 if (!err)
1683 err = sock->ops->listen(sock, backlog);
1da177e4 1684
6cb153ca 1685 fput_light(sock->file, fput_needed);
1da177e4
LT
1686 }
1687 return err;
1688}
1689
25e290ee
DB
1690SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1691{
1692 return __sys_listen(fd, backlog);
1693}
1694
de2ea4b6
JA
1695int __sys_accept4_file(struct file *file, unsigned file_flags,
1696 struct sockaddr __user *upeer_sockaddr,
1697 int __user *upeer_addrlen, int flags)
1da177e4
LT
1698{
1699 struct socket *sock, *newsock;
39d8c1b6 1700 struct file *newfile;
de2ea4b6 1701 int err, len, newfd;
230b1839 1702 struct sockaddr_storage address;
1da177e4 1703
77d27200 1704 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1705 return -EINVAL;
1706
1707 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1708 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1709
de2ea4b6 1710 sock = sock_from_file(file, &err);
1da177e4
LT
1711 if (!sock)
1712 goto out;
1713
1714 err = -ENFILE;
c6d409cf
ED
1715 newsock = sock_alloc();
1716 if (!newsock)
de2ea4b6 1717 goto out;
1da177e4
LT
1718
1719 newsock->type = sock->type;
1720 newsock->ops = sock->ops;
1721
1da177e4
LT
1722 /*
1723 * We don't need try_module_get here, as the listening socket (sock)
1724 * has the protocol module (sock->ops->owner) held.
1725 */
1726 __module_get(newsock->ops->owner);
1727
28407630 1728 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1729 if (unlikely(newfd < 0)) {
1730 err = newfd;
9a1875e6 1731 sock_release(newsock);
de2ea4b6 1732 goto out;
39d8c1b6 1733 }
aab174f0 1734 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1735 if (IS_ERR(newfile)) {
28407630
AV
1736 err = PTR_ERR(newfile);
1737 put_unused_fd(newfd);
de2ea4b6 1738 goto out;
28407630 1739 }
39d8c1b6 1740
a79af59e
FF
1741 err = security_socket_accept(sock, newsock);
1742 if (err)
39d8c1b6 1743 goto out_fd;
a79af59e 1744
de2ea4b6
JA
1745 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1746 false);
1da177e4 1747 if (err < 0)
39d8c1b6 1748 goto out_fd;
1da177e4
LT
1749
1750 if (upeer_sockaddr) {
9b2c45d4
DV
1751 len = newsock->ops->getname(newsock,
1752 (struct sockaddr *)&address, 2);
1753 if (len < 0) {
1da177e4 1754 err = -ECONNABORTED;
39d8c1b6 1755 goto out_fd;
1da177e4 1756 }
43db362d 1757 err = move_addr_to_user(&address,
230b1839 1758 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1759 if (err < 0)
39d8c1b6 1760 goto out_fd;
1da177e4
LT
1761 }
1762
1763 /* File flags are not inherited via accept() unlike another OSes. */
1764
39d8c1b6
DM
1765 fd_install(newfd, newfile);
1766 err = newfd;
1da177e4
LT
1767out:
1768 return err;
39d8c1b6 1769out_fd:
9606a216 1770 fput(newfile);
39d8c1b6 1771 put_unused_fd(newfd);
de2ea4b6
JA
1772 goto out;
1773
1774}
1775
1776/*
1777 * For accept, we attempt to create a new socket, set up the link
1778 * with the client, wake up the client, then return the new
1779 * connected fd. We collect the address of the connector in kernel
1780 * space and move it to user at the very end. This is unclean because
1781 * we open the socket then return an error.
1782 *
1783 * 1003.1g adds the ability to recvmsg() to query connection pending
1784 * status to recvmsg. We need to add that support in a way thats
1785 * clean when we restructure accept also.
1786 */
1787
1788int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1789 int __user *upeer_addrlen, int flags)
1790{
1791 int ret = -EBADF;
1792 struct fd f;
1793
1794 f = fdget(fd);
1795 if (f.file) {
1796 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
1797 upeer_addrlen, flags);
1798 if (f.flags)
1799 fput(f.file);
1800 }
1801
1802 return ret;
1da177e4
LT
1803}
1804
4541e805
DB
1805SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1806 int __user *, upeer_addrlen, int, flags)
1807{
1808 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1809}
1810
20f37034
HC
1811SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1812 int __user *, upeer_addrlen)
aaca0bdc 1813{
4541e805 1814 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1815}
1816
1da177e4
LT
1817/*
1818 * Attempt to connect to a socket with the server address. The address
1819 * is in user space so we verify it is OK and move it to kernel space.
1820 *
1821 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1822 * break bindings
1823 *
1824 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1825 * other SEQPACKET protocols that take time to connect() as it doesn't
1826 * include the -EINPROGRESS status for such sockets.
1827 */
1828
f499a021 1829int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 1830 int addrlen, int file_flags)
1da177e4
LT
1831{
1832 struct socket *sock;
bd3ded31 1833 int err;
1da177e4 1834
bd3ded31 1835 sock = sock_from_file(file, &err);
1da177e4
LT
1836 if (!sock)
1837 goto out;
1da177e4 1838
89bddce5 1839 err =
f499a021 1840 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 1841 if (err)
bd3ded31 1842 goto out;
1da177e4 1843
f499a021 1844 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
bd3ded31 1845 sock->file->f_flags | file_flags);
1da177e4
LT
1846out:
1847 return err;
1848}
1849
bd3ded31
JA
1850int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1851{
1852 int ret = -EBADF;
1853 struct fd f;
1854
1855 f = fdget(fd);
1856 if (f.file) {
f499a021
JA
1857 struct sockaddr_storage address;
1858
1859 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1860 if (!ret)
1861 ret = __sys_connect_file(f.file, &address, addrlen, 0);
bd3ded31
JA
1862 if (f.flags)
1863 fput(f.file);
1864 }
1865
1866 return ret;
1867}
1868
1387c2c2
DB
1869SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1870 int, addrlen)
1871{
1872 return __sys_connect(fd, uservaddr, addrlen);
1873}
1874
1da177e4
LT
1875/*
1876 * Get the local address ('name') of a socket object. Move the obtained
1877 * name to user space.
1878 */
1879
8882a107
DB
1880int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1881 int __user *usockaddr_len)
1da177e4
LT
1882{
1883 struct socket *sock;
230b1839 1884 struct sockaddr_storage address;
9b2c45d4 1885 int err, fput_needed;
89bddce5 1886
6cb153ca 1887 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1888 if (!sock)
1889 goto out;
1890
1891 err = security_socket_getsockname(sock);
1892 if (err)
1893 goto out_put;
1894
9b2c45d4
DV
1895 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1896 if (err < 0)
1da177e4 1897 goto out_put;
9b2c45d4
DV
1898 /* "err" is actually length in this case */
1899 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1900
1901out_put:
6cb153ca 1902 fput_light(sock->file, fput_needed);
1da177e4
LT
1903out:
1904 return err;
1905}
1906
8882a107
DB
1907SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1908 int __user *, usockaddr_len)
1909{
1910 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1911}
1912
1da177e4
LT
1913/*
1914 * Get the remote address ('name') of a socket object. Move the obtained
1915 * name to user space.
1916 */
1917
b21c8f83
DB
1918int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1919 int __user *usockaddr_len)
1da177e4
LT
1920{
1921 struct socket *sock;
230b1839 1922 struct sockaddr_storage address;
9b2c45d4 1923 int err, fput_needed;
1da177e4 1924
89bddce5
SH
1925 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1926 if (sock != NULL) {
1da177e4
LT
1927 err = security_socket_getpeername(sock);
1928 if (err) {
6cb153ca 1929 fput_light(sock->file, fput_needed);
1da177e4
LT
1930 return err;
1931 }
1932
9b2c45d4
DV
1933 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1934 if (err >= 0)
1935 /* "err" is actually length in this case */
1936 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1937 usockaddr_len);
6cb153ca 1938 fput_light(sock->file, fput_needed);
1da177e4
LT
1939 }
1940 return err;
1941}
1942
b21c8f83
DB
1943SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1944 int __user *, usockaddr_len)
1945{
1946 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1947}
1948
1da177e4
LT
1949/*
1950 * Send a datagram to a given address. We move the address into kernel
1951 * space and check the user space data area is readable before invoking
1952 * the protocol.
1953 */
211b634b
DB
1954int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1955 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1956{
1957 struct socket *sock;
230b1839 1958 struct sockaddr_storage address;
1da177e4
LT
1959 int err;
1960 struct msghdr msg;
1961 struct iovec iov;
6cb153ca 1962 int fput_needed;
6cb153ca 1963
602bd0e9
AV
1964 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1965 if (unlikely(err))
1966 return err;
de0fa95c
PE
1967 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1968 if (!sock)
4387ff75 1969 goto out;
6cb153ca 1970
89bddce5 1971 msg.msg_name = NULL;
89bddce5
SH
1972 msg.msg_control = NULL;
1973 msg.msg_controllen = 0;
1974 msg.msg_namelen = 0;
6cb153ca 1975 if (addr) {
43db362d 1976 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1977 if (err < 0)
1978 goto out_put;
230b1839 1979 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1980 msg.msg_namelen = addr_len;
1da177e4
LT
1981 }
1982 if (sock->file->f_flags & O_NONBLOCK)
1983 flags |= MSG_DONTWAIT;
1984 msg.msg_flags = flags;
d8725c86 1985 err = sock_sendmsg(sock, &msg);
1da177e4 1986
89bddce5 1987out_put:
de0fa95c 1988 fput_light(sock->file, fput_needed);
4387ff75 1989out:
1da177e4
LT
1990 return err;
1991}
1992
211b634b
DB
1993SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1994 unsigned int, flags, struct sockaddr __user *, addr,
1995 int, addr_len)
1996{
1997 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1998}
1999
1da177e4 2000/*
89bddce5 2001 * Send a datagram down a socket.
1da177e4
LT
2002 */
2003
3e0fa65f 2004SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2005 unsigned int, flags)
1da177e4 2006{
211b634b 2007 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2008}
2009
2010/*
89bddce5 2011 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2012 * sender. We verify the buffers are writable and if needed move the
2013 * sender address from kernel to user space.
2014 */
7a09e1eb
DB
2015int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2016 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2017{
2018 struct socket *sock;
2019 struct iovec iov;
2020 struct msghdr msg;
230b1839 2021 struct sockaddr_storage address;
89bddce5 2022 int err, err2;
6cb153ca
BL
2023 int fput_needed;
2024
602bd0e9
AV
2025 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2026 if (unlikely(err))
2027 return err;
de0fa95c 2028 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2029 if (!sock)
de0fa95c 2030 goto out;
1da177e4 2031
89bddce5
SH
2032 msg.msg_control = NULL;
2033 msg.msg_controllen = 0;
f3d33426
HFS
2034 /* Save some cycles and don't copy the address if not needed */
2035 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2036 /* We assume all kernel code knows the size of sockaddr_storage */
2037 msg.msg_namelen = 0;
130ed5d1 2038 msg.msg_iocb = NULL;
9f138fa6 2039 msg.msg_flags = 0;
1da177e4
LT
2040 if (sock->file->f_flags & O_NONBLOCK)
2041 flags |= MSG_DONTWAIT;
2da62906 2042 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2043
89bddce5 2044 if (err >= 0 && addr != NULL) {
43db362d 2045 err2 = move_addr_to_user(&address,
230b1839 2046 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2047 if (err2 < 0)
2048 err = err2;
1da177e4 2049 }
de0fa95c
PE
2050
2051 fput_light(sock->file, fput_needed);
4387ff75 2052out:
1da177e4
LT
2053 return err;
2054}
2055
7a09e1eb
DB
2056SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2057 unsigned int, flags, struct sockaddr __user *, addr,
2058 int __user *, addr_len)
2059{
2060 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2061}
2062
1da177e4 2063/*
89bddce5 2064 * Receive a datagram from a socket.
1da177e4
LT
2065 */
2066
b7c0ddf5
JG
2067SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2068 unsigned int, flags)
1da177e4 2069{
7a09e1eb 2070 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2071}
2072
2073/*
2074 * Set a socket option. Because we don't know the option lengths we have
2075 * to pass the user mode parameter for the protocols to sort out.
2076 */
2077
cc36dca0
DB
2078static int __sys_setsockopt(int fd, int level, int optname,
2079 char __user *optval, int optlen)
1da177e4 2080{
0d01da6a
SF
2081 mm_segment_t oldfs = get_fs();
2082 char *kernel_optval = NULL;
6cb153ca 2083 int err, fput_needed;
1da177e4
LT
2084 struct socket *sock;
2085
2086 if (optlen < 0)
2087 return -EINVAL;
89bddce5
SH
2088
2089 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2090 if (sock != NULL) {
2091 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2092 if (err)
2093 goto out_put;
1da177e4 2094
0d01da6a
SF
2095 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
2096 &optname, optval, &optlen,
2097 &kernel_optval);
2098
2099 if (err < 0) {
2100 goto out_put;
2101 } else if (err > 0) {
2102 err = 0;
2103 goto out_put;
2104 }
2105
2106 if (kernel_optval) {
2107 set_fs(KERNEL_DS);
2108 optval = (char __user __force *)kernel_optval;
2109 }
2110
1da177e4 2111 if (level == SOL_SOCKET)
89bddce5
SH
2112 err =
2113 sock_setsockopt(sock, level, optname, optval,
2114 optlen);
1da177e4 2115 else
89bddce5
SH
2116 err =
2117 sock->ops->setsockopt(sock, level, optname, optval,
2118 optlen);
0d01da6a
SF
2119
2120 if (kernel_optval) {
2121 set_fs(oldfs);
2122 kfree(kernel_optval);
2123 }
6cb153ca
BL
2124out_put:
2125 fput_light(sock->file, fput_needed);
1da177e4
LT
2126 }
2127 return err;
2128}
2129
cc36dca0
DB
2130SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2131 char __user *, optval, int, optlen)
2132{
2133 return __sys_setsockopt(fd, level, optname, optval, optlen);
2134}
2135
1da177e4
LT
2136/*
2137 * Get a socket option. Because we don't know the option lengths we have
2138 * to pass a user mode parameter for the protocols to sort out.
2139 */
2140
13a2d70e
DB
2141static int __sys_getsockopt(int fd, int level, int optname,
2142 char __user *optval, int __user *optlen)
1da177e4 2143{
6cb153ca 2144 int err, fput_needed;
1da177e4 2145 struct socket *sock;
0d01da6a 2146 int max_optlen;
1da177e4 2147
89bddce5
SH
2148 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2149 if (sock != NULL) {
6cb153ca
BL
2150 err = security_socket_getsockopt(sock, level, optname);
2151 if (err)
2152 goto out_put;
1da177e4 2153
0d01da6a
SF
2154 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
2155
1da177e4 2156 if (level == SOL_SOCKET)
89bddce5
SH
2157 err =
2158 sock_getsockopt(sock, level, optname, optval,
2159 optlen);
1da177e4 2160 else
89bddce5
SH
2161 err =
2162 sock->ops->getsockopt(sock, level, optname, optval,
2163 optlen);
0d01da6a
SF
2164
2165 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2166 optval, optlen,
2167 max_optlen, err);
6cb153ca
BL
2168out_put:
2169 fput_light(sock->file, fput_needed);
1da177e4
LT
2170 }
2171 return err;
2172}
2173
13a2d70e
DB
2174SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2175 char __user *, optval, int __user *, optlen)
2176{
2177 return __sys_getsockopt(fd, level, optname, optval, optlen);
2178}
2179
1da177e4
LT
2180/*
2181 * Shutdown a socket.
2182 */
2183
005a1aea 2184int __sys_shutdown(int fd, int how)
1da177e4 2185{
6cb153ca 2186 int err, fput_needed;
1da177e4
LT
2187 struct socket *sock;
2188
89bddce5
SH
2189 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2190 if (sock != NULL) {
1da177e4 2191 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2192 if (!err)
2193 err = sock->ops->shutdown(sock, how);
2194 fput_light(sock->file, fput_needed);
1da177e4
LT
2195 }
2196 return err;
2197}
2198
005a1aea
DB
2199SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2200{
2201 return __sys_shutdown(fd, how);
2202}
2203
89bddce5 2204/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2205 * fields which are the same type (int / unsigned) on our platforms.
2206 */
2207#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2208#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2209#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2210
c71d8ebe
TH
2211struct used_address {
2212 struct sockaddr_storage name;
2213 unsigned int name_len;
2214};
2215
da184284
AV
2216static int copy_msghdr_from_user(struct msghdr *kmsg,
2217 struct user_msghdr __user *umsg,
2218 struct sockaddr __user **save_addr,
2219 struct iovec **iov)
1661bf36 2220{
ffb07550 2221 struct user_msghdr msg;
08adb7da
AV
2222 ssize_t err;
2223
ffb07550 2224 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2225 return -EFAULT;
dbb490b9 2226
864d9664 2227 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2228 kmsg->msg_controllen = msg.msg_controllen;
2229 kmsg->msg_flags = msg.msg_flags;
2230
2231 kmsg->msg_namelen = msg.msg_namelen;
2232 if (!msg.msg_name)
6a2a2b3a
AS
2233 kmsg->msg_namelen = 0;
2234
dbb490b9
ML
2235 if (kmsg->msg_namelen < 0)
2236 return -EINVAL;
2237
1661bf36 2238 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2239 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2240
2241 if (save_addr)
ffb07550 2242 *save_addr = msg.msg_name;
08adb7da 2243
ffb07550 2244 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2245 if (!save_addr) {
864d9664
PA
2246 err = move_addr_to_kernel(msg.msg_name,
2247 kmsg->msg_namelen,
08adb7da
AV
2248 kmsg->msg_name);
2249 if (err < 0)
2250 return err;
2251 }
2252 } else {
2253 kmsg->msg_name = NULL;
2254 kmsg->msg_namelen = 0;
2255 }
2256
ffb07550 2257 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2258 return -EMSGSIZE;
2259
0345f931 2260 kmsg->msg_iocb = NULL;
2261
87e5e6da 2262 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2263 msg.msg_iov, msg.msg_iovlen,
da184284 2264 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2265 return err < 0 ? err : 0;
1661bf36
DC
2266}
2267
4257c8ca
JA
2268static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2269 unsigned int flags, struct used_address *used_address,
2270 unsigned int allowed_msghdr_flags)
1da177e4 2271{
b9d717a7 2272 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2273 __aligned(sizeof(__kernel_size_t));
89bddce5 2274 /* 20 is size of ipv6_pktinfo */
1da177e4 2275 unsigned char *ctl_buf = ctl;
d8725c86 2276 int ctl_len;
08adb7da 2277 ssize_t err;
89bddce5 2278
1da177e4
LT
2279 err = -ENOBUFS;
2280
228e548e 2281 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2282 goto out;
28a94d8f 2283 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2284 ctl_len = msg_sys->msg_controllen;
1da177e4 2285 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2286 err =
228e548e 2287 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2288 sizeof(ctl));
1da177e4 2289 if (err)
4257c8ca 2290 goto out;
228e548e
AB
2291 ctl_buf = msg_sys->msg_control;
2292 ctl_len = msg_sys->msg_controllen;
1da177e4 2293 } else if (ctl_len) {
ac4340fc
DM
2294 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2295 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2296 if (ctl_len > sizeof(ctl)) {
1da177e4 2297 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2298 if (ctl_buf == NULL)
4257c8ca 2299 goto out;
1da177e4
LT
2300 }
2301 err = -EFAULT;
2302 /*
228e548e 2303 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2304 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2305 * checking falls down on this.
2306 */
fb8621bb 2307 if (copy_from_user(ctl_buf,
228e548e 2308 (void __user __force *)msg_sys->msg_control,
89bddce5 2309 ctl_len))
1da177e4 2310 goto out_freectl;
228e548e 2311 msg_sys->msg_control = ctl_buf;
1da177e4 2312 }
228e548e 2313 msg_sys->msg_flags = flags;
1da177e4
LT
2314
2315 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2316 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2317 /*
2318 * If this is sendmmsg() and current destination address is same as
2319 * previously succeeded address, omit asking LSM's decision.
2320 * used_address->name_len is initialized to UINT_MAX so that the first
2321 * destination address never matches.
2322 */
bc909d9d
MD
2323 if (used_address && msg_sys->msg_name &&
2324 used_address->name_len == msg_sys->msg_namelen &&
2325 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2326 used_address->name_len)) {
d8725c86 2327 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2328 goto out_freectl;
2329 }
d8725c86 2330 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2331 /*
2332 * If this is sendmmsg() and sending to current destination address was
2333 * successful, remember it.
2334 */
2335 if (used_address && err >= 0) {
2336 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2337 if (msg_sys->msg_name)
2338 memcpy(&used_address->name, msg_sys->msg_name,
2339 used_address->name_len);
c71d8ebe 2340 }
1da177e4
LT
2341
2342out_freectl:
89bddce5 2343 if (ctl_buf != ctl)
1da177e4 2344 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2345out:
2346 return err;
2347}
2348
03b1230c
JA
2349int sendmsg_copy_msghdr(struct msghdr *msg,
2350 struct user_msghdr __user *umsg, unsigned flags,
2351 struct iovec **iov)
4257c8ca
JA
2352{
2353 int err;
2354
2355 if (flags & MSG_CMSG_COMPAT) {
2356 struct compat_msghdr __user *msg_compat;
2357
2358 msg_compat = (struct compat_msghdr __user *) umsg;
2359 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2360 } else {
2361 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2362 }
2363 if (err < 0)
2364 return err;
2365
2366 return 0;
2367}
2368
2369static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2370 struct msghdr *msg_sys, unsigned int flags,
2371 struct used_address *used_address,
2372 unsigned int allowed_msghdr_flags)
2373{
2374 struct sockaddr_storage address;
2375 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2376 ssize_t err;
2377
2378 msg_sys->msg_name = &address;
2379
2380 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2381 if (err < 0)
2382 return err;
2383
2384 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2385 allowed_msghdr_flags);
da184284 2386 kfree(iov);
228e548e
AB
2387 return err;
2388}
2389
2390/*
2391 * BSD sendmsg interface
2392 */
03b1230c 2393long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2394 unsigned int flags)
2395{
d69e0779 2396 /* disallow ancillary data requests from this path */
03b1230c
JA
2397 if (msg->msg_control || msg->msg_controllen)
2398 return -EINVAL;
d69e0779 2399
03b1230c 2400 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2401}
228e548e 2402
e1834a32
DB
2403long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2404 bool forbid_cmsg_compat)
228e548e
AB
2405{
2406 int fput_needed, err;
2407 struct msghdr msg_sys;
1be374a0
AL
2408 struct socket *sock;
2409
e1834a32
DB
2410 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2411 return -EINVAL;
2412
1be374a0 2413 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2414 if (!sock)
2415 goto out;
2416
28a94d8f 2417 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2418
6cb153ca 2419 fput_light(sock->file, fput_needed);
89bddce5 2420out:
1da177e4
LT
2421 return err;
2422}
2423
666547ff 2424SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2425{
e1834a32 2426 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2427}
2428
228e548e
AB
2429/*
2430 * Linux sendmmsg interface
2431 */
2432
2433int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2434 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2435{
2436 int fput_needed, err, datagrams;
2437 struct socket *sock;
2438 struct mmsghdr __user *entry;
2439 struct compat_mmsghdr __user *compat_entry;
2440 struct msghdr msg_sys;
c71d8ebe 2441 struct used_address used_address;
f092276d 2442 unsigned int oflags = flags;
228e548e 2443
e1834a32
DB
2444 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2445 return -EINVAL;
2446
98382f41
AB
2447 if (vlen > UIO_MAXIOV)
2448 vlen = UIO_MAXIOV;
228e548e
AB
2449
2450 datagrams = 0;
2451
2452 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2453 if (!sock)
2454 return err;
2455
c71d8ebe 2456 used_address.name_len = UINT_MAX;
228e548e
AB
2457 entry = mmsg;
2458 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2459 err = 0;
f092276d 2460 flags |= MSG_BATCH;
228e548e
AB
2461
2462 while (datagrams < vlen) {
f092276d
TH
2463 if (datagrams == vlen - 1)
2464 flags = oflags;
2465
228e548e 2466 if (MSG_CMSG_COMPAT & flags) {
666547ff 2467 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2468 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2469 if (err < 0)
2470 break;
2471 err = __put_user(err, &compat_entry->msg_len);
2472 ++compat_entry;
2473 } else {
a7526eb5 2474 err = ___sys_sendmsg(sock,
666547ff 2475 (struct user_msghdr __user *)entry,
28a94d8f 2476 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2477 if (err < 0)
2478 break;
2479 err = put_user(err, &entry->msg_len);
2480 ++entry;
2481 }
2482
2483 if (err)
2484 break;
2485 ++datagrams;
3023898b
SHY
2486 if (msg_data_left(&msg_sys))
2487 break;
a78cb84c 2488 cond_resched();
228e548e
AB
2489 }
2490
228e548e
AB
2491 fput_light(sock->file, fput_needed);
2492
728ffb86
AB
2493 /* We only return an error if no datagrams were able to be sent */
2494 if (datagrams != 0)
228e548e
AB
2495 return datagrams;
2496
228e548e
AB
2497 return err;
2498}
2499
2500SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2501 unsigned int, vlen, unsigned int, flags)
2502{
e1834a32 2503 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2504}
2505
03b1230c
JA
2506int recvmsg_copy_msghdr(struct msghdr *msg,
2507 struct user_msghdr __user *umsg, unsigned flags,
2508 struct sockaddr __user **uaddr,
2509 struct iovec **iov)
1da177e4 2510{
08adb7da 2511 ssize_t err;
1da177e4 2512
4257c8ca
JA
2513 if (MSG_CMSG_COMPAT & flags) {
2514 struct compat_msghdr __user *msg_compat;
1da177e4 2515
4257c8ca
JA
2516 msg_compat = (struct compat_msghdr __user *) umsg;
2517 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2518 } else {
2519 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2520 }
1da177e4 2521 if (err < 0)
da184284 2522 return err;
1da177e4 2523
4257c8ca
JA
2524 return 0;
2525}
2526
2527static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2528 struct user_msghdr __user *msg,
2529 struct sockaddr __user *uaddr,
2530 unsigned int flags, int nosec)
2531{
2532 struct compat_msghdr __user *msg_compat =
2533 (struct compat_msghdr __user *) msg;
2534 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2535 struct sockaddr_storage addr;
2536 unsigned long cmsg_ptr;
2537 int len;
2538 ssize_t err;
2539
2540 msg_sys->msg_name = &addr;
a2e27255
ACM
2541 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2542 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2543
f3d33426
HFS
2544 /* We assume all kernel code knows the size of sockaddr_storage */
2545 msg_sys->msg_namelen = 0;
2546
1da177e4
LT
2547 if (sock->file->f_flags & O_NONBLOCK)
2548 flags |= MSG_DONTWAIT;
2da62906 2549 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4 2550 if (err < 0)
4257c8ca 2551 goto out;
1da177e4
LT
2552 len = err;
2553
2554 if (uaddr != NULL) {
43db362d 2555 err = move_addr_to_user(&addr,
a2e27255 2556 msg_sys->msg_namelen, uaddr,
89bddce5 2557 uaddr_len);
1da177e4 2558 if (err < 0)
4257c8ca 2559 goto out;
1da177e4 2560 }
a2e27255 2561 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2562 COMPAT_FLAGS(msg));
1da177e4 2563 if (err)
4257c8ca 2564 goto out;
1da177e4 2565 if (MSG_CMSG_COMPAT & flags)
a2e27255 2566 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2567 &msg_compat->msg_controllen);
2568 else
a2e27255 2569 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2570 &msg->msg_controllen);
2571 if (err)
4257c8ca 2572 goto out;
1da177e4 2573 err = len;
4257c8ca
JA
2574out:
2575 return err;
2576}
2577
2578static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2579 struct msghdr *msg_sys, unsigned int flags, int nosec)
2580{
2581 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2582 /* user mode address pointers */
2583 struct sockaddr __user *uaddr;
2584 ssize_t err;
2585
2586 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2587 if (err < 0)
2588 return err;
1da177e4 2589
4257c8ca 2590 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2591 kfree(iov);
a2e27255
ACM
2592 return err;
2593}
2594
2595/*
2596 * BSD recvmsg interface
2597 */
2598
03b1230c
JA
2599long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2600 struct user_msghdr __user *umsg,
2601 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2602{
d69e0779 2603 /* disallow ancillary data requests from this path */
03b1230c
JA
2604 if (msg->msg_control || msg->msg_controllen)
2605 return -EINVAL;
aa1fa28f 2606
03b1230c 2607 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2608}
2609
e1834a32
DB
2610long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2611 bool forbid_cmsg_compat)
a2e27255
ACM
2612{
2613 int fput_needed, err;
2614 struct msghdr msg_sys;
1be374a0
AL
2615 struct socket *sock;
2616
e1834a32
DB
2617 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2618 return -EINVAL;
2619
1be374a0 2620 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2621 if (!sock)
2622 goto out;
2623
a7526eb5 2624 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2625
6cb153ca 2626 fput_light(sock->file, fput_needed);
1da177e4
LT
2627out:
2628 return err;
2629}
2630
666547ff 2631SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2632 unsigned int, flags)
2633{
e1834a32 2634 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2635}
2636
a2e27255
ACM
2637/*
2638 * Linux recvmmsg interface
2639 */
2640
e11d4284
AB
2641static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2642 unsigned int vlen, unsigned int flags,
2643 struct timespec64 *timeout)
a2e27255
ACM
2644{
2645 int fput_needed, err, datagrams;
2646 struct socket *sock;
2647 struct mmsghdr __user *entry;
d7256d0e 2648 struct compat_mmsghdr __user *compat_entry;
a2e27255 2649 struct msghdr msg_sys;
766b9f92
DD
2650 struct timespec64 end_time;
2651 struct timespec64 timeout64;
a2e27255
ACM
2652
2653 if (timeout &&
2654 poll_select_set_timeout(&end_time, timeout->tv_sec,
2655 timeout->tv_nsec))
2656 return -EINVAL;
2657
2658 datagrams = 0;
2659
2660 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2661 if (!sock)
2662 return err;
2663
7797dc41
SHY
2664 if (likely(!(flags & MSG_ERRQUEUE))) {
2665 err = sock_error(sock->sk);
2666 if (err) {
2667 datagrams = err;
2668 goto out_put;
2669 }
e623a9e9 2670 }
a2e27255
ACM
2671
2672 entry = mmsg;
d7256d0e 2673 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2674
2675 while (datagrams < vlen) {
2676 /*
2677 * No need to ask LSM for more than the first datagram.
2678 */
d7256d0e 2679 if (MSG_CMSG_COMPAT & flags) {
666547ff 2680 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2681 &msg_sys, flags & ~MSG_WAITFORONE,
2682 datagrams);
d7256d0e
JMG
2683 if (err < 0)
2684 break;
2685 err = __put_user(err, &compat_entry->msg_len);
2686 ++compat_entry;
2687 } else {
a7526eb5 2688 err = ___sys_recvmsg(sock,
666547ff 2689 (struct user_msghdr __user *)entry,
a7526eb5
AL
2690 &msg_sys, flags & ~MSG_WAITFORONE,
2691 datagrams);
d7256d0e
JMG
2692 if (err < 0)
2693 break;
2694 err = put_user(err, &entry->msg_len);
2695 ++entry;
2696 }
2697
a2e27255
ACM
2698 if (err)
2699 break;
a2e27255
ACM
2700 ++datagrams;
2701
71c5c159
BB
2702 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2703 if (flags & MSG_WAITFORONE)
2704 flags |= MSG_DONTWAIT;
2705
a2e27255 2706 if (timeout) {
766b9f92 2707 ktime_get_ts64(&timeout64);
c2e6c856 2708 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2709 if (timeout->tv_sec < 0) {
2710 timeout->tv_sec = timeout->tv_nsec = 0;
2711 break;
2712 }
2713
2714 /* Timeout, return less than vlen datagrams */
2715 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2716 break;
2717 }
2718
2719 /* Out of band data, return right away */
2720 if (msg_sys.msg_flags & MSG_OOB)
2721 break;
a78cb84c 2722 cond_resched();
a2e27255
ACM
2723 }
2724
a2e27255 2725 if (err == 0)
34b88a68
ACM
2726 goto out_put;
2727
2728 if (datagrams == 0) {
2729 datagrams = err;
2730 goto out_put;
2731 }
a2e27255 2732
34b88a68
ACM
2733 /*
2734 * We may return less entries than requested (vlen) if the
2735 * sock is non block and there aren't enough datagrams...
2736 */
2737 if (err != -EAGAIN) {
a2e27255 2738 /*
34b88a68
ACM
2739 * ... or if recvmsg returns an error after we
2740 * received some datagrams, where we record the
2741 * error to return on the next call or if the
2742 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2743 */
34b88a68 2744 sock->sk->sk_err = -err;
a2e27255 2745 }
34b88a68
ACM
2746out_put:
2747 fput_light(sock->file, fput_needed);
a2e27255 2748
34b88a68 2749 return datagrams;
a2e27255
ACM
2750}
2751
e11d4284
AB
2752int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2753 unsigned int vlen, unsigned int flags,
2754 struct __kernel_timespec __user *timeout,
2755 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2756{
2757 int datagrams;
c2e6c856 2758 struct timespec64 timeout_sys;
a2e27255 2759
e11d4284
AB
2760 if (timeout && get_timespec64(&timeout_sys, timeout))
2761 return -EFAULT;
a2e27255 2762
e11d4284 2763 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2764 return -EFAULT;
2765
e11d4284
AB
2766 if (!timeout && !timeout32)
2767 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2768
2769 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2770
e11d4284
AB
2771 if (datagrams <= 0)
2772 return datagrams;
2773
2774 if (timeout && put_timespec64(&timeout_sys, timeout))
2775 datagrams = -EFAULT;
2776
2777 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2778 datagrams = -EFAULT;
2779
2780 return datagrams;
2781}
2782
1255e269
DB
2783SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2784 unsigned int, vlen, unsigned int, flags,
c2e6c856 2785 struct __kernel_timespec __user *, timeout)
1255e269 2786{
e11d4284
AB
2787 if (flags & MSG_CMSG_COMPAT)
2788 return -EINVAL;
2789
2790 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2791}
2792
2793#ifdef CONFIG_COMPAT_32BIT_TIME
2794SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2795 unsigned int, vlen, unsigned int, flags,
2796 struct old_timespec32 __user *, timeout)
2797{
2798 if (flags & MSG_CMSG_COMPAT)
2799 return -EINVAL;
2800
2801 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2802}
e11d4284 2803#endif
1255e269 2804
a2e27255 2805#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2806/* Argument list sizes for sys_socketcall */
2807#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2808static const unsigned char nargs[21] = {
c6d409cf
ED
2809 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2810 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2811 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2812 AL(4), AL(5), AL(4)
89bddce5
SH
2813};
2814
1da177e4
LT
2815#undef AL
2816
2817/*
89bddce5 2818 * System call vectors.
1da177e4
LT
2819 *
2820 * Argument checking cleaned up. Saved 20% in size.
2821 * This function doesn't need to set the kernel lock because
89bddce5 2822 * it is set by the callees.
1da177e4
LT
2823 */
2824
3e0fa65f 2825SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2826{
2950fa9d 2827 unsigned long a[AUDITSC_ARGS];
89bddce5 2828 unsigned long a0, a1;
1da177e4 2829 int err;
47379052 2830 unsigned int len;
1da177e4 2831
228e548e 2832 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2833 return -EINVAL;
c8e8cd57 2834 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2835
47379052
AV
2836 len = nargs[call];
2837 if (len > sizeof(a))
2838 return -EINVAL;
2839
1da177e4 2840 /* copy_from_user should be SMP safe. */
47379052 2841 if (copy_from_user(a, args, len))
1da177e4 2842 return -EFAULT;
3ec3b2fb 2843
2950fa9d
CG
2844 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2845 if (err)
2846 return err;
3ec3b2fb 2847
89bddce5
SH
2848 a0 = a[0];
2849 a1 = a[1];
2850
2851 switch (call) {
2852 case SYS_SOCKET:
9d6a15c3 2853 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2854 break;
2855 case SYS_BIND:
a87d35d8 2856 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2857 break;
2858 case SYS_CONNECT:
1387c2c2 2859 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2860 break;
2861 case SYS_LISTEN:
25e290ee 2862 err = __sys_listen(a0, a1);
89bddce5
SH
2863 break;
2864 case SYS_ACCEPT:
4541e805
DB
2865 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2866 (int __user *)a[2], 0);
89bddce5
SH
2867 break;
2868 case SYS_GETSOCKNAME:
2869 err =
8882a107
DB
2870 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2871 (int __user *)a[2]);
89bddce5
SH
2872 break;
2873 case SYS_GETPEERNAME:
2874 err =
b21c8f83
DB
2875 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2876 (int __user *)a[2]);
89bddce5
SH
2877 break;
2878 case SYS_SOCKETPAIR:
6debc8d8 2879 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2880 break;
2881 case SYS_SEND:
f3bf896b
DB
2882 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2883 NULL, 0);
89bddce5
SH
2884 break;
2885 case SYS_SENDTO:
211b634b
DB
2886 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2887 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2888 break;
2889 case SYS_RECV:
d27e9afc
DB
2890 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2891 NULL, NULL);
89bddce5
SH
2892 break;
2893 case SYS_RECVFROM:
7a09e1eb
DB
2894 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2895 (struct sockaddr __user *)a[4],
2896 (int __user *)a[5]);
89bddce5
SH
2897 break;
2898 case SYS_SHUTDOWN:
005a1aea 2899 err = __sys_shutdown(a0, a1);
89bddce5
SH
2900 break;
2901 case SYS_SETSOCKOPT:
cc36dca0
DB
2902 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2903 a[4]);
89bddce5
SH
2904 break;
2905 case SYS_GETSOCKOPT:
2906 err =
13a2d70e
DB
2907 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2908 (int __user *)a[4]);
89bddce5
SH
2909 break;
2910 case SYS_SENDMSG:
e1834a32
DB
2911 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2912 a[2], true);
89bddce5 2913 break;
228e548e 2914 case SYS_SENDMMSG:
e1834a32
DB
2915 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2916 a[3], true);
228e548e 2917 break;
89bddce5 2918 case SYS_RECVMSG:
e1834a32
DB
2919 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2920 a[2], true);
89bddce5 2921 break;
a2e27255 2922 case SYS_RECVMMSG:
3ca47e95 2923 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
2924 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2925 a[2], a[3],
2926 (struct __kernel_timespec __user *)a[4],
2927 NULL);
2928 else
2929 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2930 a[2], a[3], NULL,
2931 (struct old_timespec32 __user *)a[4]);
a2e27255 2932 break;
de11defe 2933 case SYS_ACCEPT4:
4541e805
DB
2934 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2935 (int __user *)a[2], a[3]);
aaca0bdc 2936 break;
89bddce5
SH
2937 default:
2938 err = -EINVAL;
2939 break;
1da177e4
LT
2940 }
2941 return err;
2942}
2943
89bddce5 2944#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2945
55737fda
SH
2946/**
2947 * sock_register - add a socket protocol handler
2948 * @ops: description of protocol
2949 *
1da177e4
LT
2950 * This function is called by a protocol handler that wants to
2951 * advertise its address family, and have it linked into the
e793c0f7 2952 * socket interface. The value ops->family corresponds to the
55737fda 2953 * socket system call protocol family.
1da177e4 2954 */
f0fd27d4 2955int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2956{
2957 int err;
2958
2959 if (ops->family >= NPROTO) {
3410f22e 2960 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2961 return -ENOBUFS;
2962 }
55737fda
SH
2963
2964 spin_lock(&net_family_lock);
190683a9
ED
2965 if (rcu_dereference_protected(net_families[ops->family],
2966 lockdep_is_held(&net_family_lock)))
55737fda
SH
2967 err = -EEXIST;
2968 else {
cf778b00 2969 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2970 err = 0;
2971 }
55737fda
SH
2972 spin_unlock(&net_family_lock);
2973
3410f22e 2974 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2975 return err;
2976}
c6d409cf 2977EXPORT_SYMBOL(sock_register);
1da177e4 2978
55737fda
SH
2979/**
2980 * sock_unregister - remove a protocol handler
2981 * @family: protocol family to remove
2982 *
1da177e4
LT
2983 * This function is called by a protocol handler that wants to
2984 * remove its address family, and have it unlinked from the
55737fda
SH
2985 * new socket creation.
2986 *
2987 * If protocol handler is a module, then it can use module reference
2988 * counts to protect against new references. If protocol handler is not
2989 * a module then it needs to provide its own protection in
2990 * the ops->create routine.
1da177e4 2991 */
f0fd27d4 2992void sock_unregister(int family)
1da177e4 2993{
f0fd27d4 2994 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2995
55737fda 2996 spin_lock(&net_family_lock);
a9b3cd7f 2997 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2998 spin_unlock(&net_family_lock);
2999
3000 synchronize_rcu();
3001
3410f22e 3002 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 3003}
c6d409cf 3004EXPORT_SYMBOL(sock_unregister);
1da177e4 3005
bf2ae2e4
XL
3006bool sock_is_registered(int family)
3007{
66b51b0a 3008 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3009}
3010
77d76ea3 3011static int __init sock_init(void)
1da177e4 3012{
b3e19d92 3013 int err;
2ca794e5
EB
3014 /*
3015 * Initialize the network sysctl infrastructure.
3016 */
3017 err = net_sysctl_init();
3018 if (err)
3019 goto out;
b3e19d92 3020
1da177e4 3021 /*
89bddce5 3022 * Initialize skbuff SLAB cache
1da177e4
LT
3023 */
3024 skb_init();
1da177e4
LT
3025
3026 /*
89bddce5 3027 * Initialize the protocols module.
1da177e4
LT
3028 */
3029
3030 init_inodecache();
b3e19d92
NP
3031
3032 err = register_filesystem(&sock_fs_type);
3033 if (err)
3034 goto out_fs;
1da177e4 3035 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3036 if (IS_ERR(sock_mnt)) {
3037 err = PTR_ERR(sock_mnt);
3038 goto out_mount;
3039 }
77d76ea3
AK
3040
3041 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3042 */
3043
3044#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3045 err = netfilter_init();
3046 if (err)
3047 goto out;
1da177e4 3048#endif
cbeb321a 3049
408eccce 3050 ptp_classifier_init();
c1f19b51 3051
b3e19d92
NP
3052out:
3053 return err;
3054
3055out_mount:
3056 unregister_filesystem(&sock_fs_type);
3057out_fs:
3058 goto out;
1da177e4
LT
3059}
3060
77d76ea3
AK
3061core_initcall(sock_init); /* early initcall */
3062
1da177e4
LT
3063#ifdef CONFIG_PROC_FS
3064void socket_seq_show(struct seq_file *seq)
3065{
648845ab
TZ
3066 seq_printf(seq, "sockets: used %d\n",
3067 sock_inuse_get(seq->private));
1da177e4 3068}
89bddce5 3069#endif /* CONFIG_PROC_FS */
1da177e4 3070
89bbfc95 3071#ifdef CONFIG_COMPAT
36fd633e 3072static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 3073{
6b96018b 3074 struct compat_ifconf ifc32;
7a229387 3075 struct ifconf ifc;
7a229387
AB
3076 int err;
3077
6b96018b 3078 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3079 return -EFAULT;
3080
36fd633e
AV
3081 ifc.ifc_len = ifc32.ifc_len;
3082 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 3083
36fd633e
AV
3084 rtnl_lock();
3085 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
3086 rtnl_unlock();
7a229387
AB
3087 if (err)
3088 return err;
3089
36fd633e 3090 ifc32.ifc_len = ifc.ifc_len;
6b96018b 3091 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3092 return -EFAULT;
3093
3094 return 0;
3095}
3096
6b96018b 3097static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 3098{
3a7da39d
BH
3099 struct compat_ethtool_rxnfc __user *compat_rxnfc;
3100 bool convert_in = false, convert_out = false;
44c02a2c
AV
3101 size_t buf_size = 0;
3102 struct ethtool_rxnfc __user *rxnfc = NULL;
3103 struct ifreq ifr;
3a7da39d
BH
3104 u32 rule_cnt = 0, actual_rule_cnt;
3105 u32 ethcmd;
7a229387 3106 u32 data;
3a7da39d 3107 int ret;
7a229387 3108
3a7da39d
BH
3109 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
3110 return -EFAULT;
7a229387 3111
3a7da39d
BH
3112 compat_rxnfc = compat_ptr(data);
3113
3114 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
3115 return -EFAULT;
3116
3a7da39d
BH
3117 /* Most ethtool structures are defined without padding.
3118 * Unfortunately struct ethtool_rxnfc is an exception.
3119 */
3120 switch (ethcmd) {
3121 default:
3122 break;
3123 case ETHTOOL_GRXCLSRLALL:
3124 /* Buffer size is variable */
3125 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3126 return -EFAULT;
3127 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3128 return -ENOMEM;
3129 buf_size += rule_cnt * sizeof(u32);
3130 /* fall through */
3131 case ETHTOOL_GRXRINGS:
3132 case ETHTOOL_GRXCLSRLCNT:
3133 case ETHTOOL_GRXCLSRULE:
55664f32 3134 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3135 convert_out = true;
3136 /* fall through */
3137 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3138 buf_size += sizeof(struct ethtool_rxnfc);
3139 convert_in = true;
44c02a2c 3140 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3141 break;
3142 }
3143
44c02a2c 3144 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3145 return -EFAULT;
3146
44c02a2c 3147 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3148
3a7da39d 3149 if (convert_in) {
127fe533 3150 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3151 * fs.ring_cookie and at the end of fs, but nowhere else.
3152 */
127fe533
AD
3153 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3154 sizeof(compat_rxnfc->fs.m_ext) !=
3155 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3156 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3157 BUILD_BUG_ON(
3158 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3159 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3160 offsetof(struct ethtool_rxnfc, fs.location) -
3161 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3162
3163 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3164 (void __user *)(&rxnfc->fs.m_ext + 1) -
3165 (void __user *)rxnfc) ||
3a7da39d
BH
3166 copy_in_user(&rxnfc->fs.ring_cookie,
3167 &compat_rxnfc->fs.ring_cookie,
954b1244 3168 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3169 (void __user *)&rxnfc->fs.ring_cookie))
3170 return -EFAULT;
3171 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3172 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3173 return -EFAULT;
3174 } else if (copy_in_user(&rxnfc->rule_cnt,
3175 &compat_rxnfc->rule_cnt,
3176 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3177 return -EFAULT;
3178 }
3179
44c02a2c 3180 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3181 if (ret)
3182 return ret;
3183
3184 if (convert_out) {
3185 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3186 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3187 (const void __user *)rxnfc) ||
3a7da39d
BH
3188 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3189 &rxnfc->fs.ring_cookie,
954b1244
SH
3190 (const void __user *)(&rxnfc->fs.location + 1) -
3191 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3192 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3193 sizeof(rxnfc->rule_cnt)))
3194 return -EFAULT;
3195
3196 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3197 /* As an optimisation, we only copy the actual
3198 * number of rules that the underlying
3199 * function returned. Since Mallory might
3200 * change the rule count in user memory, we
3201 * check that it is less than the rule count
3202 * originally given (as the user buffer size),
3203 * which has been range-checked.
3204 */
3205 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3206 return -EFAULT;
3207 if (actual_rule_cnt < rule_cnt)
3208 rule_cnt = actual_rule_cnt;
3209 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3210 &rxnfc->rule_locs[0],
3211 rule_cnt * sizeof(u32)))
3212 return -EFAULT;
3213 }
3214 }
3215
3216 return 0;
7a229387
AB
3217}
3218
7a50a240
AB
3219static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3220{
7a50a240 3221 compat_uptr_t uptr32;
44c02a2c
AV
3222 struct ifreq ifr;
3223 void __user *saved;
3224 int err;
7a50a240 3225
44c02a2c 3226 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3227 return -EFAULT;
3228
3229 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3230 return -EFAULT;
3231
44c02a2c
AV
3232 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3233 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3234
44c02a2c
AV
3235 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3236 if (!err) {
3237 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3238 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3239 err = -EFAULT;
ccbd6a5a 3240 }
44c02a2c 3241 return err;
7a229387
AB
3242}
3243
590d4693
BH
3244/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3245static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3246 struct compat_ifreq __user *u_ifreq32)
7a229387 3247{
44c02a2c 3248 struct ifreq ifreq;
7a229387
AB
3249 u32 data32;
3250
44c02a2c 3251 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3252 return -EFAULT;
44c02a2c 3253 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3254 return -EFAULT;
44c02a2c 3255 ifreq.ifr_data = compat_ptr(data32);
7a229387 3256
44c02a2c 3257 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3258}
3259
37ac39bd
JB
3260static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3261 unsigned int cmd,
3262 struct compat_ifreq __user *uifr32)
3263{
3264 struct ifreq __user *uifr;
3265 int err;
3266
3267 /* Handle the fact that while struct ifreq has the same *layout* on
3268 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3269 * which are handled elsewhere, it still has different *size* due to
3270 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3271 * resulting in struct ifreq being 32 and 40 bytes respectively).
3272 * As a result, if the struct happens to be at the end of a page and
3273 * the next page isn't readable/writable, we get a fault. To prevent
3274 * that, copy back and forth to the full size.
3275 */
3276
3277 uifr = compat_alloc_user_space(sizeof(*uifr));
3278 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3279 return -EFAULT;
3280
3281 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3282
3283 if (!err) {
3284 switch (cmd) {
3285 case SIOCGIFFLAGS:
3286 case SIOCGIFMETRIC:
3287 case SIOCGIFMTU:
3288 case SIOCGIFMEM:
3289 case SIOCGIFHWADDR:
3290 case SIOCGIFINDEX:
3291 case SIOCGIFADDR:
3292 case SIOCGIFBRDADDR:
3293 case SIOCGIFDSTADDR:
3294 case SIOCGIFNETMASK:
3295 case SIOCGIFPFLAGS:
3296 case SIOCGIFTXQLEN:
3297 case SIOCGMIIPHY:
3298 case SIOCGMIIREG:
c6c9fee3 3299 case SIOCGIFNAME:
37ac39bd
JB
3300 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3301 err = -EFAULT;
3302 break;
3303 }
3304 }
3305 return err;
3306}
3307
a2116ed2
AB
3308static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3309 struct compat_ifreq __user *uifr32)
3310{
3311 struct ifreq ifr;
3312 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3313 int err;
3314
3315 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3316 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3317 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3318 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3319 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3320 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3321 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3322 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3323 if (err)
3324 return -EFAULT;
3325
44c02a2c 3326 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3327
3328 if (cmd == SIOCGIFMAP && !err) {
3329 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3330 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3331 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3332 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3333 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3334 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3335 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3336 if (err)
3337 err = -EFAULT;
3338 }
3339 return err;
3340}
3341
7a229387 3342struct rtentry32 {
c6d409cf 3343 u32 rt_pad1;
7a229387
AB
3344 struct sockaddr rt_dst; /* target address */
3345 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3346 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3347 unsigned short rt_flags;
3348 short rt_pad2;
3349 u32 rt_pad3;
3350 unsigned char rt_tos;
3351 unsigned char rt_class;
3352 short rt_pad4;
3353 short rt_metric; /* +1 for binary compatibility! */
7a229387 3354 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3355 u32 rt_mtu; /* per route MTU/Window */
3356 u32 rt_window; /* Window clamping */
7a229387
AB
3357 unsigned short rt_irtt; /* Initial RTT */
3358};
3359
3360struct in6_rtmsg32 {
3361 struct in6_addr rtmsg_dst;
3362 struct in6_addr rtmsg_src;
3363 struct in6_addr rtmsg_gateway;
3364 u32 rtmsg_type;
3365 u16 rtmsg_dst_len;
3366 u16 rtmsg_src_len;
3367 u32 rtmsg_metric;
3368 u32 rtmsg_info;
3369 u32 rtmsg_flags;
3370 s32 rtmsg_ifindex;
3371};
3372
6b96018b
AB
3373static int routing_ioctl(struct net *net, struct socket *sock,
3374 unsigned int cmd, void __user *argp)
7a229387
AB
3375{
3376 int ret;
3377 void *r = NULL;
3378 struct in6_rtmsg r6;
3379 struct rtentry r4;
3380 char devname[16];
3381 u32 rtdev;
3382 mm_segment_t old_fs = get_fs();
3383
6b96018b
AB
3384 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3385 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3386 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3387 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3388 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3389 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3390 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3391 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3392 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3393 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3394 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3395
3396 r = (void *) &r6;
3397 } else { /* ipv4 */
6b96018b 3398 struct rtentry32 __user *ur4 = argp;
c6d409cf 3399 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3400 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3401 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3402 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3403 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3404 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3405 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3406 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3407 if (rtdev) {
c6d409cf 3408 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3409 r4.rt_dev = (char __user __force *)devname;
3410 devname[15] = 0;
7a229387
AB
3411 } else
3412 r4.rt_dev = NULL;
3413
3414 r = (void *) &r4;
3415 }
3416
3417 if (ret) {
3418 ret = -EFAULT;
3419 goto out;
3420 }
3421
c6d409cf 3422 set_fs(KERNEL_DS);
63ff03ab 3423 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3424 set_fs(old_fs);
7a229387
AB
3425
3426out:
7a229387
AB
3427 return ret;
3428}
3429
3430/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3431 * for some operations; this forces use of the newer bridge-utils that
25985edc 3432 * use compatible ioctls
7a229387 3433 */
6b96018b 3434static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3435{
6b96018b 3436 compat_ulong_t tmp;
7a229387 3437
6b96018b 3438 if (get_user(tmp, argp))
7a229387
AB
3439 return -EFAULT;
3440 if (tmp == BRCTL_GET_VERSION)
3441 return BRCTL_VERSION + 1;
3442 return -EINVAL;
3443}
3444
6b96018b
AB
3445static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3446 unsigned int cmd, unsigned long arg)
3447{
3448 void __user *argp = compat_ptr(arg);
3449 struct sock *sk = sock->sk;
3450 struct net *net = sock_net(sk);
7a229387 3451
6b96018b 3452 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3453 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3454
3455 switch (cmd) {
3456 case SIOCSIFBR:
3457 case SIOCGIFBR:
3458 return old_bridge_ioctl(argp);
6b96018b 3459 case SIOCGIFCONF:
36fd633e 3460 return compat_dev_ifconf(net, argp);
6b96018b
AB
3461 case SIOCETHTOOL:
3462 return ethtool_ioctl(net, argp);
7a50a240
AB
3463 case SIOCWANDEV:
3464 return compat_siocwandev(net, argp);
a2116ed2
AB
3465 case SIOCGIFMAP:
3466 case SIOCSIFMAP:
3467 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3468 case SIOCADDRT:
3469 case SIOCDELRT:
3470 return routing_ioctl(net, sock, cmd, argp);
0768e170
AB
3471 case SIOCGSTAMP_OLD:
3472 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3473 if (!sock->ops->gettstamp)
3474 return -ENOIOCTLCMD;
0768e170 3475 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3476 !COMPAT_USE_64BIT_TIME);
3477
590d4693
BH
3478 case SIOCBONDSLAVEINFOQUERY:
3479 case SIOCBONDINFOQUERY:
a2116ed2 3480 case SIOCSHWTSTAMP:
fd468c74 3481 case SIOCGHWTSTAMP:
590d4693 3482 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3483
3484 case FIOSETOWN:
3485 case SIOCSPGRP:
3486 case FIOGETOWN:
3487 case SIOCGPGRP:
3488 case SIOCBRADDBR:
3489 case SIOCBRDELBR:
3490 case SIOCGIFVLAN:
3491 case SIOCSIFVLAN:
3492 case SIOCADDDLCI:
3493 case SIOCDELDLCI:
c62cce2c 3494 case SIOCGSKNS:
0768e170
AB
3495 case SIOCGSTAMP_NEW:
3496 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3497 return sock_ioctl(file, cmd, arg);
3498
3499 case SIOCGIFFLAGS:
3500 case SIOCSIFFLAGS:
3501 case SIOCGIFMETRIC:
3502 case SIOCSIFMETRIC:
3503 case SIOCGIFMTU:
3504 case SIOCSIFMTU:
3505 case SIOCGIFMEM:
3506 case SIOCSIFMEM:
3507 case SIOCGIFHWADDR:
3508 case SIOCSIFHWADDR:
3509 case SIOCADDMULTI:
3510 case SIOCDELMULTI:
3511 case SIOCGIFINDEX:
6b96018b
AB
3512 case SIOCGIFADDR:
3513 case SIOCSIFADDR:
3514 case SIOCSIFHWBROADCAST:
6b96018b 3515 case SIOCDIFADDR:
6b96018b
AB
3516 case SIOCGIFBRDADDR:
3517 case SIOCSIFBRDADDR:
3518 case SIOCGIFDSTADDR:
3519 case SIOCSIFDSTADDR:
3520 case SIOCGIFNETMASK:
3521 case SIOCSIFNETMASK:
3522 case SIOCSIFPFLAGS:
3523 case SIOCGIFPFLAGS:
3524 case SIOCGIFTXQLEN:
3525 case SIOCSIFTXQLEN:
3526 case SIOCBRADDIF:
3527 case SIOCBRDELIF:
c6c9fee3 3528 case SIOCGIFNAME:
9177efd3
AB
3529 case SIOCSIFNAME:
3530 case SIOCGMIIPHY:
3531 case SIOCGMIIREG:
3532 case SIOCSMIIREG:
f92d4fc9
AV
3533 case SIOCBONDENSLAVE:
3534 case SIOCBONDRELEASE:
3535 case SIOCBONDSETHWADDR:
3536 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3537 return compat_ifreq_ioctl(net, sock, cmd, argp);
3538
6b96018b
AB
3539 case SIOCSARP:
3540 case SIOCGARP:
3541 case SIOCDARP:
c7dc504e 3542 case SIOCOUTQ:
9d7bf41f 3543 case SIOCOUTQNSD:
6b96018b 3544 case SIOCATMARK:
63ff03ab 3545 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3546 }
3547
6b96018b
AB
3548 return -ENOIOCTLCMD;
3549}
7a229387 3550
95c96174 3551static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3552 unsigned long arg)
89bbfc95
SP
3553{
3554 struct socket *sock = file->private_data;
3555 int ret = -ENOIOCTLCMD;
87de87d5
DM
3556 struct sock *sk;
3557 struct net *net;
3558
3559 sk = sock->sk;
3560 net = sock_net(sk);
89bbfc95
SP
3561
3562 if (sock->ops->compat_ioctl)
3563 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3564
87de87d5
DM
3565 if (ret == -ENOIOCTLCMD &&
3566 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3567 ret = compat_wext_handle_ioctl(net, cmd, arg);
3568
6b96018b
AB
3569 if (ret == -ENOIOCTLCMD)
3570 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3571
89bbfc95
SP
3572 return ret;
3573}
3574#endif
3575
8a3c245c
PT
3576/**
3577 * kernel_bind - bind an address to a socket (kernel space)
3578 * @sock: socket
3579 * @addr: address
3580 * @addrlen: length of address
3581 *
3582 * Returns 0 or an error.
3583 */
3584
ac5a488e
SS
3585int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3586{
3587 return sock->ops->bind(sock, addr, addrlen);
3588}
c6d409cf 3589EXPORT_SYMBOL(kernel_bind);
ac5a488e 3590
8a3c245c
PT
3591/**
3592 * kernel_listen - move socket to listening state (kernel space)
3593 * @sock: socket
3594 * @backlog: pending connections queue size
3595 *
3596 * Returns 0 or an error.
3597 */
3598
ac5a488e
SS
3599int kernel_listen(struct socket *sock, int backlog)
3600{
3601 return sock->ops->listen(sock, backlog);
3602}
c6d409cf 3603EXPORT_SYMBOL(kernel_listen);
ac5a488e 3604
8a3c245c
PT
3605/**
3606 * kernel_accept - accept a connection (kernel space)
3607 * @sock: listening socket
3608 * @newsock: new connected socket
3609 * @flags: flags
3610 *
3611 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3612 * If it fails, @newsock is guaranteed to be %NULL.
3613 * Returns 0 or an error.
3614 */
3615
ac5a488e
SS
3616int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3617{
3618 struct sock *sk = sock->sk;
3619 int err;
3620
3621 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3622 newsock);
3623 if (err < 0)
3624 goto done;
3625
cdfbabfb 3626 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3627 if (err < 0) {
3628 sock_release(*newsock);
fa8705b0 3629 *newsock = NULL;
ac5a488e
SS
3630 goto done;
3631 }
3632
3633 (*newsock)->ops = sock->ops;
1b08534e 3634 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3635
3636done:
3637 return err;
3638}
c6d409cf 3639EXPORT_SYMBOL(kernel_accept);
ac5a488e 3640
8a3c245c
PT
3641/**
3642 * kernel_connect - connect a socket (kernel space)
3643 * @sock: socket
3644 * @addr: address
3645 * @addrlen: address length
3646 * @flags: flags (O_NONBLOCK, ...)
3647 *
3648 * For datagram sockets, @addr is the addres to which datagrams are sent
3649 * by default, and the only address from which datagrams are received.
3650 * For stream sockets, attempts to connect to @addr.
3651 * Returns 0 or an error code.
3652 */
3653
ac5a488e 3654int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3655 int flags)
ac5a488e
SS
3656{
3657 return sock->ops->connect(sock, addr, addrlen, flags);
3658}
c6d409cf 3659EXPORT_SYMBOL(kernel_connect);
ac5a488e 3660
8a3c245c
PT
3661/**
3662 * kernel_getsockname - get the address which the socket is bound (kernel space)
3663 * @sock: socket
3664 * @addr: address holder
3665 *
3666 * Fills the @addr pointer with the address which the socket is bound.
3667 * Returns 0 or an error code.
3668 */
3669
9b2c45d4 3670int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3671{
9b2c45d4 3672 return sock->ops->getname(sock, addr, 0);
ac5a488e 3673}
c6d409cf 3674EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3675
8a3c245c
PT
3676/**
3677 * kernel_peername - get the address which the socket is connected (kernel space)
3678 * @sock: socket
3679 * @addr: address holder
3680 *
3681 * Fills the @addr pointer with the address which the socket is connected.
3682 * Returns 0 or an error code.
3683 */
3684
9b2c45d4 3685int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3686{
9b2c45d4 3687 return sock->ops->getname(sock, addr, 1);
ac5a488e 3688}
c6d409cf 3689EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3690
8a3c245c
PT
3691/**
3692 * kernel_getsockopt - get a socket option (kernel space)
3693 * @sock: socket
3694 * @level: API level (SOL_SOCKET, ...)
3695 * @optname: option tag
3696 * @optval: option value
3697 * @optlen: option length
3698 *
3699 * Assigns the option length to @optlen.
3700 * Returns 0 or an error.
3701 */
3702
ac5a488e
SS
3703int kernel_getsockopt(struct socket *sock, int level, int optname,
3704 char *optval, int *optlen)
3705{
3706 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3707 char __user *uoptval;
3708 int __user *uoptlen;
ac5a488e
SS
3709 int err;
3710
fb8621bb
NK
3711 uoptval = (char __user __force *) optval;
3712 uoptlen = (int __user __force *) optlen;
3713
ac5a488e
SS
3714 set_fs(KERNEL_DS);
3715 if (level == SOL_SOCKET)
fb8621bb 3716 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3717 else
fb8621bb
NK
3718 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3719 uoptlen);
ac5a488e
SS
3720 set_fs(oldfs);
3721 return err;
3722}
c6d409cf 3723EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3724
8a3c245c
PT
3725/**
3726 * kernel_setsockopt - set a socket option (kernel space)
3727 * @sock: socket
3728 * @level: API level (SOL_SOCKET, ...)
3729 * @optname: option tag
3730 * @optval: option value
3731 * @optlen: option length
3732 *
3733 * Returns 0 or an error.
3734 */
3735
ac5a488e 3736int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3737 char *optval, unsigned int optlen)
ac5a488e
SS
3738{
3739 mm_segment_t oldfs = get_fs();
fb8621bb 3740 char __user *uoptval;
ac5a488e
SS
3741 int err;
3742
fb8621bb
NK
3743 uoptval = (char __user __force *) optval;
3744
ac5a488e
SS
3745 set_fs(KERNEL_DS);
3746 if (level == SOL_SOCKET)
fb8621bb 3747 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3748 else
fb8621bb 3749 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3750 optlen);
3751 set_fs(oldfs);
3752 return err;
3753}
c6d409cf 3754EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3755
8a3c245c
PT
3756/**
3757 * kernel_sendpage - send a &page through a socket (kernel space)
3758 * @sock: socket
3759 * @page: page
3760 * @offset: page offset
3761 * @size: total size in bytes
3762 * @flags: flags (MSG_DONTWAIT, ...)
3763 *
3764 * Returns the total amount sent in bytes or an error.
3765 */
3766
ac5a488e
SS
3767int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3768 size_t size, int flags)
3769{
3770 if (sock->ops->sendpage)
3771 return sock->ops->sendpage(sock, page, offset, size, flags);
3772
3773 return sock_no_sendpage(sock, page, offset, size, flags);
3774}
c6d409cf 3775EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3776
8a3c245c
PT
3777/**
3778 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3779 * @sk: sock
3780 * @page: page
3781 * @offset: page offset
3782 * @size: total size in bytes
3783 * @flags: flags (MSG_DONTWAIT, ...)
3784 *
3785 * Returns the total amount sent in bytes or an error.
3786 * Caller must hold @sk.
3787 */
3788
306b13eb
TH
3789int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3790 size_t size, int flags)
3791{
3792 struct socket *sock = sk->sk_socket;
3793
3794 if (sock->ops->sendpage_locked)
3795 return sock->ops->sendpage_locked(sk, page, offset, size,
3796 flags);
3797
3798 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3799}
3800EXPORT_SYMBOL(kernel_sendpage_locked);
3801
8a3c245c
PT
3802/**
3803 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3804 * @sock: socket
3805 * @how: connection part
3806 *
3807 * Returns 0 or an error.
3808 */
3809
91cf45f0
TM
3810int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3811{
3812 return sock->ops->shutdown(sock, how);
3813}
91cf45f0 3814EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3815
8a3c245c
PT
3816/**
3817 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3818 * @sk: socket
3819 *
3820 * This routine returns the IP overhead imposed by a socket i.e.
3821 * the length of the underlying IP header, depending on whether
3822 * this is an IPv4 or IPv6 socket and the length from IP options turned
3823 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3824 */
8a3c245c 3825
113c3075
P
3826u32 kernel_sock_ip_overhead(struct sock *sk)
3827{
3828 struct inet_sock *inet;
3829 struct ip_options_rcu *opt;
3830 u32 overhead = 0;
113c3075
P
3831#if IS_ENABLED(CONFIG_IPV6)
3832 struct ipv6_pinfo *np;
3833 struct ipv6_txoptions *optv6 = NULL;
3834#endif /* IS_ENABLED(CONFIG_IPV6) */
3835
3836 if (!sk)
3837 return overhead;
3838
113c3075
P
3839 switch (sk->sk_family) {
3840 case AF_INET:
3841 inet = inet_sk(sk);
3842 overhead += sizeof(struct iphdr);
3843 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3844 sock_owned_by_user(sk));
113c3075
P
3845 if (opt)
3846 overhead += opt->opt.optlen;
3847 return overhead;
3848#if IS_ENABLED(CONFIG_IPV6)
3849 case AF_INET6:
3850 np = inet6_sk(sk);
3851 overhead += sizeof(struct ipv6hdr);
3852 if (np)
3853 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3854 sock_owned_by_user(sk));
113c3075
P
3855 if (optv6)
3856 overhead += (optv6->opt_flen + optv6->opt_nflen);
3857 return overhead;
3858#endif /* IS_ENABLED(CONFIG_IPV6) */
3859 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3860 return overhead;
3861 }
3862}
3863EXPORT_SYMBOL(kernel_sock_ip_overhead);