]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - net/socket.c
proc/sysctl: add shared variables for range check
[mirror_ubuntu-hirsute-kernel.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
1da177e4 55#include <linux/mm.h>
1da177e4
LT
56#include <linux/socket.h>
57#include <linux/file.h>
58#include <linux/net.h>
59#include <linux/interrupt.h>
aaca0bdc 60#include <linux/thread_info.h>
55737fda 61#include <linux/rcupdate.h>
1da177e4
LT
62#include <linux/netdevice.h>
63#include <linux/proc_fs.h>
64#include <linux/seq_file.h>
4a3e2f71 65#include <linux/mutex.h>
1da177e4 66#include <linux/if_bridge.h>
20380731
ACM
67#include <linux/if_frad.h>
68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4
LT
75#include <linux/mount.h>
76#include <linux/security.h>
77#include <linux/syscalls.h>
78#include <linux/compat.h>
79#include <linux/kmod.h>
3ec3b2fb 80#include <linux/audit.h>
d86b5e0e 81#include <linux/wireless.h>
1b8d7ae4 82#include <linux/nsproxy.h>
1fd7317d 83#include <linux/magic.h>
5a0e3ad6 84#include <linux/slab.h>
600e1779 85#include <linux/xattr.h>
c8e8cd57 86#include <linux/nospec.h>
8c3c447b 87#include <linux/indirect_call_wrapper.h>
1da177e4 88
7c0f6ba6 89#include <linux/uaccess.h>
1da177e4
LT
90#include <asm/unistd.h>
91
92#include <net/compat.h>
87de87d5 93#include <net/wext.h>
f8451725 94#include <net/cls_cgroup.h>
1da177e4
LT
95
96#include <net/sock.h>
97#include <linux/netfilter.h>
98
6b96018b
AB
99#include <linux/if_tun.h>
100#include <linux/ipv6_route.h>
101#include <linux/route.h>
6b96018b 102#include <linux/sockios.h>
076bb0c8 103#include <net/busy_poll.h>
f24b9be5 104#include <linux/errqueue.h>
06021292 105
e0d1095a 106#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
107unsigned int sysctl_net_busy_read __read_mostly;
108unsigned int sysctl_net_busy_poll __read_mostly;
06021292 109#endif
6b96018b 110
8ae5e030
AV
111static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
112static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 113static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
114
115static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
116static __poll_t sock_poll(struct file *file,
117 struct poll_table_struct *wait);
89bddce5 118static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
119#ifdef CONFIG_COMPAT
120static long compat_sock_ioctl(struct file *file,
89bddce5 121 unsigned int cmd, unsigned long arg);
89bbfc95 122#endif
1da177e4 123static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
124static ssize_t sock_sendpage(struct file *file, struct page *page,
125 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 126static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 127 struct pipe_inode_info *pipe, size_t len,
9c55e01c 128 unsigned int flags);
1da177e4 129
1da177e4
LT
130/*
131 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
132 * in the operation structures but are done directly via the socketcall() multiplexor.
133 */
134
da7071d7 135static const struct file_operations socket_file_ops = {
1da177e4
LT
136 .owner = THIS_MODULE,
137 .llseek = no_llseek,
8ae5e030
AV
138 .read_iter = sock_read_iter,
139 .write_iter = sock_write_iter,
1da177e4
LT
140 .poll = sock_poll,
141 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
142#ifdef CONFIG_COMPAT
143 .compat_ioctl = compat_sock_ioctl,
144#endif
1da177e4 145 .mmap = sock_mmap,
1da177e4
LT
146 .release = sock_close,
147 .fasync = sock_fasync,
5274f052
JA
148 .sendpage = sock_sendpage,
149 .splice_write = generic_splice_sendpage,
9c55e01c 150 .splice_read = sock_splice_read,
1da177e4
LT
151};
152
153/*
154 * The protocol list. Each protocol is registered in here.
155 */
156
1da177e4 157static DEFINE_SPINLOCK(net_family_lock);
190683a9 158static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 159
1da177e4 160/*
89bddce5
SH
161 * Support routines.
162 * Move socket addresses back and forth across the kernel/user
163 * divide and look after the messy bits.
1da177e4
LT
164 */
165
1da177e4
LT
166/**
167 * move_addr_to_kernel - copy a socket address into kernel space
168 * @uaddr: Address in user space
169 * @kaddr: Address in kernel space
170 * @ulen: Length in user space
171 *
172 * The address is copied into kernel space. If the provided address is
173 * too long an error code of -EINVAL is returned. If the copy gives
174 * invalid addresses -EFAULT is returned. On a success 0 is returned.
175 */
176
43db362d 177int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 178{
230b1839 179 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 180 return -EINVAL;
89bddce5 181 if (ulen == 0)
1da177e4 182 return 0;
89bddce5 183 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 184 return -EFAULT;
3ec3b2fb 185 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
186}
187
188/**
189 * move_addr_to_user - copy an address to user space
190 * @kaddr: kernel space address
191 * @klen: length of address in kernel
192 * @uaddr: user space address
193 * @ulen: pointer to user length field
194 *
195 * The value pointed to by ulen on entry is the buffer length available.
196 * This is overwritten with the buffer space used. -EINVAL is returned
197 * if an overlong buffer is specified or a negative buffer size. -EFAULT
198 * is returned if either the buffer or the length field are not
199 * accessible.
200 * After copying the data up to the limit the user specifies, the true
201 * length of the data is written over the length limit the user
202 * specified. Zero is returned for a success.
203 */
89bddce5 204
43db362d 205static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 206 void __user *uaddr, int __user *ulen)
1da177e4
LT
207{
208 int err;
209 int len;
210
68c6beb3 211 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
212 err = get_user(len, ulen);
213 if (err)
1da177e4 214 return err;
89bddce5
SH
215 if (len > klen)
216 len = klen;
68c6beb3 217 if (len < 0)
1da177e4 218 return -EINVAL;
89bddce5 219 if (len) {
d6fe3945
SG
220 if (audit_sockaddr(klen, kaddr))
221 return -ENOMEM;
89bddce5 222 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
223 return -EFAULT;
224 }
225 /*
89bddce5
SH
226 * "fromlen shall refer to the value before truncation.."
227 * 1003.1g
1da177e4
LT
228 */
229 return __put_user(klen, ulen);
230}
231
08009a76 232static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
233
234static struct inode *sock_alloc_inode(struct super_block *sb)
235{
236 struct socket_alloc *ei;
89bddce5 237
e94b1766 238 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
239 if (!ei)
240 return NULL;
333f7909
AV
241 init_waitqueue_head(&ei->socket.wq.wait);
242 ei->socket.wq.fasync_list = NULL;
243 ei->socket.wq.flags = 0;
89bddce5 244
1da177e4
LT
245 ei->socket.state = SS_UNCONNECTED;
246 ei->socket.flags = 0;
247 ei->socket.ops = NULL;
248 ei->socket.sk = NULL;
249 ei->socket.file = NULL;
1da177e4
LT
250
251 return &ei->vfs_inode;
252}
253
6d7855c5 254static void sock_free_inode(struct inode *inode)
1da177e4 255{
43815482
ED
256 struct socket_alloc *ei;
257
258 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 259 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
260}
261
51cc5068 262static void init_once(void *foo)
1da177e4 263{
89bddce5 264 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 265
a35afb83 266 inode_init_once(&ei->vfs_inode);
1da177e4 267}
89bddce5 268
1e911632 269static void init_inodecache(void)
1da177e4
LT
270{
271 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
272 sizeof(struct socket_alloc),
273 0,
274 (SLAB_HWCACHE_ALIGN |
275 SLAB_RECLAIM_ACCOUNT |
5d097056 276 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 277 init_once);
1e911632 278 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
279}
280
b87221de 281static const struct super_operations sockfs_ops = {
c6d409cf 282 .alloc_inode = sock_alloc_inode,
6d7855c5 283 .free_inode = sock_free_inode,
c6d409cf 284 .statfs = simple_statfs,
1da177e4
LT
285};
286
c23fbb6b
ED
287/*
288 * sockfs_dname() is called from d_path().
289 */
290static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
291{
292 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 293 d_inode(dentry)->i_ino);
c23fbb6b
ED
294}
295
3ba13d17 296static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 297 .d_dname = sockfs_dname,
1da177e4
LT
298};
299
bba0bd31
AG
300static int sockfs_xattr_get(const struct xattr_handler *handler,
301 struct dentry *dentry, struct inode *inode,
302 const char *suffix, void *value, size_t size)
303{
304 if (value) {
305 if (dentry->d_name.len + 1 > size)
306 return -ERANGE;
307 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
308 }
309 return dentry->d_name.len + 1;
310}
311
312#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
313#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
314#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
315
316static const struct xattr_handler sockfs_xattr_handler = {
317 .name = XATTR_NAME_SOCKPROTONAME,
318 .get = sockfs_xattr_get,
319};
320
4a590153
AG
321static int sockfs_security_xattr_set(const struct xattr_handler *handler,
322 struct dentry *dentry, struct inode *inode,
323 const char *suffix, const void *value,
324 size_t size, int flags)
325{
326 /* Handled by LSM. */
327 return -EAGAIN;
328}
329
330static const struct xattr_handler sockfs_security_xattr_handler = {
331 .prefix = XATTR_SECURITY_PREFIX,
332 .set = sockfs_security_xattr_set,
333};
334
bba0bd31
AG
335static const struct xattr_handler *sockfs_xattr_handlers[] = {
336 &sockfs_xattr_handler,
4a590153 337 &sockfs_security_xattr_handler,
bba0bd31
AG
338 NULL
339};
340
c74a1cbb
AV
341static struct dentry *sockfs_mount(struct file_system_type *fs_type,
342 int flags, const char *dev_name, void *data)
343{
bba0bd31
AG
344 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
345 sockfs_xattr_handlers,
346 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
347}
348
349static struct vfsmount *sock_mnt __read_mostly;
350
351static struct file_system_type sock_fs_type = {
352 .name = "sockfs",
353 .mount = sockfs_mount,
354 .kill_sb = kill_anon_super,
355};
356
1da177e4
LT
357/*
358 * Obtains the first available file descriptor and sets it up for use.
359 *
39d8c1b6
DM
360 * These functions create file structures and maps them to fd space
361 * of the current process. On success it returns file descriptor
1da177e4
LT
362 * and file struct implicitly stored in sock->file.
363 * Note that another thread may close file descriptor before we return
364 * from this function. We use the fact that now we do not refer
365 * to socket after mapping. If one day we will need it, this
366 * function will increment ref. count on file by 1.
367 *
368 * In any case returned fd MAY BE not valid!
369 * This race condition is unavoidable
370 * with shared fd spaces, we cannot solve it inside kernel,
371 * but we take care of internal coherence yet.
372 */
373
8a3c245c
PT
374/**
375 * sock_alloc_file - Bind a &socket to a &file
376 * @sock: socket
377 * @flags: file status flags
378 * @dname: protocol name
379 *
380 * Returns the &file bound with @sock, implicitly storing it
381 * in sock->file. If dname is %NULL, sets to "".
382 * On failure the return is a ERR pointer (see linux/err.h).
383 * This function uses GFP_KERNEL internally.
384 */
385
aab174f0 386struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 387{
7cbe66b6 388 struct file *file;
1da177e4 389
d93aa9d8
AV
390 if (!dname)
391 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 392
d93aa9d8
AV
393 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
394 O_RDWR | (flags & O_NONBLOCK),
395 &socket_file_ops);
b5ffe634 396 if (IS_ERR(file)) {
8e1611e2 397 sock_release(sock);
39b65252 398 return file;
cc3808f8
AV
399 }
400
401 sock->file = file;
39d8c1b6 402 file->private_data = sock;
28407630 403 return file;
39d8c1b6 404}
56b31d1c 405EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 406
56b31d1c 407static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
408{
409 struct file *newfile;
28407630 410 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
411 if (unlikely(fd < 0)) {
412 sock_release(sock);
28407630 413 return fd;
ce4bb04c 414 }
39d8c1b6 415
aab174f0 416 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 417 if (!IS_ERR(newfile)) {
39d8c1b6 418 fd_install(fd, newfile);
28407630
AV
419 return fd;
420 }
7cbe66b6 421
28407630
AV
422 put_unused_fd(fd);
423 return PTR_ERR(newfile);
1da177e4
LT
424}
425
8a3c245c
PT
426/**
427 * sock_from_file - Return the &socket bounded to @file.
428 * @file: file
429 * @err: pointer to an error code return
430 *
431 * On failure returns %NULL and assigns -ENOTSOCK to @err.
432 */
433
406a3c63 434struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 435{
6cb153ca
BL
436 if (file->f_op == &socket_file_ops)
437 return file->private_data; /* set in sock_map_fd */
438
23bb80d2
ED
439 *err = -ENOTSOCK;
440 return NULL;
6cb153ca 441}
406a3c63 442EXPORT_SYMBOL(sock_from_file);
6cb153ca 443
1da177e4 444/**
c6d409cf 445 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
446 * @fd: file handle
447 * @err: pointer to an error code return
448 *
449 * The file handle passed in is locked and the socket it is bound
241c4667 450 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
451 * with a negative errno code and NULL is returned. The function checks
452 * for both invalid handles and passing a handle which is not a socket.
453 *
454 * On a success the socket object pointer is returned.
455 */
456
457struct socket *sockfd_lookup(int fd, int *err)
458{
459 struct file *file;
1da177e4
LT
460 struct socket *sock;
461
89bddce5
SH
462 file = fget(fd);
463 if (!file) {
1da177e4
LT
464 *err = -EBADF;
465 return NULL;
466 }
89bddce5 467
6cb153ca
BL
468 sock = sock_from_file(file, err);
469 if (!sock)
1da177e4 470 fput(file);
6cb153ca
BL
471 return sock;
472}
c6d409cf 473EXPORT_SYMBOL(sockfd_lookup);
1da177e4 474
6cb153ca
BL
475static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
476{
00e188ef 477 struct fd f = fdget(fd);
6cb153ca
BL
478 struct socket *sock;
479
3672558c 480 *err = -EBADF;
00e188ef
AV
481 if (f.file) {
482 sock = sock_from_file(f.file, err);
483 if (likely(sock)) {
484 *fput_needed = f.flags;
6cb153ca 485 return sock;
00e188ef
AV
486 }
487 fdput(f);
1da177e4 488 }
6cb153ca 489 return NULL;
1da177e4
LT
490}
491
600e1779
MY
492static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
493 size_t size)
494{
495 ssize_t len;
496 ssize_t used = 0;
497
c5ef6035 498 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
499 if (len < 0)
500 return len;
501 used += len;
502 if (buffer) {
503 if (size < used)
504 return -ERANGE;
505 buffer += len;
506 }
507
508 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
509 used += len;
510 if (buffer) {
511 if (size < used)
512 return -ERANGE;
513 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
514 buffer += len;
515 }
516
517 return used;
518}
519
dc647ec8 520static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
521{
522 int err = simple_setattr(dentry, iattr);
523
e1a3a60a 524 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
525 struct socket *sock = SOCKET_I(d_inode(dentry));
526
6d8c50dc
CW
527 if (sock->sk)
528 sock->sk->sk_uid = iattr->ia_uid;
529 else
530 err = -ENOENT;
86741ec2
LC
531 }
532
533 return err;
534}
535
600e1779 536static const struct inode_operations sockfs_inode_ops = {
600e1779 537 .listxattr = sockfs_listxattr,
86741ec2 538 .setattr = sockfs_setattr,
600e1779
MY
539};
540
1da177e4 541/**
8a3c245c 542 * sock_alloc - allocate a socket
89bddce5 543 *
1da177e4
LT
544 * Allocate a new inode and socket object. The two are bound together
545 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 546 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
547 */
548
f4a00aac 549struct socket *sock_alloc(void)
1da177e4 550{
89bddce5
SH
551 struct inode *inode;
552 struct socket *sock;
1da177e4 553
a209dfc7 554 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
555 if (!inode)
556 return NULL;
557
558 sock = SOCKET_I(inode);
559
85fe4025 560 inode->i_ino = get_next_ino();
89bddce5 561 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
562 inode->i_uid = current_fsuid();
563 inode->i_gid = current_fsgid();
600e1779 564 inode->i_op = &sockfs_inode_ops;
1da177e4 565
1da177e4
LT
566 return sock;
567}
f4a00aac 568EXPORT_SYMBOL(sock_alloc);
1da177e4 569
1da177e4 570/**
8a3c245c 571 * sock_release - close a socket
1da177e4
LT
572 * @sock: socket to close
573 *
574 * The socket is released from the protocol stack if it has a release
575 * callback, and the inode is then released if the socket is bound to
89bddce5 576 * an inode not a file.
1da177e4 577 */
89bddce5 578
6d8c50dc 579static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
580{
581 if (sock->ops) {
582 struct module *owner = sock->ops->owner;
583
6d8c50dc
CW
584 if (inode)
585 inode_lock(inode);
1da177e4 586 sock->ops->release(sock);
ff7b11aa 587 sock->sk = NULL;
6d8c50dc
CW
588 if (inode)
589 inode_unlock(inode);
1da177e4
LT
590 sock->ops = NULL;
591 module_put(owner);
592 }
593
333f7909 594 if (sock->wq.fasync_list)
3410f22e 595 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 596
1da177e4
LT
597 if (!sock->file) {
598 iput(SOCK_INODE(sock));
599 return;
600 }
89bddce5 601 sock->file = NULL;
1da177e4 602}
6d8c50dc
CW
603
604void sock_release(struct socket *sock)
605{
606 __sock_release(sock, NULL);
607}
c6d409cf 608EXPORT_SYMBOL(sock_release);
1da177e4 609
c14ac945 610void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 611{
140c55d4
ED
612 u8 flags = *tx_flags;
613
c14ac945 614 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
615 flags |= SKBTX_HW_TSTAMP;
616
c14ac945 617 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
618 flags |= SKBTX_SW_TSTAMP;
619
c14ac945 620 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
621 flags |= SKBTX_SCHED_TSTAMP;
622
140c55d4 623 *tx_flags = flags;
20d49473 624}
67cc0d40 625EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 626
8c3c447b
PA
627INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
628 size_t));
a648a592
PA
629INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
630 size_t));
d8725c86 631static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 632{
a648a592
PA
633 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
634 inet_sendmsg, sock, msg,
635 msg_data_left(msg));
d8725c86
AV
636 BUG_ON(ret == -EIOCBQUEUED);
637 return ret;
1da177e4
LT
638}
639
85806af0
RD
640/**
641 * sock_sendmsg - send a message through @sock
642 * @sock: socket
643 * @msg: message to send
644 *
645 * Sends @msg through @sock, passing through LSM.
646 * Returns the number of bytes sent, or an error code.
647 */
d8725c86 648int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 649{
d8725c86 650 int err = security_socket_sendmsg(sock, msg,
01e97e65 651 msg_data_left(msg));
228e548e 652
d8725c86 653 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 654}
c6d409cf 655EXPORT_SYMBOL(sock_sendmsg);
1da177e4 656
8a3c245c
PT
657/**
658 * kernel_sendmsg - send a message through @sock (kernel-space)
659 * @sock: socket
660 * @msg: message header
661 * @vec: kernel vec
662 * @num: vec array length
663 * @size: total message data size
664 *
665 * Builds the message data with @vec and sends it through @sock.
666 * Returns the number of bytes sent, or an error code.
667 */
668
1da177e4
LT
669int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
670 struct kvec *vec, size_t num, size_t size)
671{
aa563d7b 672 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 673 return sock_sendmsg(sock, msg);
1da177e4 674}
c6d409cf 675EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 676
8a3c245c
PT
677/**
678 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
679 * @sk: sock
680 * @msg: message header
681 * @vec: output s/g array
682 * @num: output s/g array length
683 * @size: total message data size
684 *
685 * Builds the message data with @vec and sends it through @sock.
686 * Returns the number of bytes sent, or an error code.
687 * Caller must hold @sk.
688 */
689
306b13eb
TH
690int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
691 struct kvec *vec, size_t num, size_t size)
692{
693 struct socket *sock = sk->sk_socket;
694
695 if (!sock->ops->sendmsg_locked)
db5980d8 696 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 697
aa563d7b 698 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
699
700 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
701}
702EXPORT_SYMBOL(kernel_sendmsg_locked);
703
8605330a
SHY
704static bool skb_is_err_queue(const struct sk_buff *skb)
705{
706 /* pkt_type of skbs enqueued on the error queue are set to
707 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
708 * in recvmsg, since skbs received on a local socket will never
709 * have a pkt_type of PACKET_OUTGOING.
710 */
711 return skb->pkt_type == PACKET_OUTGOING;
712}
713
b50a5c70
ML
714/* On transmit, software and hardware timestamps are returned independently.
715 * As the two skb clones share the hardware timestamp, which may be updated
716 * before the software timestamp is received, a hardware TX timestamp may be
717 * returned only if there is no software TX timestamp. Ignore false software
718 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 719 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
720 * hardware timestamp.
721 */
722static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
723{
724 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
725}
726
aad9c8c4
ML
727static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
728{
729 struct scm_ts_pktinfo ts_pktinfo;
730 struct net_device *orig_dev;
731
732 if (!skb_mac_header_was_set(skb))
733 return;
734
735 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
736
737 rcu_read_lock();
738 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
739 if (orig_dev)
740 ts_pktinfo.if_index = orig_dev->ifindex;
741 rcu_read_unlock();
742
743 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
744 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
745 sizeof(ts_pktinfo), &ts_pktinfo);
746}
747
92f37fd2
ED
748/*
749 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
750 */
751void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
752 struct sk_buff *skb)
753{
20d49473 754 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 755 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
756 struct scm_timestamping_internal tss;
757
b50a5c70 758 int empty = 1, false_tstamp = 0;
20d49473
PO
759 struct skb_shared_hwtstamps *shhwtstamps =
760 skb_hwtstamps(skb);
761
762 /* Race occurred between timestamp enabling and packet
763 receiving. Fill in the current time for now. */
b50a5c70 764 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 765 __net_timestamp(skb);
b50a5c70
ML
766 false_tstamp = 1;
767 }
20d49473
PO
768
769 if (need_software_tstamp) {
770 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
771 if (new_tstamp) {
772 struct __kernel_sock_timeval tv;
773
774 skb_get_new_timestamp(skb, &tv);
775 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
776 sizeof(tv), &tv);
777 } else {
778 struct __kernel_old_timeval tv;
779
780 skb_get_timestamp(skb, &tv);
781 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
782 sizeof(tv), &tv);
783 }
20d49473 784 } else {
887feae3
DD
785 if (new_tstamp) {
786 struct __kernel_timespec ts;
787
788 skb_get_new_timestampns(skb, &ts);
789 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
790 sizeof(ts), &ts);
791 } else {
792 struct timespec ts;
793
794 skb_get_timestampns(skb, &ts);
795 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
796 sizeof(ts), &ts);
797 }
20d49473
PO
798 }
799 }
800
f24b9be5 801 memset(&tss, 0, sizeof(tss));
c199105d 802 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 803 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 804 empty = 0;
4d276eb6 805 if (shhwtstamps &&
b9f40e21 806 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 807 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 808 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 809 empty = 0;
aad9c8c4
ML
810 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
811 !skb_is_err_queue(skb))
812 put_ts_pktinfo(msg, skb);
813 }
1c885808 814 if (!empty) {
9718475e
DD
815 if (sock_flag(sk, SOCK_TSTAMP_NEW))
816 put_cmsg_scm_timestamping64(msg, &tss);
817 else
818 put_cmsg_scm_timestamping(msg, &tss);
1c885808 819
8605330a 820 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 821 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
822 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
823 skb->len, skb->data);
824 }
92f37fd2 825}
7c81fd8b
ACM
826EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
827
6e3e939f
JB
828void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
829 struct sk_buff *skb)
830{
831 int ack;
832
833 if (!sock_flag(sk, SOCK_WIFI_STATUS))
834 return;
835 if (!skb->wifi_acked_valid)
836 return;
837
838 ack = skb->wifi_acked;
839
840 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
841}
842EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
843
11165f14 844static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
845 struct sk_buff *skb)
3b885787 846{
744d5a3e 847 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 848 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 849 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
850}
851
767dd033 852void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
853 struct sk_buff *skb)
854{
855 sock_recv_timestamp(msg, sk, skb);
856 sock_recv_drops(msg, sk, skb);
857}
767dd033 858EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 859
8c3c447b 860INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
861 size_t, int));
862INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
863 size_t, int));
1b784140 864static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 865 int flags)
1da177e4 866{
a648a592
PA
867 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
868 inet_recvmsg, sock, msg, msg_data_left(msg),
869 flags);
1da177e4
LT
870}
871
85806af0
RD
872/**
873 * sock_recvmsg - receive a message from @sock
874 * @sock: socket
875 * @msg: message to receive
876 * @flags: message flags
877 *
878 * Receives @msg from @sock, passing through LSM. Returns the total number
879 * of bytes received, or an error.
880 */
2da62906 881int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 882{
2da62906 883 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 884
2da62906 885 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 886}
c6d409cf 887EXPORT_SYMBOL(sock_recvmsg);
1da177e4 888
c1249c0a 889/**
8a3c245c
PT
890 * kernel_recvmsg - Receive a message from a socket (kernel space)
891 * @sock: The socket to receive the message from
892 * @msg: Received message
893 * @vec: Input s/g array for message data
894 * @num: Size of input s/g array
895 * @size: Number of bytes to read
896 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 897 *
8a3c245c
PT
898 * On return the msg structure contains the scatter/gather array passed in the
899 * vec argument. The array is modified so that it consists of the unfilled
900 * portion of the original array.
c1249c0a 901 *
8a3c245c 902 * The returned value is the total number of bytes received, or an error.
c1249c0a 903 */
8a3c245c 904
89bddce5
SH
905int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
906 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
907{
908 mm_segment_t oldfs = get_fs();
909 int result;
910
aa563d7b 911 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 912 set_fs(KERNEL_DS);
2da62906 913 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
914 set_fs(oldfs);
915 return result;
916}
c6d409cf 917EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 918
ce1d4d3e
CH
919static ssize_t sock_sendpage(struct file *file, struct page *page,
920 int offset, size_t size, loff_t *ppos, int more)
1da177e4 921{
1da177e4
LT
922 struct socket *sock;
923 int flags;
924
ce1d4d3e
CH
925 sock = file->private_data;
926
35f9c09f
ED
927 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
928 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
929 flags |= more;
ce1d4d3e 930
e6949583 931 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 932}
1da177e4 933
9c55e01c 934static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 935 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
936 unsigned int flags)
937{
938 struct socket *sock = file->private_data;
939
997b37da 940 if (unlikely(!sock->ops->splice_read))
95506588 941 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 942
9c55e01c
JA
943 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
944}
945
8ae5e030 946static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 947{
6d652330
AV
948 struct file *file = iocb->ki_filp;
949 struct socket *sock = file->private_data;
0345f931 950 struct msghdr msg = {.msg_iter = *to,
951 .msg_iocb = iocb};
8ae5e030 952 ssize_t res;
ce1d4d3e 953
8ae5e030
AV
954 if (file->f_flags & O_NONBLOCK)
955 msg.msg_flags = MSG_DONTWAIT;
956
957 if (iocb->ki_pos != 0)
1da177e4 958 return -ESPIPE;
027445c3 959
66ee59af 960 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
961 return 0;
962
2da62906 963 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
964 *to = msg.msg_iter;
965 return res;
1da177e4
LT
966}
967
8ae5e030 968static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 969{
6d652330
AV
970 struct file *file = iocb->ki_filp;
971 struct socket *sock = file->private_data;
0345f931 972 struct msghdr msg = {.msg_iter = *from,
973 .msg_iocb = iocb};
8ae5e030 974 ssize_t res;
1da177e4 975
8ae5e030 976 if (iocb->ki_pos != 0)
ce1d4d3e 977 return -ESPIPE;
027445c3 978
8ae5e030
AV
979 if (file->f_flags & O_NONBLOCK)
980 msg.msg_flags = MSG_DONTWAIT;
981
6d652330
AV
982 if (sock->type == SOCK_SEQPACKET)
983 msg.msg_flags |= MSG_EOR;
984
d8725c86 985 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
986 *from = msg.msg_iter;
987 return res;
1da177e4
LT
988}
989
1da177e4
LT
990/*
991 * Atomic setting of ioctl hooks to avoid race
992 * with module unload.
993 */
994
4a3e2f71 995static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 996static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 997
881d966b 998void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 999{
4a3e2f71 1000 mutex_lock(&br_ioctl_mutex);
1da177e4 1001 br_ioctl_hook = hook;
4a3e2f71 1002 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1003}
1004EXPORT_SYMBOL(brioctl_set);
1005
4a3e2f71 1006static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1007static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1008
881d966b 1009void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1010{
4a3e2f71 1011 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1012 vlan_ioctl_hook = hook;
4a3e2f71 1013 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1014}
1015EXPORT_SYMBOL(vlan_ioctl_set);
1016
4a3e2f71 1017static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1018static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1019
89bddce5 1020void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1021{
4a3e2f71 1022 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1023 dlci_ioctl_hook = hook;
4a3e2f71 1024 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1025}
1026EXPORT_SYMBOL(dlci_ioctl_set);
1027
6b96018b 1028static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1029 unsigned int cmd, unsigned long arg)
6b96018b
AB
1030{
1031 int err;
1032 void __user *argp = (void __user *)arg;
1033
1034 err = sock->ops->ioctl(sock, cmd, arg);
1035
1036 /*
1037 * If this ioctl is unknown try to hand it down
1038 * to the NIC driver.
1039 */
36fd633e
AV
1040 if (err != -ENOIOCTLCMD)
1041 return err;
6b96018b 1042
36fd633e
AV
1043 if (cmd == SIOCGIFCONF) {
1044 struct ifconf ifc;
1045 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1046 return -EFAULT;
1047 rtnl_lock();
1048 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1049 rtnl_unlock();
1050 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1051 err = -EFAULT;
44c02a2c
AV
1052 } else {
1053 struct ifreq ifr;
1054 bool need_copyout;
63ff03ab 1055 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1056 return -EFAULT;
1057 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1058 if (!err && need_copyout)
63ff03ab 1059 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1060 return -EFAULT;
36fd633e 1061 }
6b96018b
AB
1062 return err;
1063}
1064
1da177e4
LT
1065/*
1066 * With an ioctl, arg may well be a user mode pointer, but we don't know
1067 * what to do with it - that's up to the protocol still.
1068 */
1069
8a3c245c
PT
1070/**
1071 * get_net_ns - increment the refcount of the network namespace
1072 * @ns: common namespace (net)
1073 *
1074 * Returns the net's common namespace.
1075 */
1076
d8d211a2 1077struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1078{
1079 return &get_net(container_of(ns, struct net, ns))->ns;
1080}
d8d211a2 1081EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1082
1da177e4
LT
1083static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1084{
1085 struct socket *sock;
881d966b 1086 struct sock *sk;
1da177e4
LT
1087 void __user *argp = (void __user *)arg;
1088 int pid, err;
881d966b 1089 struct net *net;
1da177e4 1090
b69aee04 1091 sock = file->private_data;
881d966b 1092 sk = sock->sk;
3b1e0a65 1093 net = sock_net(sk);
44c02a2c
AV
1094 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1095 struct ifreq ifr;
1096 bool need_copyout;
1097 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1098 return -EFAULT;
1099 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1100 if (!err && need_copyout)
1101 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1102 return -EFAULT;
1da177e4 1103 } else
3d23e349 1104#ifdef CONFIG_WEXT_CORE
1da177e4 1105 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1106 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1107 } else
3d23e349 1108#endif
89bddce5 1109 switch (cmd) {
1da177e4
LT
1110 case FIOSETOWN:
1111 case SIOCSPGRP:
1112 err = -EFAULT;
1113 if (get_user(pid, (int __user *)argp))
1114 break;
393cc3f5 1115 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1116 break;
1117 case FIOGETOWN:
1118 case SIOCGPGRP:
609d7fa9 1119 err = put_user(f_getown(sock->file),
89bddce5 1120 (int __user *)argp);
1da177e4
LT
1121 break;
1122 case SIOCGIFBR:
1123 case SIOCSIFBR:
1124 case SIOCBRADDBR:
1125 case SIOCBRDELBR:
1126 err = -ENOPKG;
1127 if (!br_ioctl_hook)
1128 request_module("bridge");
1129
4a3e2f71 1130 mutex_lock(&br_ioctl_mutex);
89bddce5 1131 if (br_ioctl_hook)
881d966b 1132 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1133 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1134 break;
1135 case SIOCGIFVLAN:
1136 case SIOCSIFVLAN:
1137 err = -ENOPKG;
1138 if (!vlan_ioctl_hook)
1139 request_module("8021q");
1140
4a3e2f71 1141 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1142 if (vlan_ioctl_hook)
881d966b 1143 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1144 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1145 break;
1da177e4
LT
1146 case SIOCADDDLCI:
1147 case SIOCDELDLCI:
1148 err = -ENOPKG;
1149 if (!dlci_ioctl_hook)
1150 request_module("dlci");
1151
7512cbf6
PE
1152 mutex_lock(&dlci_ioctl_mutex);
1153 if (dlci_ioctl_hook)
1da177e4 1154 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1155 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1156 break;
c62cce2c
AV
1157 case SIOCGSKNS:
1158 err = -EPERM;
1159 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1160 break;
1161
1162 err = open_related_ns(&net->ns, get_net_ns);
1163 break;
0768e170
AB
1164 case SIOCGSTAMP_OLD:
1165 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1166 if (!sock->ops->gettstamp) {
1167 err = -ENOIOCTLCMD;
1168 break;
1169 }
1170 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1171 cmd == SIOCGSTAMP_OLD,
1172 !IS_ENABLED(CONFIG_64BIT));
60747828 1173 break;
0768e170
AB
1174 case SIOCGSTAMP_NEW:
1175 case SIOCGSTAMPNS_NEW:
1176 if (!sock->ops->gettstamp) {
1177 err = -ENOIOCTLCMD;
1178 break;
1179 }
1180 err = sock->ops->gettstamp(sock, argp,
1181 cmd == SIOCGSTAMP_NEW,
1182 false);
c7cbdbf2 1183 break;
1da177e4 1184 default:
63ff03ab 1185 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1186 break;
89bddce5 1187 }
1da177e4
LT
1188 return err;
1189}
1190
8a3c245c
PT
1191/**
1192 * sock_create_lite - creates a socket
1193 * @family: protocol family (AF_INET, ...)
1194 * @type: communication type (SOCK_STREAM, ...)
1195 * @protocol: protocol (0, ...)
1196 * @res: new socket
1197 *
1198 * Creates a new socket and assigns it to @res, passing through LSM.
1199 * The new socket initialization is not complete, see kernel_accept().
1200 * Returns 0 or an error. On failure @res is set to %NULL.
1201 * This function internally uses GFP_KERNEL.
1202 */
1203
1da177e4
LT
1204int sock_create_lite(int family, int type, int protocol, struct socket **res)
1205{
1206 int err;
1207 struct socket *sock = NULL;
89bddce5 1208
1da177e4
LT
1209 err = security_socket_create(family, type, protocol, 1);
1210 if (err)
1211 goto out;
1212
1213 sock = sock_alloc();
1214 if (!sock) {
1215 err = -ENOMEM;
1216 goto out;
1217 }
1218
1da177e4 1219 sock->type = type;
7420ed23
VY
1220 err = security_socket_post_create(sock, family, type, protocol, 1);
1221 if (err)
1222 goto out_release;
1223
1da177e4
LT
1224out:
1225 *res = sock;
1226 return err;
7420ed23
VY
1227out_release:
1228 sock_release(sock);
1229 sock = NULL;
1230 goto out;
1da177e4 1231}
c6d409cf 1232EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1233
1234/* No kernel lock held - perfect */
ade994f4 1235static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1236{
3cafb376 1237 struct socket *sock = file->private_data;
a331de3b 1238 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1239
e88958e6
CH
1240 if (!sock->ops->poll)
1241 return 0;
f641f13b 1242
a331de3b
CH
1243 if (sk_can_busy_loop(sock->sk)) {
1244 /* poll once if requested by the syscall */
1245 if (events & POLL_BUSY_LOOP)
1246 sk_busy_loop(sock->sk, 1);
1247
1248 /* if this socket can poll_ll, tell the system call */
1249 flag = POLL_BUSY_LOOP;
1250 }
1251
1252 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1253}
1254
89bddce5 1255static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1256{
b69aee04 1257 struct socket *sock = file->private_data;
1da177e4
LT
1258
1259 return sock->ops->mmap(file, sock, vma);
1260}
1261
20380731 1262static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1263{
6d8c50dc 1264 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1265 return 0;
1266}
1267
1268/*
1269 * Update the socket async list
1270 *
1271 * Fasync_list locking strategy.
1272 *
1273 * 1. fasync_list is modified only under process context socket lock
1274 * i.e. under semaphore.
1275 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1276 * or under socket lock
1da177e4
LT
1277 */
1278
1279static int sock_fasync(int fd, struct file *filp, int on)
1280{
989a2979
ED
1281 struct socket *sock = filp->private_data;
1282 struct sock *sk = sock->sk;
333f7909 1283 struct socket_wq *wq = &sock->wq;
1da177e4 1284
989a2979 1285 if (sk == NULL)
1da177e4 1286 return -EINVAL;
1da177e4
LT
1287
1288 lock_sock(sk);
eaefd110 1289 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1290
eaefd110 1291 if (!wq->fasync_list)
989a2979
ED
1292 sock_reset_flag(sk, SOCK_FASYNC);
1293 else
bcdce719 1294 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1295
989a2979 1296 release_sock(sk);
1da177e4
LT
1297 return 0;
1298}
1299
ceb5d58b 1300/* This function may be called only under rcu_lock */
1da177e4 1301
ceb5d58b 1302int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1303{
ceb5d58b 1304 if (!wq || !wq->fasync_list)
1da177e4 1305 return -1;
ceb5d58b 1306
89bddce5 1307 switch (how) {
8d8ad9d7 1308 case SOCK_WAKE_WAITD:
ceb5d58b 1309 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1310 break;
1311 goto call_kill;
8d8ad9d7 1312 case SOCK_WAKE_SPACE:
ceb5d58b 1313 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1314 break;
1315 /* fall through */
8d8ad9d7 1316 case SOCK_WAKE_IO:
89bddce5 1317call_kill:
43815482 1318 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1319 break;
8d8ad9d7 1320 case SOCK_WAKE_URG:
43815482 1321 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1322 }
ceb5d58b 1323
1da177e4
LT
1324 return 0;
1325}
c6d409cf 1326EXPORT_SYMBOL(sock_wake_async);
1da177e4 1327
8a3c245c
PT
1328/**
1329 * __sock_create - creates a socket
1330 * @net: net namespace
1331 * @family: protocol family (AF_INET, ...)
1332 * @type: communication type (SOCK_STREAM, ...)
1333 * @protocol: protocol (0, ...)
1334 * @res: new socket
1335 * @kern: boolean for kernel space sockets
1336 *
1337 * Creates a new socket and assigns it to @res, passing through LSM.
1338 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1339 * be set to true if the socket resides in kernel space.
1340 * This function internally uses GFP_KERNEL.
1341 */
1342
721db93a 1343int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1344 struct socket **res, int kern)
1da177e4
LT
1345{
1346 int err;
1347 struct socket *sock;
55737fda 1348 const struct net_proto_family *pf;
1da177e4
LT
1349
1350 /*
89bddce5 1351 * Check protocol is in range
1da177e4
LT
1352 */
1353 if (family < 0 || family >= NPROTO)
1354 return -EAFNOSUPPORT;
1355 if (type < 0 || type >= SOCK_MAX)
1356 return -EINVAL;
1357
1358 /* Compatibility.
1359
1360 This uglymoron is moved from INET layer to here to avoid
1361 deadlock in module load.
1362 */
1363 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1364 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1365 current->comm);
1da177e4
LT
1366 family = PF_PACKET;
1367 }
1368
1369 err = security_socket_create(family, type, protocol, kern);
1370 if (err)
1371 return err;
89bddce5 1372
55737fda
SH
1373 /*
1374 * Allocate the socket and allow the family to set things up. if
1375 * the protocol is 0, the family is instructed to select an appropriate
1376 * default.
1377 */
1378 sock = sock_alloc();
1379 if (!sock) {
e87cc472 1380 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1381 return -ENFILE; /* Not exactly a match, but its the
1382 closest posix thing */
1383 }
1384
1385 sock->type = type;
1386
95a5afca 1387#ifdef CONFIG_MODULES
89bddce5
SH
1388 /* Attempt to load a protocol module if the find failed.
1389 *
1390 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1391 * requested real, full-featured networking support upon configuration.
1392 * Otherwise module support will break!
1393 */
190683a9 1394 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1395 request_module("net-pf-%d", family);
1da177e4
LT
1396#endif
1397
55737fda
SH
1398 rcu_read_lock();
1399 pf = rcu_dereference(net_families[family]);
1400 err = -EAFNOSUPPORT;
1401 if (!pf)
1402 goto out_release;
1da177e4
LT
1403
1404 /*
1405 * We will call the ->create function, that possibly is in a loadable
1406 * module, so we have to bump that loadable module refcnt first.
1407 */
55737fda 1408 if (!try_module_get(pf->owner))
1da177e4
LT
1409 goto out_release;
1410
55737fda
SH
1411 /* Now protected by module ref count */
1412 rcu_read_unlock();
1413
3f378b68 1414 err = pf->create(net, sock, protocol, kern);
55737fda 1415 if (err < 0)
1da177e4 1416 goto out_module_put;
a79af59e 1417
1da177e4
LT
1418 /*
1419 * Now to bump the refcnt of the [loadable] module that owns this
1420 * socket at sock_release time we decrement its refcnt.
1421 */
55737fda
SH
1422 if (!try_module_get(sock->ops->owner))
1423 goto out_module_busy;
1424
1da177e4
LT
1425 /*
1426 * Now that we're done with the ->create function, the [loadable]
1427 * module can have its refcnt decremented
1428 */
55737fda 1429 module_put(pf->owner);
7420ed23
VY
1430 err = security_socket_post_create(sock, family, type, protocol, kern);
1431 if (err)
3b185525 1432 goto out_sock_release;
55737fda 1433 *res = sock;
1da177e4 1434
55737fda
SH
1435 return 0;
1436
1437out_module_busy:
1438 err = -EAFNOSUPPORT;
1da177e4 1439out_module_put:
55737fda
SH
1440 sock->ops = NULL;
1441 module_put(pf->owner);
1442out_sock_release:
1da177e4 1443 sock_release(sock);
55737fda
SH
1444 return err;
1445
1446out_release:
1447 rcu_read_unlock();
1448 goto out_sock_release;
1da177e4 1449}
721db93a 1450EXPORT_SYMBOL(__sock_create);
1da177e4 1451
8a3c245c
PT
1452/**
1453 * sock_create - creates a socket
1454 * @family: protocol family (AF_INET, ...)
1455 * @type: communication type (SOCK_STREAM, ...)
1456 * @protocol: protocol (0, ...)
1457 * @res: new socket
1458 *
1459 * A wrapper around __sock_create().
1460 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1461 */
1462
1da177e4
LT
1463int sock_create(int family, int type, int protocol, struct socket **res)
1464{
1b8d7ae4 1465 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1466}
c6d409cf 1467EXPORT_SYMBOL(sock_create);
1da177e4 1468
8a3c245c
PT
1469/**
1470 * sock_create_kern - creates a socket (kernel space)
1471 * @net: net namespace
1472 * @family: protocol family (AF_INET, ...)
1473 * @type: communication type (SOCK_STREAM, ...)
1474 * @protocol: protocol (0, ...)
1475 * @res: new socket
1476 *
1477 * A wrapper around __sock_create().
1478 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1479 */
1480
eeb1bd5c 1481int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1482{
eeb1bd5c 1483 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1484}
c6d409cf 1485EXPORT_SYMBOL(sock_create_kern);
1da177e4 1486
9d6a15c3 1487int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1488{
1489 int retval;
1490 struct socket *sock;
a677a039
UD
1491 int flags;
1492
e38b36f3
UD
1493 /* Check the SOCK_* constants for consistency. */
1494 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1495 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1496 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1497 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1498
a677a039 1499 flags = type & ~SOCK_TYPE_MASK;
77d27200 1500 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1501 return -EINVAL;
1502 type &= SOCK_TYPE_MASK;
1da177e4 1503
aaca0bdc
UD
1504 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1505 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1506
1da177e4
LT
1507 retval = sock_create(family, type, protocol, &sock);
1508 if (retval < 0)
8e1611e2 1509 return retval;
1da177e4 1510
8e1611e2 1511 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1512}
1513
9d6a15c3
DB
1514SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1515{
1516 return __sys_socket(family, type, protocol);
1517}
1518
1da177e4
LT
1519/*
1520 * Create a pair of connected sockets.
1521 */
1522
6debc8d8 1523int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1524{
1525 struct socket *sock1, *sock2;
1526 int fd1, fd2, err;
db349509 1527 struct file *newfile1, *newfile2;
a677a039
UD
1528 int flags;
1529
1530 flags = type & ~SOCK_TYPE_MASK;
77d27200 1531 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1532 return -EINVAL;
1533 type &= SOCK_TYPE_MASK;
1da177e4 1534
aaca0bdc
UD
1535 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1536 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1537
016a266b
AV
1538 /*
1539 * reserve descriptors and make sure we won't fail
1540 * to return them to userland.
1541 */
1542 fd1 = get_unused_fd_flags(flags);
1543 if (unlikely(fd1 < 0))
1544 return fd1;
1545
1546 fd2 = get_unused_fd_flags(flags);
1547 if (unlikely(fd2 < 0)) {
1548 put_unused_fd(fd1);
1549 return fd2;
1550 }
1551
1552 err = put_user(fd1, &usockvec[0]);
1553 if (err)
1554 goto out;
1555
1556 err = put_user(fd2, &usockvec[1]);
1557 if (err)
1558 goto out;
1559
1da177e4
LT
1560 /*
1561 * Obtain the first socket and check if the underlying protocol
1562 * supports the socketpair call.
1563 */
1564
1565 err = sock_create(family, type, protocol, &sock1);
016a266b 1566 if (unlikely(err < 0))
1da177e4
LT
1567 goto out;
1568
1569 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1570 if (unlikely(err < 0)) {
1571 sock_release(sock1);
1572 goto out;
bf3c23d1 1573 }
d73aa286 1574
d47cd945
DH
1575 err = security_socket_socketpair(sock1, sock2);
1576 if (unlikely(err)) {
1577 sock_release(sock2);
1578 sock_release(sock1);
1579 goto out;
1580 }
1581
016a266b
AV
1582 err = sock1->ops->socketpair(sock1, sock2);
1583 if (unlikely(err < 0)) {
1584 sock_release(sock2);
1585 sock_release(sock1);
1586 goto out;
28407630
AV
1587 }
1588
aab174f0 1589 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1590 if (IS_ERR(newfile1)) {
28407630 1591 err = PTR_ERR(newfile1);
016a266b
AV
1592 sock_release(sock2);
1593 goto out;
28407630
AV
1594 }
1595
aab174f0 1596 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1597 if (IS_ERR(newfile2)) {
1598 err = PTR_ERR(newfile2);
016a266b
AV
1599 fput(newfile1);
1600 goto out;
db349509
AV
1601 }
1602
157cf649 1603 audit_fd_pair(fd1, fd2);
d73aa286 1604
db349509
AV
1605 fd_install(fd1, newfile1);
1606 fd_install(fd2, newfile2);
d73aa286 1607 return 0;
1da177e4 1608
016a266b 1609out:
d73aa286 1610 put_unused_fd(fd2);
d73aa286 1611 put_unused_fd(fd1);
1da177e4
LT
1612 return err;
1613}
1614
6debc8d8
DB
1615SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1616 int __user *, usockvec)
1617{
1618 return __sys_socketpair(family, type, protocol, usockvec);
1619}
1620
1da177e4
LT
1621/*
1622 * Bind a name to a socket. Nothing much to do here since it's
1623 * the protocol's responsibility to handle the local address.
1624 *
1625 * We move the socket address to kernel space before we call
1626 * the protocol layer (having also checked the address is ok).
1627 */
1628
a87d35d8 1629int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1630{
1631 struct socket *sock;
230b1839 1632 struct sockaddr_storage address;
6cb153ca 1633 int err, fput_needed;
1da177e4 1634
89bddce5 1635 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1636 if (sock) {
43db362d 1637 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1638 if (!err) {
89bddce5 1639 err = security_socket_bind(sock,
230b1839 1640 (struct sockaddr *)&address,
89bddce5 1641 addrlen);
6cb153ca
BL
1642 if (!err)
1643 err = sock->ops->bind(sock,
89bddce5 1644 (struct sockaddr *)
230b1839 1645 &address, addrlen);
1da177e4 1646 }
6cb153ca 1647 fput_light(sock->file, fput_needed);
89bddce5 1648 }
1da177e4
LT
1649 return err;
1650}
1651
a87d35d8
DB
1652SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1653{
1654 return __sys_bind(fd, umyaddr, addrlen);
1655}
1656
1da177e4
LT
1657/*
1658 * Perform a listen. Basically, we allow the protocol to do anything
1659 * necessary for a listen, and if that works, we mark the socket as
1660 * ready for listening.
1661 */
1662
25e290ee 1663int __sys_listen(int fd, int backlog)
1da177e4
LT
1664{
1665 struct socket *sock;
6cb153ca 1666 int err, fput_needed;
b8e1f9b5 1667 int somaxconn;
89bddce5
SH
1668
1669 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1670 if (sock) {
8efa6e93 1671 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1672 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1673 backlog = somaxconn;
1da177e4
LT
1674
1675 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1676 if (!err)
1677 err = sock->ops->listen(sock, backlog);
1da177e4 1678
6cb153ca 1679 fput_light(sock->file, fput_needed);
1da177e4
LT
1680 }
1681 return err;
1682}
1683
25e290ee
DB
1684SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1685{
1686 return __sys_listen(fd, backlog);
1687}
1688
1da177e4
LT
1689/*
1690 * For accept, we attempt to create a new socket, set up the link
1691 * with the client, wake up the client, then return the new
1692 * connected fd. We collect the address of the connector in kernel
1693 * space and move it to user at the very end. This is unclean because
1694 * we open the socket then return an error.
1695 *
1696 * 1003.1g adds the ability to recvmsg() to query connection pending
1697 * status to recvmsg. We need to add that support in a way thats
b903036a 1698 * clean when we restructure accept also.
1da177e4
LT
1699 */
1700
4541e805
DB
1701int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1702 int __user *upeer_addrlen, int flags)
1da177e4
LT
1703{
1704 struct socket *sock, *newsock;
39d8c1b6 1705 struct file *newfile;
6cb153ca 1706 int err, len, newfd, fput_needed;
230b1839 1707 struct sockaddr_storage address;
1da177e4 1708
77d27200 1709 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1710 return -EINVAL;
1711
1712 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1713 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1714
6cb153ca 1715 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1716 if (!sock)
1717 goto out;
1718
1719 err = -ENFILE;
c6d409cf
ED
1720 newsock = sock_alloc();
1721 if (!newsock)
1da177e4
LT
1722 goto out_put;
1723
1724 newsock->type = sock->type;
1725 newsock->ops = sock->ops;
1726
1da177e4
LT
1727 /*
1728 * We don't need try_module_get here, as the listening socket (sock)
1729 * has the protocol module (sock->ops->owner) held.
1730 */
1731 __module_get(newsock->ops->owner);
1732
28407630 1733 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1734 if (unlikely(newfd < 0)) {
1735 err = newfd;
9a1875e6
DM
1736 sock_release(newsock);
1737 goto out_put;
39d8c1b6 1738 }
aab174f0 1739 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1740 if (IS_ERR(newfile)) {
28407630
AV
1741 err = PTR_ERR(newfile);
1742 put_unused_fd(newfd);
28407630
AV
1743 goto out_put;
1744 }
39d8c1b6 1745
a79af59e
FF
1746 err = security_socket_accept(sock, newsock);
1747 if (err)
39d8c1b6 1748 goto out_fd;
a79af59e 1749
cdfbabfb 1750 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1751 if (err < 0)
39d8c1b6 1752 goto out_fd;
1da177e4
LT
1753
1754 if (upeer_sockaddr) {
9b2c45d4
DV
1755 len = newsock->ops->getname(newsock,
1756 (struct sockaddr *)&address, 2);
1757 if (len < 0) {
1da177e4 1758 err = -ECONNABORTED;
39d8c1b6 1759 goto out_fd;
1da177e4 1760 }
43db362d 1761 err = move_addr_to_user(&address,
230b1839 1762 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1763 if (err < 0)
39d8c1b6 1764 goto out_fd;
1da177e4
LT
1765 }
1766
1767 /* File flags are not inherited via accept() unlike another OSes. */
1768
39d8c1b6
DM
1769 fd_install(newfd, newfile);
1770 err = newfd;
1da177e4 1771
1da177e4 1772out_put:
6cb153ca 1773 fput_light(sock->file, fput_needed);
1da177e4
LT
1774out:
1775 return err;
39d8c1b6 1776out_fd:
9606a216 1777 fput(newfile);
39d8c1b6 1778 put_unused_fd(newfd);
1da177e4
LT
1779 goto out_put;
1780}
1781
4541e805
DB
1782SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1783 int __user *, upeer_addrlen, int, flags)
1784{
1785 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1786}
1787
20f37034
HC
1788SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1789 int __user *, upeer_addrlen)
aaca0bdc 1790{
4541e805 1791 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1792}
1793
1da177e4
LT
1794/*
1795 * Attempt to connect to a socket with the server address. The address
1796 * is in user space so we verify it is OK and move it to kernel space.
1797 *
1798 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1799 * break bindings
1800 *
1801 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1802 * other SEQPACKET protocols that take time to connect() as it doesn't
1803 * include the -EINPROGRESS status for such sockets.
1804 */
1805
1387c2c2 1806int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1807{
1808 struct socket *sock;
230b1839 1809 struct sockaddr_storage address;
6cb153ca 1810 int err, fput_needed;
1da177e4 1811
6cb153ca 1812 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1813 if (!sock)
1814 goto out;
43db362d 1815 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1816 if (err < 0)
1817 goto out_put;
1818
89bddce5 1819 err =
230b1839 1820 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1821 if (err)
1822 goto out_put;
1823
230b1839 1824 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1825 sock->file->f_flags);
1826out_put:
6cb153ca 1827 fput_light(sock->file, fput_needed);
1da177e4
LT
1828out:
1829 return err;
1830}
1831
1387c2c2
DB
1832SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1833 int, addrlen)
1834{
1835 return __sys_connect(fd, uservaddr, addrlen);
1836}
1837
1da177e4
LT
1838/*
1839 * Get the local address ('name') of a socket object. Move the obtained
1840 * name to user space.
1841 */
1842
8882a107
DB
1843int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1844 int __user *usockaddr_len)
1da177e4
LT
1845{
1846 struct socket *sock;
230b1839 1847 struct sockaddr_storage address;
9b2c45d4 1848 int err, fput_needed;
89bddce5 1849
6cb153ca 1850 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1851 if (!sock)
1852 goto out;
1853
1854 err = security_socket_getsockname(sock);
1855 if (err)
1856 goto out_put;
1857
9b2c45d4
DV
1858 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1859 if (err < 0)
1da177e4 1860 goto out_put;
9b2c45d4
DV
1861 /* "err" is actually length in this case */
1862 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1863
1864out_put:
6cb153ca 1865 fput_light(sock->file, fput_needed);
1da177e4
LT
1866out:
1867 return err;
1868}
1869
8882a107
DB
1870SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1871 int __user *, usockaddr_len)
1872{
1873 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1874}
1875
1da177e4
LT
1876/*
1877 * Get the remote address ('name') of a socket object. Move the obtained
1878 * name to user space.
1879 */
1880
b21c8f83
DB
1881int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1882 int __user *usockaddr_len)
1da177e4
LT
1883{
1884 struct socket *sock;
230b1839 1885 struct sockaddr_storage address;
9b2c45d4 1886 int err, fput_needed;
1da177e4 1887
89bddce5
SH
1888 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1889 if (sock != NULL) {
1da177e4
LT
1890 err = security_socket_getpeername(sock);
1891 if (err) {
6cb153ca 1892 fput_light(sock->file, fput_needed);
1da177e4
LT
1893 return err;
1894 }
1895
9b2c45d4
DV
1896 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1897 if (err >= 0)
1898 /* "err" is actually length in this case */
1899 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1900 usockaddr_len);
6cb153ca 1901 fput_light(sock->file, fput_needed);
1da177e4
LT
1902 }
1903 return err;
1904}
1905
b21c8f83
DB
1906SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1907 int __user *, usockaddr_len)
1908{
1909 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1910}
1911
1da177e4
LT
1912/*
1913 * Send a datagram to a given address. We move the address into kernel
1914 * space and check the user space data area is readable before invoking
1915 * the protocol.
1916 */
211b634b
DB
1917int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1918 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1919{
1920 struct socket *sock;
230b1839 1921 struct sockaddr_storage address;
1da177e4
LT
1922 int err;
1923 struct msghdr msg;
1924 struct iovec iov;
6cb153ca 1925 int fput_needed;
6cb153ca 1926
602bd0e9
AV
1927 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1928 if (unlikely(err))
1929 return err;
de0fa95c
PE
1930 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1931 if (!sock)
4387ff75 1932 goto out;
6cb153ca 1933
89bddce5 1934 msg.msg_name = NULL;
89bddce5
SH
1935 msg.msg_control = NULL;
1936 msg.msg_controllen = 0;
1937 msg.msg_namelen = 0;
6cb153ca 1938 if (addr) {
43db362d 1939 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1940 if (err < 0)
1941 goto out_put;
230b1839 1942 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1943 msg.msg_namelen = addr_len;
1da177e4
LT
1944 }
1945 if (sock->file->f_flags & O_NONBLOCK)
1946 flags |= MSG_DONTWAIT;
1947 msg.msg_flags = flags;
d8725c86 1948 err = sock_sendmsg(sock, &msg);
1da177e4 1949
89bddce5 1950out_put:
de0fa95c 1951 fput_light(sock->file, fput_needed);
4387ff75 1952out:
1da177e4
LT
1953 return err;
1954}
1955
211b634b
DB
1956SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1957 unsigned int, flags, struct sockaddr __user *, addr,
1958 int, addr_len)
1959{
1960 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1961}
1962
1da177e4 1963/*
89bddce5 1964 * Send a datagram down a socket.
1da177e4
LT
1965 */
1966
3e0fa65f 1967SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1968 unsigned int, flags)
1da177e4 1969{
211b634b 1970 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1971}
1972
1973/*
89bddce5 1974 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1975 * sender. We verify the buffers are writable and if needed move the
1976 * sender address from kernel to user space.
1977 */
7a09e1eb
DB
1978int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
1979 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
1980{
1981 struct socket *sock;
1982 struct iovec iov;
1983 struct msghdr msg;
230b1839 1984 struct sockaddr_storage address;
89bddce5 1985 int err, err2;
6cb153ca
BL
1986 int fput_needed;
1987
602bd0e9
AV
1988 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1989 if (unlikely(err))
1990 return err;
de0fa95c 1991 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1992 if (!sock)
de0fa95c 1993 goto out;
1da177e4 1994
89bddce5
SH
1995 msg.msg_control = NULL;
1996 msg.msg_controllen = 0;
f3d33426
HFS
1997 /* Save some cycles and don't copy the address if not needed */
1998 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1999 /* We assume all kernel code knows the size of sockaddr_storage */
2000 msg.msg_namelen = 0;
130ed5d1 2001 msg.msg_iocb = NULL;
9f138fa6 2002 msg.msg_flags = 0;
1da177e4
LT
2003 if (sock->file->f_flags & O_NONBLOCK)
2004 flags |= MSG_DONTWAIT;
2da62906 2005 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2006
89bddce5 2007 if (err >= 0 && addr != NULL) {
43db362d 2008 err2 = move_addr_to_user(&address,
230b1839 2009 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2010 if (err2 < 0)
2011 err = err2;
1da177e4 2012 }
de0fa95c
PE
2013
2014 fput_light(sock->file, fput_needed);
4387ff75 2015out:
1da177e4
LT
2016 return err;
2017}
2018
7a09e1eb
DB
2019SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2020 unsigned int, flags, struct sockaddr __user *, addr,
2021 int __user *, addr_len)
2022{
2023 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2024}
2025
1da177e4 2026/*
89bddce5 2027 * Receive a datagram from a socket.
1da177e4
LT
2028 */
2029
b7c0ddf5
JG
2030SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2031 unsigned int, flags)
1da177e4 2032{
7a09e1eb 2033 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2034}
2035
2036/*
2037 * Set a socket option. Because we don't know the option lengths we have
2038 * to pass the user mode parameter for the protocols to sort out.
2039 */
2040
cc36dca0
DB
2041static int __sys_setsockopt(int fd, int level, int optname,
2042 char __user *optval, int optlen)
1da177e4 2043{
0d01da6a
SF
2044 mm_segment_t oldfs = get_fs();
2045 char *kernel_optval = NULL;
6cb153ca 2046 int err, fput_needed;
1da177e4
LT
2047 struct socket *sock;
2048
2049 if (optlen < 0)
2050 return -EINVAL;
89bddce5
SH
2051
2052 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2053 if (sock != NULL) {
2054 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2055 if (err)
2056 goto out_put;
1da177e4 2057
0d01da6a
SF
2058 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
2059 &optname, optval, &optlen,
2060 &kernel_optval);
2061
2062 if (err < 0) {
2063 goto out_put;
2064 } else if (err > 0) {
2065 err = 0;
2066 goto out_put;
2067 }
2068
2069 if (kernel_optval) {
2070 set_fs(KERNEL_DS);
2071 optval = (char __user __force *)kernel_optval;
2072 }
2073
1da177e4 2074 if (level == SOL_SOCKET)
89bddce5
SH
2075 err =
2076 sock_setsockopt(sock, level, optname, optval,
2077 optlen);
1da177e4 2078 else
89bddce5
SH
2079 err =
2080 sock->ops->setsockopt(sock, level, optname, optval,
2081 optlen);
0d01da6a
SF
2082
2083 if (kernel_optval) {
2084 set_fs(oldfs);
2085 kfree(kernel_optval);
2086 }
6cb153ca
BL
2087out_put:
2088 fput_light(sock->file, fput_needed);
1da177e4
LT
2089 }
2090 return err;
2091}
2092
cc36dca0
DB
2093SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2094 char __user *, optval, int, optlen)
2095{
2096 return __sys_setsockopt(fd, level, optname, optval, optlen);
2097}
2098
1da177e4
LT
2099/*
2100 * Get a socket option. Because we don't know the option lengths we have
2101 * to pass a user mode parameter for the protocols to sort out.
2102 */
2103
13a2d70e
DB
2104static int __sys_getsockopt(int fd, int level, int optname,
2105 char __user *optval, int __user *optlen)
1da177e4 2106{
6cb153ca 2107 int err, fput_needed;
1da177e4 2108 struct socket *sock;
0d01da6a 2109 int max_optlen;
1da177e4 2110
89bddce5
SH
2111 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2112 if (sock != NULL) {
6cb153ca
BL
2113 err = security_socket_getsockopt(sock, level, optname);
2114 if (err)
2115 goto out_put;
1da177e4 2116
0d01da6a
SF
2117 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
2118
1da177e4 2119 if (level == SOL_SOCKET)
89bddce5
SH
2120 err =
2121 sock_getsockopt(sock, level, optname, optval,
2122 optlen);
1da177e4 2123 else
89bddce5
SH
2124 err =
2125 sock->ops->getsockopt(sock, level, optname, optval,
2126 optlen);
0d01da6a
SF
2127
2128 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2129 optval, optlen,
2130 max_optlen, err);
6cb153ca
BL
2131out_put:
2132 fput_light(sock->file, fput_needed);
1da177e4
LT
2133 }
2134 return err;
2135}
2136
13a2d70e
DB
2137SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2138 char __user *, optval, int __user *, optlen)
2139{
2140 return __sys_getsockopt(fd, level, optname, optval, optlen);
2141}
2142
1da177e4
LT
2143/*
2144 * Shutdown a socket.
2145 */
2146
005a1aea 2147int __sys_shutdown(int fd, int how)
1da177e4 2148{
6cb153ca 2149 int err, fput_needed;
1da177e4
LT
2150 struct socket *sock;
2151
89bddce5
SH
2152 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2153 if (sock != NULL) {
1da177e4 2154 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2155 if (!err)
2156 err = sock->ops->shutdown(sock, how);
2157 fput_light(sock->file, fput_needed);
1da177e4
LT
2158 }
2159 return err;
2160}
2161
005a1aea
DB
2162SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2163{
2164 return __sys_shutdown(fd, how);
2165}
2166
89bddce5 2167/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2168 * fields which are the same type (int / unsigned) on our platforms.
2169 */
2170#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2171#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2172#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2173
c71d8ebe
TH
2174struct used_address {
2175 struct sockaddr_storage name;
2176 unsigned int name_len;
2177};
2178
da184284
AV
2179static int copy_msghdr_from_user(struct msghdr *kmsg,
2180 struct user_msghdr __user *umsg,
2181 struct sockaddr __user **save_addr,
2182 struct iovec **iov)
1661bf36 2183{
ffb07550 2184 struct user_msghdr msg;
08adb7da
AV
2185 ssize_t err;
2186
ffb07550 2187 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2188 return -EFAULT;
dbb490b9 2189
864d9664 2190 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2191 kmsg->msg_controllen = msg.msg_controllen;
2192 kmsg->msg_flags = msg.msg_flags;
2193
2194 kmsg->msg_namelen = msg.msg_namelen;
2195 if (!msg.msg_name)
6a2a2b3a
AS
2196 kmsg->msg_namelen = 0;
2197
dbb490b9
ML
2198 if (kmsg->msg_namelen < 0)
2199 return -EINVAL;
2200
1661bf36 2201 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2202 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2203
2204 if (save_addr)
ffb07550 2205 *save_addr = msg.msg_name;
08adb7da 2206
ffb07550 2207 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2208 if (!save_addr) {
864d9664
PA
2209 err = move_addr_to_kernel(msg.msg_name,
2210 kmsg->msg_namelen,
08adb7da
AV
2211 kmsg->msg_name);
2212 if (err < 0)
2213 return err;
2214 }
2215 } else {
2216 kmsg->msg_name = NULL;
2217 kmsg->msg_namelen = 0;
2218 }
2219
ffb07550 2220 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2221 return -EMSGSIZE;
2222
0345f931 2223 kmsg->msg_iocb = NULL;
2224
87e5e6da 2225 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2226 msg.msg_iov, msg.msg_iovlen,
da184284 2227 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2228 return err < 0 ? err : 0;
1661bf36
DC
2229}
2230
666547ff 2231static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2232 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2233 struct used_address *used_address,
2234 unsigned int allowed_msghdr_flags)
1da177e4 2235{
89bddce5
SH
2236 struct compat_msghdr __user *msg_compat =
2237 (struct compat_msghdr __user *)msg;
230b1839 2238 struct sockaddr_storage address;
1da177e4 2239 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2240 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2241 __aligned(sizeof(__kernel_size_t));
89bddce5 2242 /* 20 is size of ipv6_pktinfo */
1da177e4 2243 unsigned char *ctl_buf = ctl;
d8725c86 2244 int ctl_len;
08adb7da 2245 ssize_t err;
89bddce5 2246
08adb7da 2247 msg_sys->msg_name = &address;
1da177e4 2248
08449320 2249 if (MSG_CMSG_COMPAT & flags)
08adb7da 2250 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2251 else
08adb7da 2252 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2253 if (err < 0)
da184284 2254 return err;
1da177e4
LT
2255
2256 err = -ENOBUFS;
2257
228e548e 2258 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2259 goto out_freeiov;
28a94d8f 2260 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2261 ctl_len = msg_sys->msg_controllen;
1da177e4 2262 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2263 err =
228e548e 2264 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2265 sizeof(ctl));
1da177e4
LT
2266 if (err)
2267 goto out_freeiov;
228e548e
AB
2268 ctl_buf = msg_sys->msg_control;
2269 ctl_len = msg_sys->msg_controllen;
1da177e4 2270 } else if (ctl_len) {
ac4340fc
DM
2271 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2272 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2273 if (ctl_len > sizeof(ctl)) {
1da177e4 2274 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2275 if (ctl_buf == NULL)
1da177e4
LT
2276 goto out_freeiov;
2277 }
2278 err = -EFAULT;
2279 /*
228e548e 2280 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2281 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2282 * checking falls down on this.
2283 */
fb8621bb 2284 if (copy_from_user(ctl_buf,
228e548e 2285 (void __user __force *)msg_sys->msg_control,
89bddce5 2286 ctl_len))
1da177e4 2287 goto out_freectl;
228e548e 2288 msg_sys->msg_control = ctl_buf;
1da177e4 2289 }
228e548e 2290 msg_sys->msg_flags = flags;
1da177e4
LT
2291
2292 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2293 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2294 /*
2295 * If this is sendmmsg() and current destination address is same as
2296 * previously succeeded address, omit asking LSM's decision.
2297 * used_address->name_len is initialized to UINT_MAX so that the first
2298 * destination address never matches.
2299 */
bc909d9d
MD
2300 if (used_address && msg_sys->msg_name &&
2301 used_address->name_len == msg_sys->msg_namelen &&
2302 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2303 used_address->name_len)) {
d8725c86 2304 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2305 goto out_freectl;
2306 }
d8725c86 2307 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2308 /*
2309 * If this is sendmmsg() and sending to current destination address was
2310 * successful, remember it.
2311 */
2312 if (used_address && err >= 0) {
2313 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2314 if (msg_sys->msg_name)
2315 memcpy(&used_address->name, msg_sys->msg_name,
2316 used_address->name_len);
c71d8ebe 2317 }
1da177e4
LT
2318
2319out_freectl:
89bddce5 2320 if (ctl_buf != ctl)
1da177e4
LT
2321 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2322out_freeiov:
da184284 2323 kfree(iov);
228e548e
AB
2324 return err;
2325}
2326
2327/*
2328 * BSD sendmsg interface
2329 */
0fa03c62
JA
2330long __sys_sendmsg_sock(struct socket *sock, struct user_msghdr __user *msg,
2331 unsigned int flags)
2332{
2333 struct msghdr msg_sys;
2334
2335 return ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
2336}
228e548e 2337
e1834a32
DB
2338long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2339 bool forbid_cmsg_compat)
228e548e
AB
2340{
2341 int fput_needed, err;
2342 struct msghdr msg_sys;
1be374a0
AL
2343 struct socket *sock;
2344
e1834a32
DB
2345 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2346 return -EINVAL;
2347
1be374a0 2348 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2349 if (!sock)
2350 goto out;
2351
28a94d8f 2352 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2353
6cb153ca 2354 fput_light(sock->file, fput_needed);
89bddce5 2355out:
1da177e4
LT
2356 return err;
2357}
2358
666547ff 2359SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2360{
e1834a32 2361 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2362}
2363
228e548e
AB
2364/*
2365 * Linux sendmmsg interface
2366 */
2367
2368int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2369 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2370{
2371 int fput_needed, err, datagrams;
2372 struct socket *sock;
2373 struct mmsghdr __user *entry;
2374 struct compat_mmsghdr __user *compat_entry;
2375 struct msghdr msg_sys;
c71d8ebe 2376 struct used_address used_address;
f092276d 2377 unsigned int oflags = flags;
228e548e 2378
e1834a32
DB
2379 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2380 return -EINVAL;
2381
98382f41
AB
2382 if (vlen > UIO_MAXIOV)
2383 vlen = UIO_MAXIOV;
228e548e
AB
2384
2385 datagrams = 0;
2386
2387 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2388 if (!sock)
2389 return err;
2390
c71d8ebe 2391 used_address.name_len = UINT_MAX;
228e548e
AB
2392 entry = mmsg;
2393 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2394 err = 0;
f092276d 2395 flags |= MSG_BATCH;
228e548e
AB
2396
2397 while (datagrams < vlen) {
f092276d
TH
2398 if (datagrams == vlen - 1)
2399 flags = oflags;
2400
228e548e 2401 if (MSG_CMSG_COMPAT & flags) {
666547ff 2402 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2403 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2404 if (err < 0)
2405 break;
2406 err = __put_user(err, &compat_entry->msg_len);
2407 ++compat_entry;
2408 } else {
a7526eb5 2409 err = ___sys_sendmsg(sock,
666547ff 2410 (struct user_msghdr __user *)entry,
28a94d8f 2411 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2412 if (err < 0)
2413 break;
2414 err = put_user(err, &entry->msg_len);
2415 ++entry;
2416 }
2417
2418 if (err)
2419 break;
2420 ++datagrams;
3023898b
SHY
2421 if (msg_data_left(&msg_sys))
2422 break;
a78cb84c 2423 cond_resched();
228e548e
AB
2424 }
2425
228e548e
AB
2426 fput_light(sock->file, fput_needed);
2427
728ffb86
AB
2428 /* We only return an error if no datagrams were able to be sent */
2429 if (datagrams != 0)
228e548e
AB
2430 return datagrams;
2431
228e548e
AB
2432 return err;
2433}
2434
2435SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2436 unsigned int, vlen, unsigned int, flags)
2437{
e1834a32 2438 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2439}
2440
666547ff 2441static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2442 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2443{
89bddce5
SH
2444 struct compat_msghdr __user *msg_compat =
2445 (struct compat_msghdr __user *)msg;
1da177e4 2446 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2447 struct iovec *iov = iovstack;
1da177e4 2448 unsigned long cmsg_ptr;
2da62906 2449 int len;
08adb7da 2450 ssize_t err;
1da177e4
LT
2451
2452 /* kernel mode address */
230b1839 2453 struct sockaddr_storage addr;
1da177e4
LT
2454
2455 /* user mode address pointers */
2456 struct sockaddr __user *uaddr;
08adb7da 2457 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2458
08adb7da 2459 msg_sys->msg_name = &addr;
1da177e4 2460
f3d33426 2461 if (MSG_CMSG_COMPAT & flags)
08adb7da 2462 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2463 else
08adb7da 2464 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2465 if (err < 0)
da184284 2466 return err;
1da177e4 2467
a2e27255
ACM
2468 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2469 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2470
f3d33426
HFS
2471 /* We assume all kernel code knows the size of sockaddr_storage */
2472 msg_sys->msg_namelen = 0;
2473
1da177e4
LT
2474 if (sock->file->f_flags & O_NONBLOCK)
2475 flags |= MSG_DONTWAIT;
2da62906 2476 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2477 if (err < 0)
2478 goto out_freeiov;
2479 len = err;
2480
2481 if (uaddr != NULL) {
43db362d 2482 err = move_addr_to_user(&addr,
a2e27255 2483 msg_sys->msg_namelen, uaddr,
89bddce5 2484 uaddr_len);
1da177e4
LT
2485 if (err < 0)
2486 goto out_freeiov;
2487 }
a2e27255 2488 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2489 COMPAT_FLAGS(msg));
1da177e4
LT
2490 if (err)
2491 goto out_freeiov;
2492 if (MSG_CMSG_COMPAT & flags)
a2e27255 2493 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2494 &msg_compat->msg_controllen);
2495 else
a2e27255 2496 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2497 &msg->msg_controllen);
2498 if (err)
2499 goto out_freeiov;
2500 err = len;
2501
2502out_freeiov:
da184284 2503 kfree(iov);
a2e27255
ACM
2504 return err;
2505}
2506
2507/*
2508 * BSD recvmsg interface
2509 */
2510
aa1fa28f
JA
2511long __sys_recvmsg_sock(struct socket *sock, struct user_msghdr __user *msg,
2512 unsigned int flags)
2513{
2514 struct msghdr msg_sys;
2515
2516 return ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2517}
2518
e1834a32
DB
2519long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2520 bool forbid_cmsg_compat)
a2e27255
ACM
2521{
2522 int fput_needed, err;
2523 struct msghdr msg_sys;
1be374a0
AL
2524 struct socket *sock;
2525
e1834a32
DB
2526 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2527 return -EINVAL;
2528
1be374a0 2529 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2530 if (!sock)
2531 goto out;
2532
a7526eb5 2533 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2534
6cb153ca 2535 fput_light(sock->file, fput_needed);
1da177e4
LT
2536out:
2537 return err;
2538}
2539
666547ff 2540SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2541 unsigned int, flags)
2542{
e1834a32 2543 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2544}
2545
a2e27255
ACM
2546/*
2547 * Linux recvmmsg interface
2548 */
2549
e11d4284
AB
2550static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2551 unsigned int vlen, unsigned int flags,
2552 struct timespec64 *timeout)
a2e27255
ACM
2553{
2554 int fput_needed, err, datagrams;
2555 struct socket *sock;
2556 struct mmsghdr __user *entry;
d7256d0e 2557 struct compat_mmsghdr __user *compat_entry;
a2e27255 2558 struct msghdr msg_sys;
766b9f92
DD
2559 struct timespec64 end_time;
2560 struct timespec64 timeout64;
a2e27255
ACM
2561
2562 if (timeout &&
2563 poll_select_set_timeout(&end_time, timeout->tv_sec,
2564 timeout->tv_nsec))
2565 return -EINVAL;
2566
2567 datagrams = 0;
2568
2569 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2570 if (!sock)
2571 return err;
2572
7797dc41
SHY
2573 if (likely(!(flags & MSG_ERRQUEUE))) {
2574 err = sock_error(sock->sk);
2575 if (err) {
2576 datagrams = err;
2577 goto out_put;
2578 }
e623a9e9 2579 }
a2e27255
ACM
2580
2581 entry = mmsg;
d7256d0e 2582 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2583
2584 while (datagrams < vlen) {
2585 /*
2586 * No need to ask LSM for more than the first datagram.
2587 */
d7256d0e 2588 if (MSG_CMSG_COMPAT & flags) {
666547ff 2589 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2590 &msg_sys, flags & ~MSG_WAITFORONE,
2591 datagrams);
d7256d0e
JMG
2592 if (err < 0)
2593 break;
2594 err = __put_user(err, &compat_entry->msg_len);
2595 ++compat_entry;
2596 } else {
a7526eb5 2597 err = ___sys_recvmsg(sock,
666547ff 2598 (struct user_msghdr __user *)entry,
a7526eb5
AL
2599 &msg_sys, flags & ~MSG_WAITFORONE,
2600 datagrams);
d7256d0e
JMG
2601 if (err < 0)
2602 break;
2603 err = put_user(err, &entry->msg_len);
2604 ++entry;
2605 }
2606
a2e27255
ACM
2607 if (err)
2608 break;
a2e27255
ACM
2609 ++datagrams;
2610
71c5c159
BB
2611 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2612 if (flags & MSG_WAITFORONE)
2613 flags |= MSG_DONTWAIT;
2614
a2e27255 2615 if (timeout) {
766b9f92 2616 ktime_get_ts64(&timeout64);
c2e6c856 2617 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2618 if (timeout->tv_sec < 0) {
2619 timeout->tv_sec = timeout->tv_nsec = 0;
2620 break;
2621 }
2622
2623 /* Timeout, return less than vlen datagrams */
2624 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2625 break;
2626 }
2627
2628 /* Out of band data, return right away */
2629 if (msg_sys.msg_flags & MSG_OOB)
2630 break;
a78cb84c 2631 cond_resched();
a2e27255
ACM
2632 }
2633
a2e27255 2634 if (err == 0)
34b88a68
ACM
2635 goto out_put;
2636
2637 if (datagrams == 0) {
2638 datagrams = err;
2639 goto out_put;
2640 }
a2e27255 2641
34b88a68
ACM
2642 /*
2643 * We may return less entries than requested (vlen) if the
2644 * sock is non block and there aren't enough datagrams...
2645 */
2646 if (err != -EAGAIN) {
a2e27255 2647 /*
34b88a68
ACM
2648 * ... or if recvmsg returns an error after we
2649 * received some datagrams, where we record the
2650 * error to return on the next call or if the
2651 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2652 */
34b88a68 2653 sock->sk->sk_err = -err;
a2e27255 2654 }
34b88a68
ACM
2655out_put:
2656 fput_light(sock->file, fput_needed);
a2e27255 2657
34b88a68 2658 return datagrams;
a2e27255
ACM
2659}
2660
e11d4284
AB
2661int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2662 unsigned int vlen, unsigned int flags,
2663 struct __kernel_timespec __user *timeout,
2664 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2665{
2666 int datagrams;
c2e6c856 2667 struct timespec64 timeout_sys;
a2e27255 2668
e11d4284
AB
2669 if (timeout && get_timespec64(&timeout_sys, timeout))
2670 return -EFAULT;
a2e27255 2671
e11d4284 2672 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2673 return -EFAULT;
2674
e11d4284
AB
2675 if (!timeout && !timeout32)
2676 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2677
2678 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2679
e11d4284
AB
2680 if (datagrams <= 0)
2681 return datagrams;
2682
2683 if (timeout && put_timespec64(&timeout_sys, timeout))
2684 datagrams = -EFAULT;
2685
2686 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2687 datagrams = -EFAULT;
2688
2689 return datagrams;
2690}
2691
1255e269
DB
2692SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2693 unsigned int, vlen, unsigned int, flags,
c2e6c856 2694 struct __kernel_timespec __user *, timeout)
1255e269 2695{
e11d4284
AB
2696 if (flags & MSG_CMSG_COMPAT)
2697 return -EINVAL;
2698
2699 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2700}
2701
2702#ifdef CONFIG_COMPAT_32BIT_TIME
2703SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2704 unsigned int, vlen, unsigned int, flags,
2705 struct old_timespec32 __user *, timeout)
2706{
2707 if (flags & MSG_CMSG_COMPAT)
2708 return -EINVAL;
2709
2710 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2711}
e11d4284 2712#endif
1255e269 2713
a2e27255 2714#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2715/* Argument list sizes for sys_socketcall */
2716#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2717static const unsigned char nargs[21] = {
c6d409cf
ED
2718 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2719 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2720 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2721 AL(4), AL(5), AL(4)
89bddce5
SH
2722};
2723
1da177e4
LT
2724#undef AL
2725
2726/*
89bddce5 2727 * System call vectors.
1da177e4
LT
2728 *
2729 * Argument checking cleaned up. Saved 20% in size.
2730 * This function doesn't need to set the kernel lock because
89bddce5 2731 * it is set by the callees.
1da177e4
LT
2732 */
2733
3e0fa65f 2734SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2735{
2950fa9d 2736 unsigned long a[AUDITSC_ARGS];
89bddce5 2737 unsigned long a0, a1;
1da177e4 2738 int err;
47379052 2739 unsigned int len;
1da177e4 2740
228e548e 2741 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2742 return -EINVAL;
c8e8cd57 2743 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2744
47379052
AV
2745 len = nargs[call];
2746 if (len > sizeof(a))
2747 return -EINVAL;
2748
1da177e4 2749 /* copy_from_user should be SMP safe. */
47379052 2750 if (copy_from_user(a, args, len))
1da177e4 2751 return -EFAULT;
3ec3b2fb 2752
2950fa9d
CG
2753 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2754 if (err)
2755 return err;
3ec3b2fb 2756
89bddce5
SH
2757 a0 = a[0];
2758 a1 = a[1];
2759
2760 switch (call) {
2761 case SYS_SOCKET:
9d6a15c3 2762 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2763 break;
2764 case SYS_BIND:
a87d35d8 2765 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2766 break;
2767 case SYS_CONNECT:
1387c2c2 2768 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2769 break;
2770 case SYS_LISTEN:
25e290ee 2771 err = __sys_listen(a0, a1);
89bddce5
SH
2772 break;
2773 case SYS_ACCEPT:
4541e805
DB
2774 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2775 (int __user *)a[2], 0);
89bddce5
SH
2776 break;
2777 case SYS_GETSOCKNAME:
2778 err =
8882a107
DB
2779 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2780 (int __user *)a[2]);
89bddce5
SH
2781 break;
2782 case SYS_GETPEERNAME:
2783 err =
b21c8f83
DB
2784 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2785 (int __user *)a[2]);
89bddce5
SH
2786 break;
2787 case SYS_SOCKETPAIR:
6debc8d8 2788 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2789 break;
2790 case SYS_SEND:
f3bf896b
DB
2791 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2792 NULL, 0);
89bddce5
SH
2793 break;
2794 case SYS_SENDTO:
211b634b
DB
2795 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2796 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2797 break;
2798 case SYS_RECV:
d27e9afc
DB
2799 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2800 NULL, NULL);
89bddce5
SH
2801 break;
2802 case SYS_RECVFROM:
7a09e1eb
DB
2803 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2804 (struct sockaddr __user *)a[4],
2805 (int __user *)a[5]);
89bddce5
SH
2806 break;
2807 case SYS_SHUTDOWN:
005a1aea 2808 err = __sys_shutdown(a0, a1);
89bddce5
SH
2809 break;
2810 case SYS_SETSOCKOPT:
cc36dca0
DB
2811 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2812 a[4]);
89bddce5
SH
2813 break;
2814 case SYS_GETSOCKOPT:
2815 err =
13a2d70e
DB
2816 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2817 (int __user *)a[4]);
89bddce5
SH
2818 break;
2819 case SYS_SENDMSG:
e1834a32
DB
2820 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2821 a[2], true);
89bddce5 2822 break;
228e548e 2823 case SYS_SENDMMSG:
e1834a32
DB
2824 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2825 a[3], true);
228e548e 2826 break;
89bddce5 2827 case SYS_RECVMSG:
e1834a32
DB
2828 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2829 a[2], true);
89bddce5 2830 break;
a2e27255 2831 case SYS_RECVMMSG:
e11d4284
AB
2832 if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
2833 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2834 a[2], a[3],
2835 (struct __kernel_timespec __user *)a[4],
2836 NULL);
2837 else
2838 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2839 a[2], a[3], NULL,
2840 (struct old_timespec32 __user *)a[4]);
a2e27255 2841 break;
de11defe 2842 case SYS_ACCEPT4:
4541e805
DB
2843 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2844 (int __user *)a[2], a[3]);
aaca0bdc 2845 break;
89bddce5
SH
2846 default:
2847 err = -EINVAL;
2848 break;
1da177e4
LT
2849 }
2850 return err;
2851}
2852
89bddce5 2853#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2854
55737fda
SH
2855/**
2856 * sock_register - add a socket protocol handler
2857 * @ops: description of protocol
2858 *
1da177e4
LT
2859 * This function is called by a protocol handler that wants to
2860 * advertise its address family, and have it linked into the
e793c0f7 2861 * socket interface. The value ops->family corresponds to the
55737fda 2862 * socket system call protocol family.
1da177e4 2863 */
f0fd27d4 2864int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2865{
2866 int err;
2867
2868 if (ops->family >= NPROTO) {
3410f22e 2869 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2870 return -ENOBUFS;
2871 }
55737fda
SH
2872
2873 spin_lock(&net_family_lock);
190683a9
ED
2874 if (rcu_dereference_protected(net_families[ops->family],
2875 lockdep_is_held(&net_family_lock)))
55737fda
SH
2876 err = -EEXIST;
2877 else {
cf778b00 2878 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2879 err = 0;
2880 }
55737fda
SH
2881 spin_unlock(&net_family_lock);
2882
3410f22e 2883 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2884 return err;
2885}
c6d409cf 2886EXPORT_SYMBOL(sock_register);
1da177e4 2887
55737fda
SH
2888/**
2889 * sock_unregister - remove a protocol handler
2890 * @family: protocol family to remove
2891 *
1da177e4
LT
2892 * This function is called by a protocol handler that wants to
2893 * remove its address family, and have it unlinked from the
55737fda
SH
2894 * new socket creation.
2895 *
2896 * If protocol handler is a module, then it can use module reference
2897 * counts to protect against new references. If protocol handler is not
2898 * a module then it needs to provide its own protection in
2899 * the ops->create routine.
1da177e4 2900 */
f0fd27d4 2901void sock_unregister(int family)
1da177e4 2902{
f0fd27d4 2903 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2904
55737fda 2905 spin_lock(&net_family_lock);
a9b3cd7f 2906 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2907 spin_unlock(&net_family_lock);
2908
2909 synchronize_rcu();
2910
3410f22e 2911 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2912}
c6d409cf 2913EXPORT_SYMBOL(sock_unregister);
1da177e4 2914
bf2ae2e4
XL
2915bool sock_is_registered(int family)
2916{
66b51b0a 2917 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
2918}
2919
77d76ea3 2920static int __init sock_init(void)
1da177e4 2921{
b3e19d92 2922 int err;
2ca794e5
EB
2923 /*
2924 * Initialize the network sysctl infrastructure.
2925 */
2926 err = net_sysctl_init();
2927 if (err)
2928 goto out;
b3e19d92 2929
1da177e4 2930 /*
89bddce5 2931 * Initialize skbuff SLAB cache
1da177e4
LT
2932 */
2933 skb_init();
1da177e4
LT
2934
2935 /*
89bddce5 2936 * Initialize the protocols module.
1da177e4
LT
2937 */
2938
2939 init_inodecache();
b3e19d92
NP
2940
2941 err = register_filesystem(&sock_fs_type);
2942 if (err)
2943 goto out_fs;
1da177e4 2944 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2945 if (IS_ERR(sock_mnt)) {
2946 err = PTR_ERR(sock_mnt);
2947 goto out_mount;
2948 }
77d76ea3
AK
2949
2950 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2951 */
2952
2953#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2954 err = netfilter_init();
2955 if (err)
2956 goto out;
1da177e4 2957#endif
cbeb321a 2958
408eccce 2959 ptp_classifier_init();
c1f19b51 2960
b3e19d92
NP
2961out:
2962 return err;
2963
2964out_mount:
2965 unregister_filesystem(&sock_fs_type);
2966out_fs:
2967 goto out;
1da177e4
LT
2968}
2969
77d76ea3
AK
2970core_initcall(sock_init); /* early initcall */
2971
1da177e4
LT
2972#ifdef CONFIG_PROC_FS
2973void socket_seq_show(struct seq_file *seq)
2974{
648845ab
TZ
2975 seq_printf(seq, "sockets: used %d\n",
2976 sock_inuse_get(seq->private));
1da177e4 2977}
89bddce5 2978#endif /* CONFIG_PROC_FS */
1da177e4 2979
89bbfc95 2980#ifdef CONFIG_COMPAT
36fd633e 2981static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2982{
6b96018b 2983 struct compat_ifconf ifc32;
7a229387 2984 struct ifconf ifc;
7a229387
AB
2985 int err;
2986
6b96018b 2987 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2988 return -EFAULT;
2989
36fd633e
AV
2990 ifc.ifc_len = ifc32.ifc_len;
2991 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 2992
36fd633e
AV
2993 rtnl_lock();
2994 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2995 rtnl_unlock();
7a229387
AB
2996 if (err)
2997 return err;
2998
36fd633e 2999 ifc32.ifc_len = ifc.ifc_len;
6b96018b 3000 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3001 return -EFAULT;
3002
3003 return 0;
3004}
3005
6b96018b 3006static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 3007{
3a7da39d
BH
3008 struct compat_ethtool_rxnfc __user *compat_rxnfc;
3009 bool convert_in = false, convert_out = false;
44c02a2c
AV
3010 size_t buf_size = 0;
3011 struct ethtool_rxnfc __user *rxnfc = NULL;
3012 struct ifreq ifr;
3a7da39d
BH
3013 u32 rule_cnt = 0, actual_rule_cnt;
3014 u32 ethcmd;
7a229387 3015 u32 data;
3a7da39d 3016 int ret;
7a229387 3017
3a7da39d
BH
3018 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
3019 return -EFAULT;
7a229387 3020
3a7da39d
BH
3021 compat_rxnfc = compat_ptr(data);
3022
3023 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
3024 return -EFAULT;
3025
3a7da39d
BH
3026 /* Most ethtool structures are defined without padding.
3027 * Unfortunately struct ethtool_rxnfc is an exception.
3028 */
3029 switch (ethcmd) {
3030 default:
3031 break;
3032 case ETHTOOL_GRXCLSRLALL:
3033 /* Buffer size is variable */
3034 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3035 return -EFAULT;
3036 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3037 return -ENOMEM;
3038 buf_size += rule_cnt * sizeof(u32);
3039 /* fall through */
3040 case ETHTOOL_GRXRINGS:
3041 case ETHTOOL_GRXCLSRLCNT:
3042 case ETHTOOL_GRXCLSRULE:
55664f32 3043 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3044 convert_out = true;
3045 /* fall through */
3046 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3047 buf_size += sizeof(struct ethtool_rxnfc);
3048 convert_in = true;
44c02a2c 3049 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3050 break;
3051 }
3052
44c02a2c 3053 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3054 return -EFAULT;
3055
44c02a2c 3056 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3057
3a7da39d 3058 if (convert_in) {
127fe533 3059 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3060 * fs.ring_cookie and at the end of fs, but nowhere else.
3061 */
127fe533
AD
3062 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3063 sizeof(compat_rxnfc->fs.m_ext) !=
3064 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3065 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3066 BUILD_BUG_ON(
3067 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3068 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3069 offsetof(struct ethtool_rxnfc, fs.location) -
3070 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3071
3072 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3073 (void __user *)(&rxnfc->fs.m_ext + 1) -
3074 (void __user *)rxnfc) ||
3a7da39d
BH
3075 copy_in_user(&rxnfc->fs.ring_cookie,
3076 &compat_rxnfc->fs.ring_cookie,
954b1244 3077 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3078 (void __user *)&rxnfc->fs.ring_cookie))
3079 return -EFAULT;
3080 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3081 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3082 return -EFAULT;
3083 } else if (copy_in_user(&rxnfc->rule_cnt,
3084 &compat_rxnfc->rule_cnt,
3085 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3086 return -EFAULT;
3087 }
3088
44c02a2c 3089 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3090 if (ret)
3091 return ret;
3092
3093 if (convert_out) {
3094 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3095 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3096 (const void __user *)rxnfc) ||
3a7da39d
BH
3097 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3098 &rxnfc->fs.ring_cookie,
954b1244
SH
3099 (const void __user *)(&rxnfc->fs.location + 1) -
3100 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3101 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3102 sizeof(rxnfc->rule_cnt)))
3103 return -EFAULT;
3104
3105 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3106 /* As an optimisation, we only copy the actual
3107 * number of rules that the underlying
3108 * function returned. Since Mallory might
3109 * change the rule count in user memory, we
3110 * check that it is less than the rule count
3111 * originally given (as the user buffer size),
3112 * which has been range-checked.
3113 */
3114 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3115 return -EFAULT;
3116 if (actual_rule_cnt < rule_cnt)
3117 rule_cnt = actual_rule_cnt;
3118 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3119 &rxnfc->rule_locs[0],
3120 rule_cnt * sizeof(u32)))
3121 return -EFAULT;
3122 }
3123 }
3124
3125 return 0;
7a229387
AB
3126}
3127
7a50a240
AB
3128static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3129{
7a50a240 3130 compat_uptr_t uptr32;
44c02a2c
AV
3131 struct ifreq ifr;
3132 void __user *saved;
3133 int err;
7a50a240 3134
44c02a2c 3135 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3136 return -EFAULT;
3137
3138 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3139 return -EFAULT;
3140
44c02a2c
AV
3141 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3142 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3143
44c02a2c
AV
3144 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3145 if (!err) {
3146 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3147 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3148 err = -EFAULT;
ccbd6a5a 3149 }
44c02a2c 3150 return err;
7a229387
AB
3151}
3152
590d4693
BH
3153/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3154static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3155 struct compat_ifreq __user *u_ifreq32)
7a229387 3156{
44c02a2c 3157 struct ifreq ifreq;
7a229387
AB
3158 u32 data32;
3159
44c02a2c 3160 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3161 return -EFAULT;
44c02a2c 3162 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3163 return -EFAULT;
44c02a2c 3164 ifreq.ifr_data = compat_ptr(data32);
7a229387 3165
44c02a2c 3166 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3167}
3168
37ac39bd
JB
3169static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3170 unsigned int cmd,
3171 struct compat_ifreq __user *uifr32)
3172{
3173 struct ifreq __user *uifr;
3174 int err;
3175
3176 /* Handle the fact that while struct ifreq has the same *layout* on
3177 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3178 * which are handled elsewhere, it still has different *size* due to
3179 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3180 * resulting in struct ifreq being 32 and 40 bytes respectively).
3181 * As a result, if the struct happens to be at the end of a page and
3182 * the next page isn't readable/writable, we get a fault. To prevent
3183 * that, copy back and forth to the full size.
3184 */
3185
3186 uifr = compat_alloc_user_space(sizeof(*uifr));
3187 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3188 return -EFAULT;
3189
3190 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3191
3192 if (!err) {
3193 switch (cmd) {
3194 case SIOCGIFFLAGS:
3195 case SIOCGIFMETRIC:
3196 case SIOCGIFMTU:
3197 case SIOCGIFMEM:
3198 case SIOCGIFHWADDR:
3199 case SIOCGIFINDEX:
3200 case SIOCGIFADDR:
3201 case SIOCGIFBRDADDR:
3202 case SIOCGIFDSTADDR:
3203 case SIOCGIFNETMASK:
3204 case SIOCGIFPFLAGS:
3205 case SIOCGIFTXQLEN:
3206 case SIOCGMIIPHY:
3207 case SIOCGMIIREG:
c6c9fee3 3208 case SIOCGIFNAME:
37ac39bd
JB
3209 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3210 err = -EFAULT;
3211 break;
3212 }
3213 }
3214 return err;
3215}
3216
a2116ed2
AB
3217static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3218 struct compat_ifreq __user *uifr32)
3219{
3220 struct ifreq ifr;
3221 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3222 int err;
3223
3224 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3225 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3226 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3227 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3228 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3229 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3230 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3231 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3232 if (err)
3233 return -EFAULT;
3234
44c02a2c 3235 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3236
3237 if (cmd == SIOCGIFMAP && !err) {
3238 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3239 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3240 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3241 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3242 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3243 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3244 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3245 if (err)
3246 err = -EFAULT;
3247 }
3248 return err;
3249}
3250
7a229387 3251struct rtentry32 {
c6d409cf 3252 u32 rt_pad1;
7a229387
AB
3253 struct sockaddr rt_dst; /* target address */
3254 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3255 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3256 unsigned short rt_flags;
3257 short rt_pad2;
3258 u32 rt_pad3;
3259 unsigned char rt_tos;
3260 unsigned char rt_class;
3261 short rt_pad4;
3262 short rt_metric; /* +1 for binary compatibility! */
7a229387 3263 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3264 u32 rt_mtu; /* per route MTU/Window */
3265 u32 rt_window; /* Window clamping */
7a229387
AB
3266 unsigned short rt_irtt; /* Initial RTT */
3267};
3268
3269struct in6_rtmsg32 {
3270 struct in6_addr rtmsg_dst;
3271 struct in6_addr rtmsg_src;
3272 struct in6_addr rtmsg_gateway;
3273 u32 rtmsg_type;
3274 u16 rtmsg_dst_len;
3275 u16 rtmsg_src_len;
3276 u32 rtmsg_metric;
3277 u32 rtmsg_info;
3278 u32 rtmsg_flags;
3279 s32 rtmsg_ifindex;
3280};
3281
6b96018b
AB
3282static int routing_ioctl(struct net *net, struct socket *sock,
3283 unsigned int cmd, void __user *argp)
7a229387
AB
3284{
3285 int ret;
3286 void *r = NULL;
3287 struct in6_rtmsg r6;
3288 struct rtentry r4;
3289 char devname[16];
3290 u32 rtdev;
3291 mm_segment_t old_fs = get_fs();
3292
6b96018b
AB
3293 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3294 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3295 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3296 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3297 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3298 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3299 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3300 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3301 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3302 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3303 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3304
3305 r = (void *) &r6;
3306 } else { /* ipv4 */
6b96018b 3307 struct rtentry32 __user *ur4 = argp;
c6d409cf 3308 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3309 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3310 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3311 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3312 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3313 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3314 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3315 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3316 if (rtdev) {
c6d409cf 3317 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3318 r4.rt_dev = (char __user __force *)devname;
3319 devname[15] = 0;
7a229387
AB
3320 } else
3321 r4.rt_dev = NULL;
3322
3323 r = (void *) &r4;
3324 }
3325
3326 if (ret) {
3327 ret = -EFAULT;
3328 goto out;
3329 }
3330
c6d409cf 3331 set_fs(KERNEL_DS);
63ff03ab 3332 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3333 set_fs(old_fs);
7a229387
AB
3334
3335out:
7a229387
AB
3336 return ret;
3337}
3338
3339/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3340 * for some operations; this forces use of the newer bridge-utils that
25985edc 3341 * use compatible ioctls
7a229387 3342 */
6b96018b 3343static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3344{
6b96018b 3345 compat_ulong_t tmp;
7a229387 3346
6b96018b 3347 if (get_user(tmp, argp))
7a229387
AB
3348 return -EFAULT;
3349 if (tmp == BRCTL_GET_VERSION)
3350 return BRCTL_VERSION + 1;
3351 return -EINVAL;
3352}
3353
6b96018b
AB
3354static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3355 unsigned int cmd, unsigned long arg)
3356{
3357 void __user *argp = compat_ptr(arg);
3358 struct sock *sk = sock->sk;
3359 struct net *net = sock_net(sk);
7a229387 3360
6b96018b 3361 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3362 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3363
3364 switch (cmd) {
3365 case SIOCSIFBR:
3366 case SIOCGIFBR:
3367 return old_bridge_ioctl(argp);
6b96018b 3368 case SIOCGIFCONF:
36fd633e 3369 return compat_dev_ifconf(net, argp);
6b96018b
AB
3370 case SIOCETHTOOL:
3371 return ethtool_ioctl(net, argp);
7a50a240
AB
3372 case SIOCWANDEV:
3373 return compat_siocwandev(net, argp);
a2116ed2
AB
3374 case SIOCGIFMAP:
3375 case SIOCSIFMAP:
3376 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3377 case SIOCADDRT:
3378 case SIOCDELRT:
3379 return routing_ioctl(net, sock, cmd, argp);
0768e170
AB
3380 case SIOCGSTAMP_OLD:
3381 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3382 if (!sock->ops->gettstamp)
3383 return -ENOIOCTLCMD;
0768e170 3384 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3385 !COMPAT_USE_64BIT_TIME);
3386
590d4693
BH
3387 case SIOCBONDSLAVEINFOQUERY:
3388 case SIOCBONDINFOQUERY:
a2116ed2 3389 case SIOCSHWTSTAMP:
fd468c74 3390 case SIOCGHWTSTAMP:
590d4693 3391 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3392
3393 case FIOSETOWN:
3394 case SIOCSPGRP:
3395 case FIOGETOWN:
3396 case SIOCGPGRP:
3397 case SIOCBRADDBR:
3398 case SIOCBRDELBR:
3399 case SIOCGIFVLAN:
3400 case SIOCSIFVLAN:
3401 case SIOCADDDLCI:
3402 case SIOCDELDLCI:
c62cce2c 3403 case SIOCGSKNS:
0768e170
AB
3404 case SIOCGSTAMP_NEW:
3405 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3406 return sock_ioctl(file, cmd, arg);
3407
3408 case SIOCGIFFLAGS:
3409 case SIOCSIFFLAGS:
3410 case SIOCGIFMETRIC:
3411 case SIOCSIFMETRIC:
3412 case SIOCGIFMTU:
3413 case SIOCSIFMTU:
3414 case SIOCGIFMEM:
3415 case SIOCSIFMEM:
3416 case SIOCGIFHWADDR:
3417 case SIOCSIFHWADDR:
3418 case SIOCADDMULTI:
3419 case SIOCDELMULTI:
3420 case SIOCGIFINDEX:
6b96018b
AB
3421 case SIOCGIFADDR:
3422 case SIOCSIFADDR:
3423 case SIOCSIFHWBROADCAST:
6b96018b 3424 case SIOCDIFADDR:
6b96018b
AB
3425 case SIOCGIFBRDADDR:
3426 case SIOCSIFBRDADDR:
3427 case SIOCGIFDSTADDR:
3428 case SIOCSIFDSTADDR:
3429 case SIOCGIFNETMASK:
3430 case SIOCSIFNETMASK:
3431 case SIOCSIFPFLAGS:
3432 case SIOCGIFPFLAGS:
3433 case SIOCGIFTXQLEN:
3434 case SIOCSIFTXQLEN:
3435 case SIOCBRADDIF:
3436 case SIOCBRDELIF:
c6c9fee3 3437 case SIOCGIFNAME:
9177efd3
AB
3438 case SIOCSIFNAME:
3439 case SIOCGMIIPHY:
3440 case SIOCGMIIREG:
3441 case SIOCSMIIREG:
f92d4fc9
AV
3442 case SIOCBONDENSLAVE:
3443 case SIOCBONDRELEASE:
3444 case SIOCBONDSETHWADDR:
3445 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3446 return compat_ifreq_ioctl(net, sock, cmd, argp);
3447
6b96018b
AB
3448 case SIOCSARP:
3449 case SIOCGARP:
3450 case SIOCDARP:
6b96018b 3451 case SIOCATMARK:
63ff03ab 3452 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3453 }
3454
6b96018b
AB
3455 return -ENOIOCTLCMD;
3456}
7a229387 3457
95c96174 3458static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3459 unsigned long arg)
89bbfc95
SP
3460{
3461 struct socket *sock = file->private_data;
3462 int ret = -ENOIOCTLCMD;
87de87d5
DM
3463 struct sock *sk;
3464 struct net *net;
3465
3466 sk = sock->sk;
3467 net = sock_net(sk);
89bbfc95
SP
3468
3469 if (sock->ops->compat_ioctl)
3470 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3471
87de87d5
DM
3472 if (ret == -ENOIOCTLCMD &&
3473 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3474 ret = compat_wext_handle_ioctl(net, cmd, arg);
3475
6b96018b
AB
3476 if (ret == -ENOIOCTLCMD)
3477 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3478
89bbfc95
SP
3479 return ret;
3480}
3481#endif
3482
8a3c245c
PT
3483/**
3484 * kernel_bind - bind an address to a socket (kernel space)
3485 * @sock: socket
3486 * @addr: address
3487 * @addrlen: length of address
3488 *
3489 * Returns 0 or an error.
3490 */
3491
ac5a488e
SS
3492int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3493{
3494 return sock->ops->bind(sock, addr, addrlen);
3495}
c6d409cf 3496EXPORT_SYMBOL(kernel_bind);
ac5a488e 3497
8a3c245c
PT
3498/**
3499 * kernel_listen - move socket to listening state (kernel space)
3500 * @sock: socket
3501 * @backlog: pending connections queue size
3502 *
3503 * Returns 0 or an error.
3504 */
3505
ac5a488e
SS
3506int kernel_listen(struct socket *sock, int backlog)
3507{
3508 return sock->ops->listen(sock, backlog);
3509}
c6d409cf 3510EXPORT_SYMBOL(kernel_listen);
ac5a488e 3511
8a3c245c
PT
3512/**
3513 * kernel_accept - accept a connection (kernel space)
3514 * @sock: listening socket
3515 * @newsock: new connected socket
3516 * @flags: flags
3517 *
3518 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3519 * If it fails, @newsock is guaranteed to be %NULL.
3520 * Returns 0 or an error.
3521 */
3522
ac5a488e
SS
3523int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3524{
3525 struct sock *sk = sock->sk;
3526 int err;
3527
3528 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3529 newsock);
3530 if (err < 0)
3531 goto done;
3532
cdfbabfb 3533 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3534 if (err < 0) {
3535 sock_release(*newsock);
fa8705b0 3536 *newsock = NULL;
ac5a488e
SS
3537 goto done;
3538 }
3539
3540 (*newsock)->ops = sock->ops;
1b08534e 3541 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3542
3543done:
3544 return err;
3545}
c6d409cf 3546EXPORT_SYMBOL(kernel_accept);
ac5a488e 3547
8a3c245c
PT
3548/**
3549 * kernel_connect - connect a socket (kernel space)
3550 * @sock: socket
3551 * @addr: address
3552 * @addrlen: address length
3553 * @flags: flags (O_NONBLOCK, ...)
3554 *
3555 * For datagram sockets, @addr is the addres to which datagrams are sent
3556 * by default, and the only address from which datagrams are received.
3557 * For stream sockets, attempts to connect to @addr.
3558 * Returns 0 or an error code.
3559 */
3560
ac5a488e 3561int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3562 int flags)
ac5a488e
SS
3563{
3564 return sock->ops->connect(sock, addr, addrlen, flags);
3565}
c6d409cf 3566EXPORT_SYMBOL(kernel_connect);
ac5a488e 3567
8a3c245c
PT
3568/**
3569 * kernel_getsockname - get the address which the socket is bound (kernel space)
3570 * @sock: socket
3571 * @addr: address holder
3572 *
3573 * Fills the @addr pointer with the address which the socket is bound.
3574 * Returns 0 or an error code.
3575 */
3576
9b2c45d4 3577int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3578{
9b2c45d4 3579 return sock->ops->getname(sock, addr, 0);
ac5a488e 3580}
c6d409cf 3581EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3582
8a3c245c
PT
3583/**
3584 * kernel_peername - get the address which the socket is connected (kernel space)
3585 * @sock: socket
3586 * @addr: address holder
3587 *
3588 * Fills the @addr pointer with the address which the socket is connected.
3589 * Returns 0 or an error code.
3590 */
3591
9b2c45d4 3592int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3593{
9b2c45d4 3594 return sock->ops->getname(sock, addr, 1);
ac5a488e 3595}
c6d409cf 3596EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3597
8a3c245c
PT
3598/**
3599 * kernel_getsockopt - get a socket option (kernel space)
3600 * @sock: socket
3601 * @level: API level (SOL_SOCKET, ...)
3602 * @optname: option tag
3603 * @optval: option value
3604 * @optlen: option length
3605 *
3606 * Assigns the option length to @optlen.
3607 * Returns 0 or an error.
3608 */
3609
ac5a488e
SS
3610int kernel_getsockopt(struct socket *sock, int level, int optname,
3611 char *optval, int *optlen)
3612{
3613 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3614 char __user *uoptval;
3615 int __user *uoptlen;
ac5a488e
SS
3616 int err;
3617
fb8621bb
NK
3618 uoptval = (char __user __force *) optval;
3619 uoptlen = (int __user __force *) optlen;
3620
ac5a488e
SS
3621 set_fs(KERNEL_DS);
3622 if (level == SOL_SOCKET)
fb8621bb 3623 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3624 else
fb8621bb
NK
3625 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3626 uoptlen);
ac5a488e
SS
3627 set_fs(oldfs);
3628 return err;
3629}
c6d409cf 3630EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3631
8a3c245c
PT
3632/**
3633 * kernel_setsockopt - set a socket option (kernel space)
3634 * @sock: socket
3635 * @level: API level (SOL_SOCKET, ...)
3636 * @optname: option tag
3637 * @optval: option value
3638 * @optlen: option length
3639 *
3640 * Returns 0 or an error.
3641 */
3642
ac5a488e 3643int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3644 char *optval, unsigned int optlen)
ac5a488e
SS
3645{
3646 mm_segment_t oldfs = get_fs();
fb8621bb 3647 char __user *uoptval;
ac5a488e
SS
3648 int err;
3649
fb8621bb
NK
3650 uoptval = (char __user __force *) optval;
3651
ac5a488e
SS
3652 set_fs(KERNEL_DS);
3653 if (level == SOL_SOCKET)
fb8621bb 3654 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3655 else
fb8621bb 3656 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3657 optlen);
3658 set_fs(oldfs);
3659 return err;
3660}
c6d409cf 3661EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3662
8a3c245c
PT
3663/**
3664 * kernel_sendpage - send a &page through a socket (kernel space)
3665 * @sock: socket
3666 * @page: page
3667 * @offset: page offset
3668 * @size: total size in bytes
3669 * @flags: flags (MSG_DONTWAIT, ...)
3670 *
3671 * Returns the total amount sent in bytes or an error.
3672 */
3673
ac5a488e
SS
3674int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3675 size_t size, int flags)
3676{
3677 if (sock->ops->sendpage)
3678 return sock->ops->sendpage(sock, page, offset, size, flags);
3679
3680 return sock_no_sendpage(sock, page, offset, size, flags);
3681}
c6d409cf 3682EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3683
8a3c245c
PT
3684/**
3685 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3686 * @sk: sock
3687 * @page: page
3688 * @offset: page offset
3689 * @size: total size in bytes
3690 * @flags: flags (MSG_DONTWAIT, ...)
3691 *
3692 * Returns the total amount sent in bytes or an error.
3693 * Caller must hold @sk.
3694 */
3695
306b13eb
TH
3696int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3697 size_t size, int flags)
3698{
3699 struct socket *sock = sk->sk_socket;
3700
3701 if (sock->ops->sendpage_locked)
3702 return sock->ops->sendpage_locked(sk, page, offset, size,
3703 flags);
3704
3705 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3706}
3707EXPORT_SYMBOL(kernel_sendpage_locked);
3708
8a3c245c
PT
3709/**
3710 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3711 * @sock: socket
3712 * @how: connection part
3713 *
3714 * Returns 0 or an error.
3715 */
3716
91cf45f0
TM
3717int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3718{
3719 return sock->ops->shutdown(sock, how);
3720}
91cf45f0 3721EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3722
8a3c245c
PT
3723/**
3724 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3725 * @sk: socket
3726 *
3727 * This routine returns the IP overhead imposed by a socket i.e.
3728 * the length of the underlying IP header, depending on whether
3729 * this is an IPv4 or IPv6 socket and the length from IP options turned
3730 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3731 */
8a3c245c 3732
113c3075
P
3733u32 kernel_sock_ip_overhead(struct sock *sk)
3734{
3735 struct inet_sock *inet;
3736 struct ip_options_rcu *opt;
3737 u32 overhead = 0;
113c3075
P
3738#if IS_ENABLED(CONFIG_IPV6)
3739 struct ipv6_pinfo *np;
3740 struct ipv6_txoptions *optv6 = NULL;
3741#endif /* IS_ENABLED(CONFIG_IPV6) */
3742
3743 if (!sk)
3744 return overhead;
3745
113c3075
P
3746 switch (sk->sk_family) {
3747 case AF_INET:
3748 inet = inet_sk(sk);
3749 overhead += sizeof(struct iphdr);
3750 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3751 sock_owned_by_user(sk));
113c3075
P
3752 if (opt)
3753 overhead += opt->opt.optlen;
3754 return overhead;
3755#if IS_ENABLED(CONFIG_IPV6)
3756 case AF_INET6:
3757 np = inet6_sk(sk);
3758 overhead += sizeof(struct ipv6hdr);
3759 if (np)
3760 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3761 sock_owned_by_user(sk));
113c3075
P
3762 if (optv6)
3763 overhead += (optv6->opt_flen + optv6->opt_nflen);
3764 return overhead;
3765#endif /* IS_ENABLED(CONFIG_IPV6) */
3766 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3767 return overhead;
3768 }
3769}
3770EXPORT_SYMBOL(kernel_sock_ip_overhead);