]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/socket.c
nfp: flower: use struct_size() helper
[mirror_ubuntu-jammy-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
c8e8cd57 92#include <linux/nospec.h>
8c3c447b 93#include <linux/indirect_call_wrapper.h>
1da177e4 94
7c0f6ba6 95#include <linux/uaccess.h>
1da177e4
LT
96#include <asm/unistd.h>
97
98#include <net/compat.h>
87de87d5 99#include <net/wext.h>
f8451725 100#include <net/cls_cgroup.h>
1da177e4
LT
101
102#include <net/sock.h>
103#include <linux/netfilter.h>
104
6b96018b
AB
105#include <linux/if_tun.h>
106#include <linux/ipv6_route.h>
107#include <linux/route.h>
6b96018b 108#include <linux/sockios.h>
076bb0c8 109#include <net/busy_poll.h>
f24b9be5 110#include <linux/errqueue.h>
06021292 111
8c3c447b
PA
112/* proto_ops for ipv4 and ipv6 use the same {recv,send}msg function */
113#if IS_ENABLED(CONFIG_INET)
114#define INDIRECT_CALL_INET4(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
115#else
116#define INDIRECT_CALL_INET4(f, f1, ...) f(__VA_ARGS__)
117#endif
118
e0d1095a 119#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
120unsigned int sysctl_net_busy_read __read_mostly;
121unsigned int sysctl_net_busy_poll __read_mostly;
06021292 122#endif
6b96018b 123
8ae5e030
AV
124static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
125static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 126static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
127
128static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
129static __poll_t sock_poll(struct file *file,
130 struct poll_table_struct *wait);
89bddce5 131static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
132#ifdef CONFIG_COMPAT
133static long compat_sock_ioctl(struct file *file,
89bddce5 134 unsigned int cmd, unsigned long arg);
89bbfc95 135#endif
1da177e4 136static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
137static ssize_t sock_sendpage(struct file *file, struct page *page,
138 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 139static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 140 struct pipe_inode_info *pipe, size_t len,
9c55e01c 141 unsigned int flags);
1da177e4 142
1da177e4
LT
143/*
144 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
145 * in the operation structures but are done directly via the socketcall() multiplexor.
146 */
147
da7071d7 148static const struct file_operations socket_file_ops = {
1da177e4
LT
149 .owner = THIS_MODULE,
150 .llseek = no_llseek,
8ae5e030
AV
151 .read_iter = sock_read_iter,
152 .write_iter = sock_write_iter,
1da177e4
LT
153 .poll = sock_poll,
154 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
155#ifdef CONFIG_COMPAT
156 .compat_ioctl = compat_sock_ioctl,
157#endif
1da177e4 158 .mmap = sock_mmap,
1da177e4
LT
159 .release = sock_close,
160 .fasync = sock_fasync,
5274f052
JA
161 .sendpage = sock_sendpage,
162 .splice_write = generic_splice_sendpage,
9c55e01c 163 .splice_read = sock_splice_read,
1da177e4
LT
164};
165
166/*
167 * The protocol list. Each protocol is registered in here.
168 */
169
1da177e4 170static DEFINE_SPINLOCK(net_family_lock);
190683a9 171static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 172
1da177e4 173/*
89bddce5
SH
174 * Support routines.
175 * Move socket addresses back and forth across the kernel/user
176 * divide and look after the messy bits.
1da177e4
LT
177 */
178
1da177e4
LT
179/**
180 * move_addr_to_kernel - copy a socket address into kernel space
181 * @uaddr: Address in user space
182 * @kaddr: Address in kernel space
183 * @ulen: Length in user space
184 *
185 * The address is copied into kernel space. If the provided address is
186 * too long an error code of -EINVAL is returned. If the copy gives
187 * invalid addresses -EFAULT is returned. On a success 0 is returned.
188 */
189
43db362d 190int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 191{
230b1839 192 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 193 return -EINVAL;
89bddce5 194 if (ulen == 0)
1da177e4 195 return 0;
89bddce5 196 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 197 return -EFAULT;
3ec3b2fb 198 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
199}
200
201/**
202 * move_addr_to_user - copy an address to user space
203 * @kaddr: kernel space address
204 * @klen: length of address in kernel
205 * @uaddr: user space address
206 * @ulen: pointer to user length field
207 *
208 * The value pointed to by ulen on entry is the buffer length available.
209 * This is overwritten with the buffer space used. -EINVAL is returned
210 * if an overlong buffer is specified or a negative buffer size. -EFAULT
211 * is returned if either the buffer or the length field are not
212 * accessible.
213 * After copying the data up to the limit the user specifies, the true
214 * length of the data is written over the length limit the user
215 * specified. Zero is returned for a success.
216 */
89bddce5 217
43db362d 218static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 219 void __user *uaddr, int __user *ulen)
1da177e4
LT
220{
221 int err;
222 int len;
223
68c6beb3 224 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
225 err = get_user(len, ulen);
226 if (err)
1da177e4 227 return err;
89bddce5
SH
228 if (len > klen)
229 len = klen;
68c6beb3 230 if (len < 0)
1da177e4 231 return -EINVAL;
89bddce5 232 if (len) {
d6fe3945
SG
233 if (audit_sockaddr(klen, kaddr))
234 return -ENOMEM;
89bddce5 235 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
236 return -EFAULT;
237 }
238 /*
89bddce5
SH
239 * "fromlen shall refer to the value before truncation.."
240 * 1003.1g
1da177e4
LT
241 */
242 return __put_user(klen, ulen);
243}
244
08009a76 245static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
246
247static struct inode *sock_alloc_inode(struct super_block *sb)
248{
249 struct socket_alloc *ei;
eaefd110 250 struct socket_wq *wq;
89bddce5 251
e94b1766 252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
253 if (!ei)
254 return NULL;
eaefd110
ED
255 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
256 if (!wq) {
43815482
ED
257 kmem_cache_free(sock_inode_cachep, ei);
258 return NULL;
259 }
eaefd110
ED
260 init_waitqueue_head(&wq->wait);
261 wq->fasync_list = NULL;
574aab1e 262 wq->flags = 0;
e6476c21 263 ei->socket.wq = wq;
89bddce5 264
1da177e4
LT
265 ei->socket.state = SS_UNCONNECTED;
266 ei->socket.flags = 0;
267 ei->socket.ops = NULL;
268 ei->socket.sk = NULL;
269 ei->socket.file = NULL;
1da177e4
LT
270
271 return &ei->vfs_inode;
272}
273
274static void sock_destroy_inode(struct inode *inode)
275{
43815482
ED
276 struct socket_alloc *ei;
277
278 ei = container_of(inode, struct socket_alloc, vfs_inode);
e6476c21 279 kfree_rcu(ei->socket.wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1e911632 290static void init_inodecache(void)
1da177e4
LT
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1e911632 299 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
300}
301
b87221de 302static const struct super_operations sockfs_ops = {
c6d409cf
ED
303 .alloc_inode = sock_alloc_inode,
304 .destroy_inode = sock_destroy_inode,
305 .statfs = simple_statfs,
1da177e4
LT
306};
307
c23fbb6b
ED
308/*
309 * sockfs_dname() is called from d_path().
310 */
311static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
312{
313 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 314 d_inode(dentry)->i_ino);
c23fbb6b
ED
315}
316
3ba13d17 317static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 318 .d_dname = sockfs_dname,
1da177e4
LT
319};
320
bba0bd31
AG
321static int sockfs_xattr_get(const struct xattr_handler *handler,
322 struct dentry *dentry, struct inode *inode,
323 const char *suffix, void *value, size_t size)
324{
325 if (value) {
326 if (dentry->d_name.len + 1 > size)
327 return -ERANGE;
328 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
329 }
330 return dentry->d_name.len + 1;
331}
332
333#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
334#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
335#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
336
337static const struct xattr_handler sockfs_xattr_handler = {
338 .name = XATTR_NAME_SOCKPROTONAME,
339 .get = sockfs_xattr_get,
340};
341
4a590153
AG
342static int sockfs_security_xattr_set(const struct xattr_handler *handler,
343 struct dentry *dentry, struct inode *inode,
344 const char *suffix, const void *value,
345 size_t size, int flags)
346{
347 /* Handled by LSM. */
348 return -EAGAIN;
349}
350
351static const struct xattr_handler sockfs_security_xattr_handler = {
352 .prefix = XATTR_SECURITY_PREFIX,
353 .set = sockfs_security_xattr_set,
354};
355
bba0bd31
AG
356static const struct xattr_handler *sockfs_xattr_handlers[] = {
357 &sockfs_xattr_handler,
4a590153 358 &sockfs_security_xattr_handler,
bba0bd31
AG
359 NULL
360};
361
c74a1cbb
AV
362static struct dentry *sockfs_mount(struct file_system_type *fs_type,
363 int flags, const char *dev_name, void *data)
364{
bba0bd31
AG
365 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
366 sockfs_xattr_handlers,
367 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
368}
369
370static struct vfsmount *sock_mnt __read_mostly;
371
372static struct file_system_type sock_fs_type = {
373 .name = "sockfs",
374 .mount = sockfs_mount,
375 .kill_sb = kill_anon_super,
376};
377
1da177e4
LT
378/*
379 * Obtains the first available file descriptor and sets it up for use.
380 *
39d8c1b6
DM
381 * These functions create file structures and maps them to fd space
382 * of the current process. On success it returns file descriptor
1da177e4
LT
383 * and file struct implicitly stored in sock->file.
384 * Note that another thread may close file descriptor before we return
385 * from this function. We use the fact that now we do not refer
386 * to socket after mapping. If one day we will need it, this
387 * function will increment ref. count on file by 1.
388 *
389 * In any case returned fd MAY BE not valid!
390 * This race condition is unavoidable
391 * with shared fd spaces, we cannot solve it inside kernel,
392 * but we take care of internal coherence yet.
393 */
394
8a3c245c
PT
395/**
396 * sock_alloc_file - Bind a &socket to a &file
397 * @sock: socket
398 * @flags: file status flags
399 * @dname: protocol name
400 *
401 * Returns the &file bound with @sock, implicitly storing it
402 * in sock->file. If dname is %NULL, sets to "".
403 * On failure the return is a ERR pointer (see linux/err.h).
404 * This function uses GFP_KERNEL internally.
405 */
406
aab174f0 407struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 408{
7cbe66b6 409 struct file *file;
1da177e4 410
d93aa9d8
AV
411 if (!dname)
412 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 413
d93aa9d8
AV
414 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
415 O_RDWR | (flags & O_NONBLOCK),
416 &socket_file_ops);
b5ffe634 417 if (IS_ERR(file)) {
8e1611e2 418 sock_release(sock);
39b65252 419 return file;
cc3808f8
AV
420 }
421
422 sock->file = file;
39d8c1b6 423 file->private_data = sock;
28407630 424 return file;
39d8c1b6 425}
56b31d1c 426EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 427
56b31d1c 428static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
429{
430 struct file *newfile;
28407630 431 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
432 if (unlikely(fd < 0)) {
433 sock_release(sock);
28407630 434 return fd;
ce4bb04c 435 }
39d8c1b6 436
aab174f0 437 newfile = sock_alloc_file(sock, flags, NULL);
28407630 438 if (likely(!IS_ERR(newfile))) {
39d8c1b6 439 fd_install(fd, newfile);
28407630
AV
440 return fd;
441 }
7cbe66b6 442
28407630
AV
443 put_unused_fd(fd);
444 return PTR_ERR(newfile);
1da177e4
LT
445}
446
8a3c245c
PT
447/**
448 * sock_from_file - Return the &socket bounded to @file.
449 * @file: file
450 * @err: pointer to an error code return
451 *
452 * On failure returns %NULL and assigns -ENOTSOCK to @err.
453 */
454
406a3c63 455struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 456{
6cb153ca
BL
457 if (file->f_op == &socket_file_ops)
458 return file->private_data; /* set in sock_map_fd */
459
23bb80d2
ED
460 *err = -ENOTSOCK;
461 return NULL;
6cb153ca 462}
406a3c63 463EXPORT_SYMBOL(sock_from_file);
6cb153ca 464
1da177e4 465/**
c6d409cf 466 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
467 * @fd: file handle
468 * @err: pointer to an error code return
469 *
470 * The file handle passed in is locked and the socket it is bound
241c4667 471 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
472 * with a negative errno code and NULL is returned. The function checks
473 * for both invalid handles and passing a handle which is not a socket.
474 *
475 * On a success the socket object pointer is returned.
476 */
477
478struct socket *sockfd_lookup(int fd, int *err)
479{
480 struct file *file;
1da177e4
LT
481 struct socket *sock;
482
89bddce5
SH
483 file = fget(fd);
484 if (!file) {
1da177e4
LT
485 *err = -EBADF;
486 return NULL;
487 }
89bddce5 488
6cb153ca
BL
489 sock = sock_from_file(file, err);
490 if (!sock)
1da177e4 491 fput(file);
6cb153ca
BL
492 return sock;
493}
c6d409cf 494EXPORT_SYMBOL(sockfd_lookup);
1da177e4 495
6cb153ca
BL
496static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
497{
00e188ef 498 struct fd f = fdget(fd);
6cb153ca
BL
499 struct socket *sock;
500
3672558c 501 *err = -EBADF;
00e188ef
AV
502 if (f.file) {
503 sock = sock_from_file(f.file, err);
504 if (likely(sock)) {
505 *fput_needed = f.flags;
6cb153ca 506 return sock;
00e188ef
AV
507 }
508 fdput(f);
1da177e4 509 }
6cb153ca 510 return NULL;
1da177e4
LT
511}
512
600e1779
MY
513static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
514 size_t size)
515{
516 ssize_t len;
517 ssize_t used = 0;
518
c5ef6035 519 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
520 if (len < 0)
521 return len;
522 used += len;
523 if (buffer) {
524 if (size < used)
525 return -ERANGE;
526 buffer += len;
527 }
528
529 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
530 used += len;
531 if (buffer) {
532 if (size < used)
533 return -ERANGE;
534 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
535 buffer += len;
536 }
537
538 return used;
539}
540
dc647ec8 541static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
542{
543 int err = simple_setattr(dentry, iattr);
544
e1a3a60a 545 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
546 struct socket *sock = SOCKET_I(d_inode(dentry));
547
6d8c50dc
CW
548 if (sock->sk)
549 sock->sk->sk_uid = iattr->ia_uid;
550 else
551 err = -ENOENT;
86741ec2
LC
552 }
553
554 return err;
555}
556
600e1779 557static const struct inode_operations sockfs_inode_ops = {
600e1779 558 .listxattr = sockfs_listxattr,
86741ec2 559 .setattr = sockfs_setattr,
600e1779
MY
560};
561
1da177e4 562/**
8a3c245c 563 * sock_alloc - allocate a socket
89bddce5 564 *
1da177e4
LT
565 * Allocate a new inode and socket object. The two are bound together
566 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 567 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
568 */
569
f4a00aac 570struct socket *sock_alloc(void)
1da177e4 571{
89bddce5
SH
572 struct inode *inode;
573 struct socket *sock;
1da177e4 574
a209dfc7 575 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
576 if (!inode)
577 return NULL;
578
579 sock = SOCKET_I(inode);
580
85fe4025 581 inode->i_ino = get_next_ino();
89bddce5 582 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
583 inode->i_uid = current_fsuid();
584 inode->i_gid = current_fsgid();
600e1779 585 inode->i_op = &sockfs_inode_ops;
1da177e4 586
1da177e4
LT
587 return sock;
588}
f4a00aac 589EXPORT_SYMBOL(sock_alloc);
1da177e4 590
1da177e4 591/**
8a3c245c 592 * sock_release - close a socket
1da177e4
LT
593 * @sock: socket to close
594 *
595 * The socket is released from the protocol stack if it has a release
596 * callback, and the inode is then released if the socket is bound to
89bddce5 597 * an inode not a file.
1da177e4 598 */
89bddce5 599
6d8c50dc 600static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
601{
602 if (sock->ops) {
603 struct module *owner = sock->ops->owner;
604
6d8c50dc
CW
605 if (inode)
606 inode_lock(inode);
1da177e4 607 sock->ops->release(sock);
ff7b11aa 608 sock->sk = NULL;
6d8c50dc
CW
609 if (inode)
610 inode_unlock(inode);
1da177e4
LT
611 sock->ops = NULL;
612 module_put(owner);
613 }
614
e6476c21 615 if (sock->wq->fasync_list)
3410f22e 616 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 617
1da177e4
LT
618 if (!sock->file) {
619 iput(SOCK_INODE(sock));
620 return;
621 }
89bddce5 622 sock->file = NULL;
1da177e4 623}
6d8c50dc
CW
624
625void sock_release(struct socket *sock)
626{
627 __sock_release(sock, NULL);
628}
c6d409cf 629EXPORT_SYMBOL(sock_release);
1da177e4 630
c14ac945 631void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 632{
140c55d4
ED
633 u8 flags = *tx_flags;
634
c14ac945 635 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
636 flags |= SKBTX_HW_TSTAMP;
637
c14ac945 638 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
639 flags |= SKBTX_SW_TSTAMP;
640
c14ac945 641 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
642 flags |= SKBTX_SCHED_TSTAMP;
643
140c55d4 644 *tx_flags = flags;
20d49473 645}
67cc0d40 646EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 647
8c3c447b
PA
648INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
649 size_t));
d8725c86 650static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 651{
8c3c447b
PA
652 int ret = INDIRECT_CALL_INET4(sock->ops->sendmsg, inet_sendmsg, sock,
653 msg, msg_data_left(msg));
d8725c86
AV
654 BUG_ON(ret == -EIOCBQUEUED);
655 return ret;
1da177e4
LT
656}
657
85806af0
RD
658/**
659 * sock_sendmsg - send a message through @sock
660 * @sock: socket
661 * @msg: message to send
662 *
663 * Sends @msg through @sock, passing through LSM.
664 * Returns the number of bytes sent, or an error code.
665 */
d8725c86 666int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 667{
d8725c86 668 int err = security_socket_sendmsg(sock, msg,
01e97e65 669 msg_data_left(msg));
228e548e 670
d8725c86 671 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 672}
c6d409cf 673EXPORT_SYMBOL(sock_sendmsg);
1da177e4 674
8a3c245c
PT
675/**
676 * kernel_sendmsg - send a message through @sock (kernel-space)
677 * @sock: socket
678 * @msg: message header
679 * @vec: kernel vec
680 * @num: vec array length
681 * @size: total message data size
682 *
683 * Builds the message data with @vec and sends it through @sock.
684 * Returns the number of bytes sent, or an error code.
685 */
686
1da177e4
LT
687int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
688 struct kvec *vec, size_t num, size_t size)
689{
aa563d7b 690 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 691 return sock_sendmsg(sock, msg);
1da177e4 692}
c6d409cf 693EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 694
8a3c245c
PT
695/**
696 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
697 * @sk: sock
698 * @msg: message header
699 * @vec: output s/g array
700 * @num: output s/g array length
701 * @size: total message data size
702 *
703 * Builds the message data with @vec and sends it through @sock.
704 * Returns the number of bytes sent, or an error code.
705 * Caller must hold @sk.
706 */
707
306b13eb
TH
708int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
709 struct kvec *vec, size_t num, size_t size)
710{
711 struct socket *sock = sk->sk_socket;
712
713 if (!sock->ops->sendmsg_locked)
db5980d8 714 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 715
aa563d7b 716 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
717
718 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
719}
720EXPORT_SYMBOL(kernel_sendmsg_locked);
721
8605330a
SHY
722static bool skb_is_err_queue(const struct sk_buff *skb)
723{
724 /* pkt_type of skbs enqueued on the error queue are set to
725 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
726 * in recvmsg, since skbs received on a local socket will never
727 * have a pkt_type of PACKET_OUTGOING.
728 */
729 return skb->pkt_type == PACKET_OUTGOING;
730}
731
b50a5c70
ML
732/* On transmit, software and hardware timestamps are returned independently.
733 * As the two skb clones share the hardware timestamp, which may be updated
734 * before the software timestamp is received, a hardware TX timestamp may be
735 * returned only if there is no software TX timestamp. Ignore false software
736 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 737 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
738 * hardware timestamp.
739 */
740static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
741{
742 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
743}
744
aad9c8c4
ML
745static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
746{
747 struct scm_ts_pktinfo ts_pktinfo;
748 struct net_device *orig_dev;
749
750 if (!skb_mac_header_was_set(skb))
751 return;
752
753 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
754
755 rcu_read_lock();
756 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
757 if (orig_dev)
758 ts_pktinfo.if_index = orig_dev->ifindex;
759 rcu_read_unlock();
760
761 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
762 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
763 sizeof(ts_pktinfo), &ts_pktinfo);
764}
765
92f37fd2
ED
766/*
767 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
768 */
769void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
770 struct sk_buff *skb)
771{
20d49473 772 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 773 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
774 struct scm_timestamping_internal tss;
775
b50a5c70 776 int empty = 1, false_tstamp = 0;
20d49473
PO
777 struct skb_shared_hwtstamps *shhwtstamps =
778 skb_hwtstamps(skb);
779
780 /* Race occurred between timestamp enabling and packet
781 receiving. Fill in the current time for now. */
b50a5c70 782 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 783 __net_timestamp(skb);
b50a5c70
ML
784 false_tstamp = 1;
785 }
20d49473
PO
786
787 if (need_software_tstamp) {
788 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
789 if (new_tstamp) {
790 struct __kernel_sock_timeval tv;
791
792 skb_get_new_timestamp(skb, &tv);
793 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
794 sizeof(tv), &tv);
795 } else {
796 struct __kernel_old_timeval tv;
797
798 skb_get_timestamp(skb, &tv);
799 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
800 sizeof(tv), &tv);
801 }
20d49473 802 } else {
887feae3
DD
803 if (new_tstamp) {
804 struct __kernel_timespec ts;
805
806 skb_get_new_timestampns(skb, &ts);
807 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
808 sizeof(ts), &ts);
809 } else {
810 struct timespec ts;
811
812 skb_get_timestampns(skb, &ts);
813 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
814 sizeof(ts), &ts);
815 }
20d49473
PO
816 }
817 }
818
f24b9be5 819 memset(&tss, 0, sizeof(tss));
c199105d 820 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 821 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 822 empty = 0;
4d276eb6 823 if (shhwtstamps &&
b9f40e21 824 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 825 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 826 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 827 empty = 0;
aad9c8c4
ML
828 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
829 !skb_is_err_queue(skb))
830 put_ts_pktinfo(msg, skb);
831 }
1c885808 832 if (!empty) {
9718475e
DD
833 if (sock_flag(sk, SOCK_TSTAMP_NEW))
834 put_cmsg_scm_timestamping64(msg, &tss);
835 else
836 put_cmsg_scm_timestamping(msg, &tss);
1c885808 837
8605330a 838 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 839 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
840 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
841 skb->len, skb->data);
842 }
92f37fd2 843}
7c81fd8b
ACM
844EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
845
6e3e939f
JB
846void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
847 struct sk_buff *skb)
848{
849 int ack;
850
851 if (!sock_flag(sk, SOCK_WIFI_STATUS))
852 return;
853 if (!skb->wifi_acked_valid)
854 return;
855
856 ack = skb->wifi_acked;
857
858 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
859}
860EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
861
11165f14 862static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
863 struct sk_buff *skb)
3b885787 864{
744d5a3e 865 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 866 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 867 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
868}
869
767dd033 870void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
871 struct sk_buff *skb)
872{
873 sock_recv_timestamp(msg, sk, skb);
874 sock_recv_drops(msg, sk, skb);
875}
767dd033 876EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 877
8c3c447b
PA
878INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
879 size_t , int ));
1b784140 880static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 881 int flags)
1da177e4 882{
8c3c447b
PA
883 return INDIRECT_CALL_INET4(sock->ops->recvmsg, inet_recvmsg, sock, msg,
884 msg_data_left(msg), flags);
1da177e4
LT
885}
886
85806af0
RD
887/**
888 * sock_recvmsg - receive a message from @sock
889 * @sock: socket
890 * @msg: message to receive
891 * @flags: message flags
892 *
893 * Receives @msg from @sock, passing through LSM. Returns the total number
894 * of bytes received, or an error.
895 */
2da62906 896int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 897{
2da62906 898 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 899
2da62906 900 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 901}
c6d409cf 902EXPORT_SYMBOL(sock_recvmsg);
1da177e4 903
c1249c0a 904/**
8a3c245c
PT
905 * kernel_recvmsg - Receive a message from a socket (kernel space)
906 * @sock: The socket to receive the message from
907 * @msg: Received message
908 * @vec: Input s/g array for message data
909 * @num: Size of input s/g array
910 * @size: Number of bytes to read
911 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 912 *
8a3c245c
PT
913 * On return the msg structure contains the scatter/gather array passed in the
914 * vec argument. The array is modified so that it consists of the unfilled
915 * portion of the original array.
c1249c0a 916 *
8a3c245c 917 * The returned value is the total number of bytes received, or an error.
c1249c0a 918 */
8a3c245c 919
89bddce5
SH
920int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
921 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
922{
923 mm_segment_t oldfs = get_fs();
924 int result;
925
aa563d7b 926 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 927 set_fs(KERNEL_DS);
2da62906 928 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
929 set_fs(oldfs);
930 return result;
931}
c6d409cf 932EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 933
ce1d4d3e
CH
934static ssize_t sock_sendpage(struct file *file, struct page *page,
935 int offset, size_t size, loff_t *ppos, int more)
1da177e4 936{
1da177e4
LT
937 struct socket *sock;
938 int flags;
939
ce1d4d3e
CH
940 sock = file->private_data;
941
35f9c09f
ED
942 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
943 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
944 flags |= more;
ce1d4d3e 945
e6949583 946 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 947}
1da177e4 948
9c55e01c 949static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 950 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
951 unsigned int flags)
952{
953 struct socket *sock = file->private_data;
954
997b37da 955 if (unlikely(!sock->ops->splice_read))
95506588 956 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 957
9c55e01c
JA
958 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
959}
960
8ae5e030 961static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 962{
6d652330
AV
963 struct file *file = iocb->ki_filp;
964 struct socket *sock = file->private_data;
0345f931 965 struct msghdr msg = {.msg_iter = *to,
966 .msg_iocb = iocb};
8ae5e030 967 ssize_t res;
ce1d4d3e 968
8ae5e030
AV
969 if (file->f_flags & O_NONBLOCK)
970 msg.msg_flags = MSG_DONTWAIT;
971
972 if (iocb->ki_pos != 0)
1da177e4 973 return -ESPIPE;
027445c3 974
66ee59af 975 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
976 return 0;
977
2da62906 978 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
979 *to = msg.msg_iter;
980 return res;
1da177e4
LT
981}
982
8ae5e030 983static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 984{
6d652330
AV
985 struct file *file = iocb->ki_filp;
986 struct socket *sock = file->private_data;
0345f931 987 struct msghdr msg = {.msg_iter = *from,
988 .msg_iocb = iocb};
8ae5e030 989 ssize_t res;
1da177e4 990
8ae5e030 991 if (iocb->ki_pos != 0)
ce1d4d3e 992 return -ESPIPE;
027445c3 993
8ae5e030
AV
994 if (file->f_flags & O_NONBLOCK)
995 msg.msg_flags = MSG_DONTWAIT;
996
6d652330
AV
997 if (sock->type == SOCK_SEQPACKET)
998 msg.msg_flags |= MSG_EOR;
999
d8725c86 1000 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1001 *from = msg.msg_iter;
1002 return res;
1da177e4
LT
1003}
1004
1da177e4
LT
1005/*
1006 * Atomic setting of ioctl hooks to avoid race
1007 * with module unload.
1008 */
1009
4a3e2f71 1010static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1011static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1012
881d966b 1013void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1014{
4a3e2f71 1015 mutex_lock(&br_ioctl_mutex);
1da177e4 1016 br_ioctl_hook = hook;
4a3e2f71 1017 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1018}
1019EXPORT_SYMBOL(brioctl_set);
1020
4a3e2f71 1021static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1022static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1023
881d966b 1024void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1025{
4a3e2f71 1026 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1027 vlan_ioctl_hook = hook;
4a3e2f71 1028 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1029}
1030EXPORT_SYMBOL(vlan_ioctl_set);
1031
4a3e2f71 1032static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1033static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1034
89bddce5 1035void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1036{
4a3e2f71 1037 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1038 dlci_ioctl_hook = hook;
4a3e2f71 1039 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1040}
1041EXPORT_SYMBOL(dlci_ioctl_set);
1042
6b96018b 1043static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1044 unsigned int cmd, unsigned long arg)
6b96018b
AB
1045{
1046 int err;
1047 void __user *argp = (void __user *)arg;
1048
1049 err = sock->ops->ioctl(sock, cmd, arg);
1050
1051 /*
1052 * If this ioctl is unknown try to hand it down
1053 * to the NIC driver.
1054 */
36fd633e
AV
1055 if (err != -ENOIOCTLCMD)
1056 return err;
6b96018b 1057
36fd633e
AV
1058 if (cmd == SIOCGIFCONF) {
1059 struct ifconf ifc;
1060 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1061 return -EFAULT;
1062 rtnl_lock();
1063 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1064 rtnl_unlock();
1065 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1066 err = -EFAULT;
44c02a2c
AV
1067 } else {
1068 struct ifreq ifr;
1069 bool need_copyout;
63ff03ab 1070 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1071 return -EFAULT;
1072 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1073 if (!err && need_copyout)
63ff03ab 1074 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1075 return -EFAULT;
36fd633e 1076 }
6b96018b
AB
1077 return err;
1078}
1079
1da177e4
LT
1080/*
1081 * With an ioctl, arg may well be a user mode pointer, but we don't know
1082 * what to do with it - that's up to the protocol still.
1083 */
1084
8a3c245c
PT
1085/**
1086 * get_net_ns - increment the refcount of the network namespace
1087 * @ns: common namespace (net)
1088 *
1089 * Returns the net's common namespace.
1090 */
1091
d8d211a2 1092struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1093{
1094 return &get_net(container_of(ns, struct net, ns))->ns;
1095}
d8d211a2 1096EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1097
1da177e4
LT
1098static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1099{
1100 struct socket *sock;
881d966b 1101 struct sock *sk;
1da177e4
LT
1102 void __user *argp = (void __user *)arg;
1103 int pid, err;
881d966b 1104 struct net *net;
1da177e4 1105
b69aee04 1106 sock = file->private_data;
881d966b 1107 sk = sock->sk;
3b1e0a65 1108 net = sock_net(sk);
44c02a2c
AV
1109 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1110 struct ifreq ifr;
1111 bool need_copyout;
1112 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1113 return -EFAULT;
1114 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1115 if (!err && need_copyout)
1116 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1117 return -EFAULT;
1da177e4 1118 } else
3d23e349 1119#ifdef CONFIG_WEXT_CORE
1da177e4 1120 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1121 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1122 } else
3d23e349 1123#endif
89bddce5 1124 switch (cmd) {
1da177e4
LT
1125 case FIOSETOWN:
1126 case SIOCSPGRP:
1127 err = -EFAULT;
1128 if (get_user(pid, (int __user *)argp))
1129 break;
393cc3f5 1130 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1131 break;
1132 case FIOGETOWN:
1133 case SIOCGPGRP:
609d7fa9 1134 err = put_user(f_getown(sock->file),
89bddce5 1135 (int __user *)argp);
1da177e4
LT
1136 break;
1137 case SIOCGIFBR:
1138 case SIOCSIFBR:
1139 case SIOCBRADDBR:
1140 case SIOCBRDELBR:
1141 err = -ENOPKG;
1142 if (!br_ioctl_hook)
1143 request_module("bridge");
1144
4a3e2f71 1145 mutex_lock(&br_ioctl_mutex);
89bddce5 1146 if (br_ioctl_hook)
881d966b 1147 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1148 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1149 break;
1150 case SIOCGIFVLAN:
1151 case SIOCSIFVLAN:
1152 err = -ENOPKG;
1153 if (!vlan_ioctl_hook)
1154 request_module("8021q");
1155
4a3e2f71 1156 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1157 if (vlan_ioctl_hook)
881d966b 1158 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1159 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1160 break;
1da177e4
LT
1161 case SIOCADDDLCI:
1162 case SIOCDELDLCI:
1163 err = -ENOPKG;
1164 if (!dlci_ioctl_hook)
1165 request_module("dlci");
1166
7512cbf6
PE
1167 mutex_lock(&dlci_ioctl_mutex);
1168 if (dlci_ioctl_hook)
1da177e4 1169 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1170 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1171 break;
c62cce2c
AV
1172 case SIOCGSKNS:
1173 err = -EPERM;
1174 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1175 break;
1176
1177 err = open_related_ns(&net->ns, get_net_ns);
1178 break;
0768e170
AB
1179 case SIOCGSTAMP_OLD:
1180 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1181 if (!sock->ops->gettstamp) {
1182 err = -ENOIOCTLCMD;
1183 break;
1184 }
1185 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1186 cmd == SIOCGSTAMP_OLD,
1187 !IS_ENABLED(CONFIG_64BIT));
60747828 1188 break;
0768e170
AB
1189 case SIOCGSTAMP_NEW:
1190 case SIOCGSTAMPNS_NEW:
1191 if (!sock->ops->gettstamp) {
1192 err = -ENOIOCTLCMD;
1193 break;
1194 }
1195 err = sock->ops->gettstamp(sock, argp,
1196 cmd == SIOCGSTAMP_NEW,
1197 false);
c7cbdbf2 1198 break;
1da177e4 1199 default:
63ff03ab 1200 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1201 break;
89bddce5 1202 }
1da177e4
LT
1203 return err;
1204}
1205
8a3c245c
PT
1206/**
1207 * sock_create_lite - creates a socket
1208 * @family: protocol family (AF_INET, ...)
1209 * @type: communication type (SOCK_STREAM, ...)
1210 * @protocol: protocol (0, ...)
1211 * @res: new socket
1212 *
1213 * Creates a new socket and assigns it to @res, passing through LSM.
1214 * The new socket initialization is not complete, see kernel_accept().
1215 * Returns 0 or an error. On failure @res is set to %NULL.
1216 * This function internally uses GFP_KERNEL.
1217 */
1218
1da177e4
LT
1219int sock_create_lite(int family, int type, int protocol, struct socket **res)
1220{
1221 int err;
1222 struct socket *sock = NULL;
89bddce5 1223
1da177e4
LT
1224 err = security_socket_create(family, type, protocol, 1);
1225 if (err)
1226 goto out;
1227
1228 sock = sock_alloc();
1229 if (!sock) {
1230 err = -ENOMEM;
1231 goto out;
1232 }
1233
1da177e4 1234 sock->type = type;
7420ed23
VY
1235 err = security_socket_post_create(sock, family, type, protocol, 1);
1236 if (err)
1237 goto out_release;
1238
1da177e4
LT
1239out:
1240 *res = sock;
1241 return err;
7420ed23
VY
1242out_release:
1243 sock_release(sock);
1244 sock = NULL;
1245 goto out;
1da177e4 1246}
c6d409cf 1247EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1248
1249/* No kernel lock held - perfect */
ade994f4 1250static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1251{
3cafb376 1252 struct socket *sock = file->private_data;
a331de3b 1253 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1254
e88958e6
CH
1255 if (!sock->ops->poll)
1256 return 0;
f641f13b 1257
a331de3b
CH
1258 if (sk_can_busy_loop(sock->sk)) {
1259 /* poll once if requested by the syscall */
1260 if (events & POLL_BUSY_LOOP)
1261 sk_busy_loop(sock->sk, 1);
1262
1263 /* if this socket can poll_ll, tell the system call */
1264 flag = POLL_BUSY_LOOP;
1265 }
1266
1267 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1268}
1269
89bddce5 1270static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1271{
b69aee04 1272 struct socket *sock = file->private_data;
1da177e4
LT
1273
1274 return sock->ops->mmap(file, sock, vma);
1275}
1276
20380731 1277static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1278{
6d8c50dc 1279 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1280 return 0;
1281}
1282
1283/*
1284 * Update the socket async list
1285 *
1286 * Fasync_list locking strategy.
1287 *
1288 * 1. fasync_list is modified only under process context socket lock
1289 * i.e. under semaphore.
1290 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1291 * or under socket lock
1da177e4
LT
1292 */
1293
1294static int sock_fasync(int fd, struct file *filp, int on)
1295{
989a2979
ED
1296 struct socket *sock = filp->private_data;
1297 struct sock *sk = sock->sk;
eaefd110 1298 struct socket_wq *wq;
1da177e4 1299
989a2979 1300 if (sk == NULL)
1da177e4 1301 return -EINVAL;
1da177e4
LT
1302
1303 lock_sock(sk);
e6476c21 1304 wq = sock->wq;
eaefd110 1305 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1306
eaefd110 1307 if (!wq->fasync_list)
989a2979
ED
1308 sock_reset_flag(sk, SOCK_FASYNC);
1309 else
bcdce719 1310 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1311
989a2979 1312 release_sock(sk);
1da177e4
LT
1313 return 0;
1314}
1315
ceb5d58b 1316/* This function may be called only under rcu_lock */
1da177e4 1317
ceb5d58b 1318int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1319{
ceb5d58b 1320 if (!wq || !wq->fasync_list)
1da177e4 1321 return -1;
ceb5d58b 1322
89bddce5 1323 switch (how) {
8d8ad9d7 1324 case SOCK_WAKE_WAITD:
ceb5d58b 1325 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1326 break;
1327 goto call_kill;
8d8ad9d7 1328 case SOCK_WAKE_SPACE:
ceb5d58b 1329 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1330 break;
1331 /* fall through */
8d8ad9d7 1332 case SOCK_WAKE_IO:
89bddce5 1333call_kill:
43815482 1334 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1335 break;
8d8ad9d7 1336 case SOCK_WAKE_URG:
43815482 1337 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1338 }
ceb5d58b 1339
1da177e4
LT
1340 return 0;
1341}
c6d409cf 1342EXPORT_SYMBOL(sock_wake_async);
1da177e4 1343
8a3c245c
PT
1344/**
1345 * __sock_create - creates a socket
1346 * @net: net namespace
1347 * @family: protocol family (AF_INET, ...)
1348 * @type: communication type (SOCK_STREAM, ...)
1349 * @protocol: protocol (0, ...)
1350 * @res: new socket
1351 * @kern: boolean for kernel space sockets
1352 *
1353 * Creates a new socket and assigns it to @res, passing through LSM.
1354 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1355 * be set to true if the socket resides in kernel space.
1356 * This function internally uses GFP_KERNEL.
1357 */
1358
721db93a 1359int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1360 struct socket **res, int kern)
1da177e4
LT
1361{
1362 int err;
1363 struct socket *sock;
55737fda 1364 const struct net_proto_family *pf;
1da177e4
LT
1365
1366 /*
89bddce5 1367 * Check protocol is in range
1da177e4
LT
1368 */
1369 if (family < 0 || family >= NPROTO)
1370 return -EAFNOSUPPORT;
1371 if (type < 0 || type >= SOCK_MAX)
1372 return -EINVAL;
1373
1374 /* Compatibility.
1375
1376 This uglymoron is moved from INET layer to here to avoid
1377 deadlock in module load.
1378 */
1379 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1380 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1381 current->comm);
1da177e4
LT
1382 family = PF_PACKET;
1383 }
1384
1385 err = security_socket_create(family, type, protocol, kern);
1386 if (err)
1387 return err;
89bddce5 1388
55737fda
SH
1389 /*
1390 * Allocate the socket and allow the family to set things up. if
1391 * the protocol is 0, the family is instructed to select an appropriate
1392 * default.
1393 */
1394 sock = sock_alloc();
1395 if (!sock) {
e87cc472 1396 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1397 return -ENFILE; /* Not exactly a match, but its the
1398 closest posix thing */
1399 }
1400
1401 sock->type = type;
1402
95a5afca 1403#ifdef CONFIG_MODULES
89bddce5
SH
1404 /* Attempt to load a protocol module if the find failed.
1405 *
1406 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1407 * requested real, full-featured networking support upon configuration.
1408 * Otherwise module support will break!
1409 */
190683a9 1410 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1411 request_module("net-pf-%d", family);
1da177e4
LT
1412#endif
1413
55737fda
SH
1414 rcu_read_lock();
1415 pf = rcu_dereference(net_families[family]);
1416 err = -EAFNOSUPPORT;
1417 if (!pf)
1418 goto out_release;
1da177e4
LT
1419
1420 /*
1421 * We will call the ->create function, that possibly is in a loadable
1422 * module, so we have to bump that loadable module refcnt first.
1423 */
55737fda 1424 if (!try_module_get(pf->owner))
1da177e4
LT
1425 goto out_release;
1426
55737fda
SH
1427 /* Now protected by module ref count */
1428 rcu_read_unlock();
1429
3f378b68 1430 err = pf->create(net, sock, protocol, kern);
55737fda 1431 if (err < 0)
1da177e4 1432 goto out_module_put;
a79af59e 1433
1da177e4
LT
1434 /*
1435 * Now to bump the refcnt of the [loadable] module that owns this
1436 * socket at sock_release time we decrement its refcnt.
1437 */
55737fda
SH
1438 if (!try_module_get(sock->ops->owner))
1439 goto out_module_busy;
1440
1da177e4
LT
1441 /*
1442 * Now that we're done with the ->create function, the [loadable]
1443 * module can have its refcnt decremented
1444 */
55737fda 1445 module_put(pf->owner);
7420ed23
VY
1446 err = security_socket_post_create(sock, family, type, protocol, kern);
1447 if (err)
3b185525 1448 goto out_sock_release;
55737fda 1449 *res = sock;
1da177e4 1450
55737fda
SH
1451 return 0;
1452
1453out_module_busy:
1454 err = -EAFNOSUPPORT;
1da177e4 1455out_module_put:
55737fda
SH
1456 sock->ops = NULL;
1457 module_put(pf->owner);
1458out_sock_release:
1da177e4 1459 sock_release(sock);
55737fda
SH
1460 return err;
1461
1462out_release:
1463 rcu_read_unlock();
1464 goto out_sock_release;
1da177e4 1465}
721db93a 1466EXPORT_SYMBOL(__sock_create);
1da177e4 1467
8a3c245c
PT
1468/**
1469 * sock_create - creates a socket
1470 * @family: protocol family (AF_INET, ...)
1471 * @type: communication type (SOCK_STREAM, ...)
1472 * @protocol: protocol (0, ...)
1473 * @res: new socket
1474 *
1475 * A wrapper around __sock_create().
1476 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1477 */
1478
1da177e4
LT
1479int sock_create(int family, int type, int protocol, struct socket **res)
1480{
1b8d7ae4 1481 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1482}
c6d409cf 1483EXPORT_SYMBOL(sock_create);
1da177e4 1484
8a3c245c
PT
1485/**
1486 * sock_create_kern - creates a socket (kernel space)
1487 * @net: net namespace
1488 * @family: protocol family (AF_INET, ...)
1489 * @type: communication type (SOCK_STREAM, ...)
1490 * @protocol: protocol (0, ...)
1491 * @res: new socket
1492 *
1493 * A wrapper around __sock_create().
1494 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1495 */
1496
eeb1bd5c 1497int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1498{
eeb1bd5c 1499 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1500}
c6d409cf 1501EXPORT_SYMBOL(sock_create_kern);
1da177e4 1502
9d6a15c3 1503int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1504{
1505 int retval;
1506 struct socket *sock;
a677a039
UD
1507 int flags;
1508
e38b36f3
UD
1509 /* Check the SOCK_* constants for consistency. */
1510 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1511 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1512 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1513 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1514
a677a039 1515 flags = type & ~SOCK_TYPE_MASK;
77d27200 1516 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1517 return -EINVAL;
1518 type &= SOCK_TYPE_MASK;
1da177e4 1519
aaca0bdc
UD
1520 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1521 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1522
1da177e4
LT
1523 retval = sock_create(family, type, protocol, &sock);
1524 if (retval < 0)
8e1611e2 1525 return retval;
1da177e4 1526
8e1611e2 1527 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1528}
1529
9d6a15c3
DB
1530SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1531{
1532 return __sys_socket(family, type, protocol);
1533}
1534
1da177e4
LT
1535/*
1536 * Create a pair of connected sockets.
1537 */
1538
6debc8d8 1539int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1540{
1541 struct socket *sock1, *sock2;
1542 int fd1, fd2, err;
db349509 1543 struct file *newfile1, *newfile2;
a677a039
UD
1544 int flags;
1545
1546 flags = type & ~SOCK_TYPE_MASK;
77d27200 1547 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1548 return -EINVAL;
1549 type &= SOCK_TYPE_MASK;
1da177e4 1550
aaca0bdc
UD
1551 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1552 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1553
016a266b
AV
1554 /*
1555 * reserve descriptors and make sure we won't fail
1556 * to return them to userland.
1557 */
1558 fd1 = get_unused_fd_flags(flags);
1559 if (unlikely(fd1 < 0))
1560 return fd1;
1561
1562 fd2 = get_unused_fd_flags(flags);
1563 if (unlikely(fd2 < 0)) {
1564 put_unused_fd(fd1);
1565 return fd2;
1566 }
1567
1568 err = put_user(fd1, &usockvec[0]);
1569 if (err)
1570 goto out;
1571
1572 err = put_user(fd2, &usockvec[1]);
1573 if (err)
1574 goto out;
1575
1da177e4
LT
1576 /*
1577 * Obtain the first socket and check if the underlying protocol
1578 * supports the socketpair call.
1579 */
1580
1581 err = sock_create(family, type, protocol, &sock1);
016a266b 1582 if (unlikely(err < 0))
1da177e4
LT
1583 goto out;
1584
1585 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1586 if (unlikely(err < 0)) {
1587 sock_release(sock1);
1588 goto out;
bf3c23d1 1589 }
d73aa286 1590
d47cd945
DH
1591 err = security_socket_socketpair(sock1, sock2);
1592 if (unlikely(err)) {
1593 sock_release(sock2);
1594 sock_release(sock1);
1595 goto out;
1596 }
1597
016a266b
AV
1598 err = sock1->ops->socketpair(sock1, sock2);
1599 if (unlikely(err < 0)) {
1600 sock_release(sock2);
1601 sock_release(sock1);
1602 goto out;
28407630
AV
1603 }
1604
aab174f0 1605 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1606 if (IS_ERR(newfile1)) {
28407630 1607 err = PTR_ERR(newfile1);
016a266b
AV
1608 sock_release(sock2);
1609 goto out;
28407630
AV
1610 }
1611
aab174f0 1612 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1613 if (IS_ERR(newfile2)) {
1614 err = PTR_ERR(newfile2);
016a266b
AV
1615 fput(newfile1);
1616 goto out;
db349509
AV
1617 }
1618
157cf649 1619 audit_fd_pair(fd1, fd2);
d73aa286 1620
db349509
AV
1621 fd_install(fd1, newfile1);
1622 fd_install(fd2, newfile2);
d73aa286 1623 return 0;
1da177e4 1624
016a266b 1625out:
d73aa286 1626 put_unused_fd(fd2);
d73aa286 1627 put_unused_fd(fd1);
1da177e4
LT
1628 return err;
1629}
1630
6debc8d8
DB
1631SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1632 int __user *, usockvec)
1633{
1634 return __sys_socketpair(family, type, protocol, usockvec);
1635}
1636
1da177e4
LT
1637/*
1638 * Bind a name to a socket. Nothing much to do here since it's
1639 * the protocol's responsibility to handle the local address.
1640 *
1641 * We move the socket address to kernel space before we call
1642 * the protocol layer (having also checked the address is ok).
1643 */
1644
a87d35d8 1645int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1646{
1647 struct socket *sock;
230b1839 1648 struct sockaddr_storage address;
6cb153ca 1649 int err, fput_needed;
1da177e4 1650
89bddce5 1651 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1652 if (sock) {
43db362d 1653 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1654 if (!err) {
89bddce5 1655 err = security_socket_bind(sock,
230b1839 1656 (struct sockaddr *)&address,
89bddce5 1657 addrlen);
6cb153ca
BL
1658 if (!err)
1659 err = sock->ops->bind(sock,
89bddce5 1660 (struct sockaddr *)
230b1839 1661 &address, addrlen);
1da177e4 1662 }
6cb153ca 1663 fput_light(sock->file, fput_needed);
89bddce5 1664 }
1da177e4
LT
1665 return err;
1666}
1667
a87d35d8
DB
1668SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1669{
1670 return __sys_bind(fd, umyaddr, addrlen);
1671}
1672
1da177e4
LT
1673/*
1674 * Perform a listen. Basically, we allow the protocol to do anything
1675 * necessary for a listen, and if that works, we mark the socket as
1676 * ready for listening.
1677 */
1678
25e290ee 1679int __sys_listen(int fd, int backlog)
1da177e4
LT
1680{
1681 struct socket *sock;
6cb153ca 1682 int err, fput_needed;
b8e1f9b5 1683 int somaxconn;
89bddce5
SH
1684
1685 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1686 if (sock) {
8efa6e93 1687 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1688 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1689 backlog = somaxconn;
1da177e4
LT
1690
1691 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1692 if (!err)
1693 err = sock->ops->listen(sock, backlog);
1da177e4 1694
6cb153ca 1695 fput_light(sock->file, fput_needed);
1da177e4
LT
1696 }
1697 return err;
1698}
1699
25e290ee
DB
1700SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1701{
1702 return __sys_listen(fd, backlog);
1703}
1704
1da177e4
LT
1705/*
1706 * For accept, we attempt to create a new socket, set up the link
1707 * with the client, wake up the client, then return the new
1708 * connected fd. We collect the address of the connector in kernel
1709 * space and move it to user at the very end. This is unclean because
1710 * we open the socket then return an error.
1711 *
1712 * 1003.1g adds the ability to recvmsg() to query connection pending
1713 * status to recvmsg. We need to add that support in a way thats
b903036a 1714 * clean when we restructure accept also.
1da177e4
LT
1715 */
1716
4541e805
DB
1717int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1718 int __user *upeer_addrlen, int flags)
1da177e4
LT
1719{
1720 struct socket *sock, *newsock;
39d8c1b6 1721 struct file *newfile;
6cb153ca 1722 int err, len, newfd, fput_needed;
230b1839 1723 struct sockaddr_storage address;
1da177e4 1724
77d27200 1725 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1726 return -EINVAL;
1727
1728 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1729 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1730
6cb153ca 1731 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1732 if (!sock)
1733 goto out;
1734
1735 err = -ENFILE;
c6d409cf
ED
1736 newsock = sock_alloc();
1737 if (!newsock)
1da177e4
LT
1738 goto out_put;
1739
1740 newsock->type = sock->type;
1741 newsock->ops = sock->ops;
1742
1da177e4
LT
1743 /*
1744 * We don't need try_module_get here, as the listening socket (sock)
1745 * has the protocol module (sock->ops->owner) held.
1746 */
1747 __module_get(newsock->ops->owner);
1748
28407630 1749 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1750 if (unlikely(newfd < 0)) {
1751 err = newfd;
9a1875e6
DM
1752 sock_release(newsock);
1753 goto out_put;
39d8c1b6 1754 }
aab174f0 1755 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1756 if (IS_ERR(newfile)) {
28407630
AV
1757 err = PTR_ERR(newfile);
1758 put_unused_fd(newfd);
28407630
AV
1759 goto out_put;
1760 }
39d8c1b6 1761
a79af59e
FF
1762 err = security_socket_accept(sock, newsock);
1763 if (err)
39d8c1b6 1764 goto out_fd;
a79af59e 1765
cdfbabfb 1766 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1767 if (err < 0)
39d8c1b6 1768 goto out_fd;
1da177e4
LT
1769
1770 if (upeer_sockaddr) {
9b2c45d4
DV
1771 len = newsock->ops->getname(newsock,
1772 (struct sockaddr *)&address, 2);
1773 if (len < 0) {
1da177e4 1774 err = -ECONNABORTED;
39d8c1b6 1775 goto out_fd;
1da177e4 1776 }
43db362d 1777 err = move_addr_to_user(&address,
230b1839 1778 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1779 if (err < 0)
39d8c1b6 1780 goto out_fd;
1da177e4
LT
1781 }
1782
1783 /* File flags are not inherited via accept() unlike another OSes. */
1784
39d8c1b6
DM
1785 fd_install(newfd, newfile);
1786 err = newfd;
1da177e4 1787
1da177e4 1788out_put:
6cb153ca 1789 fput_light(sock->file, fput_needed);
1da177e4
LT
1790out:
1791 return err;
39d8c1b6 1792out_fd:
9606a216 1793 fput(newfile);
39d8c1b6 1794 put_unused_fd(newfd);
1da177e4
LT
1795 goto out_put;
1796}
1797
4541e805
DB
1798SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1799 int __user *, upeer_addrlen, int, flags)
1800{
1801 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1802}
1803
20f37034
HC
1804SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1805 int __user *, upeer_addrlen)
aaca0bdc 1806{
4541e805 1807 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1808}
1809
1da177e4
LT
1810/*
1811 * Attempt to connect to a socket with the server address. The address
1812 * is in user space so we verify it is OK and move it to kernel space.
1813 *
1814 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1815 * break bindings
1816 *
1817 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1818 * other SEQPACKET protocols that take time to connect() as it doesn't
1819 * include the -EINPROGRESS status for such sockets.
1820 */
1821
1387c2c2 1822int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1823{
1824 struct socket *sock;
230b1839 1825 struct sockaddr_storage address;
6cb153ca 1826 int err, fput_needed;
1da177e4 1827
6cb153ca 1828 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1829 if (!sock)
1830 goto out;
43db362d 1831 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1832 if (err < 0)
1833 goto out_put;
1834
89bddce5 1835 err =
230b1839 1836 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1837 if (err)
1838 goto out_put;
1839
230b1839 1840 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1841 sock->file->f_flags);
1842out_put:
6cb153ca 1843 fput_light(sock->file, fput_needed);
1da177e4
LT
1844out:
1845 return err;
1846}
1847
1387c2c2
DB
1848SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1849 int, addrlen)
1850{
1851 return __sys_connect(fd, uservaddr, addrlen);
1852}
1853
1da177e4
LT
1854/*
1855 * Get the local address ('name') of a socket object. Move the obtained
1856 * name to user space.
1857 */
1858
8882a107
DB
1859int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1860 int __user *usockaddr_len)
1da177e4
LT
1861{
1862 struct socket *sock;
230b1839 1863 struct sockaddr_storage address;
9b2c45d4 1864 int err, fput_needed;
89bddce5 1865
6cb153ca 1866 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1867 if (!sock)
1868 goto out;
1869
1870 err = security_socket_getsockname(sock);
1871 if (err)
1872 goto out_put;
1873
9b2c45d4
DV
1874 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1875 if (err < 0)
1da177e4 1876 goto out_put;
9b2c45d4
DV
1877 /* "err" is actually length in this case */
1878 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1879
1880out_put:
6cb153ca 1881 fput_light(sock->file, fput_needed);
1da177e4
LT
1882out:
1883 return err;
1884}
1885
8882a107
DB
1886SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1887 int __user *, usockaddr_len)
1888{
1889 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1890}
1891
1da177e4
LT
1892/*
1893 * Get the remote address ('name') of a socket object. Move the obtained
1894 * name to user space.
1895 */
1896
b21c8f83
DB
1897int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1898 int __user *usockaddr_len)
1da177e4
LT
1899{
1900 struct socket *sock;
230b1839 1901 struct sockaddr_storage address;
9b2c45d4 1902 int err, fput_needed;
1da177e4 1903
89bddce5
SH
1904 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1905 if (sock != NULL) {
1da177e4
LT
1906 err = security_socket_getpeername(sock);
1907 if (err) {
6cb153ca 1908 fput_light(sock->file, fput_needed);
1da177e4
LT
1909 return err;
1910 }
1911
9b2c45d4
DV
1912 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1913 if (err >= 0)
1914 /* "err" is actually length in this case */
1915 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1916 usockaddr_len);
6cb153ca 1917 fput_light(sock->file, fput_needed);
1da177e4
LT
1918 }
1919 return err;
1920}
1921
b21c8f83
DB
1922SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1923 int __user *, usockaddr_len)
1924{
1925 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1926}
1927
1da177e4
LT
1928/*
1929 * Send a datagram to a given address. We move the address into kernel
1930 * space and check the user space data area is readable before invoking
1931 * the protocol.
1932 */
211b634b
DB
1933int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1934 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1935{
1936 struct socket *sock;
230b1839 1937 struct sockaddr_storage address;
1da177e4
LT
1938 int err;
1939 struct msghdr msg;
1940 struct iovec iov;
6cb153ca 1941 int fput_needed;
6cb153ca 1942
602bd0e9
AV
1943 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1944 if (unlikely(err))
1945 return err;
de0fa95c
PE
1946 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1947 if (!sock)
4387ff75 1948 goto out;
6cb153ca 1949
89bddce5 1950 msg.msg_name = NULL;
89bddce5
SH
1951 msg.msg_control = NULL;
1952 msg.msg_controllen = 0;
1953 msg.msg_namelen = 0;
6cb153ca 1954 if (addr) {
43db362d 1955 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1956 if (err < 0)
1957 goto out_put;
230b1839 1958 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1959 msg.msg_namelen = addr_len;
1da177e4
LT
1960 }
1961 if (sock->file->f_flags & O_NONBLOCK)
1962 flags |= MSG_DONTWAIT;
1963 msg.msg_flags = flags;
d8725c86 1964 err = sock_sendmsg(sock, &msg);
1da177e4 1965
89bddce5 1966out_put:
de0fa95c 1967 fput_light(sock->file, fput_needed);
4387ff75 1968out:
1da177e4
LT
1969 return err;
1970}
1971
211b634b
DB
1972SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1973 unsigned int, flags, struct sockaddr __user *, addr,
1974 int, addr_len)
1975{
1976 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1977}
1978
1da177e4 1979/*
89bddce5 1980 * Send a datagram down a socket.
1da177e4
LT
1981 */
1982
3e0fa65f 1983SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1984 unsigned int, flags)
1da177e4 1985{
211b634b 1986 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1987}
1988
1989/*
89bddce5 1990 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1991 * sender. We verify the buffers are writable and if needed move the
1992 * sender address from kernel to user space.
1993 */
7a09e1eb
DB
1994int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
1995 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
1996{
1997 struct socket *sock;
1998 struct iovec iov;
1999 struct msghdr msg;
230b1839 2000 struct sockaddr_storage address;
89bddce5 2001 int err, err2;
6cb153ca
BL
2002 int fput_needed;
2003
602bd0e9
AV
2004 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2005 if (unlikely(err))
2006 return err;
de0fa95c 2007 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2008 if (!sock)
de0fa95c 2009 goto out;
1da177e4 2010
89bddce5
SH
2011 msg.msg_control = NULL;
2012 msg.msg_controllen = 0;
f3d33426
HFS
2013 /* Save some cycles and don't copy the address if not needed */
2014 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2015 /* We assume all kernel code knows the size of sockaddr_storage */
2016 msg.msg_namelen = 0;
130ed5d1 2017 msg.msg_iocb = NULL;
9f138fa6 2018 msg.msg_flags = 0;
1da177e4
LT
2019 if (sock->file->f_flags & O_NONBLOCK)
2020 flags |= MSG_DONTWAIT;
2da62906 2021 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2022
89bddce5 2023 if (err >= 0 && addr != NULL) {
43db362d 2024 err2 = move_addr_to_user(&address,
230b1839 2025 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2026 if (err2 < 0)
2027 err = err2;
1da177e4 2028 }
de0fa95c
PE
2029
2030 fput_light(sock->file, fput_needed);
4387ff75 2031out:
1da177e4
LT
2032 return err;
2033}
2034
7a09e1eb
DB
2035SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2036 unsigned int, flags, struct sockaddr __user *, addr,
2037 int __user *, addr_len)
2038{
2039 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2040}
2041
1da177e4 2042/*
89bddce5 2043 * Receive a datagram from a socket.
1da177e4
LT
2044 */
2045
b7c0ddf5
JG
2046SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2047 unsigned int, flags)
1da177e4 2048{
7a09e1eb 2049 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2050}
2051
2052/*
2053 * Set a socket option. Because we don't know the option lengths we have
2054 * to pass the user mode parameter for the protocols to sort out.
2055 */
2056
cc36dca0
DB
2057static int __sys_setsockopt(int fd, int level, int optname,
2058 char __user *optval, int optlen)
1da177e4 2059{
6cb153ca 2060 int err, fput_needed;
1da177e4
LT
2061 struct socket *sock;
2062
2063 if (optlen < 0)
2064 return -EINVAL;
89bddce5
SH
2065
2066 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2067 if (sock != NULL) {
2068 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2069 if (err)
2070 goto out_put;
1da177e4
LT
2071
2072 if (level == SOL_SOCKET)
89bddce5
SH
2073 err =
2074 sock_setsockopt(sock, level, optname, optval,
2075 optlen);
1da177e4 2076 else
89bddce5
SH
2077 err =
2078 sock->ops->setsockopt(sock, level, optname, optval,
2079 optlen);
6cb153ca
BL
2080out_put:
2081 fput_light(sock->file, fput_needed);
1da177e4
LT
2082 }
2083 return err;
2084}
2085
cc36dca0
DB
2086SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2087 char __user *, optval, int, optlen)
2088{
2089 return __sys_setsockopt(fd, level, optname, optval, optlen);
2090}
2091
1da177e4
LT
2092/*
2093 * Get a socket option. Because we don't know the option lengths we have
2094 * to pass a user mode parameter for the protocols to sort out.
2095 */
2096
13a2d70e
DB
2097static int __sys_getsockopt(int fd, int level, int optname,
2098 char __user *optval, int __user *optlen)
1da177e4 2099{
6cb153ca 2100 int err, fput_needed;
1da177e4
LT
2101 struct socket *sock;
2102
89bddce5
SH
2103 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2104 if (sock != NULL) {
6cb153ca
BL
2105 err = security_socket_getsockopt(sock, level, optname);
2106 if (err)
2107 goto out_put;
1da177e4
LT
2108
2109 if (level == SOL_SOCKET)
89bddce5
SH
2110 err =
2111 sock_getsockopt(sock, level, optname, optval,
2112 optlen);
1da177e4 2113 else
89bddce5
SH
2114 err =
2115 sock->ops->getsockopt(sock, level, optname, optval,
2116 optlen);
6cb153ca
BL
2117out_put:
2118 fput_light(sock->file, fput_needed);
1da177e4
LT
2119 }
2120 return err;
2121}
2122
13a2d70e
DB
2123SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2124 char __user *, optval, int __user *, optlen)
2125{
2126 return __sys_getsockopt(fd, level, optname, optval, optlen);
2127}
2128
1da177e4
LT
2129/*
2130 * Shutdown a socket.
2131 */
2132
005a1aea 2133int __sys_shutdown(int fd, int how)
1da177e4 2134{
6cb153ca 2135 int err, fput_needed;
1da177e4
LT
2136 struct socket *sock;
2137
89bddce5
SH
2138 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2139 if (sock != NULL) {
1da177e4 2140 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2141 if (!err)
2142 err = sock->ops->shutdown(sock, how);
2143 fput_light(sock->file, fput_needed);
1da177e4
LT
2144 }
2145 return err;
2146}
2147
005a1aea
DB
2148SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2149{
2150 return __sys_shutdown(fd, how);
2151}
2152
89bddce5 2153/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2154 * fields which are the same type (int / unsigned) on our platforms.
2155 */
2156#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2157#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2158#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2159
c71d8ebe
TH
2160struct used_address {
2161 struct sockaddr_storage name;
2162 unsigned int name_len;
2163};
2164
da184284
AV
2165static int copy_msghdr_from_user(struct msghdr *kmsg,
2166 struct user_msghdr __user *umsg,
2167 struct sockaddr __user **save_addr,
2168 struct iovec **iov)
1661bf36 2169{
ffb07550 2170 struct user_msghdr msg;
08adb7da
AV
2171 ssize_t err;
2172
ffb07550 2173 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2174 return -EFAULT;
dbb490b9 2175
864d9664 2176 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2177 kmsg->msg_controllen = msg.msg_controllen;
2178 kmsg->msg_flags = msg.msg_flags;
2179
2180 kmsg->msg_namelen = msg.msg_namelen;
2181 if (!msg.msg_name)
6a2a2b3a
AS
2182 kmsg->msg_namelen = 0;
2183
dbb490b9
ML
2184 if (kmsg->msg_namelen < 0)
2185 return -EINVAL;
2186
1661bf36 2187 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2188 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2189
2190 if (save_addr)
ffb07550 2191 *save_addr = msg.msg_name;
08adb7da 2192
ffb07550 2193 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2194 if (!save_addr) {
864d9664
PA
2195 err = move_addr_to_kernel(msg.msg_name,
2196 kmsg->msg_namelen,
08adb7da
AV
2197 kmsg->msg_name);
2198 if (err < 0)
2199 return err;
2200 }
2201 } else {
2202 kmsg->msg_name = NULL;
2203 kmsg->msg_namelen = 0;
2204 }
2205
ffb07550 2206 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2207 return -EMSGSIZE;
2208
0345f931 2209 kmsg->msg_iocb = NULL;
2210
ffb07550
AV
2211 return import_iovec(save_addr ? READ : WRITE,
2212 msg.msg_iov, msg.msg_iovlen,
da184284 2213 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
2214}
2215
666547ff 2216static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2217 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2218 struct used_address *used_address,
2219 unsigned int allowed_msghdr_flags)
1da177e4 2220{
89bddce5
SH
2221 struct compat_msghdr __user *msg_compat =
2222 (struct compat_msghdr __user *)msg;
230b1839 2223 struct sockaddr_storage address;
1da177e4 2224 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2225 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2226 __aligned(sizeof(__kernel_size_t));
89bddce5 2227 /* 20 is size of ipv6_pktinfo */
1da177e4 2228 unsigned char *ctl_buf = ctl;
d8725c86 2229 int ctl_len;
08adb7da 2230 ssize_t err;
89bddce5 2231
08adb7da 2232 msg_sys->msg_name = &address;
1da177e4 2233
08449320 2234 if (MSG_CMSG_COMPAT & flags)
08adb7da 2235 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2236 else
08adb7da 2237 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2238 if (err < 0)
da184284 2239 return err;
1da177e4
LT
2240
2241 err = -ENOBUFS;
2242
228e548e 2243 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2244 goto out_freeiov;
28a94d8f 2245 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2246 ctl_len = msg_sys->msg_controllen;
1da177e4 2247 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2248 err =
228e548e 2249 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2250 sizeof(ctl));
1da177e4
LT
2251 if (err)
2252 goto out_freeiov;
228e548e
AB
2253 ctl_buf = msg_sys->msg_control;
2254 ctl_len = msg_sys->msg_controllen;
1da177e4 2255 } else if (ctl_len) {
ac4340fc
DM
2256 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2257 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2258 if (ctl_len > sizeof(ctl)) {
1da177e4 2259 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2260 if (ctl_buf == NULL)
1da177e4
LT
2261 goto out_freeiov;
2262 }
2263 err = -EFAULT;
2264 /*
228e548e 2265 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2266 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2267 * checking falls down on this.
2268 */
fb8621bb 2269 if (copy_from_user(ctl_buf,
228e548e 2270 (void __user __force *)msg_sys->msg_control,
89bddce5 2271 ctl_len))
1da177e4 2272 goto out_freectl;
228e548e 2273 msg_sys->msg_control = ctl_buf;
1da177e4 2274 }
228e548e 2275 msg_sys->msg_flags = flags;
1da177e4
LT
2276
2277 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2278 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2279 /*
2280 * If this is sendmmsg() and current destination address is same as
2281 * previously succeeded address, omit asking LSM's decision.
2282 * used_address->name_len is initialized to UINT_MAX so that the first
2283 * destination address never matches.
2284 */
bc909d9d
MD
2285 if (used_address && msg_sys->msg_name &&
2286 used_address->name_len == msg_sys->msg_namelen &&
2287 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2288 used_address->name_len)) {
d8725c86 2289 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2290 goto out_freectl;
2291 }
d8725c86 2292 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2293 /*
2294 * If this is sendmmsg() and sending to current destination address was
2295 * successful, remember it.
2296 */
2297 if (used_address && err >= 0) {
2298 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2299 if (msg_sys->msg_name)
2300 memcpy(&used_address->name, msg_sys->msg_name,
2301 used_address->name_len);
c71d8ebe 2302 }
1da177e4
LT
2303
2304out_freectl:
89bddce5 2305 if (ctl_buf != ctl)
1da177e4
LT
2306 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2307out_freeiov:
da184284 2308 kfree(iov);
228e548e
AB
2309 return err;
2310}
2311
2312/*
2313 * BSD sendmsg interface
2314 */
2315
e1834a32
DB
2316long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2317 bool forbid_cmsg_compat)
228e548e
AB
2318{
2319 int fput_needed, err;
2320 struct msghdr msg_sys;
1be374a0
AL
2321 struct socket *sock;
2322
e1834a32
DB
2323 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2324 return -EINVAL;
2325
1be374a0 2326 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2327 if (!sock)
2328 goto out;
2329
28a94d8f 2330 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2331
6cb153ca 2332 fput_light(sock->file, fput_needed);
89bddce5 2333out:
1da177e4
LT
2334 return err;
2335}
2336
666547ff 2337SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2338{
e1834a32 2339 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2340}
2341
228e548e
AB
2342/*
2343 * Linux sendmmsg interface
2344 */
2345
2346int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2347 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2348{
2349 int fput_needed, err, datagrams;
2350 struct socket *sock;
2351 struct mmsghdr __user *entry;
2352 struct compat_mmsghdr __user *compat_entry;
2353 struct msghdr msg_sys;
c71d8ebe 2354 struct used_address used_address;
f092276d 2355 unsigned int oflags = flags;
228e548e 2356
e1834a32
DB
2357 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2358 return -EINVAL;
2359
98382f41
AB
2360 if (vlen > UIO_MAXIOV)
2361 vlen = UIO_MAXIOV;
228e548e
AB
2362
2363 datagrams = 0;
2364
2365 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2366 if (!sock)
2367 return err;
2368
c71d8ebe 2369 used_address.name_len = UINT_MAX;
228e548e
AB
2370 entry = mmsg;
2371 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2372 err = 0;
f092276d 2373 flags |= MSG_BATCH;
228e548e
AB
2374
2375 while (datagrams < vlen) {
f092276d
TH
2376 if (datagrams == vlen - 1)
2377 flags = oflags;
2378
228e548e 2379 if (MSG_CMSG_COMPAT & flags) {
666547ff 2380 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2381 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2382 if (err < 0)
2383 break;
2384 err = __put_user(err, &compat_entry->msg_len);
2385 ++compat_entry;
2386 } else {
a7526eb5 2387 err = ___sys_sendmsg(sock,
666547ff 2388 (struct user_msghdr __user *)entry,
28a94d8f 2389 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2390 if (err < 0)
2391 break;
2392 err = put_user(err, &entry->msg_len);
2393 ++entry;
2394 }
2395
2396 if (err)
2397 break;
2398 ++datagrams;
3023898b
SHY
2399 if (msg_data_left(&msg_sys))
2400 break;
a78cb84c 2401 cond_resched();
228e548e
AB
2402 }
2403
228e548e
AB
2404 fput_light(sock->file, fput_needed);
2405
728ffb86
AB
2406 /* We only return an error if no datagrams were able to be sent */
2407 if (datagrams != 0)
228e548e
AB
2408 return datagrams;
2409
228e548e
AB
2410 return err;
2411}
2412
2413SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2414 unsigned int, vlen, unsigned int, flags)
2415{
e1834a32 2416 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2417}
2418
666547ff 2419static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2420 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2421{
89bddce5
SH
2422 struct compat_msghdr __user *msg_compat =
2423 (struct compat_msghdr __user *)msg;
1da177e4 2424 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2425 struct iovec *iov = iovstack;
1da177e4 2426 unsigned long cmsg_ptr;
2da62906 2427 int len;
08adb7da 2428 ssize_t err;
1da177e4
LT
2429
2430 /* kernel mode address */
230b1839 2431 struct sockaddr_storage addr;
1da177e4
LT
2432
2433 /* user mode address pointers */
2434 struct sockaddr __user *uaddr;
08adb7da 2435 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2436
08adb7da 2437 msg_sys->msg_name = &addr;
1da177e4 2438
f3d33426 2439 if (MSG_CMSG_COMPAT & flags)
08adb7da 2440 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2441 else
08adb7da 2442 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2443 if (err < 0)
da184284 2444 return err;
1da177e4 2445
a2e27255
ACM
2446 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2447 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2448
f3d33426
HFS
2449 /* We assume all kernel code knows the size of sockaddr_storage */
2450 msg_sys->msg_namelen = 0;
2451
1da177e4
LT
2452 if (sock->file->f_flags & O_NONBLOCK)
2453 flags |= MSG_DONTWAIT;
2da62906 2454 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2455 if (err < 0)
2456 goto out_freeiov;
2457 len = err;
2458
2459 if (uaddr != NULL) {
43db362d 2460 err = move_addr_to_user(&addr,
a2e27255 2461 msg_sys->msg_namelen, uaddr,
89bddce5 2462 uaddr_len);
1da177e4
LT
2463 if (err < 0)
2464 goto out_freeiov;
2465 }
a2e27255 2466 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2467 COMPAT_FLAGS(msg));
1da177e4
LT
2468 if (err)
2469 goto out_freeiov;
2470 if (MSG_CMSG_COMPAT & flags)
a2e27255 2471 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2472 &msg_compat->msg_controllen);
2473 else
a2e27255 2474 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2475 &msg->msg_controllen);
2476 if (err)
2477 goto out_freeiov;
2478 err = len;
2479
2480out_freeiov:
da184284 2481 kfree(iov);
a2e27255
ACM
2482 return err;
2483}
2484
2485/*
2486 * BSD recvmsg interface
2487 */
2488
e1834a32
DB
2489long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2490 bool forbid_cmsg_compat)
a2e27255
ACM
2491{
2492 int fput_needed, err;
2493 struct msghdr msg_sys;
1be374a0
AL
2494 struct socket *sock;
2495
e1834a32
DB
2496 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2497 return -EINVAL;
2498
1be374a0 2499 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2500 if (!sock)
2501 goto out;
2502
a7526eb5 2503 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2504
6cb153ca 2505 fput_light(sock->file, fput_needed);
1da177e4
LT
2506out:
2507 return err;
2508}
2509
666547ff 2510SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2511 unsigned int, flags)
2512{
e1834a32 2513 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2514}
2515
a2e27255
ACM
2516/*
2517 * Linux recvmmsg interface
2518 */
2519
e11d4284
AB
2520static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2521 unsigned int vlen, unsigned int flags,
2522 struct timespec64 *timeout)
a2e27255
ACM
2523{
2524 int fput_needed, err, datagrams;
2525 struct socket *sock;
2526 struct mmsghdr __user *entry;
d7256d0e 2527 struct compat_mmsghdr __user *compat_entry;
a2e27255 2528 struct msghdr msg_sys;
766b9f92
DD
2529 struct timespec64 end_time;
2530 struct timespec64 timeout64;
a2e27255
ACM
2531
2532 if (timeout &&
2533 poll_select_set_timeout(&end_time, timeout->tv_sec,
2534 timeout->tv_nsec))
2535 return -EINVAL;
2536
2537 datagrams = 0;
2538
2539 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2540 if (!sock)
2541 return err;
2542
7797dc41
SHY
2543 if (likely(!(flags & MSG_ERRQUEUE))) {
2544 err = sock_error(sock->sk);
2545 if (err) {
2546 datagrams = err;
2547 goto out_put;
2548 }
e623a9e9 2549 }
a2e27255
ACM
2550
2551 entry = mmsg;
d7256d0e 2552 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2553
2554 while (datagrams < vlen) {
2555 /*
2556 * No need to ask LSM for more than the first datagram.
2557 */
d7256d0e 2558 if (MSG_CMSG_COMPAT & flags) {
666547ff 2559 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2560 &msg_sys, flags & ~MSG_WAITFORONE,
2561 datagrams);
d7256d0e
JMG
2562 if (err < 0)
2563 break;
2564 err = __put_user(err, &compat_entry->msg_len);
2565 ++compat_entry;
2566 } else {
a7526eb5 2567 err = ___sys_recvmsg(sock,
666547ff 2568 (struct user_msghdr __user *)entry,
a7526eb5
AL
2569 &msg_sys, flags & ~MSG_WAITFORONE,
2570 datagrams);
d7256d0e
JMG
2571 if (err < 0)
2572 break;
2573 err = put_user(err, &entry->msg_len);
2574 ++entry;
2575 }
2576
a2e27255
ACM
2577 if (err)
2578 break;
a2e27255
ACM
2579 ++datagrams;
2580
71c5c159
BB
2581 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2582 if (flags & MSG_WAITFORONE)
2583 flags |= MSG_DONTWAIT;
2584
a2e27255 2585 if (timeout) {
766b9f92 2586 ktime_get_ts64(&timeout64);
c2e6c856 2587 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2588 if (timeout->tv_sec < 0) {
2589 timeout->tv_sec = timeout->tv_nsec = 0;
2590 break;
2591 }
2592
2593 /* Timeout, return less than vlen datagrams */
2594 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2595 break;
2596 }
2597
2598 /* Out of band data, return right away */
2599 if (msg_sys.msg_flags & MSG_OOB)
2600 break;
a78cb84c 2601 cond_resched();
a2e27255
ACM
2602 }
2603
a2e27255 2604 if (err == 0)
34b88a68
ACM
2605 goto out_put;
2606
2607 if (datagrams == 0) {
2608 datagrams = err;
2609 goto out_put;
2610 }
a2e27255 2611
34b88a68
ACM
2612 /*
2613 * We may return less entries than requested (vlen) if the
2614 * sock is non block and there aren't enough datagrams...
2615 */
2616 if (err != -EAGAIN) {
a2e27255 2617 /*
34b88a68
ACM
2618 * ... or if recvmsg returns an error after we
2619 * received some datagrams, where we record the
2620 * error to return on the next call or if the
2621 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2622 */
34b88a68 2623 sock->sk->sk_err = -err;
a2e27255 2624 }
34b88a68
ACM
2625out_put:
2626 fput_light(sock->file, fput_needed);
a2e27255 2627
34b88a68 2628 return datagrams;
a2e27255
ACM
2629}
2630
e11d4284
AB
2631int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2632 unsigned int vlen, unsigned int flags,
2633 struct __kernel_timespec __user *timeout,
2634 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2635{
2636 int datagrams;
c2e6c856 2637 struct timespec64 timeout_sys;
a2e27255 2638
e11d4284
AB
2639 if (timeout && get_timespec64(&timeout_sys, timeout))
2640 return -EFAULT;
a2e27255 2641
e11d4284 2642 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2643 return -EFAULT;
2644
e11d4284
AB
2645 if (!timeout && !timeout32)
2646 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2647
2648 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2649
e11d4284
AB
2650 if (datagrams <= 0)
2651 return datagrams;
2652
2653 if (timeout && put_timespec64(&timeout_sys, timeout))
2654 datagrams = -EFAULT;
2655
2656 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2657 datagrams = -EFAULT;
2658
2659 return datagrams;
2660}
2661
1255e269
DB
2662SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2663 unsigned int, vlen, unsigned int, flags,
c2e6c856 2664 struct __kernel_timespec __user *, timeout)
1255e269 2665{
e11d4284
AB
2666 if (flags & MSG_CMSG_COMPAT)
2667 return -EINVAL;
2668
2669 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2670}
2671
2672#ifdef CONFIG_COMPAT_32BIT_TIME
2673SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2674 unsigned int, vlen, unsigned int, flags,
2675 struct old_timespec32 __user *, timeout)
2676{
2677 if (flags & MSG_CMSG_COMPAT)
2678 return -EINVAL;
2679
2680 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2681}
e11d4284 2682#endif
1255e269 2683
a2e27255 2684#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2685/* Argument list sizes for sys_socketcall */
2686#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2687static const unsigned char nargs[21] = {
c6d409cf
ED
2688 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2689 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2690 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2691 AL(4), AL(5), AL(4)
89bddce5
SH
2692};
2693
1da177e4
LT
2694#undef AL
2695
2696/*
89bddce5 2697 * System call vectors.
1da177e4
LT
2698 *
2699 * Argument checking cleaned up. Saved 20% in size.
2700 * This function doesn't need to set the kernel lock because
89bddce5 2701 * it is set by the callees.
1da177e4
LT
2702 */
2703
3e0fa65f 2704SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2705{
2950fa9d 2706 unsigned long a[AUDITSC_ARGS];
89bddce5 2707 unsigned long a0, a1;
1da177e4 2708 int err;
47379052 2709 unsigned int len;
1da177e4 2710
228e548e 2711 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2712 return -EINVAL;
c8e8cd57 2713 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2714
47379052
AV
2715 len = nargs[call];
2716 if (len > sizeof(a))
2717 return -EINVAL;
2718
1da177e4 2719 /* copy_from_user should be SMP safe. */
47379052 2720 if (copy_from_user(a, args, len))
1da177e4 2721 return -EFAULT;
3ec3b2fb 2722
2950fa9d
CG
2723 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2724 if (err)
2725 return err;
3ec3b2fb 2726
89bddce5
SH
2727 a0 = a[0];
2728 a1 = a[1];
2729
2730 switch (call) {
2731 case SYS_SOCKET:
9d6a15c3 2732 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2733 break;
2734 case SYS_BIND:
a87d35d8 2735 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2736 break;
2737 case SYS_CONNECT:
1387c2c2 2738 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2739 break;
2740 case SYS_LISTEN:
25e290ee 2741 err = __sys_listen(a0, a1);
89bddce5
SH
2742 break;
2743 case SYS_ACCEPT:
4541e805
DB
2744 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2745 (int __user *)a[2], 0);
89bddce5
SH
2746 break;
2747 case SYS_GETSOCKNAME:
2748 err =
8882a107
DB
2749 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2750 (int __user *)a[2]);
89bddce5
SH
2751 break;
2752 case SYS_GETPEERNAME:
2753 err =
b21c8f83
DB
2754 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2755 (int __user *)a[2]);
89bddce5
SH
2756 break;
2757 case SYS_SOCKETPAIR:
6debc8d8 2758 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2759 break;
2760 case SYS_SEND:
f3bf896b
DB
2761 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2762 NULL, 0);
89bddce5
SH
2763 break;
2764 case SYS_SENDTO:
211b634b
DB
2765 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2766 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2767 break;
2768 case SYS_RECV:
d27e9afc
DB
2769 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2770 NULL, NULL);
89bddce5
SH
2771 break;
2772 case SYS_RECVFROM:
7a09e1eb
DB
2773 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2774 (struct sockaddr __user *)a[4],
2775 (int __user *)a[5]);
89bddce5
SH
2776 break;
2777 case SYS_SHUTDOWN:
005a1aea 2778 err = __sys_shutdown(a0, a1);
89bddce5
SH
2779 break;
2780 case SYS_SETSOCKOPT:
cc36dca0
DB
2781 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2782 a[4]);
89bddce5
SH
2783 break;
2784 case SYS_GETSOCKOPT:
2785 err =
13a2d70e
DB
2786 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2787 (int __user *)a[4]);
89bddce5
SH
2788 break;
2789 case SYS_SENDMSG:
e1834a32
DB
2790 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2791 a[2], true);
89bddce5 2792 break;
228e548e 2793 case SYS_SENDMMSG:
e1834a32
DB
2794 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2795 a[3], true);
228e548e 2796 break;
89bddce5 2797 case SYS_RECVMSG:
e1834a32
DB
2798 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2799 a[2], true);
89bddce5 2800 break;
a2e27255 2801 case SYS_RECVMMSG:
e11d4284
AB
2802 if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
2803 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2804 a[2], a[3],
2805 (struct __kernel_timespec __user *)a[4],
2806 NULL);
2807 else
2808 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2809 a[2], a[3], NULL,
2810 (struct old_timespec32 __user *)a[4]);
a2e27255 2811 break;
de11defe 2812 case SYS_ACCEPT4:
4541e805
DB
2813 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2814 (int __user *)a[2], a[3]);
aaca0bdc 2815 break;
89bddce5
SH
2816 default:
2817 err = -EINVAL;
2818 break;
1da177e4
LT
2819 }
2820 return err;
2821}
2822
89bddce5 2823#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2824
55737fda
SH
2825/**
2826 * sock_register - add a socket protocol handler
2827 * @ops: description of protocol
2828 *
1da177e4
LT
2829 * This function is called by a protocol handler that wants to
2830 * advertise its address family, and have it linked into the
e793c0f7 2831 * socket interface. The value ops->family corresponds to the
55737fda 2832 * socket system call protocol family.
1da177e4 2833 */
f0fd27d4 2834int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2835{
2836 int err;
2837
2838 if (ops->family >= NPROTO) {
3410f22e 2839 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2840 return -ENOBUFS;
2841 }
55737fda
SH
2842
2843 spin_lock(&net_family_lock);
190683a9
ED
2844 if (rcu_dereference_protected(net_families[ops->family],
2845 lockdep_is_held(&net_family_lock)))
55737fda
SH
2846 err = -EEXIST;
2847 else {
cf778b00 2848 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2849 err = 0;
2850 }
55737fda
SH
2851 spin_unlock(&net_family_lock);
2852
3410f22e 2853 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2854 return err;
2855}
c6d409cf 2856EXPORT_SYMBOL(sock_register);
1da177e4 2857
55737fda
SH
2858/**
2859 * sock_unregister - remove a protocol handler
2860 * @family: protocol family to remove
2861 *
1da177e4
LT
2862 * This function is called by a protocol handler that wants to
2863 * remove its address family, and have it unlinked from the
55737fda
SH
2864 * new socket creation.
2865 *
2866 * If protocol handler is a module, then it can use module reference
2867 * counts to protect against new references. If protocol handler is not
2868 * a module then it needs to provide its own protection in
2869 * the ops->create routine.
1da177e4 2870 */
f0fd27d4 2871void sock_unregister(int family)
1da177e4 2872{
f0fd27d4 2873 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2874
55737fda 2875 spin_lock(&net_family_lock);
a9b3cd7f 2876 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2877 spin_unlock(&net_family_lock);
2878
2879 synchronize_rcu();
2880
3410f22e 2881 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2882}
c6d409cf 2883EXPORT_SYMBOL(sock_unregister);
1da177e4 2884
bf2ae2e4
XL
2885bool sock_is_registered(int family)
2886{
66b51b0a 2887 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
2888}
2889
77d76ea3 2890static int __init sock_init(void)
1da177e4 2891{
b3e19d92 2892 int err;
2ca794e5
EB
2893 /*
2894 * Initialize the network sysctl infrastructure.
2895 */
2896 err = net_sysctl_init();
2897 if (err)
2898 goto out;
b3e19d92 2899
1da177e4 2900 /*
89bddce5 2901 * Initialize skbuff SLAB cache
1da177e4
LT
2902 */
2903 skb_init();
1da177e4
LT
2904
2905 /*
89bddce5 2906 * Initialize the protocols module.
1da177e4
LT
2907 */
2908
2909 init_inodecache();
b3e19d92
NP
2910
2911 err = register_filesystem(&sock_fs_type);
2912 if (err)
2913 goto out_fs;
1da177e4 2914 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2915 if (IS_ERR(sock_mnt)) {
2916 err = PTR_ERR(sock_mnt);
2917 goto out_mount;
2918 }
77d76ea3
AK
2919
2920 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2921 */
2922
2923#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2924 err = netfilter_init();
2925 if (err)
2926 goto out;
1da177e4 2927#endif
cbeb321a 2928
408eccce 2929 ptp_classifier_init();
c1f19b51 2930
b3e19d92
NP
2931out:
2932 return err;
2933
2934out_mount:
2935 unregister_filesystem(&sock_fs_type);
2936out_fs:
2937 goto out;
1da177e4
LT
2938}
2939
77d76ea3
AK
2940core_initcall(sock_init); /* early initcall */
2941
1da177e4
LT
2942#ifdef CONFIG_PROC_FS
2943void socket_seq_show(struct seq_file *seq)
2944{
648845ab
TZ
2945 seq_printf(seq, "sockets: used %d\n",
2946 sock_inuse_get(seq->private));
1da177e4 2947}
89bddce5 2948#endif /* CONFIG_PROC_FS */
1da177e4 2949
89bbfc95 2950#ifdef CONFIG_COMPAT
36fd633e 2951static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2952{
6b96018b 2953 struct compat_ifconf ifc32;
7a229387 2954 struct ifconf ifc;
7a229387
AB
2955 int err;
2956
6b96018b 2957 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2958 return -EFAULT;
2959
36fd633e
AV
2960 ifc.ifc_len = ifc32.ifc_len;
2961 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 2962
36fd633e
AV
2963 rtnl_lock();
2964 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2965 rtnl_unlock();
7a229387
AB
2966 if (err)
2967 return err;
2968
36fd633e 2969 ifc32.ifc_len = ifc.ifc_len;
6b96018b 2970 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2971 return -EFAULT;
2972
2973 return 0;
2974}
2975
6b96018b 2976static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2977{
3a7da39d
BH
2978 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2979 bool convert_in = false, convert_out = false;
44c02a2c
AV
2980 size_t buf_size = 0;
2981 struct ethtool_rxnfc __user *rxnfc = NULL;
2982 struct ifreq ifr;
3a7da39d
BH
2983 u32 rule_cnt = 0, actual_rule_cnt;
2984 u32 ethcmd;
7a229387 2985 u32 data;
3a7da39d 2986 int ret;
7a229387 2987
3a7da39d
BH
2988 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2989 return -EFAULT;
7a229387 2990
3a7da39d
BH
2991 compat_rxnfc = compat_ptr(data);
2992
2993 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2994 return -EFAULT;
2995
3a7da39d
BH
2996 /* Most ethtool structures are defined without padding.
2997 * Unfortunately struct ethtool_rxnfc is an exception.
2998 */
2999 switch (ethcmd) {
3000 default:
3001 break;
3002 case ETHTOOL_GRXCLSRLALL:
3003 /* Buffer size is variable */
3004 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3005 return -EFAULT;
3006 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3007 return -ENOMEM;
3008 buf_size += rule_cnt * sizeof(u32);
3009 /* fall through */
3010 case ETHTOOL_GRXRINGS:
3011 case ETHTOOL_GRXCLSRLCNT:
3012 case ETHTOOL_GRXCLSRULE:
55664f32 3013 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3014 convert_out = true;
3015 /* fall through */
3016 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3017 buf_size += sizeof(struct ethtool_rxnfc);
3018 convert_in = true;
44c02a2c 3019 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3020 break;
3021 }
3022
44c02a2c 3023 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3024 return -EFAULT;
3025
44c02a2c 3026 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3027
3a7da39d 3028 if (convert_in) {
127fe533 3029 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3030 * fs.ring_cookie and at the end of fs, but nowhere else.
3031 */
127fe533
AD
3032 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3033 sizeof(compat_rxnfc->fs.m_ext) !=
3034 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3035 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3036 BUILD_BUG_ON(
3037 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3038 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3039 offsetof(struct ethtool_rxnfc, fs.location) -
3040 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3041
3042 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3043 (void __user *)(&rxnfc->fs.m_ext + 1) -
3044 (void __user *)rxnfc) ||
3a7da39d
BH
3045 copy_in_user(&rxnfc->fs.ring_cookie,
3046 &compat_rxnfc->fs.ring_cookie,
954b1244 3047 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3048 (void __user *)&rxnfc->fs.ring_cookie))
3049 return -EFAULT;
3050 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3051 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3052 return -EFAULT;
3053 } else if (copy_in_user(&rxnfc->rule_cnt,
3054 &compat_rxnfc->rule_cnt,
3055 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3056 return -EFAULT;
3057 }
3058
44c02a2c 3059 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3060 if (ret)
3061 return ret;
3062
3063 if (convert_out) {
3064 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3065 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3066 (const void __user *)rxnfc) ||
3a7da39d
BH
3067 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3068 &rxnfc->fs.ring_cookie,
954b1244
SH
3069 (const void __user *)(&rxnfc->fs.location + 1) -
3070 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3071 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3072 sizeof(rxnfc->rule_cnt)))
3073 return -EFAULT;
3074
3075 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3076 /* As an optimisation, we only copy the actual
3077 * number of rules that the underlying
3078 * function returned. Since Mallory might
3079 * change the rule count in user memory, we
3080 * check that it is less than the rule count
3081 * originally given (as the user buffer size),
3082 * which has been range-checked.
3083 */
3084 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3085 return -EFAULT;
3086 if (actual_rule_cnt < rule_cnt)
3087 rule_cnt = actual_rule_cnt;
3088 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3089 &rxnfc->rule_locs[0],
3090 rule_cnt * sizeof(u32)))
3091 return -EFAULT;
3092 }
3093 }
3094
3095 return 0;
7a229387
AB
3096}
3097
7a50a240
AB
3098static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3099{
7a50a240 3100 compat_uptr_t uptr32;
44c02a2c
AV
3101 struct ifreq ifr;
3102 void __user *saved;
3103 int err;
7a50a240 3104
44c02a2c 3105 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3106 return -EFAULT;
3107
3108 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3109 return -EFAULT;
3110
44c02a2c
AV
3111 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3112 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3113
44c02a2c
AV
3114 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3115 if (!err) {
3116 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3117 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3118 err = -EFAULT;
ccbd6a5a 3119 }
44c02a2c 3120 return err;
7a229387
AB
3121}
3122
590d4693
BH
3123/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3124static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3125 struct compat_ifreq __user *u_ifreq32)
7a229387 3126{
44c02a2c 3127 struct ifreq ifreq;
7a229387
AB
3128 u32 data32;
3129
44c02a2c 3130 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3131 return -EFAULT;
44c02a2c 3132 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3133 return -EFAULT;
44c02a2c 3134 ifreq.ifr_data = compat_ptr(data32);
7a229387 3135
44c02a2c 3136 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3137}
3138
37ac39bd
JB
3139static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3140 unsigned int cmd,
3141 struct compat_ifreq __user *uifr32)
3142{
3143 struct ifreq __user *uifr;
3144 int err;
3145
3146 /* Handle the fact that while struct ifreq has the same *layout* on
3147 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3148 * which are handled elsewhere, it still has different *size* due to
3149 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3150 * resulting in struct ifreq being 32 and 40 bytes respectively).
3151 * As a result, if the struct happens to be at the end of a page and
3152 * the next page isn't readable/writable, we get a fault. To prevent
3153 * that, copy back and forth to the full size.
3154 */
3155
3156 uifr = compat_alloc_user_space(sizeof(*uifr));
3157 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3158 return -EFAULT;
3159
3160 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3161
3162 if (!err) {
3163 switch (cmd) {
3164 case SIOCGIFFLAGS:
3165 case SIOCGIFMETRIC:
3166 case SIOCGIFMTU:
3167 case SIOCGIFMEM:
3168 case SIOCGIFHWADDR:
3169 case SIOCGIFINDEX:
3170 case SIOCGIFADDR:
3171 case SIOCGIFBRDADDR:
3172 case SIOCGIFDSTADDR:
3173 case SIOCGIFNETMASK:
3174 case SIOCGIFPFLAGS:
3175 case SIOCGIFTXQLEN:
3176 case SIOCGMIIPHY:
3177 case SIOCGMIIREG:
c6c9fee3 3178 case SIOCGIFNAME:
37ac39bd
JB
3179 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3180 err = -EFAULT;
3181 break;
3182 }
3183 }
3184 return err;
3185}
3186
a2116ed2
AB
3187static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3188 struct compat_ifreq __user *uifr32)
3189{
3190 struct ifreq ifr;
3191 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3192 int err;
3193
3194 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3195 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3196 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3197 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3198 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3199 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3200 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3201 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3202 if (err)
3203 return -EFAULT;
3204
44c02a2c 3205 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3206
3207 if (cmd == SIOCGIFMAP && !err) {
3208 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3209 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3210 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3211 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3212 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3213 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3214 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3215 if (err)
3216 err = -EFAULT;
3217 }
3218 return err;
3219}
3220
7a229387 3221struct rtentry32 {
c6d409cf 3222 u32 rt_pad1;
7a229387
AB
3223 struct sockaddr rt_dst; /* target address */
3224 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3225 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3226 unsigned short rt_flags;
3227 short rt_pad2;
3228 u32 rt_pad3;
3229 unsigned char rt_tos;
3230 unsigned char rt_class;
3231 short rt_pad4;
3232 short rt_metric; /* +1 for binary compatibility! */
7a229387 3233 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3234 u32 rt_mtu; /* per route MTU/Window */
3235 u32 rt_window; /* Window clamping */
7a229387
AB
3236 unsigned short rt_irtt; /* Initial RTT */
3237};
3238
3239struct in6_rtmsg32 {
3240 struct in6_addr rtmsg_dst;
3241 struct in6_addr rtmsg_src;
3242 struct in6_addr rtmsg_gateway;
3243 u32 rtmsg_type;
3244 u16 rtmsg_dst_len;
3245 u16 rtmsg_src_len;
3246 u32 rtmsg_metric;
3247 u32 rtmsg_info;
3248 u32 rtmsg_flags;
3249 s32 rtmsg_ifindex;
3250};
3251
6b96018b
AB
3252static int routing_ioctl(struct net *net, struct socket *sock,
3253 unsigned int cmd, void __user *argp)
7a229387
AB
3254{
3255 int ret;
3256 void *r = NULL;
3257 struct in6_rtmsg r6;
3258 struct rtentry r4;
3259 char devname[16];
3260 u32 rtdev;
3261 mm_segment_t old_fs = get_fs();
3262
6b96018b
AB
3263 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3264 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3265 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3266 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3267 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3268 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3269 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3270 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3271 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3272 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3273 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3274
3275 r = (void *) &r6;
3276 } else { /* ipv4 */
6b96018b 3277 struct rtentry32 __user *ur4 = argp;
c6d409cf 3278 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3279 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3280 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3281 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3282 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3283 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3284 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3285 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3286 if (rtdev) {
c6d409cf 3287 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3288 r4.rt_dev = (char __user __force *)devname;
3289 devname[15] = 0;
7a229387
AB
3290 } else
3291 r4.rt_dev = NULL;
3292
3293 r = (void *) &r4;
3294 }
3295
3296 if (ret) {
3297 ret = -EFAULT;
3298 goto out;
3299 }
3300
c6d409cf 3301 set_fs(KERNEL_DS);
63ff03ab 3302 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3303 set_fs(old_fs);
7a229387
AB
3304
3305out:
7a229387
AB
3306 return ret;
3307}
3308
3309/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3310 * for some operations; this forces use of the newer bridge-utils that
25985edc 3311 * use compatible ioctls
7a229387 3312 */
6b96018b 3313static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3314{
6b96018b 3315 compat_ulong_t tmp;
7a229387 3316
6b96018b 3317 if (get_user(tmp, argp))
7a229387
AB
3318 return -EFAULT;
3319 if (tmp == BRCTL_GET_VERSION)
3320 return BRCTL_VERSION + 1;
3321 return -EINVAL;
3322}
3323
6b96018b
AB
3324static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3325 unsigned int cmd, unsigned long arg)
3326{
3327 void __user *argp = compat_ptr(arg);
3328 struct sock *sk = sock->sk;
3329 struct net *net = sock_net(sk);
7a229387 3330
6b96018b 3331 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3332 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3333
3334 switch (cmd) {
3335 case SIOCSIFBR:
3336 case SIOCGIFBR:
3337 return old_bridge_ioctl(argp);
6b96018b 3338 case SIOCGIFCONF:
36fd633e 3339 return compat_dev_ifconf(net, argp);
6b96018b
AB
3340 case SIOCETHTOOL:
3341 return ethtool_ioctl(net, argp);
7a50a240
AB
3342 case SIOCWANDEV:
3343 return compat_siocwandev(net, argp);
a2116ed2
AB
3344 case SIOCGIFMAP:
3345 case SIOCSIFMAP:
3346 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3347 case SIOCADDRT:
3348 case SIOCDELRT:
3349 return routing_ioctl(net, sock, cmd, argp);
0768e170
AB
3350 case SIOCGSTAMP_OLD:
3351 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3352 if (!sock->ops->gettstamp)
3353 return -ENOIOCTLCMD;
0768e170 3354 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3355 !COMPAT_USE_64BIT_TIME);
3356
590d4693
BH
3357 case SIOCBONDSLAVEINFOQUERY:
3358 case SIOCBONDINFOQUERY:
a2116ed2 3359 case SIOCSHWTSTAMP:
fd468c74 3360 case SIOCGHWTSTAMP:
590d4693 3361 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3362
3363 case FIOSETOWN:
3364 case SIOCSPGRP:
3365 case FIOGETOWN:
3366 case SIOCGPGRP:
3367 case SIOCBRADDBR:
3368 case SIOCBRDELBR:
3369 case SIOCGIFVLAN:
3370 case SIOCSIFVLAN:
3371 case SIOCADDDLCI:
3372 case SIOCDELDLCI:
c62cce2c 3373 case SIOCGSKNS:
0768e170
AB
3374 case SIOCGSTAMP_NEW:
3375 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3376 return sock_ioctl(file, cmd, arg);
3377
3378 case SIOCGIFFLAGS:
3379 case SIOCSIFFLAGS:
3380 case SIOCGIFMETRIC:
3381 case SIOCSIFMETRIC:
3382 case SIOCGIFMTU:
3383 case SIOCSIFMTU:
3384 case SIOCGIFMEM:
3385 case SIOCSIFMEM:
3386 case SIOCGIFHWADDR:
3387 case SIOCSIFHWADDR:
3388 case SIOCADDMULTI:
3389 case SIOCDELMULTI:
3390 case SIOCGIFINDEX:
6b96018b
AB
3391 case SIOCGIFADDR:
3392 case SIOCSIFADDR:
3393 case SIOCSIFHWBROADCAST:
6b96018b 3394 case SIOCDIFADDR:
6b96018b
AB
3395 case SIOCGIFBRDADDR:
3396 case SIOCSIFBRDADDR:
3397 case SIOCGIFDSTADDR:
3398 case SIOCSIFDSTADDR:
3399 case SIOCGIFNETMASK:
3400 case SIOCSIFNETMASK:
3401 case SIOCSIFPFLAGS:
3402 case SIOCGIFPFLAGS:
3403 case SIOCGIFTXQLEN:
3404 case SIOCSIFTXQLEN:
3405 case SIOCBRADDIF:
3406 case SIOCBRDELIF:
c6c9fee3 3407 case SIOCGIFNAME:
9177efd3
AB
3408 case SIOCSIFNAME:
3409 case SIOCGMIIPHY:
3410 case SIOCGMIIREG:
3411 case SIOCSMIIREG:
f92d4fc9
AV
3412 case SIOCBONDENSLAVE:
3413 case SIOCBONDRELEASE:
3414 case SIOCBONDSETHWADDR:
3415 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3416 return compat_ifreq_ioctl(net, sock, cmd, argp);
3417
6b96018b
AB
3418 case SIOCSARP:
3419 case SIOCGARP:
3420 case SIOCDARP:
6b96018b 3421 case SIOCATMARK:
63ff03ab 3422 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3423 }
3424
6b96018b
AB
3425 return -ENOIOCTLCMD;
3426}
7a229387 3427
95c96174 3428static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3429 unsigned long arg)
89bbfc95
SP
3430{
3431 struct socket *sock = file->private_data;
3432 int ret = -ENOIOCTLCMD;
87de87d5
DM
3433 struct sock *sk;
3434 struct net *net;
3435
3436 sk = sock->sk;
3437 net = sock_net(sk);
89bbfc95
SP
3438
3439 if (sock->ops->compat_ioctl)
3440 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3441
87de87d5
DM
3442 if (ret == -ENOIOCTLCMD &&
3443 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3444 ret = compat_wext_handle_ioctl(net, cmd, arg);
3445
6b96018b
AB
3446 if (ret == -ENOIOCTLCMD)
3447 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3448
89bbfc95
SP
3449 return ret;
3450}
3451#endif
3452
8a3c245c
PT
3453/**
3454 * kernel_bind - bind an address to a socket (kernel space)
3455 * @sock: socket
3456 * @addr: address
3457 * @addrlen: length of address
3458 *
3459 * Returns 0 or an error.
3460 */
3461
ac5a488e
SS
3462int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3463{
3464 return sock->ops->bind(sock, addr, addrlen);
3465}
c6d409cf 3466EXPORT_SYMBOL(kernel_bind);
ac5a488e 3467
8a3c245c
PT
3468/**
3469 * kernel_listen - move socket to listening state (kernel space)
3470 * @sock: socket
3471 * @backlog: pending connections queue size
3472 *
3473 * Returns 0 or an error.
3474 */
3475
ac5a488e
SS
3476int kernel_listen(struct socket *sock, int backlog)
3477{
3478 return sock->ops->listen(sock, backlog);
3479}
c6d409cf 3480EXPORT_SYMBOL(kernel_listen);
ac5a488e 3481
8a3c245c
PT
3482/**
3483 * kernel_accept - accept a connection (kernel space)
3484 * @sock: listening socket
3485 * @newsock: new connected socket
3486 * @flags: flags
3487 *
3488 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3489 * If it fails, @newsock is guaranteed to be %NULL.
3490 * Returns 0 or an error.
3491 */
3492
ac5a488e
SS
3493int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3494{
3495 struct sock *sk = sock->sk;
3496 int err;
3497
3498 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3499 newsock);
3500 if (err < 0)
3501 goto done;
3502
cdfbabfb 3503 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3504 if (err < 0) {
3505 sock_release(*newsock);
fa8705b0 3506 *newsock = NULL;
ac5a488e
SS
3507 goto done;
3508 }
3509
3510 (*newsock)->ops = sock->ops;
1b08534e 3511 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3512
3513done:
3514 return err;
3515}
c6d409cf 3516EXPORT_SYMBOL(kernel_accept);
ac5a488e 3517
8a3c245c
PT
3518/**
3519 * kernel_connect - connect a socket (kernel space)
3520 * @sock: socket
3521 * @addr: address
3522 * @addrlen: address length
3523 * @flags: flags (O_NONBLOCK, ...)
3524 *
3525 * For datagram sockets, @addr is the addres to which datagrams are sent
3526 * by default, and the only address from which datagrams are received.
3527 * For stream sockets, attempts to connect to @addr.
3528 * Returns 0 or an error code.
3529 */
3530
ac5a488e 3531int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3532 int flags)
ac5a488e
SS
3533{
3534 return sock->ops->connect(sock, addr, addrlen, flags);
3535}
c6d409cf 3536EXPORT_SYMBOL(kernel_connect);
ac5a488e 3537
8a3c245c
PT
3538/**
3539 * kernel_getsockname - get the address which the socket is bound (kernel space)
3540 * @sock: socket
3541 * @addr: address holder
3542 *
3543 * Fills the @addr pointer with the address which the socket is bound.
3544 * Returns 0 or an error code.
3545 */
3546
9b2c45d4 3547int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3548{
9b2c45d4 3549 return sock->ops->getname(sock, addr, 0);
ac5a488e 3550}
c6d409cf 3551EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3552
8a3c245c
PT
3553/**
3554 * kernel_peername - get the address which the socket is connected (kernel space)
3555 * @sock: socket
3556 * @addr: address holder
3557 *
3558 * Fills the @addr pointer with the address which the socket is connected.
3559 * Returns 0 or an error code.
3560 */
3561
9b2c45d4 3562int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3563{
9b2c45d4 3564 return sock->ops->getname(sock, addr, 1);
ac5a488e 3565}
c6d409cf 3566EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3567
8a3c245c
PT
3568/**
3569 * kernel_getsockopt - get a socket option (kernel space)
3570 * @sock: socket
3571 * @level: API level (SOL_SOCKET, ...)
3572 * @optname: option tag
3573 * @optval: option value
3574 * @optlen: option length
3575 *
3576 * Assigns the option length to @optlen.
3577 * Returns 0 or an error.
3578 */
3579
ac5a488e
SS
3580int kernel_getsockopt(struct socket *sock, int level, int optname,
3581 char *optval, int *optlen)
3582{
3583 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3584 char __user *uoptval;
3585 int __user *uoptlen;
ac5a488e
SS
3586 int err;
3587
fb8621bb
NK
3588 uoptval = (char __user __force *) optval;
3589 uoptlen = (int __user __force *) optlen;
3590
ac5a488e
SS
3591 set_fs(KERNEL_DS);
3592 if (level == SOL_SOCKET)
fb8621bb 3593 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3594 else
fb8621bb
NK
3595 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3596 uoptlen);
ac5a488e
SS
3597 set_fs(oldfs);
3598 return err;
3599}
c6d409cf 3600EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3601
8a3c245c
PT
3602/**
3603 * kernel_setsockopt - set a socket option (kernel space)
3604 * @sock: socket
3605 * @level: API level (SOL_SOCKET, ...)
3606 * @optname: option tag
3607 * @optval: option value
3608 * @optlen: option length
3609 *
3610 * Returns 0 or an error.
3611 */
3612
ac5a488e 3613int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3614 char *optval, unsigned int optlen)
ac5a488e
SS
3615{
3616 mm_segment_t oldfs = get_fs();
fb8621bb 3617 char __user *uoptval;
ac5a488e
SS
3618 int err;
3619
fb8621bb
NK
3620 uoptval = (char __user __force *) optval;
3621
ac5a488e
SS
3622 set_fs(KERNEL_DS);
3623 if (level == SOL_SOCKET)
fb8621bb 3624 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3625 else
fb8621bb 3626 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3627 optlen);
3628 set_fs(oldfs);
3629 return err;
3630}
c6d409cf 3631EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3632
8a3c245c
PT
3633/**
3634 * kernel_sendpage - send a &page through a socket (kernel space)
3635 * @sock: socket
3636 * @page: page
3637 * @offset: page offset
3638 * @size: total size in bytes
3639 * @flags: flags (MSG_DONTWAIT, ...)
3640 *
3641 * Returns the total amount sent in bytes or an error.
3642 */
3643
ac5a488e
SS
3644int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3645 size_t size, int flags)
3646{
3647 if (sock->ops->sendpage)
3648 return sock->ops->sendpage(sock, page, offset, size, flags);
3649
3650 return sock_no_sendpage(sock, page, offset, size, flags);
3651}
c6d409cf 3652EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3653
8a3c245c
PT
3654/**
3655 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3656 * @sk: sock
3657 * @page: page
3658 * @offset: page offset
3659 * @size: total size in bytes
3660 * @flags: flags (MSG_DONTWAIT, ...)
3661 *
3662 * Returns the total amount sent in bytes or an error.
3663 * Caller must hold @sk.
3664 */
3665
306b13eb
TH
3666int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3667 size_t size, int flags)
3668{
3669 struct socket *sock = sk->sk_socket;
3670
3671 if (sock->ops->sendpage_locked)
3672 return sock->ops->sendpage_locked(sk, page, offset, size,
3673 flags);
3674
3675 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3676}
3677EXPORT_SYMBOL(kernel_sendpage_locked);
3678
8a3c245c
PT
3679/**
3680 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3681 * @sock: socket
3682 * @how: connection part
3683 *
3684 * Returns 0 or an error.
3685 */
3686
91cf45f0
TM
3687int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3688{
3689 return sock->ops->shutdown(sock, how);
3690}
91cf45f0 3691EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3692
8a3c245c
PT
3693/**
3694 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3695 * @sk: socket
3696 *
3697 * This routine returns the IP overhead imposed by a socket i.e.
3698 * the length of the underlying IP header, depending on whether
3699 * this is an IPv4 or IPv6 socket and the length from IP options turned
3700 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3701 */
8a3c245c 3702
113c3075
P
3703u32 kernel_sock_ip_overhead(struct sock *sk)
3704{
3705 struct inet_sock *inet;
3706 struct ip_options_rcu *opt;
3707 u32 overhead = 0;
113c3075
P
3708#if IS_ENABLED(CONFIG_IPV6)
3709 struct ipv6_pinfo *np;
3710 struct ipv6_txoptions *optv6 = NULL;
3711#endif /* IS_ENABLED(CONFIG_IPV6) */
3712
3713 if (!sk)
3714 return overhead;
3715
113c3075
P
3716 switch (sk->sk_family) {
3717 case AF_INET:
3718 inet = inet_sk(sk);
3719 overhead += sizeof(struct iphdr);
3720 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3721 sock_owned_by_user(sk));
113c3075
P
3722 if (opt)
3723 overhead += opt->opt.optlen;
3724 return overhead;
3725#if IS_ENABLED(CONFIG_IPV6)
3726 case AF_INET6:
3727 np = inet6_sk(sk);
3728 overhead += sizeof(struct ipv6hdr);
3729 if (np)
3730 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3731 sock_owned_by_user(sk));
113c3075
P
3732 if (optv6)
3733 overhead += (optv6->opt_flen + optv6->opt_nflen);
3734 return overhead;
3735#endif /* IS_ENABLED(CONFIG_IPV6) */
3736 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3737 return overhead;
3738 }
3739}
3740EXPORT_SYMBOL(kernel_sock_ip_overhead);