]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/socket.c
sockfs: switch to ->free_inode()
[mirror_ubuntu-jammy-kernel.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
1da177e4 55#include <linux/mm.h>
1da177e4
LT
56#include <linux/socket.h>
57#include <linux/file.h>
58#include <linux/net.h>
59#include <linux/interrupt.h>
aaca0bdc 60#include <linux/thread_info.h>
55737fda 61#include <linux/rcupdate.h>
1da177e4
LT
62#include <linux/netdevice.h>
63#include <linux/proc_fs.h>
64#include <linux/seq_file.h>
4a3e2f71 65#include <linux/mutex.h>
1da177e4 66#include <linux/if_bridge.h>
20380731
ACM
67#include <linux/if_frad.h>
68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4
LT
75#include <linux/mount.h>
76#include <linux/security.h>
77#include <linux/syscalls.h>
78#include <linux/compat.h>
79#include <linux/kmod.h>
3ec3b2fb 80#include <linux/audit.h>
d86b5e0e 81#include <linux/wireless.h>
1b8d7ae4 82#include <linux/nsproxy.h>
1fd7317d 83#include <linux/magic.h>
5a0e3ad6 84#include <linux/slab.h>
600e1779 85#include <linux/xattr.h>
c8e8cd57 86#include <linux/nospec.h>
8c3c447b 87#include <linux/indirect_call_wrapper.h>
1da177e4 88
7c0f6ba6 89#include <linux/uaccess.h>
1da177e4
LT
90#include <asm/unistd.h>
91
92#include <net/compat.h>
87de87d5 93#include <net/wext.h>
f8451725 94#include <net/cls_cgroup.h>
1da177e4
LT
95
96#include <net/sock.h>
97#include <linux/netfilter.h>
98
6b96018b
AB
99#include <linux/if_tun.h>
100#include <linux/ipv6_route.h>
101#include <linux/route.h>
6b96018b 102#include <linux/sockios.h>
076bb0c8 103#include <net/busy_poll.h>
f24b9be5 104#include <linux/errqueue.h>
06021292 105
e0d1095a 106#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
107unsigned int sysctl_net_busy_read __read_mostly;
108unsigned int sysctl_net_busy_poll __read_mostly;
06021292 109#endif
6b96018b 110
8ae5e030
AV
111static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
112static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 113static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
114
115static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
116static __poll_t sock_poll(struct file *file,
117 struct poll_table_struct *wait);
89bddce5 118static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
119#ifdef CONFIG_COMPAT
120static long compat_sock_ioctl(struct file *file,
89bddce5 121 unsigned int cmd, unsigned long arg);
89bbfc95 122#endif
1da177e4 123static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
124static ssize_t sock_sendpage(struct file *file, struct page *page,
125 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 126static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 127 struct pipe_inode_info *pipe, size_t len,
9c55e01c 128 unsigned int flags);
1da177e4 129
1da177e4
LT
130/*
131 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
132 * in the operation structures but are done directly via the socketcall() multiplexor.
133 */
134
da7071d7 135static const struct file_operations socket_file_ops = {
1da177e4
LT
136 .owner = THIS_MODULE,
137 .llseek = no_llseek,
8ae5e030
AV
138 .read_iter = sock_read_iter,
139 .write_iter = sock_write_iter,
1da177e4
LT
140 .poll = sock_poll,
141 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
142#ifdef CONFIG_COMPAT
143 .compat_ioctl = compat_sock_ioctl,
144#endif
1da177e4 145 .mmap = sock_mmap,
1da177e4
LT
146 .release = sock_close,
147 .fasync = sock_fasync,
5274f052
JA
148 .sendpage = sock_sendpage,
149 .splice_write = generic_splice_sendpage,
9c55e01c 150 .splice_read = sock_splice_read,
1da177e4
LT
151};
152
153/*
154 * The protocol list. Each protocol is registered in here.
155 */
156
1da177e4 157static DEFINE_SPINLOCK(net_family_lock);
190683a9 158static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 159
1da177e4 160/*
89bddce5
SH
161 * Support routines.
162 * Move socket addresses back and forth across the kernel/user
163 * divide and look after the messy bits.
1da177e4
LT
164 */
165
1da177e4
LT
166/**
167 * move_addr_to_kernel - copy a socket address into kernel space
168 * @uaddr: Address in user space
169 * @kaddr: Address in kernel space
170 * @ulen: Length in user space
171 *
172 * The address is copied into kernel space. If the provided address is
173 * too long an error code of -EINVAL is returned. If the copy gives
174 * invalid addresses -EFAULT is returned. On a success 0 is returned.
175 */
176
43db362d 177int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 178{
230b1839 179 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 180 return -EINVAL;
89bddce5 181 if (ulen == 0)
1da177e4 182 return 0;
89bddce5 183 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 184 return -EFAULT;
3ec3b2fb 185 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
186}
187
188/**
189 * move_addr_to_user - copy an address to user space
190 * @kaddr: kernel space address
191 * @klen: length of address in kernel
192 * @uaddr: user space address
193 * @ulen: pointer to user length field
194 *
195 * The value pointed to by ulen on entry is the buffer length available.
196 * This is overwritten with the buffer space used. -EINVAL is returned
197 * if an overlong buffer is specified or a negative buffer size. -EFAULT
198 * is returned if either the buffer or the length field are not
199 * accessible.
200 * After copying the data up to the limit the user specifies, the true
201 * length of the data is written over the length limit the user
202 * specified. Zero is returned for a success.
203 */
89bddce5 204
43db362d 205static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 206 void __user *uaddr, int __user *ulen)
1da177e4
LT
207{
208 int err;
209 int len;
210
68c6beb3 211 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
212 err = get_user(len, ulen);
213 if (err)
1da177e4 214 return err;
89bddce5
SH
215 if (len > klen)
216 len = klen;
68c6beb3 217 if (len < 0)
1da177e4 218 return -EINVAL;
89bddce5 219 if (len) {
d6fe3945
SG
220 if (audit_sockaddr(klen, kaddr))
221 return -ENOMEM;
89bddce5 222 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
223 return -EFAULT;
224 }
225 /*
89bddce5
SH
226 * "fromlen shall refer to the value before truncation.."
227 * 1003.1g
1da177e4
LT
228 */
229 return __put_user(klen, ulen);
230}
231
08009a76 232static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
233
234static struct inode *sock_alloc_inode(struct super_block *sb)
235{
236 struct socket_alloc *ei;
eaefd110 237 struct socket_wq *wq;
89bddce5 238
e94b1766 239 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
240 if (!ei)
241 return NULL;
eaefd110
ED
242 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
243 if (!wq) {
43815482
ED
244 kmem_cache_free(sock_inode_cachep, ei);
245 return NULL;
246 }
eaefd110
ED
247 init_waitqueue_head(&wq->wait);
248 wq->fasync_list = NULL;
574aab1e 249 wq->flags = 0;
e6476c21 250 ei->socket.wq = wq;
89bddce5 251
1da177e4
LT
252 ei->socket.state = SS_UNCONNECTED;
253 ei->socket.flags = 0;
254 ei->socket.ops = NULL;
255 ei->socket.sk = NULL;
256 ei->socket.file = NULL;
1da177e4
LT
257
258 return &ei->vfs_inode;
259}
260
6d7855c5 261static void sock_free_inode(struct inode *inode)
1da177e4 262{
43815482
ED
263 struct socket_alloc *ei;
264
265 ei = container_of(inode, struct socket_alloc, vfs_inode);
6d7855c5 266 kfree(ei->socket.wq);
43815482 267 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
268}
269
51cc5068 270static void init_once(void *foo)
1da177e4 271{
89bddce5 272 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 273
a35afb83 274 inode_init_once(&ei->vfs_inode);
1da177e4 275}
89bddce5 276
1e911632 277static void init_inodecache(void)
1da177e4
LT
278{
279 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
280 sizeof(struct socket_alloc),
281 0,
282 (SLAB_HWCACHE_ALIGN |
283 SLAB_RECLAIM_ACCOUNT |
5d097056 284 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 285 init_once);
1e911632 286 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
287}
288
b87221de 289static const struct super_operations sockfs_ops = {
c6d409cf 290 .alloc_inode = sock_alloc_inode,
6d7855c5 291 .free_inode = sock_free_inode,
c6d409cf 292 .statfs = simple_statfs,
1da177e4
LT
293};
294
c23fbb6b
ED
295/*
296 * sockfs_dname() is called from d_path().
297 */
298static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
299{
300 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 301 d_inode(dentry)->i_ino);
c23fbb6b
ED
302}
303
3ba13d17 304static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 305 .d_dname = sockfs_dname,
1da177e4
LT
306};
307
bba0bd31
AG
308static int sockfs_xattr_get(const struct xattr_handler *handler,
309 struct dentry *dentry, struct inode *inode,
310 const char *suffix, void *value, size_t size)
311{
312 if (value) {
313 if (dentry->d_name.len + 1 > size)
314 return -ERANGE;
315 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
316 }
317 return dentry->d_name.len + 1;
318}
319
320#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
321#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
322#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
323
324static const struct xattr_handler sockfs_xattr_handler = {
325 .name = XATTR_NAME_SOCKPROTONAME,
326 .get = sockfs_xattr_get,
327};
328
4a590153
AG
329static int sockfs_security_xattr_set(const struct xattr_handler *handler,
330 struct dentry *dentry, struct inode *inode,
331 const char *suffix, const void *value,
332 size_t size, int flags)
333{
334 /* Handled by LSM. */
335 return -EAGAIN;
336}
337
338static const struct xattr_handler sockfs_security_xattr_handler = {
339 .prefix = XATTR_SECURITY_PREFIX,
340 .set = sockfs_security_xattr_set,
341};
342
bba0bd31
AG
343static const struct xattr_handler *sockfs_xattr_handlers[] = {
344 &sockfs_xattr_handler,
4a590153 345 &sockfs_security_xattr_handler,
bba0bd31
AG
346 NULL
347};
348
c74a1cbb
AV
349static struct dentry *sockfs_mount(struct file_system_type *fs_type,
350 int flags, const char *dev_name, void *data)
351{
bba0bd31
AG
352 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
353 sockfs_xattr_handlers,
354 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
355}
356
357static struct vfsmount *sock_mnt __read_mostly;
358
359static struct file_system_type sock_fs_type = {
360 .name = "sockfs",
361 .mount = sockfs_mount,
362 .kill_sb = kill_anon_super,
363};
364
1da177e4
LT
365/*
366 * Obtains the first available file descriptor and sets it up for use.
367 *
39d8c1b6
DM
368 * These functions create file structures and maps them to fd space
369 * of the current process. On success it returns file descriptor
1da177e4
LT
370 * and file struct implicitly stored in sock->file.
371 * Note that another thread may close file descriptor before we return
372 * from this function. We use the fact that now we do not refer
373 * to socket after mapping. If one day we will need it, this
374 * function will increment ref. count on file by 1.
375 *
376 * In any case returned fd MAY BE not valid!
377 * This race condition is unavoidable
378 * with shared fd spaces, we cannot solve it inside kernel,
379 * but we take care of internal coherence yet.
380 */
381
8a3c245c
PT
382/**
383 * sock_alloc_file - Bind a &socket to a &file
384 * @sock: socket
385 * @flags: file status flags
386 * @dname: protocol name
387 *
388 * Returns the &file bound with @sock, implicitly storing it
389 * in sock->file. If dname is %NULL, sets to "".
390 * On failure the return is a ERR pointer (see linux/err.h).
391 * This function uses GFP_KERNEL internally.
392 */
393
aab174f0 394struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 395{
7cbe66b6 396 struct file *file;
1da177e4 397
d93aa9d8
AV
398 if (!dname)
399 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 400
d93aa9d8
AV
401 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
402 O_RDWR | (flags & O_NONBLOCK),
403 &socket_file_ops);
b5ffe634 404 if (IS_ERR(file)) {
8e1611e2 405 sock_release(sock);
39b65252 406 return file;
cc3808f8
AV
407 }
408
409 sock->file = file;
39d8c1b6 410 file->private_data = sock;
28407630 411 return file;
39d8c1b6 412}
56b31d1c 413EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 414
56b31d1c 415static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
416{
417 struct file *newfile;
28407630 418 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
419 if (unlikely(fd < 0)) {
420 sock_release(sock);
28407630 421 return fd;
ce4bb04c 422 }
39d8c1b6 423
aab174f0 424 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 425 if (!IS_ERR(newfile)) {
39d8c1b6 426 fd_install(fd, newfile);
28407630
AV
427 return fd;
428 }
7cbe66b6 429
28407630
AV
430 put_unused_fd(fd);
431 return PTR_ERR(newfile);
1da177e4
LT
432}
433
8a3c245c
PT
434/**
435 * sock_from_file - Return the &socket bounded to @file.
436 * @file: file
437 * @err: pointer to an error code return
438 *
439 * On failure returns %NULL and assigns -ENOTSOCK to @err.
440 */
441
406a3c63 442struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 443{
6cb153ca
BL
444 if (file->f_op == &socket_file_ops)
445 return file->private_data; /* set in sock_map_fd */
446
23bb80d2
ED
447 *err = -ENOTSOCK;
448 return NULL;
6cb153ca 449}
406a3c63 450EXPORT_SYMBOL(sock_from_file);
6cb153ca 451
1da177e4 452/**
c6d409cf 453 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
454 * @fd: file handle
455 * @err: pointer to an error code return
456 *
457 * The file handle passed in is locked and the socket it is bound
241c4667 458 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
459 * with a negative errno code and NULL is returned. The function checks
460 * for both invalid handles and passing a handle which is not a socket.
461 *
462 * On a success the socket object pointer is returned.
463 */
464
465struct socket *sockfd_lookup(int fd, int *err)
466{
467 struct file *file;
1da177e4
LT
468 struct socket *sock;
469
89bddce5
SH
470 file = fget(fd);
471 if (!file) {
1da177e4
LT
472 *err = -EBADF;
473 return NULL;
474 }
89bddce5 475
6cb153ca
BL
476 sock = sock_from_file(file, err);
477 if (!sock)
1da177e4 478 fput(file);
6cb153ca
BL
479 return sock;
480}
c6d409cf 481EXPORT_SYMBOL(sockfd_lookup);
1da177e4 482
6cb153ca
BL
483static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
484{
00e188ef 485 struct fd f = fdget(fd);
6cb153ca
BL
486 struct socket *sock;
487
3672558c 488 *err = -EBADF;
00e188ef
AV
489 if (f.file) {
490 sock = sock_from_file(f.file, err);
491 if (likely(sock)) {
492 *fput_needed = f.flags;
6cb153ca 493 return sock;
00e188ef
AV
494 }
495 fdput(f);
1da177e4 496 }
6cb153ca 497 return NULL;
1da177e4
LT
498}
499
600e1779
MY
500static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
501 size_t size)
502{
503 ssize_t len;
504 ssize_t used = 0;
505
c5ef6035 506 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
507 if (len < 0)
508 return len;
509 used += len;
510 if (buffer) {
511 if (size < used)
512 return -ERANGE;
513 buffer += len;
514 }
515
516 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
517 used += len;
518 if (buffer) {
519 if (size < used)
520 return -ERANGE;
521 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
522 buffer += len;
523 }
524
525 return used;
526}
527
dc647ec8 528static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
529{
530 int err = simple_setattr(dentry, iattr);
531
e1a3a60a 532 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
533 struct socket *sock = SOCKET_I(d_inode(dentry));
534
6d8c50dc
CW
535 if (sock->sk)
536 sock->sk->sk_uid = iattr->ia_uid;
537 else
538 err = -ENOENT;
86741ec2
LC
539 }
540
541 return err;
542}
543
600e1779 544static const struct inode_operations sockfs_inode_ops = {
600e1779 545 .listxattr = sockfs_listxattr,
86741ec2 546 .setattr = sockfs_setattr,
600e1779
MY
547};
548
1da177e4 549/**
8a3c245c 550 * sock_alloc - allocate a socket
89bddce5 551 *
1da177e4
LT
552 * Allocate a new inode and socket object. The two are bound together
553 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 554 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
555 */
556
f4a00aac 557struct socket *sock_alloc(void)
1da177e4 558{
89bddce5
SH
559 struct inode *inode;
560 struct socket *sock;
1da177e4 561
a209dfc7 562 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
563 if (!inode)
564 return NULL;
565
566 sock = SOCKET_I(inode);
567
85fe4025 568 inode->i_ino = get_next_ino();
89bddce5 569 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
570 inode->i_uid = current_fsuid();
571 inode->i_gid = current_fsgid();
600e1779 572 inode->i_op = &sockfs_inode_ops;
1da177e4 573
1da177e4
LT
574 return sock;
575}
f4a00aac 576EXPORT_SYMBOL(sock_alloc);
1da177e4 577
1da177e4 578/**
8a3c245c 579 * sock_release - close a socket
1da177e4
LT
580 * @sock: socket to close
581 *
582 * The socket is released from the protocol stack if it has a release
583 * callback, and the inode is then released if the socket is bound to
89bddce5 584 * an inode not a file.
1da177e4 585 */
89bddce5 586
6d8c50dc 587static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
588{
589 if (sock->ops) {
590 struct module *owner = sock->ops->owner;
591
6d8c50dc
CW
592 if (inode)
593 inode_lock(inode);
1da177e4 594 sock->ops->release(sock);
ff7b11aa 595 sock->sk = NULL;
6d8c50dc
CW
596 if (inode)
597 inode_unlock(inode);
1da177e4
LT
598 sock->ops = NULL;
599 module_put(owner);
600 }
601
e6476c21 602 if (sock->wq->fasync_list)
3410f22e 603 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 604
1da177e4
LT
605 if (!sock->file) {
606 iput(SOCK_INODE(sock));
607 return;
608 }
89bddce5 609 sock->file = NULL;
1da177e4 610}
6d8c50dc
CW
611
612void sock_release(struct socket *sock)
613{
614 __sock_release(sock, NULL);
615}
c6d409cf 616EXPORT_SYMBOL(sock_release);
1da177e4 617
c14ac945 618void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 619{
140c55d4
ED
620 u8 flags = *tx_flags;
621
c14ac945 622 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
623 flags |= SKBTX_HW_TSTAMP;
624
c14ac945 625 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
626 flags |= SKBTX_SW_TSTAMP;
627
c14ac945 628 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
629 flags |= SKBTX_SCHED_TSTAMP;
630
140c55d4 631 *tx_flags = flags;
20d49473 632}
67cc0d40 633EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 634
8c3c447b
PA
635INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
636 size_t));
a648a592
PA
637INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
638 size_t));
d8725c86 639static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 640{
a648a592
PA
641 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
642 inet_sendmsg, sock, msg,
643 msg_data_left(msg));
d8725c86
AV
644 BUG_ON(ret == -EIOCBQUEUED);
645 return ret;
1da177e4
LT
646}
647
85806af0
RD
648/**
649 * sock_sendmsg - send a message through @sock
650 * @sock: socket
651 * @msg: message to send
652 *
653 * Sends @msg through @sock, passing through LSM.
654 * Returns the number of bytes sent, or an error code.
655 */
d8725c86 656int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 657{
d8725c86 658 int err = security_socket_sendmsg(sock, msg,
01e97e65 659 msg_data_left(msg));
228e548e 660
d8725c86 661 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 662}
c6d409cf 663EXPORT_SYMBOL(sock_sendmsg);
1da177e4 664
8a3c245c
PT
665/**
666 * kernel_sendmsg - send a message through @sock (kernel-space)
667 * @sock: socket
668 * @msg: message header
669 * @vec: kernel vec
670 * @num: vec array length
671 * @size: total message data size
672 *
673 * Builds the message data with @vec and sends it through @sock.
674 * Returns the number of bytes sent, or an error code.
675 */
676
1da177e4
LT
677int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
678 struct kvec *vec, size_t num, size_t size)
679{
aa563d7b 680 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 681 return sock_sendmsg(sock, msg);
1da177e4 682}
c6d409cf 683EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 684
8a3c245c
PT
685/**
686 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
687 * @sk: sock
688 * @msg: message header
689 * @vec: output s/g array
690 * @num: output s/g array length
691 * @size: total message data size
692 *
693 * Builds the message data with @vec and sends it through @sock.
694 * Returns the number of bytes sent, or an error code.
695 * Caller must hold @sk.
696 */
697
306b13eb
TH
698int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
699 struct kvec *vec, size_t num, size_t size)
700{
701 struct socket *sock = sk->sk_socket;
702
703 if (!sock->ops->sendmsg_locked)
db5980d8 704 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 705
aa563d7b 706 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
707
708 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
709}
710EXPORT_SYMBOL(kernel_sendmsg_locked);
711
8605330a
SHY
712static bool skb_is_err_queue(const struct sk_buff *skb)
713{
714 /* pkt_type of skbs enqueued on the error queue are set to
715 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
716 * in recvmsg, since skbs received on a local socket will never
717 * have a pkt_type of PACKET_OUTGOING.
718 */
719 return skb->pkt_type == PACKET_OUTGOING;
720}
721
b50a5c70
ML
722/* On transmit, software and hardware timestamps are returned independently.
723 * As the two skb clones share the hardware timestamp, which may be updated
724 * before the software timestamp is received, a hardware TX timestamp may be
725 * returned only if there is no software TX timestamp. Ignore false software
726 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 727 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
728 * hardware timestamp.
729 */
730static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
731{
732 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
733}
734
aad9c8c4
ML
735static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
736{
737 struct scm_ts_pktinfo ts_pktinfo;
738 struct net_device *orig_dev;
739
740 if (!skb_mac_header_was_set(skb))
741 return;
742
743 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
744
745 rcu_read_lock();
746 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
747 if (orig_dev)
748 ts_pktinfo.if_index = orig_dev->ifindex;
749 rcu_read_unlock();
750
751 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
752 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
753 sizeof(ts_pktinfo), &ts_pktinfo);
754}
755
92f37fd2
ED
756/*
757 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
758 */
759void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
760 struct sk_buff *skb)
761{
20d49473 762 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 763 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
764 struct scm_timestamping_internal tss;
765
b50a5c70 766 int empty = 1, false_tstamp = 0;
20d49473
PO
767 struct skb_shared_hwtstamps *shhwtstamps =
768 skb_hwtstamps(skb);
769
770 /* Race occurred between timestamp enabling and packet
771 receiving. Fill in the current time for now. */
b50a5c70 772 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 773 __net_timestamp(skb);
b50a5c70
ML
774 false_tstamp = 1;
775 }
20d49473
PO
776
777 if (need_software_tstamp) {
778 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
779 if (new_tstamp) {
780 struct __kernel_sock_timeval tv;
781
782 skb_get_new_timestamp(skb, &tv);
783 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
784 sizeof(tv), &tv);
785 } else {
786 struct __kernel_old_timeval tv;
787
788 skb_get_timestamp(skb, &tv);
789 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
790 sizeof(tv), &tv);
791 }
20d49473 792 } else {
887feae3
DD
793 if (new_tstamp) {
794 struct __kernel_timespec ts;
795
796 skb_get_new_timestampns(skb, &ts);
797 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
798 sizeof(ts), &ts);
799 } else {
800 struct timespec ts;
801
802 skb_get_timestampns(skb, &ts);
803 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
804 sizeof(ts), &ts);
805 }
20d49473
PO
806 }
807 }
808
f24b9be5 809 memset(&tss, 0, sizeof(tss));
c199105d 810 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 811 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 812 empty = 0;
4d276eb6 813 if (shhwtstamps &&
b9f40e21 814 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 815 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 816 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 817 empty = 0;
aad9c8c4
ML
818 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
819 !skb_is_err_queue(skb))
820 put_ts_pktinfo(msg, skb);
821 }
1c885808 822 if (!empty) {
9718475e
DD
823 if (sock_flag(sk, SOCK_TSTAMP_NEW))
824 put_cmsg_scm_timestamping64(msg, &tss);
825 else
826 put_cmsg_scm_timestamping(msg, &tss);
1c885808 827
8605330a 828 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 829 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
830 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
831 skb->len, skb->data);
832 }
92f37fd2 833}
7c81fd8b
ACM
834EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
835
6e3e939f
JB
836void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
837 struct sk_buff *skb)
838{
839 int ack;
840
841 if (!sock_flag(sk, SOCK_WIFI_STATUS))
842 return;
843 if (!skb->wifi_acked_valid)
844 return;
845
846 ack = skb->wifi_acked;
847
848 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
849}
850EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
851
11165f14 852static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
853 struct sk_buff *skb)
3b885787 854{
744d5a3e 855 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 856 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 857 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
858}
859
767dd033 860void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
861 struct sk_buff *skb)
862{
863 sock_recv_timestamp(msg, sk, skb);
864 sock_recv_drops(msg, sk, skb);
865}
767dd033 866EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 867
8c3c447b 868INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
869 size_t, int));
870INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
871 size_t, int));
1b784140 872static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 873 int flags)
1da177e4 874{
a648a592
PA
875 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
876 inet_recvmsg, sock, msg, msg_data_left(msg),
877 flags);
1da177e4
LT
878}
879
85806af0
RD
880/**
881 * sock_recvmsg - receive a message from @sock
882 * @sock: socket
883 * @msg: message to receive
884 * @flags: message flags
885 *
886 * Receives @msg from @sock, passing through LSM. Returns the total number
887 * of bytes received, or an error.
888 */
2da62906 889int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 890{
2da62906 891 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 892
2da62906 893 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 894}
c6d409cf 895EXPORT_SYMBOL(sock_recvmsg);
1da177e4 896
c1249c0a 897/**
8a3c245c
PT
898 * kernel_recvmsg - Receive a message from a socket (kernel space)
899 * @sock: The socket to receive the message from
900 * @msg: Received message
901 * @vec: Input s/g array for message data
902 * @num: Size of input s/g array
903 * @size: Number of bytes to read
904 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 905 *
8a3c245c
PT
906 * On return the msg structure contains the scatter/gather array passed in the
907 * vec argument. The array is modified so that it consists of the unfilled
908 * portion of the original array.
c1249c0a 909 *
8a3c245c 910 * The returned value is the total number of bytes received, or an error.
c1249c0a 911 */
8a3c245c 912
89bddce5
SH
913int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
914 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
915{
916 mm_segment_t oldfs = get_fs();
917 int result;
918
aa563d7b 919 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 920 set_fs(KERNEL_DS);
2da62906 921 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
922 set_fs(oldfs);
923 return result;
924}
c6d409cf 925EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 926
ce1d4d3e
CH
927static ssize_t sock_sendpage(struct file *file, struct page *page,
928 int offset, size_t size, loff_t *ppos, int more)
1da177e4 929{
1da177e4
LT
930 struct socket *sock;
931 int flags;
932
ce1d4d3e
CH
933 sock = file->private_data;
934
35f9c09f
ED
935 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
936 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
937 flags |= more;
ce1d4d3e 938
e6949583 939 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 940}
1da177e4 941
9c55e01c 942static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 943 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
944 unsigned int flags)
945{
946 struct socket *sock = file->private_data;
947
997b37da 948 if (unlikely(!sock->ops->splice_read))
95506588 949 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 950
9c55e01c
JA
951 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
952}
953
8ae5e030 954static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 955{
6d652330
AV
956 struct file *file = iocb->ki_filp;
957 struct socket *sock = file->private_data;
0345f931 958 struct msghdr msg = {.msg_iter = *to,
959 .msg_iocb = iocb};
8ae5e030 960 ssize_t res;
ce1d4d3e 961
8ae5e030
AV
962 if (file->f_flags & O_NONBLOCK)
963 msg.msg_flags = MSG_DONTWAIT;
964
965 if (iocb->ki_pos != 0)
1da177e4 966 return -ESPIPE;
027445c3 967
66ee59af 968 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
969 return 0;
970
2da62906 971 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
972 *to = msg.msg_iter;
973 return res;
1da177e4
LT
974}
975
8ae5e030 976static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 977{
6d652330
AV
978 struct file *file = iocb->ki_filp;
979 struct socket *sock = file->private_data;
0345f931 980 struct msghdr msg = {.msg_iter = *from,
981 .msg_iocb = iocb};
8ae5e030 982 ssize_t res;
1da177e4 983
8ae5e030 984 if (iocb->ki_pos != 0)
ce1d4d3e 985 return -ESPIPE;
027445c3 986
8ae5e030
AV
987 if (file->f_flags & O_NONBLOCK)
988 msg.msg_flags = MSG_DONTWAIT;
989
6d652330
AV
990 if (sock->type == SOCK_SEQPACKET)
991 msg.msg_flags |= MSG_EOR;
992
d8725c86 993 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
994 *from = msg.msg_iter;
995 return res;
1da177e4
LT
996}
997
1da177e4
LT
998/*
999 * Atomic setting of ioctl hooks to avoid race
1000 * with module unload.
1001 */
1002
4a3e2f71 1003static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1004static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1005
881d966b 1006void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1007{
4a3e2f71 1008 mutex_lock(&br_ioctl_mutex);
1da177e4 1009 br_ioctl_hook = hook;
4a3e2f71 1010 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1011}
1012EXPORT_SYMBOL(brioctl_set);
1013
4a3e2f71 1014static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1015static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1016
881d966b 1017void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1018{
4a3e2f71 1019 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1020 vlan_ioctl_hook = hook;
4a3e2f71 1021 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1022}
1023EXPORT_SYMBOL(vlan_ioctl_set);
1024
4a3e2f71 1025static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1026static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1027
89bddce5 1028void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1029{
4a3e2f71 1030 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1031 dlci_ioctl_hook = hook;
4a3e2f71 1032 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1033}
1034EXPORT_SYMBOL(dlci_ioctl_set);
1035
6b96018b 1036static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1037 unsigned int cmd, unsigned long arg)
6b96018b
AB
1038{
1039 int err;
1040 void __user *argp = (void __user *)arg;
1041
1042 err = sock->ops->ioctl(sock, cmd, arg);
1043
1044 /*
1045 * If this ioctl is unknown try to hand it down
1046 * to the NIC driver.
1047 */
36fd633e
AV
1048 if (err != -ENOIOCTLCMD)
1049 return err;
6b96018b 1050
36fd633e
AV
1051 if (cmd == SIOCGIFCONF) {
1052 struct ifconf ifc;
1053 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1054 return -EFAULT;
1055 rtnl_lock();
1056 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1057 rtnl_unlock();
1058 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1059 err = -EFAULT;
44c02a2c
AV
1060 } else {
1061 struct ifreq ifr;
1062 bool need_copyout;
63ff03ab 1063 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1064 return -EFAULT;
1065 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1066 if (!err && need_copyout)
63ff03ab 1067 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1068 return -EFAULT;
36fd633e 1069 }
6b96018b
AB
1070 return err;
1071}
1072
1da177e4
LT
1073/*
1074 * With an ioctl, arg may well be a user mode pointer, but we don't know
1075 * what to do with it - that's up to the protocol still.
1076 */
1077
8a3c245c
PT
1078/**
1079 * get_net_ns - increment the refcount of the network namespace
1080 * @ns: common namespace (net)
1081 *
1082 * Returns the net's common namespace.
1083 */
1084
d8d211a2 1085struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1086{
1087 return &get_net(container_of(ns, struct net, ns))->ns;
1088}
d8d211a2 1089EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1090
1da177e4
LT
1091static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1092{
1093 struct socket *sock;
881d966b 1094 struct sock *sk;
1da177e4
LT
1095 void __user *argp = (void __user *)arg;
1096 int pid, err;
881d966b 1097 struct net *net;
1da177e4 1098
b69aee04 1099 sock = file->private_data;
881d966b 1100 sk = sock->sk;
3b1e0a65 1101 net = sock_net(sk);
44c02a2c
AV
1102 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1103 struct ifreq ifr;
1104 bool need_copyout;
1105 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1106 return -EFAULT;
1107 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1108 if (!err && need_copyout)
1109 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1110 return -EFAULT;
1da177e4 1111 } else
3d23e349 1112#ifdef CONFIG_WEXT_CORE
1da177e4 1113 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1114 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1115 } else
3d23e349 1116#endif
89bddce5 1117 switch (cmd) {
1da177e4
LT
1118 case FIOSETOWN:
1119 case SIOCSPGRP:
1120 err = -EFAULT;
1121 if (get_user(pid, (int __user *)argp))
1122 break;
393cc3f5 1123 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1124 break;
1125 case FIOGETOWN:
1126 case SIOCGPGRP:
609d7fa9 1127 err = put_user(f_getown(sock->file),
89bddce5 1128 (int __user *)argp);
1da177e4
LT
1129 break;
1130 case SIOCGIFBR:
1131 case SIOCSIFBR:
1132 case SIOCBRADDBR:
1133 case SIOCBRDELBR:
1134 err = -ENOPKG;
1135 if (!br_ioctl_hook)
1136 request_module("bridge");
1137
4a3e2f71 1138 mutex_lock(&br_ioctl_mutex);
89bddce5 1139 if (br_ioctl_hook)
881d966b 1140 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1141 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1142 break;
1143 case SIOCGIFVLAN:
1144 case SIOCSIFVLAN:
1145 err = -ENOPKG;
1146 if (!vlan_ioctl_hook)
1147 request_module("8021q");
1148
4a3e2f71 1149 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1150 if (vlan_ioctl_hook)
881d966b 1151 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1152 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1153 break;
1da177e4
LT
1154 case SIOCADDDLCI:
1155 case SIOCDELDLCI:
1156 err = -ENOPKG;
1157 if (!dlci_ioctl_hook)
1158 request_module("dlci");
1159
7512cbf6
PE
1160 mutex_lock(&dlci_ioctl_mutex);
1161 if (dlci_ioctl_hook)
1da177e4 1162 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1163 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1164 break;
c62cce2c
AV
1165 case SIOCGSKNS:
1166 err = -EPERM;
1167 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1168 break;
1169
1170 err = open_related_ns(&net->ns, get_net_ns);
1171 break;
0768e170
AB
1172 case SIOCGSTAMP_OLD:
1173 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1174 if (!sock->ops->gettstamp) {
1175 err = -ENOIOCTLCMD;
1176 break;
1177 }
1178 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1179 cmd == SIOCGSTAMP_OLD,
1180 !IS_ENABLED(CONFIG_64BIT));
60747828 1181 break;
0768e170
AB
1182 case SIOCGSTAMP_NEW:
1183 case SIOCGSTAMPNS_NEW:
1184 if (!sock->ops->gettstamp) {
1185 err = -ENOIOCTLCMD;
1186 break;
1187 }
1188 err = sock->ops->gettstamp(sock, argp,
1189 cmd == SIOCGSTAMP_NEW,
1190 false);
c7cbdbf2 1191 break;
1da177e4 1192 default:
63ff03ab 1193 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1194 break;
89bddce5 1195 }
1da177e4
LT
1196 return err;
1197}
1198
8a3c245c
PT
1199/**
1200 * sock_create_lite - creates a socket
1201 * @family: protocol family (AF_INET, ...)
1202 * @type: communication type (SOCK_STREAM, ...)
1203 * @protocol: protocol (0, ...)
1204 * @res: new socket
1205 *
1206 * Creates a new socket and assigns it to @res, passing through LSM.
1207 * The new socket initialization is not complete, see kernel_accept().
1208 * Returns 0 or an error. On failure @res is set to %NULL.
1209 * This function internally uses GFP_KERNEL.
1210 */
1211
1da177e4
LT
1212int sock_create_lite(int family, int type, int protocol, struct socket **res)
1213{
1214 int err;
1215 struct socket *sock = NULL;
89bddce5 1216
1da177e4
LT
1217 err = security_socket_create(family, type, protocol, 1);
1218 if (err)
1219 goto out;
1220
1221 sock = sock_alloc();
1222 if (!sock) {
1223 err = -ENOMEM;
1224 goto out;
1225 }
1226
1da177e4 1227 sock->type = type;
7420ed23
VY
1228 err = security_socket_post_create(sock, family, type, protocol, 1);
1229 if (err)
1230 goto out_release;
1231
1da177e4
LT
1232out:
1233 *res = sock;
1234 return err;
7420ed23
VY
1235out_release:
1236 sock_release(sock);
1237 sock = NULL;
1238 goto out;
1da177e4 1239}
c6d409cf 1240EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1241
1242/* No kernel lock held - perfect */
ade994f4 1243static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1244{
3cafb376 1245 struct socket *sock = file->private_data;
a331de3b 1246 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1247
e88958e6
CH
1248 if (!sock->ops->poll)
1249 return 0;
f641f13b 1250
a331de3b
CH
1251 if (sk_can_busy_loop(sock->sk)) {
1252 /* poll once if requested by the syscall */
1253 if (events & POLL_BUSY_LOOP)
1254 sk_busy_loop(sock->sk, 1);
1255
1256 /* if this socket can poll_ll, tell the system call */
1257 flag = POLL_BUSY_LOOP;
1258 }
1259
1260 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1261}
1262
89bddce5 1263static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1264{
b69aee04 1265 struct socket *sock = file->private_data;
1da177e4
LT
1266
1267 return sock->ops->mmap(file, sock, vma);
1268}
1269
20380731 1270static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1271{
6d8c50dc 1272 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1273 return 0;
1274}
1275
1276/*
1277 * Update the socket async list
1278 *
1279 * Fasync_list locking strategy.
1280 *
1281 * 1. fasync_list is modified only under process context socket lock
1282 * i.e. under semaphore.
1283 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1284 * or under socket lock
1da177e4
LT
1285 */
1286
1287static int sock_fasync(int fd, struct file *filp, int on)
1288{
989a2979
ED
1289 struct socket *sock = filp->private_data;
1290 struct sock *sk = sock->sk;
eaefd110 1291 struct socket_wq *wq;
1da177e4 1292
989a2979 1293 if (sk == NULL)
1da177e4 1294 return -EINVAL;
1da177e4
LT
1295
1296 lock_sock(sk);
e6476c21 1297 wq = sock->wq;
eaefd110 1298 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1299
eaefd110 1300 if (!wq->fasync_list)
989a2979
ED
1301 sock_reset_flag(sk, SOCK_FASYNC);
1302 else
bcdce719 1303 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1304
989a2979 1305 release_sock(sk);
1da177e4
LT
1306 return 0;
1307}
1308
ceb5d58b 1309/* This function may be called only under rcu_lock */
1da177e4 1310
ceb5d58b 1311int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1312{
ceb5d58b 1313 if (!wq || !wq->fasync_list)
1da177e4 1314 return -1;
ceb5d58b 1315
89bddce5 1316 switch (how) {
8d8ad9d7 1317 case SOCK_WAKE_WAITD:
ceb5d58b 1318 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1319 break;
1320 goto call_kill;
8d8ad9d7 1321 case SOCK_WAKE_SPACE:
ceb5d58b 1322 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1323 break;
1324 /* fall through */
8d8ad9d7 1325 case SOCK_WAKE_IO:
89bddce5 1326call_kill:
43815482 1327 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1328 break;
8d8ad9d7 1329 case SOCK_WAKE_URG:
43815482 1330 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1331 }
ceb5d58b 1332
1da177e4
LT
1333 return 0;
1334}
c6d409cf 1335EXPORT_SYMBOL(sock_wake_async);
1da177e4 1336
8a3c245c
PT
1337/**
1338 * __sock_create - creates a socket
1339 * @net: net namespace
1340 * @family: protocol family (AF_INET, ...)
1341 * @type: communication type (SOCK_STREAM, ...)
1342 * @protocol: protocol (0, ...)
1343 * @res: new socket
1344 * @kern: boolean for kernel space sockets
1345 *
1346 * Creates a new socket and assigns it to @res, passing through LSM.
1347 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1348 * be set to true if the socket resides in kernel space.
1349 * This function internally uses GFP_KERNEL.
1350 */
1351
721db93a 1352int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1353 struct socket **res, int kern)
1da177e4
LT
1354{
1355 int err;
1356 struct socket *sock;
55737fda 1357 const struct net_proto_family *pf;
1da177e4
LT
1358
1359 /*
89bddce5 1360 * Check protocol is in range
1da177e4
LT
1361 */
1362 if (family < 0 || family >= NPROTO)
1363 return -EAFNOSUPPORT;
1364 if (type < 0 || type >= SOCK_MAX)
1365 return -EINVAL;
1366
1367 /* Compatibility.
1368
1369 This uglymoron is moved from INET layer to here to avoid
1370 deadlock in module load.
1371 */
1372 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1373 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1374 current->comm);
1da177e4
LT
1375 family = PF_PACKET;
1376 }
1377
1378 err = security_socket_create(family, type, protocol, kern);
1379 if (err)
1380 return err;
89bddce5 1381
55737fda
SH
1382 /*
1383 * Allocate the socket and allow the family to set things up. if
1384 * the protocol is 0, the family is instructed to select an appropriate
1385 * default.
1386 */
1387 sock = sock_alloc();
1388 if (!sock) {
e87cc472 1389 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1390 return -ENFILE; /* Not exactly a match, but its the
1391 closest posix thing */
1392 }
1393
1394 sock->type = type;
1395
95a5afca 1396#ifdef CONFIG_MODULES
89bddce5
SH
1397 /* Attempt to load a protocol module if the find failed.
1398 *
1399 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1400 * requested real, full-featured networking support upon configuration.
1401 * Otherwise module support will break!
1402 */
190683a9 1403 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1404 request_module("net-pf-%d", family);
1da177e4
LT
1405#endif
1406
55737fda
SH
1407 rcu_read_lock();
1408 pf = rcu_dereference(net_families[family]);
1409 err = -EAFNOSUPPORT;
1410 if (!pf)
1411 goto out_release;
1da177e4
LT
1412
1413 /*
1414 * We will call the ->create function, that possibly is in a loadable
1415 * module, so we have to bump that loadable module refcnt first.
1416 */
55737fda 1417 if (!try_module_get(pf->owner))
1da177e4
LT
1418 goto out_release;
1419
55737fda
SH
1420 /* Now protected by module ref count */
1421 rcu_read_unlock();
1422
3f378b68 1423 err = pf->create(net, sock, protocol, kern);
55737fda 1424 if (err < 0)
1da177e4 1425 goto out_module_put;
a79af59e 1426
1da177e4
LT
1427 /*
1428 * Now to bump the refcnt of the [loadable] module that owns this
1429 * socket at sock_release time we decrement its refcnt.
1430 */
55737fda
SH
1431 if (!try_module_get(sock->ops->owner))
1432 goto out_module_busy;
1433
1da177e4
LT
1434 /*
1435 * Now that we're done with the ->create function, the [loadable]
1436 * module can have its refcnt decremented
1437 */
55737fda 1438 module_put(pf->owner);
7420ed23
VY
1439 err = security_socket_post_create(sock, family, type, protocol, kern);
1440 if (err)
3b185525 1441 goto out_sock_release;
55737fda 1442 *res = sock;
1da177e4 1443
55737fda
SH
1444 return 0;
1445
1446out_module_busy:
1447 err = -EAFNOSUPPORT;
1da177e4 1448out_module_put:
55737fda
SH
1449 sock->ops = NULL;
1450 module_put(pf->owner);
1451out_sock_release:
1da177e4 1452 sock_release(sock);
55737fda
SH
1453 return err;
1454
1455out_release:
1456 rcu_read_unlock();
1457 goto out_sock_release;
1da177e4 1458}
721db93a 1459EXPORT_SYMBOL(__sock_create);
1da177e4 1460
8a3c245c
PT
1461/**
1462 * sock_create - creates a socket
1463 * @family: protocol family (AF_INET, ...)
1464 * @type: communication type (SOCK_STREAM, ...)
1465 * @protocol: protocol (0, ...)
1466 * @res: new socket
1467 *
1468 * A wrapper around __sock_create().
1469 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1470 */
1471
1da177e4
LT
1472int sock_create(int family, int type, int protocol, struct socket **res)
1473{
1b8d7ae4 1474 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1475}
c6d409cf 1476EXPORT_SYMBOL(sock_create);
1da177e4 1477
8a3c245c
PT
1478/**
1479 * sock_create_kern - creates a socket (kernel space)
1480 * @net: net namespace
1481 * @family: protocol family (AF_INET, ...)
1482 * @type: communication type (SOCK_STREAM, ...)
1483 * @protocol: protocol (0, ...)
1484 * @res: new socket
1485 *
1486 * A wrapper around __sock_create().
1487 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1488 */
1489
eeb1bd5c 1490int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1491{
eeb1bd5c 1492 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1493}
c6d409cf 1494EXPORT_SYMBOL(sock_create_kern);
1da177e4 1495
9d6a15c3 1496int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1497{
1498 int retval;
1499 struct socket *sock;
a677a039
UD
1500 int flags;
1501
e38b36f3
UD
1502 /* Check the SOCK_* constants for consistency. */
1503 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1504 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1505 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1506 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1507
a677a039 1508 flags = type & ~SOCK_TYPE_MASK;
77d27200 1509 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1510 return -EINVAL;
1511 type &= SOCK_TYPE_MASK;
1da177e4 1512
aaca0bdc
UD
1513 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1514 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1515
1da177e4
LT
1516 retval = sock_create(family, type, protocol, &sock);
1517 if (retval < 0)
8e1611e2 1518 return retval;
1da177e4 1519
8e1611e2 1520 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1521}
1522
9d6a15c3
DB
1523SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1524{
1525 return __sys_socket(family, type, protocol);
1526}
1527
1da177e4
LT
1528/*
1529 * Create a pair of connected sockets.
1530 */
1531
6debc8d8 1532int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1533{
1534 struct socket *sock1, *sock2;
1535 int fd1, fd2, err;
db349509 1536 struct file *newfile1, *newfile2;
a677a039
UD
1537 int flags;
1538
1539 flags = type & ~SOCK_TYPE_MASK;
77d27200 1540 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1541 return -EINVAL;
1542 type &= SOCK_TYPE_MASK;
1da177e4 1543
aaca0bdc
UD
1544 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1545 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1546
016a266b
AV
1547 /*
1548 * reserve descriptors and make sure we won't fail
1549 * to return them to userland.
1550 */
1551 fd1 = get_unused_fd_flags(flags);
1552 if (unlikely(fd1 < 0))
1553 return fd1;
1554
1555 fd2 = get_unused_fd_flags(flags);
1556 if (unlikely(fd2 < 0)) {
1557 put_unused_fd(fd1);
1558 return fd2;
1559 }
1560
1561 err = put_user(fd1, &usockvec[0]);
1562 if (err)
1563 goto out;
1564
1565 err = put_user(fd2, &usockvec[1]);
1566 if (err)
1567 goto out;
1568
1da177e4
LT
1569 /*
1570 * Obtain the first socket and check if the underlying protocol
1571 * supports the socketpair call.
1572 */
1573
1574 err = sock_create(family, type, protocol, &sock1);
016a266b 1575 if (unlikely(err < 0))
1da177e4
LT
1576 goto out;
1577
1578 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1579 if (unlikely(err < 0)) {
1580 sock_release(sock1);
1581 goto out;
bf3c23d1 1582 }
d73aa286 1583
d47cd945
DH
1584 err = security_socket_socketpair(sock1, sock2);
1585 if (unlikely(err)) {
1586 sock_release(sock2);
1587 sock_release(sock1);
1588 goto out;
1589 }
1590
016a266b
AV
1591 err = sock1->ops->socketpair(sock1, sock2);
1592 if (unlikely(err < 0)) {
1593 sock_release(sock2);
1594 sock_release(sock1);
1595 goto out;
28407630
AV
1596 }
1597
aab174f0 1598 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1599 if (IS_ERR(newfile1)) {
28407630 1600 err = PTR_ERR(newfile1);
016a266b
AV
1601 sock_release(sock2);
1602 goto out;
28407630
AV
1603 }
1604
aab174f0 1605 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1606 if (IS_ERR(newfile2)) {
1607 err = PTR_ERR(newfile2);
016a266b
AV
1608 fput(newfile1);
1609 goto out;
db349509
AV
1610 }
1611
157cf649 1612 audit_fd_pair(fd1, fd2);
d73aa286 1613
db349509
AV
1614 fd_install(fd1, newfile1);
1615 fd_install(fd2, newfile2);
d73aa286 1616 return 0;
1da177e4 1617
016a266b 1618out:
d73aa286 1619 put_unused_fd(fd2);
d73aa286 1620 put_unused_fd(fd1);
1da177e4
LT
1621 return err;
1622}
1623
6debc8d8
DB
1624SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1625 int __user *, usockvec)
1626{
1627 return __sys_socketpair(family, type, protocol, usockvec);
1628}
1629
1da177e4
LT
1630/*
1631 * Bind a name to a socket. Nothing much to do here since it's
1632 * the protocol's responsibility to handle the local address.
1633 *
1634 * We move the socket address to kernel space before we call
1635 * the protocol layer (having also checked the address is ok).
1636 */
1637
a87d35d8 1638int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1639{
1640 struct socket *sock;
230b1839 1641 struct sockaddr_storage address;
6cb153ca 1642 int err, fput_needed;
1da177e4 1643
89bddce5 1644 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1645 if (sock) {
43db362d 1646 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1647 if (!err) {
89bddce5 1648 err = security_socket_bind(sock,
230b1839 1649 (struct sockaddr *)&address,
89bddce5 1650 addrlen);
6cb153ca
BL
1651 if (!err)
1652 err = sock->ops->bind(sock,
89bddce5 1653 (struct sockaddr *)
230b1839 1654 &address, addrlen);
1da177e4 1655 }
6cb153ca 1656 fput_light(sock->file, fput_needed);
89bddce5 1657 }
1da177e4
LT
1658 return err;
1659}
1660
a87d35d8
DB
1661SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1662{
1663 return __sys_bind(fd, umyaddr, addrlen);
1664}
1665
1da177e4
LT
1666/*
1667 * Perform a listen. Basically, we allow the protocol to do anything
1668 * necessary for a listen, and if that works, we mark the socket as
1669 * ready for listening.
1670 */
1671
25e290ee 1672int __sys_listen(int fd, int backlog)
1da177e4
LT
1673{
1674 struct socket *sock;
6cb153ca 1675 int err, fput_needed;
b8e1f9b5 1676 int somaxconn;
89bddce5
SH
1677
1678 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1679 if (sock) {
8efa6e93 1680 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1681 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1682 backlog = somaxconn;
1da177e4
LT
1683
1684 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1685 if (!err)
1686 err = sock->ops->listen(sock, backlog);
1da177e4 1687
6cb153ca 1688 fput_light(sock->file, fput_needed);
1da177e4
LT
1689 }
1690 return err;
1691}
1692
25e290ee
DB
1693SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1694{
1695 return __sys_listen(fd, backlog);
1696}
1697
1da177e4
LT
1698/*
1699 * For accept, we attempt to create a new socket, set up the link
1700 * with the client, wake up the client, then return the new
1701 * connected fd. We collect the address of the connector in kernel
1702 * space and move it to user at the very end. This is unclean because
1703 * we open the socket then return an error.
1704 *
1705 * 1003.1g adds the ability to recvmsg() to query connection pending
1706 * status to recvmsg. We need to add that support in a way thats
b903036a 1707 * clean when we restructure accept also.
1da177e4
LT
1708 */
1709
4541e805
DB
1710int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1711 int __user *upeer_addrlen, int flags)
1da177e4
LT
1712{
1713 struct socket *sock, *newsock;
39d8c1b6 1714 struct file *newfile;
6cb153ca 1715 int err, len, newfd, fput_needed;
230b1839 1716 struct sockaddr_storage address;
1da177e4 1717
77d27200 1718 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1719 return -EINVAL;
1720
1721 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1722 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1723
6cb153ca 1724 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1725 if (!sock)
1726 goto out;
1727
1728 err = -ENFILE;
c6d409cf
ED
1729 newsock = sock_alloc();
1730 if (!newsock)
1da177e4
LT
1731 goto out_put;
1732
1733 newsock->type = sock->type;
1734 newsock->ops = sock->ops;
1735
1da177e4
LT
1736 /*
1737 * We don't need try_module_get here, as the listening socket (sock)
1738 * has the protocol module (sock->ops->owner) held.
1739 */
1740 __module_get(newsock->ops->owner);
1741
28407630 1742 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1743 if (unlikely(newfd < 0)) {
1744 err = newfd;
9a1875e6
DM
1745 sock_release(newsock);
1746 goto out_put;
39d8c1b6 1747 }
aab174f0 1748 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1749 if (IS_ERR(newfile)) {
28407630
AV
1750 err = PTR_ERR(newfile);
1751 put_unused_fd(newfd);
28407630
AV
1752 goto out_put;
1753 }
39d8c1b6 1754
a79af59e
FF
1755 err = security_socket_accept(sock, newsock);
1756 if (err)
39d8c1b6 1757 goto out_fd;
a79af59e 1758
cdfbabfb 1759 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1760 if (err < 0)
39d8c1b6 1761 goto out_fd;
1da177e4
LT
1762
1763 if (upeer_sockaddr) {
9b2c45d4
DV
1764 len = newsock->ops->getname(newsock,
1765 (struct sockaddr *)&address, 2);
1766 if (len < 0) {
1da177e4 1767 err = -ECONNABORTED;
39d8c1b6 1768 goto out_fd;
1da177e4 1769 }
43db362d 1770 err = move_addr_to_user(&address,
230b1839 1771 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1772 if (err < 0)
39d8c1b6 1773 goto out_fd;
1da177e4
LT
1774 }
1775
1776 /* File flags are not inherited via accept() unlike another OSes. */
1777
39d8c1b6
DM
1778 fd_install(newfd, newfile);
1779 err = newfd;
1da177e4 1780
1da177e4 1781out_put:
6cb153ca 1782 fput_light(sock->file, fput_needed);
1da177e4
LT
1783out:
1784 return err;
39d8c1b6 1785out_fd:
9606a216 1786 fput(newfile);
39d8c1b6 1787 put_unused_fd(newfd);
1da177e4
LT
1788 goto out_put;
1789}
1790
4541e805
DB
1791SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1792 int __user *, upeer_addrlen, int, flags)
1793{
1794 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1795}
1796
20f37034
HC
1797SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1798 int __user *, upeer_addrlen)
aaca0bdc 1799{
4541e805 1800 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1801}
1802
1da177e4
LT
1803/*
1804 * Attempt to connect to a socket with the server address. The address
1805 * is in user space so we verify it is OK and move it to kernel space.
1806 *
1807 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1808 * break bindings
1809 *
1810 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1811 * other SEQPACKET protocols that take time to connect() as it doesn't
1812 * include the -EINPROGRESS status for such sockets.
1813 */
1814
1387c2c2 1815int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1816{
1817 struct socket *sock;
230b1839 1818 struct sockaddr_storage address;
6cb153ca 1819 int err, fput_needed;
1da177e4 1820
6cb153ca 1821 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1822 if (!sock)
1823 goto out;
43db362d 1824 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1825 if (err < 0)
1826 goto out_put;
1827
89bddce5 1828 err =
230b1839 1829 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1830 if (err)
1831 goto out_put;
1832
230b1839 1833 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1834 sock->file->f_flags);
1835out_put:
6cb153ca 1836 fput_light(sock->file, fput_needed);
1da177e4
LT
1837out:
1838 return err;
1839}
1840
1387c2c2
DB
1841SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1842 int, addrlen)
1843{
1844 return __sys_connect(fd, uservaddr, addrlen);
1845}
1846
1da177e4
LT
1847/*
1848 * Get the local address ('name') of a socket object. Move the obtained
1849 * name to user space.
1850 */
1851
8882a107
DB
1852int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1853 int __user *usockaddr_len)
1da177e4
LT
1854{
1855 struct socket *sock;
230b1839 1856 struct sockaddr_storage address;
9b2c45d4 1857 int err, fput_needed;
89bddce5 1858
6cb153ca 1859 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1860 if (!sock)
1861 goto out;
1862
1863 err = security_socket_getsockname(sock);
1864 if (err)
1865 goto out_put;
1866
9b2c45d4
DV
1867 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1868 if (err < 0)
1da177e4 1869 goto out_put;
9b2c45d4
DV
1870 /* "err" is actually length in this case */
1871 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1872
1873out_put:
6cb153ca 1874 fput_light(sock->file, fput_needed);
1da177e4
LT
1875out:
1876 return err;
1877}
1878
8882a107
DB
1879SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1880 int __user *, usockaddr_len)
1881{
1882 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1883}
1884
1da177e4
LT
1885/*
1886 * Get the remote address ('name') of a socket object. Move the obtained
1887 * name to user space.
1888 */
1889
b21c8f83
DB
1890int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1891 int __user *usockaddr_len)
1da177e4
LT
1892{
1893 struct socket *sock;
230b1839 1894 struct sockaddr_storage address;
9b2c45d4 1895 int err, fput_needed;
1da177e4 1896
89bddce5
SH
1897 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1898 if (sock != NULL) {
1da177e4
LT
1899 err = security_socket_getpeername(sock);
1900 if (err) {
6cb153ca 1901 fput_light(sock->file, fput_needed);
1da177e4
LT
1902 return err;
1903 }
1904
9b2c45d4
DV
1905 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1906 if (err >= 0)
1907 /* "err" is actually length in this case */
1908 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1909 usockaddr_len);
6cb153ca 1910 fput_light(sock->file, fput_needed);
1da177e4
LT
1911 }
1912 return err;
1913}
1914
b21c8f83
DB
1915SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1916 int __user *, usockaddr_len)
1917{
1918 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1919}
1920
1da177e4
LT
1921/*
1922 * Send a datagram to a given address. We move the address into kernel
1923 * space and check the user space data area is readable before invoking
1924 * the protocol.
1925 */
211b634b
DB
1926int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1927 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1928{
1929 struct socket *sock;
230b1839 1930 struct sockaddr_storage address;
1da177e4
LT
1931 int err;
1932 struct msghdr msg;
1933 struct iovec iov;
6cb153ca 1934 int fput_needed;
6cb153ca 1935
602bd0e9
AV
1936 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1937 if (unlikely(err))
1938 return err;
de0fa95c
PE
1939 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1940 if (!sock)
4387ff75 1941 goto out;
6cb153ca 1942
89bddce5 1943 msg.msg_name = NULL;
89bddce5
SH
1944 msg.msg_control = NULL;
1945 msg.msg_controllen = 0;
1946 msg.msg_namelen = 0;
6cb153ca 1947 if (addr) {
43db362d 1948 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1949 if (err < 0)
1950 goto out_put;
230b1839 1951 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1952 msg.msg_namelen = addr_len;
1da177e4
LT
1953 }
1954 if (sock->file->f_flags & O_NONBLOCK)
1955 flags |= MSG_DONTWAIT;
1956 msg.msg_flags = flags;
d8725c86 1957 err = sock_sendmsg(sock, &msg);
1da177e4 1958
89bddce5 1959out_put:
de0fa95c 1960 fput_light(sock->file, fput_needed);
4387ff75 1961out:
1da177e4
LT
1962 return err;
1963}
1964
211b634b
DB
1965SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1966 unsigned int, flags, struct sockaddr __user *, addr,
1967 int, addr_len)
1968{
1969 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1970}
1971
1da177e4 1972/*
89bddce5 1973 * Send a datagram down a socket.
1da177e4
LT
1974 */
1975
3e0fa65f 1976SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1977 unsigned int, flags)
1da177e4 1978{
211b634b 1979 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1980}
1981
1982/*
89bddce5 1983 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1984 * sender. We verify the buffers are writable and if needed move the
1985 * sender address from kernel to user space.
1986 */
7a09e1eb
DB
1987int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
1988 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
1989{
1990 struct socket *sock;
1991 struct iovec iov;
1992 struct msghdr msg;
230b1839 1993 struct sockaddr_storage address;
89bddce5 1994 int err, err2;
6cb153ca
BL
1995 int fput_needed;
1996
602bd0e9
AV
1997 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1998 if (unlikely(err))
1999 return err;
de0fa95c 2000 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2001 if (!sock)
de0fa95c 2002 goto out;
1da177e4 2003
89bddce5
SH
2004 msg.msg_control = NULL;
2005 msg.msg_controllen = 0;
f3d33426
HFS
2006 /* Save some cycles and don't copy the address if not needed */
2007 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2008 /* We assume all kernel code knows the size of sockaddr_storage */
2009 msg.msg_namelen = 0;
130ed5d1 2010 msg.msg_iocb = NULL;
9f138fa6 2011 msg.msg_flags = 0;
1da177e4
LT
2012 if (sock->file->f_flags & O_NONBLOCK)
2013 flags |= MSG_DONTWAIT;
2da62906 2014 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2015
89bddce5 2016 if (err >= 0 && addr != NULL) {
43db362d 2017 err2 = move_addr_to_user(&address,
230b1839 2018 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2019 if (err2 < 0)
2020 err = err2;
1da177e4 2021 }
de0fa95c
PE
2022
2023 fput_light(sock->file, fput_needed);
4387ff75 2024out:
1da177e4
LT
2025 return err;
2026}
2027
7a09e1eb
DB
2028SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2029 unsigned int, flags, struct sockaddr __user *, addr,
2030 int __user *, addr_len)
2031{
2032 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2033}
2034
1da177e4 2035/*
89bddce5 2036 * Receive a datagram from a socket.
1da177e4
LT
2037 */
2038
b7c0ddf5
JG
2039SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2040 unsigned int, flags)
1da177e4 2041{
7a09e1eb 2042 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2043}
2044
2045/*
2046 * Set a socket option. Because we don't know the option lengths we have
2047 * to pass the user mode parameter for the protocols to sort out.
2048 */
2049
cc36dca0
DB
2050static int __sys_setsockopt(int fd, int level, int optname,
2051 char __user *optval, int optlen)
1da177e4 2052{
0d01da6a
SF
2053 mm_segment_t oldfs = get_fs();
2054 char *kernel_optval = NULL;
6cb153ca 2055 int err, fput_needed;
1da177e4
LT
2056 struct socket *sock;
2057
2058 if (optlen < 0)
2059 return -EINVAL;
89bddce5
SH
2060
2061 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2062 if (sock != NULL) {
2063 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2064 if (err)
2065 goto out_put;
1da177e4 2066
0d01da6a
SF
2067 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level,
2068 &optname, optval, &optlen,
2069 &kernel_optval);
2070
2071 if (err < 0) {
2072 goto out_put;
2073 } else if (err > 0) {
2074 err = 0;
2075 goto out_put;
2076 }
2077
2078 if (kernel_optval) {
2079 set_fs(KERNEL_DS);
2080 optval = (char __user __force *)kernel_optval;
2081 }
2082
1da177e4 2083 if (level == SOL_SOCKET)
89bddce5
SH
2084 err =
2085 sock_setsockopt(sock, level, optname, optval,
2086 optlen);
1da177e4 2087 else
89bddce5
SH
2088 err =
2089 sock->ops->setsockopt(sock, level, optname, optval,
2090 optlen);
0d01da6a
SF
2091
2092 if (kernel_optval) {
2093 set_fs(oldfs);
2094 kfree(kernel_optval);
2095 }
6cb153ca
BL
2096out_put:
2097 fput_light(sock->file, fput_needed);
1da177e4
LT
2098 }
2099 return err;
2100}
2101
cc36dca0
DB
2102SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2103 char __user *, optval, int, optlen)
2104{
2105 return __sys_setsockopt(fd, level, optname, optval, optlen);
2106}
2107
1da177e4
LT
2108/*
2109 * Get a socket option. Because we don't know the option lengths we have
2110 * to pass a user mode parameter for the protocols to sort out.
2111 */
2112
13a2d70e
DB
2113static int __sys_getsockopt(int fd, int level, int optname,
2114 char __user *optval, int __user *optlen)
1da177e4 2115{
6cb153ca 2116 int err, fput_needed;
1da177e4 2117 struct socket *sock;
0d01da6a 2118 int max_optlen;
1da177e4 2119
89bddce5
SH
2120 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2121 if (sock != NULL) {
6cb153ca
BL
2122 err = security_socket_getsockopt(sock, level, optname);
2123 if (err)
2124 goto out_put;
1da177e4 2125
0d01da6a
SF
2126 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
2127
1da177e4 2128 if (level == SOL_SOCKET)
89bddce5
SH
2129 err =
2130 sock_getsockopt(sock, level, optname, optval,
2131 optlen);
1da177e4 2132 else
89bddce5
SH
2133 err =
2134 sock->ops->getsockopt(sock, level, optname, optval,
2135 optlen);
0d01da6a
SF
2136
2137 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2138 optval, optlen,
2139 max_optlen, err);
6cb153ca
BL
2140out_put:
2141 fput_light(sock->file, fput_needed);
1da177e4
LT
2142 }
2143 return err;
2144}
2145
13a2d70e
DB
2146SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2147 char __user *, optval, int __user *, optlen)
2148{
2149 return __sys_getsockopt(fd, level, optname, optval, optlen);
2150}
2151
1da177e4
LT
2152/*
2153 * Shutdown a socket.
2154 */
2155
005a1aea 2156int __sys_shutdown(int fd, int how)
1da177e4 2157{
6cb153ca 2158 int err, fput_needed;
1da177e4
LT
2159 struct socket *sock;
2160
89bddce5
SH
2161 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2162 if (sock != NULL) {
1da177e4 2163 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2164 if (!err)
2165 err = sock->ops->shutdown(sock, how);
2166 fput_light(sock->file, fput_needed);
1da177e4
LT
2167 }
2168 return err;
2169}
2170
005a1aea
DB
2171SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2172{
2173 return __sys_shutdown(fd, how);
2174}
2175
89bddce5 2176/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2177 * fields which are the same type (int / unsigned) on our platforms.
2178 */
2179#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2180#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2181#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2182
c71d8ebe
TH
2183struct used_address {
2184 struct sockaddr_storage name;
2185 unsigned int name_len;
2186};
2187
da184284
AV
2188static int copy_msghdr_from_user(struct msghdr *kmsg,
2189 struct user_msghdr __user *umsg,
2190 struct sockaddr __user **save_addr,
2191 struct iovec **iov)
1661bf36 2192{
ffb07550 2193 struct user_msghdr msg;
08adb7da
AV
2194 ssize_t err;
2195
ffb07550 2196 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2197 return -EFAULT;
dbb490b9 2198
864d9664 2199 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2200 kmsg->msg_controllen = msg.msg_controllen;
2201 kmsg->msg_flags = msg.msg_flags;
2202
2203 kmsg->msg_namelen = msg.msg_namelen;
2204 if (!msg.msg_name)
6a2a2b3a
AS
2205 kmsg->msg_namelen = 0;
2206
dbb490b9
ML
2207 if (kmsg->msg_namelen < 0)
2208 return -EINVAL;
2209
1661bf36 2210 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2211 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2212
2213 if (save_addr)
ffb07550 2214 *save_addr = msg.msg_name;
08adb7da 2215
ffb07550 2216 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2217 if (!save_addr) {
864d9664
PA
2218 err = move_addr_to_kernel(msg.msg_name,
2219 kmsg->msg_namelen,
08adb7da
AV
2220 kmsg->msg_name);
2221 if (err < 0)
2222 return err;
2223 }
2224 } else {
2225 kmsg->msg_name = NULL;
2226 kmsg->msg_namelen = 0;
2227 }
2228
ffb07550 2229 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2230 return -EMSGSIZE;
2231
0345f931 2232 kmsg->msg_iocb = NULL;
2233
ffb07550
AV
2234 return import_iovec(save_addr ? READ : WRITE,
2235 msg.msg_iov, msg.msg_iovlen,
da184284 2236 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
2237}
2238
666547ff 2239static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2240 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2241 struct used_address *used_address,
2242 unsigned int allowed_msghdr_flags)
1da177e4 2243{
89bddce5
SH
2244 struct compat_msghdr __user *msg_compat =
2245 (struct compat_msghdr __user *)msg;
230b1839 2246 struct sockaddr_storage address;
1da177e4 2247 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2248 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2249 __aligned(sizeof(__kernel_size_t));
89bddce5 2250 /* 20 is size of ipv6_pktinfo */
1da177e4 2251 unsigned char *ctl_buf = ctl;
d8725c86 2252 int ctl_len;
08adb7da 2253 ssize_t err;
89bddce5 2254
08adb7da 2255 msg_sys->msg_name = &address;
1da177e4 2256
08449320 2257 if (MSG_CMSG_COMPAT & flags)
08adb7da 2258 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2259 else
08adb7da 2260 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2261 if (err < 0)
da184284 2262 return err;
1da177e4
LT
2263
2264 err = -ENOBUFS;
2265
228e548e 2266 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2267 goto out_freeiov;
28a94d8f 2268 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2269 ctl_len = msg_sys->msg_controllen;
1da177e4 2270 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2271 err =
228e548e 2272 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2273 sizeof(ctl));
1da177e4
LT
2274 if (err)
2275 goto out_freeiov;
228e548e
AB
2276 ctl_buf = msg_sys->msg_control;
2277 ctl_len = msg_sys->msg_controllen;
1da177e4 2278 } else if (ctl_len) {
ac4340fc
DM
2279 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2280 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2281 if (ctl_len > sizeof(ctl)) {
1da177e4 2282 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2283 if (ctl_buf == NULL)
1da177e4
LT
2284 goto out_freeiov;
2285 }
2286 err = -EFAULT;
2287 /*
228e548e 2288 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2289 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2290 * checking falls down on this.
2291 */
fb8621bb 2292 if (copy_from_user(ctl_buf,
228e548e 2293 (void __user __force *)msg_sys->msg_control,
89bddce5 2294 ctl_len))
1da177e4 2295 goto out_freectl;
228e548e 2296 msg_sys->msg_control = ctl_buf;
1da177e4 2297 }
228e548e 2298 msg_sys->msg_flags = flags;
1da177e4
LT
2299
2300 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2301 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2302 /*
2303 * If this is sendmmsg() and current destination address is same as
2304 * previously succeeded address, omit asking LSM's decision.
2305 * used_address->name_len is initialized to UINT_MAX so that the first
2306 * destination address never matches.
2307 */
bc909d9d
MD
2308 if (used_address && msg_sys->msg_name &&
2309 used_address->name_len == msg_sys->msg_namelen &&
2310 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2311 used_address->name_len)) {
d8725c86 2312 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2313 goto out_freectl;
2314 }
d8725c86 2315 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2316 /*
2317 * If this is sendmmsg() and sending to current destination address was
2318 * successful, remember it.
2319 */
2320 if (used_address && err >= 0) {
2321 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2322 if (msg_sys->msg_name)
2323 memcpy(&used_address->name, msg_sys->msg_name,
2324 used_address->name_len);
c71d8ebe 2325 }
1da177e4
LT
2326
2327out_freectl:
89bddce5 2328 if (ctl_buf != ctl)
1da177e4
LT
2329 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2330out_freeiov:
da184284 2331 kfree(iov);
228e548e
AB
2332 return err;
2333}
2334
2335/*
2336 * BSD sendmsg interface
2337 */
2338
e1834a32
DB
2339long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2340 bool forbid_cmsg_compat)
228e548e
AB
2341{
2342 int fput_needed, err;
2343 struct msghdr msg_sys;
1be374a0
AL
2344 struct socket *sock;
2345
e1834a32
DB
2346 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2347 return -EINVAL;
2348
1be374a0 2349 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2350 if (!sock)
2351 goto out;
2352
28a94d8f 2353 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2354
6cb153ca 2355 fput_light(sock->file, fput_needed);
89bddce5 2356out:
1da177e4
LT
2357 return err;
2358}
2359
666547ff 2360SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2361{
e1834a32 2362 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2363}
2364
228e548e
AB
2365/*
2366 * Linux sendmmsg interface
2367 */
2368
2369int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2370 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2371{
2372 int fput_needed, err, datagrams;
2373 struct socket *sock;
2374 struct mmsghdr __user *entry;
2375 struct compat_mmsghdr __user *compat_entry;
2376 struct msghdr msg_sys;
c71d8ebe 2377 struct used_address used_address;
f092276d 2378 unsigned int oflags = flags;
228e548e 2379
e1834a32
DB
2380 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2381 return -EINVAL;
2382
98382f41
AB
2383 if (vlen > UIO_MAXIOV)
2384 vlen = UIO_MAXIOV;
228e548e
AB
2385
2386 datagrams = 0;
2387
2388 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2389 if (!sock)
2390 return err;
2391
c71d8ebe 2392 used_address.name_len = UINT_MAX;
228e548e
AB
2393 entry = mmsg;
2394 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2395 err = 0;
f092276d 2396 flags |= MSG_BATCH;
228e548e
AB
2397
2398 while (datagrams < vlen) {
f092276d
TH
2399 if (datagrams == vlen - 1)
2400 flags = oflags;
2401
228e548e 2402 if (MSG_CMSG_COMPAT & flags) {
666547ff 2403 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2404 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2405 if (err < 0)
2406 break;
2407 err = __put_user(err, &compat_entry->msg_len);
2408 ++compat_entry;
2409 } else {
a7526eb5 2410 err = ___sys_sendmsg(sock,
666547ff 2411 (struct user_msghdr __user *)entry,
28a94d8f 2412 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2413 if (err < 0)
2414 break;
2415 err = put_user(err, &entry->msg_len);
2416 ++entry;
2417 }
2418
2419 if (err)
2420 break;
2421 ++datagrams;
3023898b
SHY
2422 if (msg_data_left(&msg_sys))
2423 break;
a78cb84c 2424 cond_resched();
228e548e
AB
2425 }
2426
228e548e
AB
2427 fput_light(sock->file, fput_needed);
2428
728ffb86
AB
2429 /* We only return an error if no datagrams were able to be sent */
2430 if (datagrams != 0)
228e548e
AB
2431 return datagrams;
2432
228e548e
AB
2433 return err;
2434}
2435
2436SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2437 unsigned int, vlen, unsigned int, flags)
2438{
e1834a32 2439 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2440}
2441
666547ff 2442static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2443 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2444{
89bddce5
SH
2445 struct compat_msghdr __user *msg_compat =
2446 (struct compat_msghdr __user *)msg;
1da177e4 2447 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2448 struct iovec *iov = iovstack;
1da177e4 2449 unsigned long cmsg_ptr;
2da62906 2450 int len;
08adb7da 2451 ssize_t err;
1da177e4
LT
2452
2453 /* kernel mode address */
230b1839 2454 struct sockaddr_storage addr;
1da177e4
LT
2455
2456 /* user mode address pointers */
2457 struct sockaddr __user *uaddr;
08adb7da 2458 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2459
08adb7da 2460 msg_sys->msg_name = &addr;
1da177e4 2461
f3d33426 2462 if (MSG_CMSG_COMPAT & flags)
08adb7da 2463 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2464 else
08adb7da 2465 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2466 if (err < 0)
da184284 2467 return err;
1da177e4 2468
a2e27255
ACM
2469 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2470 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2471
f3d33426
HFS
2472 /* We assume all kernel code knows the size of sockaddr_storage */
2473 msg_sys->msg_namelen = 0;
2474
1da177e4
LT
2475 if (sock->file->f_flags & O_NONBLOCK)
2476 flags |= MSG_DONTWAIT;
2da62906 2477 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2478 if (err < 0)
2479 goto out_freeiov;
2480 len = err;
2481
2482 if (uaddr != NULL) {
43db362d 2483 err = move_addr_to_user(&addr,
a2e27255 2484 msg_sys->msg_namelen, uaddr,
89bddce5 2485 uaddr_len);
1da177e4
LT
2486 if (err < 0)
2487 goto out_freeiov;
2488 }
a2e27255 2489 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2490 COMPAT_FLAGS(msg));
1da177e4
LT
2491 if (err)
2492 goto out_freeiov;
2493 if (MSG_CMSG_COMPAT & flags)
a2e27255 2494 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2495 &msg_compat->msg_controllen);
2496 else
a2e27255 2497 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2498 &msg->msg_controllen);
2499 if (err)
2500 goto out_freeiov;
2501 err = len;
2502
2503out_freeiov:
da184284 2504 kfree(iov);
a2e27255
ACM
2505 return err;
2506}
2507
2508/*
2509 * BSD recvmsg interface
2510 */
2511
e1834a32
DB
2512long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2513 bool forbid_cmsg_compat)
a2e27255
ACM
2514{
2515 int fput_needed, err;
2516 struct msghdr msg_sys;
1be374a0
AL
2517 struct socket *sock;
2518
e1834a32
DB
2519 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2520 return -EINVAL;
2521
1be374a0 2522 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2523 if (!sock)
2524 goto out;
2525
a7526eb5 2526 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2527
6cb153ca 2528 fput_light(sock->file, fput_needed);
1da177e4
LT
2529out:
2530 return err;
2531}
2532
666547ff 2533SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2534 unsigned int, flags)
2535{
e1834a32 2536 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2537}
2538
a2e27255
ACM
2539/*
2540 * Linux recvmmsg interface
2541 */
2542
e11d4284
AB
2543static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2544 unsigned int vlen, unsigned int flags,
2545 struct timespec64 *timeout)
a2e27255
ACM
2546{
2547 int fput_needed, err, datagrams;
2548 struct socket *sock;
2549 struct mmsghdr __user *entry;
d7256d0e 2550 struct compat_mmsghdr __user *compat_entry;
a2e27255 2551 struct msghdr msg_sys;
766b9f92
DD
2552 struct timespec64 end_time;
2553 struct timespec64 timeout64;
a2e27255
ACM
2554
2555 if (timeout &&
2556 poll_select_set_timeout(&end_time, timeout->tv_sec,
2557 timeout->tv_nsec))
2558 return -EINVAL;
2559
2560 datagrams = 0;
2561
2562 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2563 if (!sock)
2564 return err;
2565
7797dc41
SHY
2566 if (likely(!(flags & MSG_ERRQUEUE))) {
2567 err = sock_error(sock->sk);
2568 if (err) {
2569 datagrams = err;
2570 goto out_put;
2571 }
e623a9e9 2572 }
a2e27255
ACM
2573
2574 entry = mmsg;
d7256d0e 2575 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2576
2577 while (datagrams < vlen) {
2578 /*
2579 * No need to ask LSM for more than the first datagram.
2580 */
d7256d0e 2581 if (MSG_CMSG_COMPAT & flags) {
666547ff 2582 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2583 &msg_sys, flags & ~MSG_WAITFORONE,
2584 datagrams);
d7256d0e
JMG
2585 if (err < 0)
2586 break;
2587 err = __put_user(err, &compat_entry->msg_len);
2588 ++compat_entry;
2589 } else {
a7526eb5 2590 err = ___sys_recvmsg(sock,
666547ff 2591 (struct user_msghdr __user *)entry,
a7526eb5
AL
2592 &msg_sys, flags & ~MSG_WAITFORONE,
2593 datagrams);
d7256d0e
JMG
2594 if (err < 0)
2595 break;
2596 err = put_user(err, &entry->msg_len);
2597 ++entry;
2598 }
2599
a2e27255
ACM
2600 if (err)
2601 break;
a2e27255
ACM
2602 ++datagrams;
2603
71c5c159
BB
2604 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2605 if (flags & MSG_WAITFORONE)
2606 flags |= MSG_DONTWAIT;
2607
a2e27255 2608 if (timeout) {
766b9f92 2609 ktime_get_ts64(&timeout64);
c2e6c856 2610 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2611 if (timeout->tv_sec < 0) {
2612 timeout->tv_sec = timeout->tv_nsec = 0;
2613 break;
2614 }
2615
2616 /* Timeout, return less than vlen datagrams */
2617 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2618 break;
2619 }
2620
2621 /* Out of band data, return right away */
2622 if (msg_sys.msg_flags & MSG_OOB)
2623 break;
a78cb84c 2624 cond_resched();
a2e27255
ACM
2625 }
2626
a2e27255 2627 if (err == 0)
34b88a68
ACM
2628 goto out_put;
2629
2630 if (datagrams == 0) {
2631 datagrams = err;
2632 goto out_put;
2633 }
a2e27255 2634
34b88a68
ACM
2635 /*
2636 * We may return less entries than requested (vlen) if the
2637 * sock is non block and there aren't enough datagrams...
2638 */
2639 if (err != -EAGAIN) {
a2e27255 2640 /*
34b88a68
ACM
2641 * ... or if recvmsg returns an error after we
2642 * received some datagrams, where we record the
2643 * error to return on the next call or if the
2644 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2645 */
34b88a68 2646 sock->sk->sk_err = -err;
a2e27255 2647 }
34b88a68
ACM
2648out_put:
2649 fput_light(sock->file, fput_needed);
a2e27255 2650
34b88a68 2651 return datagrams;
a2e27255
ACM
2652}
2653
e11d4284
AB
2654int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2655 unsigned int vlen, unsigned int flags,
2656 struct __kernel_timespec __user *timeout,
2657 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2658{
2659 int datagrams;
c2e6c856 2660 struct timespec64 timeout_sys;
a2e27255 2661
e11d4284
AB
2662 if (timeout && get_timespec64(&timeout_sys, timeout))
2663 return -EFAULT;
a2e27255 2664
e11d4284 2665 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2666 return -EFAULT;
2667
e11d4284
AB
2668 if (!timeout && !timeout32)
2669 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2670
2671 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2672
e11d4284
AB
2673 if (datagrams <= 0)
2674 return datagrams;
2675
2676 if (timeout && put_timespec64(&timeout_sys, timeout))
2677 datagrams = -EFAULT;
2678
2679 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2680 datagrams = -EFAULT;
2681
2682 return datagrams;
2683}
2684
1255e269
DB
2685SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2686 unsigned int, vlen, unsigned int, flags,
c2e6c856 2687 struct __kernel_timespec __user *, timeout)
1255e269 2688{
e11d4284
AB
2689 if (flags & MSG_CMSG_COMPAT)
2690 return -EINVAL;
2691
2692 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2693}
2694
2695#ifdef CONFIG_COMPAT_32BIT_TIME
2696SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2697 unsigned int, vlen, unsigned int, flags,
2698 struct old_timespec32 __user *, timeout)
2699{
2700 if (flags & MSG_CMSG_COMPAT)
2701 return -EINVAL;
2702
2703 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2704}
e11d4284 2705#endif
1255e269 2706
a2e27255 2707#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2708/* Argument list sizes for sys_socketcall */
2709#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2710static const unsigned char nargs[21] = {
c6d409cf
ED
2711 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2712 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2713 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2714 AL(4), AL(5), AL(4)
89bddce5
SH
2715};
2716
1da177e4
LT
2717#undef AL
2718
2719/*
89bddce5 2720 * System call vectors.
1da177e4
LT
2721 *
2722 * Argument checking cleaned up. Saved 20% in size.
2723 * This function doesn't need to set the kernel lock because
89bddce5 2724 * it is set by the callees.
1da177e4
LT
2725 */
2726
3e0fa65f 2727SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2728{
2950fa9d 2729 unsigned long a[AUDITSC_ARGS];
89bddce5 2730 unsigned long a0, a1;
1da177e4 2731 int err;
47379052 2732 unsigned int len;
1da177e4 2733
228e548e 2734 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2735 return -EINVAL;
c8e8cd57 2736 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2737
47379052
AV
2738 len = nargs[call];
2739 if (len > sizeof(a))
2740 return -EINVAL;
2741
1da177e4 2742 /* copy_from_user should be SMP safe. */
47379052 2743 if (copy_from_user(a, args, len))
1da177e4 2744 return -EFAULT;
3ec3b2fb 2745
2950fa9d
CG
2746 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2747 if (err)
2748 return err;
3ec3b2fb 2749
89bddce5
SH
2750 a0 = a[0];
2751 a1 = a[1];
2752
2753 switch (call) {
2754 case SYS_SOCKET:
9d6a15c3 2755 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2756 break;
2757 case SYS_BIND:
a87d35d8 2758 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2759 break;
2760 case SYS_CONNECT:
1387c2c2 2761 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2762 break;
2763 case SYS_LISTEN:
25e290ee 2764 err = __sys_listen(a0, a1);
89bddce5
SH
2765 break;
2766 case SYS_ACCEPT:
4541e805
DB
2767 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2768 (int __user *)a[2], 0);
89bddce5
SH
2769 break;
2770 case SYS_GETSOCKNAME:
2771 err =
8882a107
DB
2772 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2773 (int __user *)a[2]);
89bddce5
SH
2774 break;
2775 case SYS_GETPEERNAME:
2776 err =
b21c8f83
DB
2777 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2778 (int __user *)a[2]);
89bddce5
SH
2779 break;
2780 case SYS_SOCKETPAIR:
6debc8d8 2781 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2782 break;
2783 case SYS_SEND:
f3bf896b
DB
2784 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2785 NULL, 0);
89bddce5
SH
2786 break;
2787 case SYS_SENDTO:
211b634b
DB
2788 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2789 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2790 break;
2791 case SYS_RECV:
d27e9afc
DB
2792 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2793 NULL, NULL);
89bddce5
SH
2794 break;
2795 case SYS_RECVFROM:
7a09e1eb
DB
2796 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2797 (struct sockaddr __user *)a[4],
2798 (int __user *)a[5]);
89bddce5
SH
2799 break;
2800 case SYS_SHUTDOWN:
005a1aea 2801 err = __sys_shutdown(a0, a1);
89bddce5
SH
2802 break;
2803 case SYS_SETSOCKOPT:
cc36dca0
DB
2804 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2805 a[4]);
89bddce5
SH
2806 break;
2807 case SYS_GETSOCKOPT:
2808 err =
13a2d70e
DB
2809 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2810 (int __user *)a[4]);
89bddce5
SH
2811 break;
2812 case SYS_SENDMSG:
e1834a32
DB
2813 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2814 a[2], true);
89bddce5 2815 break;
228e548e 2816 case SYS_SENDMMSG:
e1834a32
DB
2817 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2818 a[3], true);
228e548e 2819 break;
89bddce5 2820 case SYS_RECVMSG:
e1834a32
DB
2821 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2822 a[2], true);
89bddce5 2823 break;
a2e27255 2824 case SYS_RECVMMSG:
e11d4284
AB
2825 if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
2826 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2827 a[2], a[3],
2828 (struct __kernel_timespec __user *)a[4],
2829 NULL);
2830 else
2831 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2832 a[2], a[3], NULL,
2833 (struct old_timespec32 __user *)a[4]);
a2e27255 2834 break;
de11defe 2835 case SYS_ACCEPT4:
4541e805
DB
2836 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2837 (int __user *)a[2], a[3]);
aaca0bdc 2838 break;
89bddce5
SH
2839 default:
2840 err = -EINVAL;
2841 break;
1da177e4
LT
2842 }
2843 return err;
2844}
2845
89bddce5 2846#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2847
55737fda
SH
2848/**
2849 * sock_register - add a socket protocol handler
2850 * @ops: description of protocol
2851 *
1da177e4
LT
2852 * This function is called by a protocol handler that wants to
2853 * advertise its address family, and have it linked into the
e793c0f7 2854 * socket interface. The value ops->family corresponds to the
55737fda 2855 * socket system call protocol family.
1da177e4 2856 */
f0fd27d4 2857int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2858{
2859 int err;
2860
2861 if (ops->family >= NPROTO) {
3410f22e 2862 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2863 return -ENOBUFS;
2864 }
55737fda
SH
2865
2866 spin_lock(&net_family_lock);
190683a9
ED
2867 if (rcu_dereference_protected(net_families[ops->family],
2868 lockdep_is_held(&net_family_lock)))
55737fda
SH
2869 err = -EEXIST;
2870 else {
cf778b00 2871 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2872 err = 0;
2873 }
55737fda
SH
2874 spin_unlock(&net_family_lock);
2875
3410f22e 2876 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2877 return err;
2878}
c6d409cf 2879EXPORT_SYMBOL(sock_register);
1da177e4 2880
55737fda
SH
2881/**
2882 * sock_unregister - remove a protocol handler
2883 * @family: protocol family to remove
2884 *
1da177e4
LT
2885 * This function is called by a protocol handler that wants to
2886 * remove its address family, and have it unlinked from the
55737fda
SH
2887 * new socket creation.
2888 *
2889 * If protocol handler is a module, then it can use module reference
2890 * counts to protect against new references. If protocol handler is not
2891 * a module then it needs to provide its own protection in
2892 * the ops->create routine.
1da177e4 2893 */
f0fd27d4 2894void sock_unregister(int family)
1da177e4 2895{
f0fd27d4 2896 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2897
55737fda 2898 spin_lock(&net_family_lock);
a9b3cd7f 2899 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2900 spin_unlock(&net_family_lock);
2901
2902 synchronize_rcu();
2903
3410f22e 2904 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2905}
c6d409cf 2906EXPORT_SYMBOL(sock_unregister);
1da177e4 2907
bf2ae2e4
XL
2908bool sock_is_registered(int family)
2909{
66b51b0a 2910 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
2911}
2912
77d76ea3 2913static int __init sock_init(void)
1da177e4 2914{
b3e19d92 2915 int err;
2ca794e5
EB
2916 /*
2917 * Initialize the network sysctl infrastructure.
2918 */
2919 err = net_sysctl_init();
2920 if (err)
2921 goto out;
b3e19d92 2922
1da177e4 2923 /*
89bddce5 2924 * Initialize skbuff SLAB cache
1da177e4
LT
2925 */
2926 skb_init();
1da177e4
LT
2927
2928 /*
89bddce5 2929 * Initialize the protocols module.
1da177e4
LT
2930 */
2931
2932 init_inodecache();
b3e19d92
NP
2933
2934 err = register_filesystem(&sock_fs_type);
2935 if (err)
2936 goto out_fs;
1da177e4 2937 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2938 if (IS_ERR(sock_mnt)) {
2939 err = PTR_ERR(sock_mnt);
2940 goto out_mount;
2941 }
77d76ea3
AK
2942
2943 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2944 */
2945
2946#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2947 err = netfilter_init();
2948 if (err)
2949 goto out;
1da177e4 2950#endif
cbeb321a 2951
408eccce 2952 ptp_classifier_init();
c1f19b51 2953
b3e19d92
NP
2954out:
2955 return err;
2956
2957out_mount:
2958 unregister_filesystem(&sock_fs_type);
2959out_fs:
2960 goto out;
1da177e4
LT
2961}
2962
77d76ea3
AK
2963core_initcall(sock_init); /* early initcall */
2964
1da177e4
LT
2965#ifdef CONFIG_PROC_FS
2966void socket_seq_show(struct seq_file *seq)
2967{
648845ab
TZ
2968 seq_printf(seq, "sockets: used %d\n",
2969 sock_inuse_get(seq->private));
1da177e4 2970}
89bddce5 2971#endif /* CONFIG_PROC_FS */
1da177e4 2972
89bbfc95 2973#ifdef CONFIG_COMPAT
36fd633e 2974static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2975{
6b96018b 2976 struct compat_ifconf ifc32;
7a229387 2977 struct ifconf ifc;
7a229387
AB
2978 int err;
2979
6b96018b 2980 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2981 return -EFAULT;
2982
36fd633e
AV
2983 ifc.ifc_len = ifc32.ifc_len;
2984 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 2985
36fd633e
AV
2986 rtnl_lock();
2987 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2988 rtnl_unlock();
7a229387
AB
2989 if (err)
2990 return err;
2991
36fd633e 2992 ifc32.ifc_len = ifc.ifc_len;
6b96018b 2993 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2994 return -EFAULT;
2995
2996 return 0;
2997}
2998
6b96018b 2999static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 3000{
3a7da39d
BH
3001 struct compat_ethtool_rxnfc __user *compat_rxnfc;
3002 bool convert_in = false, convert_out = false;
44c02a2c
AV
3003 size_t buf_size = 0;
3004 struct ethtool_rxnfc __user *rxnfc = NULL;
3005 struct ifreq ifr;
3a7da39d
BH
3006 u32 rule_cnt = 0, actual_rule_cnt;
3007 u32 ethcmd;
7a229387 3008 u32 data;
3a7da39d 3009 int ret;
7a229387 3010
3a7da39d
BH
3011 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
3012 return -EFAULT;
7a229387 3013
3a7da39d
BH
3014 compat_rxnfc = compat_ptr(data);
3015
3016 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
3017 return -EFAULT;
3018
3a7da39d
BH
3019 /* Most ethtool structures are defined without padding.
3020 * Unfortunately struct ethtool_rxnfc is an exception.
3021 */
3022 switch (ethcmd) {
3023 default:
3024 break;
3025 case ETHTOOL_GRXCLSRLALL:
3026 /* Buffer size is variable */
3027 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
3028 return -EFAULT;
3029 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
3030 return -ENOMEM;
3031 buf_size += rule_cnt * sizeof(u32);
3032 /* fall through */
3033 case ETHTOOL_GRXRINGS:
3034 case ETHTOOL_GRXCLSRLCNT:
3035 case ETHTOOL_GRXCLSRULE:
55664f32 3036 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3037 convert_out = true;
3038 /* fall through */
3039 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3040 buf_size += sizeof(struct ethtool_rxnfc);
3041 convert_in = true;
44c02a2c 3042 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3043 break;
3044 }
3045
44c02a2c 3046 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3047 return -EFAULT;
3048
44c02a2c 3049 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3050
3a7da39d 3051 if (convert_in) {
127fe533 3052 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3053 * fs.ring_cookie and at the end of fs, but nowhere else.
3054 */
127fe533
AD
3055 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3056 sizeof(compat_rxnfc->fs.m_ext) !=
3057 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3058 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3059 BUILD_BUG_ON(
3060 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3061 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3062 offsetof(struct ethtool_rxnfc, fs.location) -
3063 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3064
3065 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3066 (void __user *)(&rxnfc->fs.m_ext + 1) -
3067 (void __user *)rxnfc) ||
3a7da39d
BH
3068 copy_in_user(&rxnfc->fs.ring_cookie,
3069 &compat_rxnfc->fs.ring_cookie,
954b1244 3070 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3071 (void __user *)&rxnfc->fs.ring_cookie))
3072 return -EFAULT;
3073 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3074 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3075 return -EFAULT;
3076 } else if (copy_in_user(&rxnfc->rule_cnt,
3077 &compat_rxnfc->rule_cnt,
3078 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3079 return -EFAULT;
3080 }
3081
44c02a2c 3082 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3083 if (ret)
3084 return ret;
3085
3086 if (convert_out) {
3087 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3088 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3089 (const void __user *)rxnfc) ||
3a7da39d
BH
3090 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3091 &rxnfc->fs.ring_cookie,
954b1244
SH
3092 (const void __user *)(&rxnfc->fs.location + 1) -
3093 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3094 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3095 sizeof(rxnfc->rule_cnt)))
3096 return -EFAULT;
3097
3098 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3099 /* As an optimisation, we only copy the actual
3100 * number of rules that the underlying
3101 * function returned. Since Mallory might
3102 * change the rule count in user memory, we
3103 * check that it is less than the rule count
3104 * originally given (as the user buffer size),
3105 * which has been range-checked.
3106 */
3107 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3108 return -EFAULT;
3109 if (actual_rule_cnt < rule_cnt)
3110 rule_cnt = actual_rule_cnt;
3111 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3112 &rxnfc->rule_locs[0],
3113 rule_cnt * sizeof(u32)))
3114 return -EFAULT;
3115 }
3116 }
3117
3118 return 0;
7a229387
AB
3119}
3120
7a50a240
AB
3121static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3122{
7a50a240 3123 compat_uptr_t uptr32;
44c02a2c
AV
3124 struct ifreq ifr;
3125 void __user *saved;
3126 int err;
7a50a240 3127
44c02a2c 3128 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3129 return -EFAULT;
3130
3131 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3132 return -EFAULT;
3133
44c02a2c
AV
3134 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3135 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3136
44c02a2c
AV
3137 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3138 if (!err) {
3139 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3140 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3141 err = -EFAULT;
ccbd6a5a 3142 }
44c02a2c 3143 return err;
7a229387
AB
3144}
3145
590d4693
BH
3146/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3147static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3148 struct compat_ifreq __user *u_ifreq32)
7a229387 3149{
44c02a2c 3150 struct ifreq ifreq;
7a229387
AB
3151 u32 data32;
3152
44c02a2c 3153 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3154 return -EFAULT;
44c02a2c 3155 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3156 return -EFAULT;
44c02a2c 3157 ifreq.ifr_data = compat_ptr(data32);
7a229387 3158
44c02a2c 3159 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3160}
3161
37ac39bd
JB
3162static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3163 unsigned int cmd,
3164 struct compat_ifreq __user *uifr32)
3165{
3166 struct ifreq __user *uifr;
3167 int err;
3168
3169 /* Handle the fact that while struct ifreq has the same *layout* on
3170 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3171 * which are handled elsewhere, it still has different *size* due to
3172 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3173 * resulting in struct ifreq being 32 and 40 bytes respectively).
3174 * As a result, if the struct happens to be at the end of a page and
3175 * the next page isn't readable/writable, we get a fault. To prevent
3176 * that, copy back and forth to the full size.
3177 */
3178
3179 uifr = compat_alloc_user_space(sizeof(*uifr));
3180 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3181 return -EFAULT;
3182
3183 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3184
3185 if (!err) {
3186 switch (cmd) {
3187 case SIOCGIFFLAGS:
3188 case SIOCGIFMETRIC:
3189 case SIOCGIFMTU:
3190 case SIOCGIFMEM:
3191 case SIOCGIFHWADDR:
3192 case SIOCGIFINDEX:
3193 case SIOCGIFADDR:
3194 case SIOCGIFBRDADDR:
3195 case SIOCGIFDSTADDR:
3196 case SIOCGIFNETMASK:
3197 case SIOCGIFPFLAGS:
3198 case SIOCGIFTXQLEN:
3199 case SIOCGMIIPHY:
3200 case SIOCGMIIREG:
c6c9fee3 3201 case SIOCGIFNAME:
37ac39bd
JB
3202 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3203 err = -EFAULT;
3204 break;
3205 }
3206 }
3207 return err;
3208}
3209
a2116ed2
AB
3210static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3211 struct compat_ifreq __user *uifr32)
3212{
3213 struct ifreq ifr;
3214 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3215 int err;
3216
3217 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3218 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3219 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3220 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3221 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3222 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3223 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3224 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3225 if (err)
3226 return -EFAULT;
3227
44c02a2c 3228 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3229
3230 if (cmd == SIOCGIFMAP && !err) {
3231 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3232 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3233 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3234 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3235 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3236 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3237 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3238 if (err)
3239 err = -EFAULT;
3240 }
3241 return err;
3242}
3243
7a229387 3244struct rtentry32 {
c6d409cf 3245 u32 rt_pad1;
7a229387
AB
3246 struct sockaddr rt_dst; /* target address */
3247 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3248 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3249 unsigned short rt_flags;
3250 short rt_pad2;
3251 u32 rt_pad3;
3252 unsigned char rt_tos;
3253 unsigned char rt_class;
3254 short rt_pad4;
3255 short rt_metric; /* +1 for binary compatibility! */
7a229387 3256 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3257 u32 rt_mtu; /* per route MTU/Window */
3258 u32 rt_window; /* Window clamping */
7a229387
AB
3259 unsigned short rt_irtt; /* Initial RTT */
3260};
3261
3262struct in6_rtmsg32 {
3263 struct in6_addr rtmsg_dst;
3264 struct in6_addr rtmsg_src;
3265 struct in6_addr rtmsg_gateway;
3266 u32 rtmsg_type;
3267 u16 rtmsg_dst_len;
3268 u16 rtmsg_src_len;
3269 u32 rtmsg_metric;
3270 u32 rtmsg_info;
3271 u32 rtmsg_flags;
3272 s32 rtmsg_ifindex;
3273};
3274
6b96018b
AB
3275static int routing_ioctl(struct net *net, struct socket *sock,
3276 unsigned int cmd, void __user *argp)
7a229387
AB
3277{
3278 int ret;
3279 void *r = NULL;
3280 struct in6_rtmsg r6;
3281 struct rtentry r4;
3282 char devname[16];
3283 u32 rtdev;
3284 mm_segment_t old_fs = get_fs();
3285
6b96018b
AB
3286 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3287 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3288 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3289 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3290 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3291 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3292 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3293 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3294 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3295 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3296 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3297
3298 r = (void *) &r6;
3299 } else { /* ipv4 */
6b96018b 3300 struct rtentry32 __user *ur4 = argp;
c6d409cf 3301 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3302 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3303 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3304 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3305 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3306 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3307 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3308 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3309 if (rtdev) {
c6d409cf 3310 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3311 r4.rt_dev = (char __user __force *)devname;
3312 devname[15] = 0;
7a229387
AB
3313 } else
3314 r4.rt_dev = NULL;
3315
3316 r = (void *) &r4;
3317 }
3318
3319 if (ret) {
3320 ret = -EFAULT;
3321 goto out;
3322 }
3323
c6d409cf 3324 set_fs(KERNEL_DS);
63ff03ab 3325 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3326 set_fs(old_fs);
7a229387
AB
3327
3328out:
7a229387
AB
3329 return ret;
3330}
3331
3332/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3333 * for some operations; this forces use of the newer bridge-utils that
25985edc 3334 * use compatible ioctls
7a229387 3335 */
6b96018b 3336static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3337{
6b96018b 3338 compat_ulong_t tmp;
7a229387 3339
6b96018b 3340 if (get_user(tmp, argp))
7a229387
AB
3341 return -EFAULT;
3342 if (tmp == BRCTL_GET_VERSION)
3343 return BRCTL_VERSION + 1;
3344 return -EINVAL;
3345}
3346
6b96018b
AB
3347static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3348 unsigned int cmd, unsigned long arg)
3349{
3350 void __user *argp = compat_ptr(arg);
3351 struct sock *sk = sock->sk;
3352 struct net *net = sock_net(sk);
7a229387 3353
6b96018b 3354 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3355 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3356
3357 switch (cmd) {
3358 case SIOCSIFBR:
3359 case SIOCGIFBR:
3360 return old_bridge_ioctl(argp);
6b96018b 3361 case SIOCGIFCONF:
36fd633e 3362 return compat_dev_ifconf(net, argp);
6b96018b
AB
3363 case SIOCETHTOOL:
3364 return ethtool_ioctl(net, argp);
7a50a240
AB
3365 case SIOCWANDEV:
3366 return compat_siocwandev(net, argp);
a2116ed2
AB
3367 case SIOCGIFMAP:
3368 case SIOCSIFMAP:
3369 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3370 case SIOCADDRT:
3371 case SIOCDELRT:
3372 return routing_ioctl(net, sock, cmd, argp);
0768e170
AB
3373 case SIOCGSTAMP_OLD:
3374 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3375 if (!sock->ops->gettstamp)
3376 return -ENOIOCTLCMD;
0768e170 3377 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3378 !COMPAT_USE_64BIT_TIME);
3379
590d4693
BH
3380 case SIOCBONDSLAVEINFOQUERY:
3381 case SIOCBONDINFOQUERY:
a2116ed2 3382 case SIOCSHWTSTAMP:
fd468c74 3383 case SIOCGHWTSTAMP:
590d4693 3384 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3385
3386 case FIOSETOWN:
3387 case SIOCSPGRP:
3388 case FIOGETOWN:
3389 case SIOCGPGRP:
3390 case SIOCBRADDBR:
3391 case SIOCBRDELBR:
3392 case SIOCGIFVLAN:
3393 case SIOCSIFVLAN:
3394 case SIOCADDDLCI:
3395 case SIOCDELDLCI:
c62cce2c 3396 case SIOCGSKNS:
0768e170
AB
3397 case SIOCGSTAMP_NEW:
3398 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3399 return sock_ioctl(file, cmd, arg);
3400
3401 case SIOCGIFFLAGS:
3402 case SIOCSIFFLAGS:
3403 case SIOCGIFMETRIC:
3404 case SIOCSIFMETRIC:
3405 case SIOCGIFMTU:
3406 case SIOCSIFMTU:
3407 case SIOCGIFMEM:
3408 case SIOCSIFMEM:
3409 case SIOCGIFHWADDR:
3410 case SIOCSIFHWADDR:
3411 case SIOCADDMULTI:
3412 case SIOCDELMULTI:
3413 case SIOCGIFINDEX:
6b96018b
AB
3414 case SIOCGIFADDR:
3415 case SIOCSIFADDR:
3416 case SIOCSIFHWBROADCAST:
6b96018b 3417 case SIOCDIFADDR:
6b96018b
AB
3418 case SIOCGIFBRDADDR:
3419 case SIOCSIFBRDADDR:
3420 case SIOCGIFDSTADDR:
3421 case SIOCSIFDSTADDR:
3422 case SIOCGIFNETMASK:
3423 case SIOCSIFNETMASK:
3424 case SIOCSIFPFLAGS:
3425 case SIOCGIFPFLAGS:
3426 case SIOCGIFTXQLEN:
3427 case SIOCSIFTXQLEN:
3428 case SIOCBRADDIF:
3429 case SIOCBRDELIF:
c6c9fee3 3430 case SIOCGIFNAME:
9177efd3
AB
3431 case SIOCSIFNAME:
3432 case SIOCGMIIPHY:
3433 case SIOCGMIIREG:
3434 case SIOCSMIIREG:
f92d4fc9
AV
3435 case SIOCBONDENSLAVE:
3436 case SIOCBONDRELEASE:
3437 case SIOCBONDSETHWADDR:
3438 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3439 return compat_ifreq_ioctl(net, sock, cmd, argp);
3440
6b96018b
AB
3441 case SIOCSARP:
3442 case SIOCGARP:
3443 case SIOCDARP:
6b96018b 3444 case SIOCATMARK:
63ff03ab 3445 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3446 }
3447
6b96018b
AB
3448 return -ENOIOCTLCMD;
3449}
7a229387 3450
95c96174 3451static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3452 unsigned long arg)
89bbfc95
SP
3453{
3454 struct socket *sock = file->private_data;
3455 int ret = -ENOIOCTLCMD;
87de87d5
DM
3456 struct sock *sk;
3457 struct net *net;
3458
3459 sk = sock->sk;
3460 net = sock_net(sk);
89bbfc95
SP
3461
3462 if (sock->ops->compat_ioctl)
3463 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3464
87de87d5
DM
3465 if (ret == -ENOIOCTLCMD &&
3466 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3467 ret = compat_wext_handle_ioctl(net, cmd, arg);
3468
6b96018b
AB
3469 if (ret == -ENOIOCTLCMD)
3470 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3471
89bbfc95
SP
3472 return ret;
3473}
3474#endif
3475
8a3c245c
PT
3476/**
3477 * kernel_bind - bind an address to a socket (kernel space)
3478 * @sock: socket
3479 * @addr: address
3480 * @addrlen: length of address
3481 *
3482 * Returns 0 or an error.
3483 */
3484
ac5a488e
SS
3485int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3486{
3487 return sock->ops->bind(sock, addr, addrlen);
3488}
c6d409cf 3489EXPORT_SYMBOL(kernel_bind);
ac5a488e 3490
8a3c245c
PT
3491/**
3492 * kernel_listen - move socket to listening state (kernel space)
3493 * @sock: socket
3494 * @backlog: pending connections queue size
3495 *
3496 * Returns 0 or an error.
3497 */
3498
ac5a488e
SS
3499int kernel_listen(struct socket *sock, int backlog)
3500{
3501 return sock->ops->listen(sock, backlog);
3502}
c6d409cf 3503EXPORT_SYMBOL(kernel_listen);
ac5a488e 3504
8a3c245c
PT
3505/**
3506 * kernel_accept - accept a connection (kernel space)
3507 * @sock: listening socket
3508 * @newsock: new connected socket
3509 * @flags: flags
3510 *
3511 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3512 * If it fails, @newsock is guaranteed to be %NULL.
3513 * Returns 0 or an error.
3514 */
3515
ac5a488e
SS
3516int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3517{
3518 struct sock *sk = sock->sk;
3519 int err;
3520
3521 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3522 newsock);
3523 if (err < 0)
3524 goto done;
3525
cdfbabfb 3526 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3527 if (err < 0) {
3528 sock_release(*newsock);
fa8705b0 3529 *newsock = NULL;
ac5a488e
SS
3530 goto done;
3531 }
3532
3533 (*newsock)->ops = sock->ops;
1b08534e 3534 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3535
3536done:
3537 return err;
3538}
c6d409cf 3539EXPORT_SYMBOL(kernel_accept);
ac5a488e 3540
8a3c245c
PT
3541/**
3542 * kernel_connect - connect a socket (kernel space)
3543 * @sock: socket
3544 * @addr: address
3545 * @addrlen: address length
3546 * @flags: flags (O_NONBLOCK, ...)
3547 *
3548 * For datagram sockets, @addr is the addres to which datagrams are sent
3549 * by default, and the only address from which datagrams are received.
3550 * For stream sockets, attempts to connect to @addr.
3551 * Returns 0 or an error code.
3552 */
3553
ac5a488e 3554int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3555 int flags)
ac5a488e
SS
3556{
3557 return sock->ops->connect(sock, addr, addrlen, flags);
3558}
c6d409cf 3559EXPORT_SYMBOL(kernel_connect);
ac5a488e 3560
8a3c245c
PT
3561/**
3562 * kernel_getsockname - get the address which the socket is bound (kernel space)
3563 * @sock: socket
3564 * @addr: address holder
3565 *
3566 * Fills the @addr pointer with the address which the socket is bound.
3567 * Returns 0 or an error code.
3568 */
3569
9b2c45d4 3570int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3571{
9b2c45d4 3572 return sock->ops->getname(sock, addr, 0);
ac5a488e 3573}
c6d409cf 3574EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3575
8a3c245c
PT
3576/**
3577 * kernel_peername - get the address which the socket is connected (kernel space)
3578 * @sock: socket
3579 * @addr: address holder
3580 *
3581 * Fills the @addr pointer with the address which the socket is connected.
3582 * Returns 0 or an error code.
3583 */
3584
9b2c45d4 3585int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3586{
9b2c45d4 3587 return sock->ops->getname(sock, addr, 1);
ac5a488e 3588}
c6d409cf 3589EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3590
8a3c245c
PT
3591/**
3592 * kernel_getsockopt - get a socket option (kernel space)
3593 * @sock: socket
3594 * @level: API level (SOL_SOCKET, ...)
3595 * @optname: option tag
3596 * @optval: option value
3597 * @optlen: option length
3598 *
3599 * Assigns the option length to @optlen.
3600 * Returns 0 or an error.
3601 */
3602
ac5a488e
SS
3603int kernel_getsockopt(struct socket *sock, int level, int optname,
3604 char *optval, int *optlen)
3605{
3606 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3607 char __user *uoptval;
3608 int __user *uoptlen;
ac5a488e
SS
3609 int err;
3610
fb8621bb
NK
3611 uoptval = (char __user __force *) optval;
3612 uoptlen = (int __user __force *) optlen;
3613
ac5a488e
SS
3614 set_fs(KERNEL_DS);
3615 if (level == SOL_SOCKET)
fb8621bb 3616 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3617 else
fb8621bb
NK
3618 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3619 uoptlen);
ac5a488e
SS
3620 set_fs(oldfs);
3621 return err;
3622}
c6d409cf 3623EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3624
8a3c245c
PT
3625/**
3626 * kernel_setsockopt - set a socket option (kernel space)
3627 * @sock: socket
3628 * @level: API level (SOL_SOCKET, ...)
3629 * @optname: option tag
3630 * @optval: option value
3631 * @optlen: option length
3632 *
3633 * Returns 0 or an error.
3634 */
3635
ac5a488e 3636int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3637 char *optval, unsigned int optlen)
ac5a488e
SS
3638{
3639 mm_segment_t oldfs = get_fs();
fb8621bb 3640 char __user *uoptval;
ac5a488e
SS
3641 int err;
3642
fb8621bb
NK
3643 uoptval = (char __user __force *) optval;
3644
ac5a488e
SS
3645 set_fs(KERNEL_DS);
3646 if (level == SOL_SOCKET)
fb8621bb 3647 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3648 else
fb8621bb 3649 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3650 optlen);
3651 set_fs(oldfs);
3652 return err;
3653}
c6d409cf 3654EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3655
8a3c245c
PT
3656/**
3657 * kernel_sendpage - send a &page through a socket (kernel space)
3658 * @sock: socket
3659 * @page: page
3660 * @offset: page offset
3661 * @size: total size in bytes
3662 * @flags: flags (MSG_DONTWAIT, ...)
3663 *
3664 * Returns the total amount sent in bytes or an error.
3665 */
3666
ac5a488e
SS
3667int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3668 size_t size, int flags)
3669{
3670 if (sock->ops->sendpage)
3671 return sock->ops->sendpage(sock, page, offset, size, flags);
3672
3673 return sock_no_sendpage(sock, page, offset, size, flags);
3674}
c6d409cf 3675EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3676
8a3c245c
PT
3677/**
3678 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3679 * @sk: sock
3680 * @page: page
3681 * @offset: page offset
3682 * @size: total size in bytes
3683 * @flags: flags (MSG_DONTWAIT, ...)
3684 *
3685 * Returns the total amount sent in bytes or an error.
3686 * Caller must hold @sk.
3687 */
3688
306b13eb
TH
3689int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3690 size_t size, int flags)
3691{
3692 struct socket *sock = sk->sk_socket;
3693
3694 if (sock->ops->sendpage_locked)
3695 return sock->ops->sendpage_locked(sk, page, offset, size,
3696 flags);
3697
3698 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3699}
3700EXPORT_SYMBOL(kernel_sendpage_locked);
3701
8a3c245c
PT
3702/**
3703 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3704 * @sock: socket
3705 * @how: connection part
3706 *
3707 * Returns 0 or an error.
3708 */
3709
91cf45f0
TM
3710int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3711{
3712 return sock->ops->shutdown(sock, how);
3713}
91cf45f0 3714EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3715
8a3c245c
PT
3716/**
3717 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3718 * @sk: socket
3719 *
3720 * This routine returns the IP overhead imposed by a socket i.e.
3721 * the length of the underlying IP header, depending on whether
3722 * this is an IPv4 or IPv6 socket and the length from IP options turned
3723 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3724 */
8a3c245c 3725
113c3075
P
3726u32 kernel_sock_ip_overhead(struct sock *sk)
3727{
3728 struct inet_sock *inet;
3729 struct ip_options_rcu *opt;
3730 u32 overhead = 0;
113c3075
P
3731#if IS_ENABLED(CONFIG_IPV6)
3732 struct ipv6_pinfo *np;
3733 struct ipv6_txoptions *optv6 = NULL;
3734#endif /* IS_ENABLED(CONFIG_IPV6) */
3735
3736 if (!sk)
3737 return overhead;
3738
113c3075
P
3739 switch (sk->sk_family) {
3740 case AF_INET:
3741 inet = inet_sk(sk);
3742 overhead += sizeof(struct iphdr);
3743 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3744 sock_owned_by_user(sk));
113c3075
P
3745 if (opt)
3746 overhead += opt->opt.optlen;
3747 return overhead;
3748#if IS_ENABLED(CONFIG_IPV6)
3749 case AF_INET6:
3750 np = inet6_sk(sk);
3751 overhead += sizeof(struct ipv6hdr);
3752 if (np)
3753 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3754 sock_owned_by_user(sk));
113c3075
P
3755 if (optv6)
3756 overhead += (optv6->opt_flen + optv6->opt_nflen);
3757 return overhead;
3758#endif /* IS_ENABLED(CONFIG_IPV6) */
3759 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3760 return overhead;
3761 }
3762}
3763EXPORT_SYMBOL(kernel_sock_ip_overhead);