]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/socket.c
wan: cosa: remove dead cosa_net_ioctl() function
[mirror_ubuntu-jammy-kernel.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
cc69837f 55#include <linux/ethtool.h>
1da177e4 56#include <linux/mm.h>
1da177e4
LT
57#include <linux/socket.h>
58#include <linux/file.h>
59#include <linux/net.h>
60#include <linux/interrupt.h>
aaca0bdc 61#include <linux/thread_info.h>
55737fda 62#include <linux/rcupdate.h>
1da177e4
LT
63#include <linux/netdevice.h>
64#include <linux/proc_fs.h>
65#include <linux/seq_file.h>
4a3e2f71 66#include <linux/mutex.h>
1da177e4 67#include <linux/if_bridge.h>
20380731 68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
c7dc504e 103#include <linux/termios.h>
6b96018b 104#include <linux/sockios.h>
076bb0c8 105#include <net/busy_poll.h>
f24b9be5 106#include <linux/errqueue.h>
d7c08826 107#include <linux/ptp_clock_kernel.h>
06021292 108
e0d1095a 109#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
110unsigned int sysctl_net_busy_read __read_mostly;
111unsigned int sysctl_net_busy_poll __read_mostly;
06021292 112#endif
6b96018b 113
8ae5e030
AV
114static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
115static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 116static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
117
118static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
119static __poll_t sock_poll(struct file *file,
120 struct poll_table_struct *wait);
89bddce5 121static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
122#ifdef CONFIG_COMPAT
123static long compat_sock_ioctl(struct file *file,
89bddce5 124 unsigned int cmd, unsigned long arg);
89bbfc95 125#endif
1da177e4 126static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
127static ssize_t sock_sendpage(struct file *file, struct page *page,
128 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 129static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 130 struct pipe_inode_info *pipe, size_t len,
9c55e01c 131 unsigned int flags);
542d3065
AB
132
133#ifdef CONFIG_PROC_FS
134static void sock_show_fdinfo(struct seq_file *m, struct file *f)
135{
136 struct socket *sock = f->private_data;
137
138 if (sock->ops->show_fdinfo)
139 sock->ops->show_fdinfo(m, sock);
140}
141#else
142#define sock_show_fdinfo NULL
143#endif
1da177e4 144
1da177e4
LT
145/*
146 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
147 * in the operation structures but are done directly via the socketcall() multiplexor.
148 */
149
da7071d7 150static const struct file_operations socket_file_ops = {
1da177e4
LT
151 .owner = THIS_MODULE,
152 .llseek = no_llseek,
8ae5e030
AV
153 .read_iter = sock_read_iter,
154 .write_iter = sock_write_iter,
1da177e4
LT
155 .poll = sock_poll,
156 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
157#ifdef CONFIG_COMPAT
158 .compat_ioctl = compat_sock_ioctl,
159#endif
1da177e4 160 .mmap = sock_mmap,
1da177e4
LT
161 .release = sock_close,
162 .fasync = sock_fasync,
5274f052
JA
163 .sendpage = sock_sendpage,
164 .splice_write = generic_splice_sendpage,
9c55e01c 165 .splice_read = sock_splice_read,
b4653342 166 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
167};
168
fe0bdbde
YD
169static const char * const pf_family_names[] = {
170 [PF_UNSPEC] = "PF_UNSPEC",
171 [PF_UNIX] = "PF_UNIX/PF_LOCAL",
172 [PF_INET] = "PF_INET",
173 [PF_AX25] = "PF_AX25",
174 [PF_IPX] = "PF_IPX",
175 [PF_APPLETALK] = "PF_APPLETALK",
176 [PF_NETROM] = "PF_NETROM",
177 [PF_BRIDGE] = "PF_BRIDGE",
178 [PF_ATMPVC] = "PF_ATMPVC",
179 [PF_X25] = "PF_X25",
180 [PF_INET6] = "PF_INET6",
181 [PF_ROSE] = "PF_ROSE",
182 [PF_DECnet] = "PF_DECnet",
183 [PF_NETBEUI] = "PF_NETBEUI",
184 [PF_SECURITY] = "PF_SECURITY",
185 [PF_KEY] = "PF_KEY",
186 [PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
187 [PF_PACKET] = "PF_PACKET",
188 [PF_ASH] = "PF_ASH",
189 [PF_ECONET] = "PF_ECONET",
190 [PF_ATMSVC] = "PF_ATMSVC",
191 [PF_RDS] = "PF_RDS",
192 [PF_SNA] = "PF_SNA",
193 [PF_IRDA] = "PF_IRDA",
194 [PF_PPPOX] = "PF_PPPOX",
195 [PF_WANPIPE] = "PF_WANPIPE",
196 [PF_LLC] = "PF_LLC",
197 [PF_IB] = "PF_IB",
198 [PF_MPLS] = "PF_MPLS",
199 [PF_CAN] = "PF_CAN",
200 [PF_TIPC] = "PF_TIPC",
201 [PF_BLUETOOTH] = "PF_BLUETOOTH",
202 [PF_IUCV] = "PF_IUCV",
203 [PF_RXRPC] = "PF_RXRPC",
204 [PF_ISDN] = "PF_ISDN",
205 [PF_PHONET] = "PF_PHONET",
206 [PF_IEEE802154] = "PF_IEEE802154",
207 [PF_CAIF] = "PF_CAIF",
208 [PF_ALG] = "PF_ALG",
209 [PF_NFC] = "PF_NFC",
210 [PF_VSOCK] = "PF_VSOCK",
211 [PF_KCM] = "PF_KCM",
212 [PF_QIPCRTR] = "PF_QIPCRTR",
213 [PF_SMC] = "PF_SMC",
214 [PF_XDP] = "PF_XDP",
215};
216
1da177e4
LT
217/*
218 * The protocol list. Each protocol is registered in here.
219 */
220
1da177e4 221static DEFINE_SPINLOCK(net_family_lock);
190683a9 222static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 223
1da177e4 224/*
89bddce5
SH
225 * Support routines.
226 * Move socket addresses back and forth across the kernel/user
227 * divide and look after the messy bits.
1da177e4
LT
228 */
229
1da177e4
LT
230/**
231 * move_addr_to_kernel - copy a socket address into kernel space
232 * @uaddr: Address in user space
233 * @kaddr: Address in kernel space
234 * @ulen: Length in user space
235 *
236 * The address is copied into kernel space. If the provided address is
237 * too long an error code of -EINVAL is returned. If the copy gives
238 * invalid addresses -EFAULT is returned. On a success 0 is returned.
239 */
240
43db362d 241int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 242{
230b1839 243 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 244 return -EINVAL;
89bddce5 245 if (ulen == 0)
1da177e4 246 return 0;
89bddce5 247 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 248 return -EFAULT;
3ec3b2fb 249 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
250}
251
252/**
253 * move_addr_to_user - copy an address to user space
254 * @kaddr: kernel space address
255 * @klen: length of address in kernel
256 * @uaddr: user space address
257 * @ulen: pointer to user length field
258 *
259 * The value pointed to by ulen on entry is the buffer length available.
260 * This is overwritten with the buffer space used. -EINVAL is returned
261 * if an overlong buffer is specified or a negative buffer size. -EFAULT
262 * is returned if either the buffer or the length field are not
263 * accessible.
264 * After copying the data up to the limit the user specifies, the true
265 * length of the data is written over the length limit the user
266 * specified. Zero is returned for a success.
267 */
89bddce5 268
43db362d 269static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 270 void __user *uaddr, int __user *ulen)
1da177e4
LT
271{
272 int err;
273 int len;
274
68c6beb3 275 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
276 err = get_user(len, ulen);
277 if (err)
1da177e4 278 return err;
89bddce5
SH
279 if (len > klen)
280 len = klen;
68c6beb3 281 if (len < 0)
1da177e4 282 return -EINVAL;
89bddce5 283 if (len) {
d6fe3945
SG
284 if (audit_sockaddr(klen, kaddr))
285 return -ENOMEM;
89bddce5 286 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
287 return -EFAULT;
288 }
289 /*
89bddce5
SH
290 * "fromlen shall refer to the value before truncation.."
291 * 1003.1g
1da177e4
LT
292 */
293 return __put_user(klen, ulen);
294}
295
08009a76 296static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
297
298static struct inode *sock_alloc_inode(struct super_block *sb)
299{
300 struct socket_alloc *ei;
89bddce5 301
e94b1766 302 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
303 if (!ei)
304 return NULL;
333f7909
AV
305 init_waitqueue_head(&ei->socket.wq.wait);
306 ei->socket.wq.fasync_list = NULL;
307 ei->socket.wq.flags = 0;
89bddce5 308
1da177e4
LT
309 ei->socket.state = SS_UNCONNECTED;
310 ei->socket.flags = 0;
311 ei->socket.ops = NULL;
312 ei->socket.sk = NULL;
313 ei->socket.file = NULL;
1da177e4
LT
314
315 return &ei->vfs_inode;
316}
317
6d7855c5 318static void sock_free_inode(struct inode *inode)
1da177e4 319{
43815482
ED
320 struct socket_alloc *ei;
321
322 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 323 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
324}
325
51cc5068 326static void init_once(void *foo)
1da177e4 327{
89bddce5 328 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 329
a35afb83 330 inode_init_once(&ei->vfs_inode);
1da177e4 331}
89bddce5 332
1e911632 333static void init_inodecache(void)
1da177e4
LT
334{
335 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
336 sizeof(struct socket_alloc),
337 0,
338 (SLAB_HWCACHE_ALIGN |
339 SLAB_RECLAIM_ACCOUNT |
5d097056 340 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 341 init_once);
1e911632 342 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
343}
344
b87221de 345static const struct super_operations sockfs_ops = {
c6d409cf 346 .alloc_inode = sock_alloc_inode,
6d7855c5 347 .free_inode = sock_free_inode,
c6d409cf 348 .statfs = simple_statfs,
1da177e4
LT
349};
350
c23fbb6b
ED
351/*
352 * sockfs_dname() is called from d_path().
353 */
354static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
355{
356 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 357 d_inode(dentry)->i_ino);
c23fbb6b
ED
358}
359
3ba13d17 360static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 361 .d_dname = sockfs_dname,
1da177e4
LT
362};
363
bba0bd31
AG
364static int sockfs_xattr_get(const struct xattr_handler *handler,
365 struct dentry *dentry, struct inode *inode,
366 const char *suffix, void *value, size_t size)
367{
368 if (value) {
369 if (dentry->d_name.len + 1 > size)
370 return -ERANGE;
371 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
372 }
373 return dentry->d_name.len + 1;
374}
375
376#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
377#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
378#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
379
380static const struct xattr_handler sockfs_xattr_handler = {
381 .name = XATTR_NAME_SOCKPROTONAME,
382 .get = sockfs_xattr_get,
383};
384
4a590153 385static int sockfs_security_xattr_set(const struct xattr_handler *handler,
e65ce2a5 386 struct user_namespace *mnt_userns,
4a590153
AG
387 struct dentry *dentry, struct inode *inode,
388 const char *suffix, const void *value,
389 size_t size, int flags)
390{
391 /* Handled by LSM. */
392 return -EAGAIN;
393}
394
395static const struct xattr_handler sockfs_security_xattr_handler = {
396 .prefix = XATTR_SECURITY_PREFIX,
397 .set = sockfs_security_xattr_set,
398};
399
bba0bd31
AG
400static const struct xattr_handler *sockfs_xattr_handlers[] = {
401 &sockfs_xattr_handler,
4a590153 402 &sockfs_security_xattr_handler,
bba0bd31
AG
403 NULL
404};
405
fba9be49 406static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 407{
fba9be49
DH
408 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
409 if (!ctx)
410 return -ENOMEM;
411 ctx->ops = &sockfs_ops;
412 ctx->dops = &sockfs_dentry_operations;
413 ctx->xattr = sockfs_xattr_handlers;
414 return 0;
c74a1cbb
AV
415}
416
417static struct vfsmount *sock_mnt __read_mostly;
418
419static struct file_system_type sock_fs_type = {
420 .name = "sockfs",
fba9be49 421 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
422 .kill_sb = kill_anon_super,
423};
424
1da177e4
LT
425/*
426 * Obtains the first available file descriptor and sets it up for use.
427 *
39d8c1b6
DM
428 * These functions create file structures and maps them to fd space
429 * of the current process. On success it returns file descriptor
1da177e4
LT
430 * and file struct implicitly stored in sock->file.
431 * Note that another thread may close file descriptor before we return
432 * from this function. We use the fact that now we do not refer
433 * to socket after mapping. If one day we will need it, this
434 * function will increment ref. count on file by 1.
435 *
436 * In any case returned fd MAY BE not valid!
437 * This race condition is unavoidable
438 * with shared fd spaces, we cannot solve it inside kernel,
439 * but we take care of internal coherence yet.
440 */
441
8a3c245c
PT
442/**
443 * sock_alloc_file - Bind a &socket to a &file
444 * @sock: socket
445 * @flags: file status flags
446 * @dname: protocol name
447 *
448 * Returns the &file bound with @sock, implicitly storing it
449 * in sock->file. If dname is %NULL, sets to "".
450 * On failure the return is a ERR pointer (see linux/err.h).
451 * This function uses GFP_KERNEL internally.
452 */
453
aab174f0 454struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 455{
7cbe66b6 456 struct file *file;
1da177e4 457
d93aa9d8
AV
458 if (!dname)
459 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 460
d93aa9d8
AV
461 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
462 O_RDWR | (flags & O_NONBLOCK),
463 &socket_file_ops);
b5ffe634 464 if (IS_ERR(file)) {
8e1611e2 465 sock_release(sock);
39b65252 466 return file;
cc3808f8
AV
467 }
468
469 sock->file = file;
39d8c1b6 470 file->private_data = sock;
d8e464ec 471 stream_open(SOCK_INODE(sock), file);
28407630 472 return file;
39d8c1b6 473}
56b31d1c 474EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 475
56b31d1c 476static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
477{
478 struct file *newfile;
28407630 479 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
480 if (unlikely(fd < 0)) {
481 sock_release(sock);
28407630 482 return fd;
ce4bb04c 483 }
39d8c1b6 484
aab174f0 485 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 486 if (!IS_ERR(newfile)) {
39d8c1b6 487 fd_install(fd, newfile);
28407630
AV
488 return fd;
489 }
7cbe66b6 490
28407630
AV
491 put_unused_fd(fd);
492 return PTR_ERR(newfile);
1da177e4
LT
493}
494
8a3c245c
PT
495/**
496 * sock_from_file - Return the &socket bounded to @file.
497 * @file: file
8a3c245c 498 *
dba4a925 499 * On failure returns %NULL.
8a3c245c
PT
500 */
501
dba4a925 502struct socket *sock_from_file(struct file *file)
6cb153ca 503{
6cb153ca
BL
504 if (file->f_op == &socket_file_ops)
505 return file->private_data; /* set in sock_map_fd */
506
23bb80d2 507 return NULL;
6cb153ca 508}
406a3c63 509EXPORT_SYMBOL(sock_from_file);
6cb153ca 510
1da177e4 511/**
c6d409cf 512 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
513 * @fd: file handle
514 * @err: pointer to an error code return
515 *
516 * The file handle passed in is locked and the socket it is bound
241c4667 517 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
518 * with a negative errno code and NULL is returned. The function checks
519 * for both invalid handles and passing a handle which is not a socket.
520 *
521 * On a success the socket object pointer is returned.
522 */
523
524struct socket *sockfd_lookup(int fd, int *err)
525{
526 struct file *file;
1da177e4
LT
527 struct socket *sock;
528
89bddce5
SH
529 file = fget(fd);
530 if (!file) {
1da177e4
LT
531 *err = -EBADF;
532 return NULL;
533 }
89bddce5 534
dba4a925
FR
535 sock = sock_from_file(file);
536 if (!sock) {
537 *err = -ENOTSOCK;
1da177e4 538 fput(file);
dba4a925 539 }
6cb153ca
BL
540 return sock;
541}
c6d409cf 542EXPORT_SYMBOL(sockfd_lookup);
1da177e4 543
6cb153ca
BL
544static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
545{
00e188ef 546 struct fd f = fdget(fd);
6cb153ca
BL
547 struct socket *sock;
548
3672558c 549 *err = -EBADF;
00e188ef 550 if (f.file) {
dba4a925 551 sock = sock_from_file(f.file);
00e188ef 552 if (likely(sock)) {
ce787a5a 553 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 554 return sock;
00e188ef 555 }
dba4a925 556 *err = -ENOTSOCK;
00e188ef 557 fdput(f);
1da177e4 558 }
6cb153ca 559 return NULL;
1da177e4
LT
560}
561
600e1779
MY
562static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
563 size_t size)
564{
565 ssize_t len;
566 ssize_t used = 0;
567
c5ef6035 568 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
569 if (len < 0)
570 return len;
571 used += len;
572 if (buffer) {
573 if (size < used)
574 return -ERANGE;
575 buffer += len;
576 }
577
578 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
579 used += len;
580 if (buffer) {
581 if (size < used)
582 return -ERANGE;
583 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
584 buffer += len;
585 }
586
587 return used;
588}
589
549c7297
CB
590static int sockfs_setattr(struct user_namespace *mnt_userns,
591 struct dentry *dentry, struct iattr *iattr)
86741ec2 592{
549c7297 593 int err = simple_setattr(&init_user_ns, dentry, iattr);
86741ec2 594
e1a3a60a 595 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
596 struct socket *sock = SOCKET_I(d_inode(dentry));
597
6d8c50dc
CW
598 if (sock->sk)
599 sock->sk->sk_uid = iattr->ia_uid;
600 else
601 err = -ENOENT;
86741ec2
LC
602 }
603
604 return err;
605}
606
600e1779 607static const struct inode_operations sockfs_inode_ops = {
600e1779 608 .listxattr = sockfs_listxattr,
86741ec2 609 .setattr = sockfs_setattr,
600e1779
MY
610};
611
1da177e4 612/**
8a3c245c 613 * sock_alloc - allocate a socket
89bddce5 614 *
1da177e4
LT
615 * Allocate a new inode and socket object. The two are bound together
616 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 617 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
618 */
619
f4a00aac 620struct socket *sock_alloc(void)
1da177e4 621{
89bddce5
SH
622 struct inode *inode;
623 struct socket *sock;
1da177e4 624
a209dfc7 625 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
626 if (!inode)
627 return NULL;
628
629 sock = SOCKET_I(inode);
630
85fe4025 631 inode->i_ino = get_next_ino();
89bddce5 632 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
633 inode->i_uid = current_fsuid();
634 inode->i_gid = current_fsgid();
600e1779 635 inode->i_op = &sockfs_inode_ops;
1da177e4 636
1da177e4
LT
637 return sock;
638}
f4a00aac 639EXPORT_SYMBOL(sock_alloc);
1da177e4 640
6d8c50dc 641static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
642{
643 if (sock->ops) {
644 struct module *owner = sock->ops->owner;
645
6d8c50dc
CW
646 if (inode)
647 inode_lock(inode);
1da177e4 648 sock->ops->release(sock);
ff7b11aa 649 sock->sk = NULL;
6d8c50dc
CW
650 if (inode)
651 inode_unlock(inode);
1da177e4
LT
652 sock->ops = NULL;
653 module_put(owner);
654 }
655
333f7909 656 if (sock->wq.fasync_list)
3410f22e 657 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 658
1da177e4
LT
659 if (!sock->file) {
660 iput(SOCK_INODE(sock));
661 return;
662 }
89bddce5 663 sock->file = NULL;
1da177e4 664}
6d8c50dc 665
9a8ad9ac
AL
666/**
667 * sock_release - close a socket
668 * @sock: socket to close
669 *
670 * The socket is released from the protocol stack if it has a release
671 * callback, and the inode is then released if the socket is bound to
672 * an inode not a file.
673 */
6d8c50dc
CW
674void sock_release(struct socket *sock)
675{
676 __sock_release(sock, NULL);
677}
c6d409cf 678EXPORT_SYMBOL(sock_release);
1da177e4 679
c14ac945 680void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 681{
140c55d4
ED
682 u8 flags = *tx_flags;
683
c14ac945 684 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
685 flags |= SKBTX_HW_TSTAMP;
686
c14ac945 687 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
688 flags |= SKBTX_SW_TSTAMP;
689
c14ac945 690 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
691 flags |= SKBTX_SCHED_TSTAMP;
692
140c55d4 693 *tx_flags = flags;
20d49473 694}
67cc0d40 695EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 696
8c3c447b
PA
697INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
698 size_t));
a648a592
PA
699INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
700 size_t));
d8725c86 701static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 702{
a648a592
PA
703 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
704 inet_sendmsg, sock, msg,
705 msg_data_left(msg));
d8725c86
AV
706 BUG_ON(ret == -EIOCBQUEUED);
707 return ret;
1da177e4
LT
708}
709
85806af0
RD
710/**
711 * sock_sendmsg - send a message through @sock
712 * @sock: socket
713 * @msg: message to send
714 *
715 * Sends @msg through @sock, passing through LSM.
716 * Returns the number of bytes sent, or an error code.
717 */
d8725c86 718int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 719{
d8725c86 720 int err = security_socket_sendmsg(sock, msg,
01e97e65 721 msg_data_left(msg));
228e548e 722
d8725c86 723 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 724}
c6d409cf 725EXPORT_SYMBOL(sock_sendmsg);
1da177e4 726
8a3c245c
PT
727/**
728 * kernel_sendmsg - send a message through @sock (kernel-space)
729 * @sock: socket
730 * @msg: message header
731 * @vec: kernel vec
732 * @num: vec array length
733 * @size: total message data size
734 *
735 * Builds the message data with @vec and sends it through @sock.
736 * Returns the number of bytes sent, or an error code.
737 */
738
1da177e4
LT
739int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
740 struct kvec *vec, size_t num, size_t size)
741{
aa563d7b 742 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 743 return sock_sendmsg(sock, msg);
1da177e4 744}
c6d409cf 745EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 746
8a3c245c
PT
747/**
748 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
749 * @sk: sock
750 * @msg: message header
751 * @vec: output s/g array
752 * @num: output s/g array length
753 * @size: total message data size
754 *
755 * Builds the message data with @vec and sends it through @sock.
756 * Returns the number of bytes sent, or an error code.
757 * Caller must hold @sk.
758 */
759
306b13eb
TH
760int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
761 struct kvec *vec, size_t num, size_t size)
762{
763 struct socket *sock = sk->sk_socket;
764
765 if (!sock->ops->sendmsg_locked)
db5980d8 766 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 767
aa563d7b 768 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
769
770 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
771}
772EXPORT_SYMBOL(kernel_sendmsg_locked);
773
8605330a
SHY
774static bool skb_is_err_queue(const struct sk_buff *skb)
775{
776 /* pkt_type of skbs enqueued on the error queue are set to
777 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
778 * in recvmsg, since skbs received on a local socket will never
779 * have a pkt_type of PACKET_OUTGOING.
780 */
781 return skb->pkt_type == PACKET_OUTGOING;
782}
783
b50a5c70
ML
784/* On transmit, software and hardware timestamps are returned independently.
785 * As the two skb clones share the hardware timestamp, which may be updated
786 * before the software timestamp is received, a hardware TX timestamp may be
787 * returned only if there is no software TX timestamp. Ignore false software
788 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 789 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
790 * hardware timestamp.
791 */
792static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
793{
794 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
795}
796
aad9c8c4
ML
797static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
798{
799 struct scm_ts_pktinfo ts_pktinfo;
800 struct net_device *orig_dev;
801
802 if (!skb_mac_header_was_set(skb))
803 return;
804
805 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
806
807 rcu_read_lock();
808 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
809 if (orig_dev)
810 ts_pktinfo.if_index = orig_dev->ifindex;
811 rcu_read_unlock();
812
813 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
814 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
815 sizeof(ts_pktinfo), &ts_pktinfo);
816}
817
92f37fd2
ED
818/*
819 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
820 */
821void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
822 struct sk_buff *skb)
823{
20d49473 824 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 825 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
826 struct scm_timestamping_internal tss;
827
b50a5c70 828 int empty = 1, false_tstamp = 0;
20d49473
PO
829 struct skb_shared_hwtstamps *shhwtstamps =
830 skb_hwtstamps(skb);
831
832 /* Race occurred between timestamp enabling and packet
833 receiving. Fill in the current time for now. */
b50a5c70 834 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 835 __net_timestamp(skb);
b50a5c70
ML
836 false_tstamp = 1;
837 }
20d49473
PO
838
839 if (need_software_tstamp) {
840 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
841 if (new_tstamp) {
842 struct __kernel_sock_timeval tv;
843
844 skb_get_new_timestamp(skb, &tv);
845 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
846 sizeof(tv), &tv);
847 } else {
848 struct __kernel_old_timeval tv;
849
850 skb_get_timestamp(skb, &tv);
851 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
852 sizeof(tv), &tv);
853 }
20d49473 854 } else {
887feae3
DD
855 if (new_tstamp) {
856 struct __kernel_timespec ts;
857
858 skb_get_new_timestampns(skb, &ts);
859 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
860 sizeof(ts), &ts);
861 } else {
df1b4ba9 862 struct __kernel_old_timespec ts;
887feae3
DD
863
864 skb_get_timestampns(skb, &ts);
865 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
866 sizeof(ts), &ts);
867 }
20d49473
PO
868 }
869 }
870
f24b9be5 871 memset(&tss, 0, sizeof(tss));
c199105d 872 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 873 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 874 empty = 0;
4d276eb6 875 if (shhwtstamps &&
b9f40e21 876 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
d7c08826
YL
877 !skb_is_swtx_tstamp(skb, false_tstamp)) {
878 if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
879 ptp_convert_timestamp(shhwtstamps, sk->sk_bind_phc);
880
881 if (ktime_to_timespec64_cond(shhwtstamps->hwtstamp,
882 tss.ts + 2)) {
883 empty = 0;
884
885 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
886 !skb_is_err_queue(skb))
887 put_ts_pktinfo(msg, skb);
888 }
aad9c8c4 889 }
1c885808 890 if (!empty) {
9718475e
DD
891 if (sock_flag(sk, SOCK_TSTAMP_NEW))
892 put_cmsg_scm_timestamping64(msg, &tss);
893 else
894 put_cmsg_scm_timestamping(msg, &tss);
1c885808 895
8605330a 896 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 897 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
898 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
899 skb->len, skb->data);
900 }
92f37fd2 901}
7c81fd8b
ACM
902EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
903
6e3e939f
JB
904void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
905 struct sk_buff *skb)
906{
907 int ack;
908
909 if (!sock_flag(sk, SOCK_WIFI_STATUS))
910 return;
911 if (!skb->wifi_acked_valid)
912 return;
913
914 ack = skb->wifi_acked;
915
916 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
917}
918EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
919
11165f14 920static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
921 struct sk_buff *skb)
3b885787 922{
744d5a3e 923 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 924 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 925 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
926}
927
767dd033 928void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
929 struct sk_buff *skb)
930{
931 sock_recv_timestamp(msg, sk, skb);
932 sock_recv_drops(msg, sk, skb);
933}
767dd033 934EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 935
8c3c447b 936INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
937 size_t, int));
938INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
939 size_t, int));
1b784140 940static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 941 int flags)
1da177e4 942{
a648a592
PA
943 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
944 inet_recvmsg, sock, msg, msg_data_left(msg),
945 flags);
1da177e4
LT
946}
947
85806af0
RD
948/**
949 * sock_recvmsg - receive a message from @sock
950 * @sock: socket
951 * @msg: message to receive
952 * @flags: message flags
953 *
954 * Receives @msg from @sock, passing through LSM. Returns the total number
955 * of bytes received, or an error.
956 */
2da62906 957int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 958{
2da62906 959 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 960
2da62906 961 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 962}
c6d409cf 963EXPORT_SYMBOL(sock_recvmsg);
1da177e4 964
c1249c0a 965/**
8a3c245c
PT
966 * kernel_recvmsg - Receive a message from a socket (kernel space)
967 * @sock: The socket to receive the message from
968 * @msg: Received message
969 * @vec: Input s/g array for message data
970 * @num: Size of input s/g array
971 * @size: Number of bytes to read
972 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 973 *
8a3c245c
PT
974 * On return the msg structure contains the scatter/gather array passed in the
975 * vec argument. The array is modified so that it consists of the unfilled
976 * portion of the original array.
c1249c0a 977 *
8a3c245c 978 * The returned value is the total number of bytes received, or an error.
c1249c0a 979 */
8a3c245c 980
89bddce5
SH
981int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
982 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 983{
1f466e1f 984 msg->msg_control_is_user = false;
aa563d7b 985 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1f466e1f 986 return sock_recvmsg(sock, msg, flags);
1da177e4 987}
c6d409cf 988EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 989
ce1d4d3e
CH
990static ssize_t sock_sendpage(struct file *file, struct page *page,
991 int offset, size_t size, loff_t *ppos, int more)
1da177e4 992{
1da177e4
LT
993 struct socket *sock;
994 int flags;
995
ce1d4d3e
CH
996 sock = file->private_data;
997
35f9c09f
ED
998 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
999 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
1000 flags |= more;
ce1d4d3e 1001
e6949583 1002 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 1003}
1da177e4 1004
9c55e01c 1005static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 1006 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
1007 unsigned int flags)
1008{
1009 struct socket *sock = file->private_data;
1010
997b37da 1011 if (unlikely(!sock->ops->splice_read))
95506588 1012 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 1013
9c55e01c
JA
1014 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
1015}
1016
8ae5e030 1017static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 1018{
6d652330
AV
1019 struct file *file = iocb->ki_filp;
1020 struct socket *sock = file->private_data;
0345f931 1021 struct msghdr msg = {.msg_iter = *to,
1022 .msg_iocb = iocb};
8ae5e030 1023 ssize_t res;
ce1d4d3e 1024
ebfcd895 1025 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1026 msg.msg_flags = MSG_DONTWAIT;
1027
1028 if (iocb->ki_pos != 0)
1da177e4 1029 return -ESPIPE;
027445c3 1030
66ee59af 1031 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
1032 return 0;
1033
2da62906 1034 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
1035 *to = msg.msg_iter;
1036 return res;
1da177e4
LT
1037}
1038
8ae5e030 1039static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 1040{
6d652330
AV
1041 struct file *file = iocb->ki_filp;
1042 struct socket *sock = file->private_data;
0345f931 1043 struct msghdr msg = {.msg_iter = *from,
1044 .msg_iocb = iocb};
8ae5e030 1045 ssize_t res;
1da177e4 1046
8ae5e030 1047 if (iocb->ki_pos != 0)
ce1d4d3e 1048 return -ESPIPE;
027445c3 1049
ebfcd895 1050 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1051 msg.msg_flags = MSG_DONTWAIT;
1052
6d652330
AV
1053 if (sock->type == SOCK_SEQPACKET)
1054 msg.msg_flags |= MSG_EOR;
1055
d8725c86 1056 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1057 *from = msg.msg_iter;
1058 return res;
1da177e4
LT
1059}
1060
1da177e4
LT
1061/*
1062 * Atomic setting of ioctl hooks to avoid race
1063 * with module unload.
1064 */
1065
4a3e2f71 1066static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1067static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1068
881d966b 1069void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1070{
4a3e2f71 1071 mutex_lock(&br_ioctl_mutex);
1da177e4 1072 br_ioctl_hook = hook;
4a3e2f71 1073 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1074}
1075EXPORT_SYMBOL(brioctl_set);
1076
4a3e2f71 1077static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1078static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1079
881d966b 1080void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1081{
4a3e2f71 1082 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1083 vlan_ioctl_hook = hook;
4a3e2f71 1084 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1085}
1086EXPORT_SYMBOL(vlan_ioctl_set);
1087
6b96018b 1088static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1089 unsigned int cmd, unsigned long arg)
6b96018b 1090{
876f0bf9
AB
1091 struct ifreq ifr;
1092 bool need_copyout;
6b96018b
AB
1093 int err;
1094 void __user *argp = (void __user *)arg;
1095
1096 err = sock->ops->ioctl(sock, cmd, arg);
1097
1098 /*
1099 * If this ioctl is unknown try to hand it down
1100 * to the NIC driver.
1101 */
36fd633e
AV
1102 if (err != -ENOIOCTLCMD)
1103 return err;
6b96018b 1104
876f0bf9
AB
1105 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1106 return -EFAULT;
1107 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1108 if (!err && need_copyout)
1109 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1110 return -EFAULT;
876f0bf9 1111
6b96018b
AB
1112 return err;
1113}
1114
1da177e4
LT
1115/*
1116 * With an ioctl, arg may well be a user mode pointer, but we don't know
1117 * what to do with it - that's up to the protocol still.
1118 */
1119
1120static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1121{
1122 struct socket *sock;
881d966b 1123 struct sock *sk;
1da177e4
LT
1124 void __user *argp = (void __user *)arg;
1125 int pid, err;
881d966b 1126 struct net *net;
1da177e4 1127
b69aee04 1128 sock = file->private_data;
881d966b 1129 sk = sock->sk;
3b1e0a65 1130 net = sock_net(sk);
44c02a2c
AV
1131 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1132 struct ifreq ifr;
1133 bool need_copyout;
1134 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1135 return -EFAULT;
1136 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1137 if (!err && need_copyout)
1138 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1139 return -EFAULT;
1da177e4 1140 } else
3d23e349 1141#ifdef CONFIG_WEXT_CORE
1da177e4 1142 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1143 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1144 } else
3d23e349 1145#endif
89bddce5 1146 switch (cmd) {
1da177e4
LT
1147 case FIOSETOWN:
1148 case SIOCSPGRP:
1149 err = -EFAULT;
1150 if (get_user(pid, (int __user *)argp))
1151 break;
393cc3f5 1152 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1153 break;
1154 case FIOGETOWN:
1155 case SIOCGPGRP:
609d7fa9 1156 err = put_user(f_getown(sock->file),
89bddce5 1157 (int __user *)argp);
1da177e4
LT
1158 break;
1159 case SIOCGIFBR:
1160 case SIOCSIFBR:
1161 case SIOCBRADDBR:
1162 case SIOCBRDELBR:
1163 err = -ENOPKG;
1164 if (!br_ioctl_hook)
1165 request_module("bridge");
1166
4a3e2f71 1167 mutex_lock(&br_ioctl_mutex);
89bddce5 1168 if (br_ioctl_hook)
881d966b 1169 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1170 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1171 break;
1172 case SIOCGIFVLAN:
1173 case SIOCSIFVLAN:
1174 err = -ENOPKG;
1175 if (!vlan_ioctl_hook)
1176 request_module("8021q");
1177
4a3e2f71 1178 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1179 if (vlan_ioctl_hook)
881d966b 1180 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1181 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1182 break;
c62cce2c
AV
1183 case SIOCGSKNS:
1184 err = -EPERM;
1185 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1186 break;
1187
1188 err = open_related_ns(&net->ns, get_net_ns);
1189 break;
0768e170
AB
1190 case SIOCGSTAMP_OLD:
1191 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1192 if (!sock->ops->gettstamp) {
1193 err = -ENOIOCTLCMD;
1194 break;
1195 }
1196 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1197 cmd == SIOCGSTAMP_OLD,
1198 !IS_ENABLED(CONFIG_64BIT));
60747828 1199 break;
0768e170
AB
1200 case SIOCGSTAMP_NEW:
1201 case SIOCGSTAMPNS_NEW:
1202 if (!sock->ops->gettstamp) {
1203 err = -ENOIOCTLCMD;
1204 break;
1205 }
1206 err = sock->ops->gettstamp(sock, argp,
1207 cmd == SIOCGSTAMP_NEW,
1208 false);
c7cbdbf2 1209 break;
876f0bf9
AB
1210
1211 case SIOCGIFCONF:
1212 err = dev_ifconf(net, argp);
1213 break;
1214
1da177e4 1215 default:
63ff03ab 1216 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1217 break;
89bddce5 1218 }
1da177e4
LT
1219 return err;
1220}
1221
8a3c245c
PT
1222/**
1223 * sock_create_lite - creates a socket
1224 * @family: protocol family (AF_INET, ...)
1225 * @type: communication type (SOCK_STREAM, ...)
1226 * @protocol: protocol (0, ...)
1227 * @res: new socket
1228 *
1229 * Creates a new socket and assigns it to @res, passing through LSM.
1230 * The new socket initialization is not complete, see kernel_accept().
1231 * Returns 0 or an error. On failure @res is set to %NULL.
1232 * This function internally uses GFP_KERNEL.
1233 */
1234
1da177e4
LT
1235int sock_create_lite(int family, int type, int protocol, struct socket **res)
1236{
1237 int err;
1238 struct socket *sock = NULL;
89bddce5 1239
1da177e4
LT
1240 err = security_socket_create(family, type, protocol, 1);
1241 if (err)
1242 goto out;
1243
1244 sock = sock_alloc();
1245 if (!sock) {
1246 err = -ENOMEM;
1247 goto out;
1248 }
1249
1da177e4 1250 sock->type = type;
7420ed23
VY
1251 err = security_socket_post_create(sock, family, type, protocol, 1);
1252 if (err)
1253 goto out_release;
1254
1da177e4
LT
1255out:
1256 *res = sock;
1257 return err;
7420ed23
VY
1258out_release:
1259 sock_release(sock);
1260 sock = NULL;
1261 goto out;
1da177e4 1262}
c6d409cf 1263EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1264
1265/* No kernel lock held - perfect */
ade994f4 1266static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1267{
3cafb376 1268 struct socket *sock = file->private_data;
a331de3b 1269 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1270
e88958e6
CH
1271 if (!sock->ops->poll)
1272 return 0;
f641f13b 1273
a331de3b
CH
1274 if (sk_can_busy_loop(sock->sk)) {
1275 /* poll once if requested by the syscall */
1276 if (events & POLL_BUSY_LOOP)
1277 sk_busy_loop(sock->sk, 1);
1278
1279 /* if this socket can poll_ll, tell the system call */
1280 flag = POLL_BUSY_LOOP;
1281 }
1282
1283 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1284}
1285
89bddce5 1286static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1287{
b69aee04 1288 struct socket *sock = file->private_data;
1da177e4
LT
1289
1290 return sock->ops->mmap(file, sock, vma);
1291}
1292
20380731 1293static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1294{
6d8c50dc 1295 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1296 return 0;
1297}
1298
1299/*
1300 * Update the socket async list
1301 *
1302 * Fasync_list locking strategy.
1303 *
1304 * 1. fasync_list is modified only under process context socket lock
1305 * i.e. under semaphore.
1306 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1307 * or under socket lock
1da177e4
LT
1308 */
1309
1310static int sock_fasync(int fd, struct file *filp, int on)
1311{
989a2979
ED
1312 struct socket *sock = filp->private_data;
1313 struct sock *sk = sock->sk;
333f7909 1314 struct socket_wq *wq = &sock->wq;
1da177e4 1315
989a2979 1316 if (sk == NULL)
1da177e4 1317 return -EINVAL;
1da177e4
LT
1318
1319 lock_sock(sk);
eaefd110 1320 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1321
eaefd110 1322 if (!wq->fasync_list)
989a2979
ED
1323 sock_reset_flag(sk, SOCK_FASYNC);
1324 else
bcdce719 1325 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1326
989a2979 1327 release_sock(sk);
1da177e4
LT
1328 return 0;
1329}
1330
ceb5d58b 1331/* This function may be called only under rcu_lock */
1da177e4 1332
ceb5d58b 1333int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1334{
ceb5d58b 1335 if (!wq || !wq->fasync_list)
1da177e4 1336 return -1;
ceb5d58b 1337
89bddce5 1338 switch (how) {
8d8ad9d7 1339 case SOCK_WAKE_WAITD:
ceb5d58b 1340 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1341 break;
1342 goto call_kill;
8d8ad9d7 1343 case SOCK_WAKE_SPACE:
ceb5d58b 1344 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1345 break;
7c7ab580 1346 fallthrough;
8d8ad9d7 1347 case SOCK_WAKE_IO:
89bddce5 1348call_kill:
43815482 1349 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1350 break;
8d8ad9d7 1351 case SOCK_WAKE_URG:
43815482 1352 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1353 }
ceb5d58b 1354
1da177e4
LT
1355 return 0;
1356}
c6d409cf 1357EXPORT_SYMBOL(sock_wake_async);
1da177e4 1358
8a3c245c
PT
1359/**
1360 * __sock_create - creates a socket
1361 * @net: net namespace
1362 * @family: protocol family (AF_INET, ...)
1363 * @type: communication type (SOCK_STREAM, ...)
1364 * @protocol: protocol (0, ...)
1365 * @res: new socket
1366 * @kern: boolean for kernel space sockets
1367 *
1368 * Creates a new socket and assigns it to @res, passing through LSM.
1369 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1370 * be set to true if the socket resides in kernel space.
1371 * This function internally uses GFP_KERNEL.
1372 */
1373
721db93a 1374int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1375 struct socket **res, int kern)
1da177e4
LT
1376{
1377 int err;
1378 struct socket *sock;
55737fda 1379 const struct net_proto_family *pf;
1da177e4
LT
1380
1381 /*
89bddce5 1382 * Check protocol is in range
1da177e4
LT
1383 */
1384 if (family < 0 || family >= NPROTO)
1385 return -EAFNOSUPPORT;
1386 if (type < 0 || type >= SOCK_MAX)
1387 return -EINVAL;
1388
1389 /* Compatibility.
1390
1391 This uglymoron is moved from INET layer to here to avoid
1392 deadlock in module load.
1393 */
1394 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1395 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1396 current->comm);
1da177e4
LT
1397 family = PF_PACKET;
1398 }
1399
1400 err = security_socket_create(family, type, protocol, kern);
1401 if (err)
1402 return err;
89bddce5 1403
55737fda
SH
1404 /*
1405 * Allocate the socket and allow the family to set things up. if
1406 * the protocol is 0, the family is instructed to select an appropriate
1407 * default.
1408 */
1409 sock = sock_alloc();
1410 if (!sock) {
e87cc472 1411 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1412 return -ENFILE; /* Not exactly a match, but its the
1413 closest posix thing */
1414 }
1415
1416 sock->type = type;
1417
95a5afca 1418#ifdef CONFIG_MODULES
89bddce5
SH
1419 /* Attempt to load a protocol module if the find failed.
1420 *
1421 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1422 * requested real, full-featured networking support upon configuration.
1423 * Otherwise module support will break!
1424 */
190683a9 1425 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1426 request_module("net-pf-%d", family);
1da177e4
LT
1427#endif
1428
55737fda
SH
1429 rcu_read_lock();
1430 pf = rcu_dereference(net_families[family]);
1431 err = -EAFNOSUPPORT;
1432 if (!pf)
1433 goto out_release;
1da177e4
LT
1434
1435 /*
1436 * We will call the ->create function, that possibly is in a loadable
1437 * module, so we have to bump that loadable module refcnt first.
1438 */
55737fda 1439 if (!try_module_get(pf->owner))
1da177e4
LT
1440 goto out_release;
1441
55737fda
SH
1442 /* Now protected by module ref count */
1443 rcu_read_unlock();
1444
3f378b68 1445 err = pf->create(net, sock, protocol, kern);
55737fda 1446 if (err < 0)
1da177e4 1447 goto out_module_put;
a79af59e 1448
1da177e4
LT
1449 /*
1450 * Now to bump the refcnt of the [loadable] module that owns this
1451 * socket at sock_release time we decrement its refcnt.
1452 */
55737fda
SH
1453 if (!try_module_get(sock->ops->owner))
1454 goto out_module_busy;
1455
1da177e4
LT
1456 /*
1457 * Now that we're done with the ->create function, the [loadable]
1458 * module can have its refcnt decremented
1459 */
55737fda 1460 module_put(pf->owner);
7420ed23
VY
1461 err = security_socket_post_create(sock, family, type, protocol, kern);
1462 if (err)
3b185525 1463 goto out_sock_release;
55737fda 1464 *res = sock;
1da177e4 1465
55737fda
SH
1466 return 0;
1467
1468out_module_busy:
1469 err = -EAFNOSUPPORT;
1da177e4 1470out_module_put:
55737fda
SH
1471 sock->ops = NULL;
1472 module_put(pf->owner);
1473out_sock_release:
1da177e4 1474 sock_release(sock);
55737fda
SH
1475 return err;
1476
1477out_release:
1478 rcu_read_unlock();
1479 goto out_sock_release;
1da177e4 1480}
721db93a 1481EXPORT_SYMBOL(__sock_create);
1da177e4 1482
8a3c245c
PT
1483/**
1484 * sock_create - creates a socket
1485 * @family: protocol family (AF_INET, ...)
1486 * @type: communication type (SOCK_STREAM, ...)
1487 * @protocol: protocol (0, ...)
1488 * @res: new socket
1489 *
1490 * A wrapper around __sock_create().
1491 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1492 */
1493
1da177e4
LT
1494int sock_create(int family, int type, int protocol, struct socket **res)
1495{
1b8d7ae4 1496 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1497}
c6d409cf 1498EXPORT_SYMBOL(sock_create);
1da177e4 1499
8a3c245c
PT
1500/**
1501 * sock_create_kern - creates a socket (kernel space)
1502 * @net: net namespace
1503 * @family: protocol family (AF_INET, ...)
1504 * @type: communication type (SOCK_STREAM, ...)
1505 * @protocol: protocol (0, ...)
1506 * @res: new socket
1507 *
1508 * A wrapper around __sock_create().
1509 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1510 */
1511
eeb1bd5c 1512int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1513{
eeb1bd5c 1514 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1515}
c6d409cf 1516EXPORT_SYMBOL(sock_create_kern);
1da177e4 1517
9d6a15c3 1518int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1519{
1520 int retval;
1521 struct socket *sock;
a677a039
UD
1522 int flags;
1523
e38b36f3
UD
1524 /* Check the SOCK_* constants for consistency. */
1525 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1526 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1527 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1528 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1529
a677a039 1530 flags = type & ~SOCK_TYPE_MASK;
77d27200 1531 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1532 return -EINVAL;
1533 type &= SOCK_TYPE_MASK;
1da177e4 1534
aaca0bdc
UD
1535 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1536 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1537
1da177e4
LT
1538 retval = sock_create(family, type, protocol, &sock);
1539 if (retval < 0)
8e1611e2 1540 return retval;
1da177e4 1541
8e1611e2 1542 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1543}
1544
9d6a15c3
DB
1545SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1546{
1547 return __sys_socket(family, type, protocol);
1548}
1549
1da177e4
LT
1550/*
1551 * Create a pair of connected sockets.
1552 */
1553
6debc8d8 1554int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1555{
1556 struct socket *sock1, *sock2;
1557 int fd1, fd2, err;
db349509 1558 struct file *newfile1, *newfile2;
a677a039
UD
1559 int flags;
1560
1561 flags = type & ~SOCK_TYPE_MASK;
77d27200 1562 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1563 return -EINVAL;
1564 type &= SOCK_TYPE_MASK;
1da177e4 1565
aaca0bdc
UD
1566 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1567 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1568
016a266b
AV
1569 /*
1570 * reserve descriptors and make sure we won't fail
1571 * to return them to userland.
1572 */
1573 fd1 = get_unused_fd_flags(flags);
1574 if (unlikely(fd1 < 0))
1575 return fd1;
1576
1577 fd2 = get_unused_fd_flags(flags);
1578 if (unlikely(fd2 < 0)) {
1579 put_unused_fd(fd1);
1580 return fd2;
1581 }
1582
1583 err = put_user(fd1, &usockvec[0]);
1584 if (err)
1585 goto out;
1586
1587 err = put_user(fd2, &usockvec[1]);
1588 if (err)
1589 goto out;
1590
1da177e4
LT
1591 /*
1592 * Obtain the first socket and check if the underlying protocol
1593 * supports the socketpair call.
1594 */
1595
1596 err = sock_create(family, type, protocol, &sock1);
016a266b 1597 if (unlikely(err < 0))
1da177e4
LT
1598 goto out;
1599
1600 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1601 if (unlikely(err < 0)) {
1602 sock_release(sock1);
1603 goto out;
bf3c23d1 1604 }
d73aa286 1605
d47cd945
DH
1606 err = security_socket_socketpair(sock1, sock2);
1607 if (unlikely(err)) {
1608 sock_release(sock2);
1609 sock_release(sock1);
1610 goto out;
1611 }
1612
016a266b
AV
1613 err = sock1->ops->socketpair(sock1, sock2);
1614 if (unlikely(err < 0)) {
1615 sock_release(sock2);
1616 sock_release(sock1);
1617 goto out;
28407630
AV
1618 }
1619
aab174f0 1620 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1621 if (IS_ERR(newfile1)) {
28407630 1622 err = PTR_ERR(newfile1);
016a266b
AV
1623 sock_release(sock2);
1624 goto out;
28407630
AV
1625 }
1626
aab174f0 1627 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1628 if (IS_ERR(newfile2)) {
1629 err = PTR_ERR(newfile2);
016a266b
AV
1630 fput(newfile1);
1631 goto out;
db349509
AV
1632 }
1633
157cf649 1634 audit_fd_pair(fd1, fd2);
d73aa286 1635
db349509
AV
1636 fd_install(fd1, newfile1);
1637 fd_install(fd2, newfile2);
d73aa286 1638 return 0;
1da177e4 1639
016a266b 1640out:
d73aa286 1641 put_unused_fd(fd2);
d73aa286 1642 put_unused_fd(fd1);
1da177e4
LT
1643 return err;
1644}
1645
6debc8d8
DB
1646SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1647 int __user *, usockvec)
1648{
1649 return __sys_socketpair(family, type, protocol, usockvec);
1650}
1651
1da177e4
LT
1652/*
1653 * Bind a name to a socket. Nothing much to do here since it's
1654 * the protocol's responsibility to handle the local address.
1655 *
1656 * We move the socket address to kernel space before we call
1657 * the protocol layer (having also checked the address is ok).
1658 */
1659
a87d35d8 1660int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1661{
1662 struct socket *sock;
230b1839 1663 struct sockaddr_storage address;
6cb153ca 1664 int err, fput_needed;
1da177e4 1665
89bddce5 1666 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1667 if (sock) {
43db362d 1668 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1669 if (!err) {
89bddce5 1670 err = security_socket_bind(sock,
230b1839 1671 (struct sockaddr *)&address,
89bddce5 1672 addrlen);
6cb153ca
BL
1673 if (!err)
1674 err = sock->ops->bind(sock,
89bddce5 1675 (struct sockaddr *)
230b1839 1676 &address, addrlen);
1da177e4 1677 }
6cb153ca 1678 fput_light(sock->file, fput_needed);
89bddce5 1679 }
1da177e4
LT
1680 return err;
1681}
1682
a87d35d8
DB
1683SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1684{
1685 return __sys_bind(fd, umyaddr, addrlen);
1686}
1687
1da177e4
LT
1688/*
1689 * Perform a listen. Basically, we allow the protocol to do anything
1690 * necessary for a listen, and if that works, we mark the socket as
1691 * ready for listening.
1692 */
1693
25e290ee 1694int __sys_listen(int fd, int backlog)
1da177e4
LT
1695{
1696 struct socket *sock;
6cb153ca 1697 int err, fput_needed;
b8e1f9b5 1698 int somaxconn;
89bddce5
SH
1699
1700 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1701 if (sock) {
8efa6e93 1702 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1703 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1704 backlog = somaxconn;
1da177e4
LT
1705
1706 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1707 if (!err)
1708 err = sock->ops->listen(sock, backlog);
1da177e4 1709
6cb153ca 1710 fput_light(sock->file, fput_needed);
1da177e4
LT
1711 }
1712 return err;
1713}
1714
25e290ee
DB
1715SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1716{
1717 return __sys_listen(fd, backlog);
1718}
1719
de2ea4b6
JA
1720int __sys_accept4_file(struct file *file, unsigned file_flags,
1721 struct sockaddr __user *upeer_sockaddr,
09952e3e
JA
1722 int __user *upeer_addrlen, int flags,
1723 unsigned long nofile)
1da177e4
LT
1724{
1725 struct socket *sock, *newsock;
39d8c1b6 1726 struct file *newfile;
de2ea4b6 1727 int err, len, newfd;
230b1839 1728 struct sockaddr_storage address;
1da177e4 1729
77d27200 1730 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1731 return -EINVAL;
1732
1733 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1734 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1735
dba4a925
FR
1736 sock = sock_from_file(file);
1737 if (!sock) {
1738 err = -ENOTSOCK;
1da177e4 1739 goto out;
dba4a925 1740 }
1da177e4
LT
1741
1742 err = -ENFILE;
c6d409cf
ED
1743 newsock = sock_alloc();
1744 if (!newsock)
de2ea4b6 1745 goto out;
1da177e4
LT
1746
1747 newsock->type = sock->type;
1748 newsock->ops = sock->ops;
1749
1da177e4
LT
1750 /*
1751 * We don't need try_module_get here, as the listening socket (sock)
1752 * has the protocol module (sock->ops->owner) held.
1753 */
1754 __module_get(newsock->ops->owner);
1755
09952e3e 1756 newfd = __get_unused_fd_flags(flags, nofile);
39d8c1b6
DM
1757 if (unlikely(newfd < 0)) {
1758 err = newfd;
9a1875e6 1759 sock_release(newsock);
de2ea4b6 1760 goto out;
39d8c1b6 1761 }
aab174f0 1762 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1763 if (IS_ERR(newfile)) {
28407630
AV
1764 err = PTR_ERR(newfile);
1765 put_unused_fd(newfd);
de2ea4b6 1766 goto out;
28407630 1767 }
39d8c1b6 1768
a79af59e
FF
1769 err = security_socket_accept(sock, newsock);
1770 if (err)
39d8c1b6 1771 goto out_fd;
a79af59e 1772
de2ea4b6
JA
1773 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1774 false);
1da177e4 1775 if (err < 0)
39d8c1b6 1776 goto out_fd;
1da177e4
LT
1777
1778 if (upeer_sockaddr) {
9b2c45d4
DV
1779 len = newsock->ops->getname(newsock,
1780 (struct sockaddr *)&address, 2);
1781 if (len < 0) {
1da177e4 1782 err = -ECONNABORTED;
39d8c1b6 1783 goto out_fd;
1da177e4 1784 }
43db362d 1785 err = move_addr_to_user(&address,
230b1839 1786 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1787 if (err < 0)
39d8c1b6 1788 goto out_fd;
1da177e4
LT
1789 }
1790
1791 /* File flags are not inherited via accept() unlike another OSes. */
1792
39d8c1b6
DM
1793 fd_install(newfd, newfile);
1794 err = newfd;
1da177e4
LT
1795out:
1796 return err;
39d8c1b6 1797out_fd:
9606a216 1798 fput(newfile);
39d8c1b6 1799 put_unused_fd(newfd);
de2ea4b6
JA
1800 goto out;
1801
1802}
1803
1804/*
1805 * For accept, we attempt to create a new socket, set up the link
1806 * with the client, wake up the client, then return the new
1807 * connected fd. We collect the address of the connector in kernel
1808 * space and move it to user at the very end. This is unclean because
1809 * we open the socket then return an error.
1810 *
1811 * 1003.1g adds the ability to recvmsg() to query connection pending
1812 * status to recvmsg. We need to add that support in a way thats
1813 * clean when we restructure accept also.
1814 */
1815
1816int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1817 int __user *upeer_addrlen, int flags)
1818{
1819 int ret = -EBADF;
1820 struct fd f;
1821
1822 f = fdget(fd);
1823 if (f.file) {
1824 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
09952e3e
JA
1825 upeer_addrlen, flags,
1826 rlimit(RLIMIT_NOFILE));
6b07edeb 1827 fdput(f);
de2ea4b6
JA
1828 }
1829
1830 return ret;
1da177e4
LT
1831}
1832
4541e805
DB
1833SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1834 int __user *, upeer_addrlen, int, flags)
1835{
1836 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1837}
1838
20f37034
HC
1839SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1840 int __user *, upeer_addrlen)
aaca0bdc 1841{
4541e805 1842 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1843}
1844
1da177e4
LT
1845/*
1846 * Attempt to connect to a socket with the server address. The address
1847 * is in user space so we verify it is OK and move it to kernel space.
1848 *
1849 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1850 * break bindings
1851 *
1852 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1853 * other SEQPACKET protocols that take time to connect() as it doesn't
1854 * include the -EINPROGRESS status for such sockets.
1855 */
1856
f499a021 1857int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 1858 int addrlen, int file_flags)
1da177e4
LT
1859{
1860 struct socket *sock;
bd3ded31 1861 int err;
1da177e4 1862
dba4a925
FR
1863 sock = sock_from_file(file);
1864 if (!sock) {
1865 err = -ENOTSOCK;
1da177e4 1866 goto out;
dba4a925 1867 }
1da177e4 1868
89bddce5 1869 err =
f499a021 1870 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 1871 if (err)
bd3ded31 1872 goto out;
1da177e4 1873
f499a021 1874 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
bd3ded31 1875 sock->file->f_flags | file_flags);
1da177e4
LT
1876out:
1877 return err;
1878}
1879
bd3ded31
JA
1880int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1881{
1882 int ret = -EBADF;
1883 struct fd f;
1884
1885 f = fdget(fd);
1886 if (f.file) {
f499a021
JA
1887 struct sockaddr_storage address;
1888
1889 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1890 if (!ret)
1891 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 1892 fdput(f);
bd3ded31
JA
1893 }
1894
1895 return ret;
1896}
1897
1387c2c2
DB
1898SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1899 int, addrlen)
1900{
1901 return __sys_connect(fd, uservaddr, addrlen);
1902}
1903
1da177e4
LT
1904/*
1905 * Get the local address ('name') of a socket object. Move the obtained
1906 * name to user space.
1907 */
1908
8882a107
DB
1909int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1910 int __user *usockaddr_len)
1da177e4
LT
1911{
1912 struct socket *sock;
230b1839 1913 struct sockaddr_storage address;
9b2c45d4 1914 int err, fput_needed;
89bddce5 1915
6cb153ca 1916 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1917 if (!sock)
1918 goto out;
1919
1920 err = security_socket_getsockname(sock);
1921 if (err)
1922 goto out_put;
1923
9b2c45d4
DV
1924 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1925 if (err < 0)
1da177e4 1926 goto out_put;
9b2c45d4
DV
1927 /* "err" is actually length in this case */
1928 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1929
1930out_put:
6cb153ca 1931 fput_light(sock->file, fput_needed);
1da177e4
LT
1932out:
1933 return err;
1934}
1935
8882a107
DB
1936SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1937 int __user *, usockaddr_len)
1938{
1939 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1940}
1941
1da177e4
LT
1942/*
1943 * Get the remote address ('name') of a socket object. Move the obtained
1944 * name to user space.
1945 */
1946
b21c8f83
DB
1947int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1948 int __user *usockaddr_len)
1da177e4
LT
1949{
1950 struct socket *sock;
230b1839 1951 struct sockaddr_storage address;
9b2c45d4 1952 int err, fput_needed;
1da177e4 1953
89bddce5
SH
1954 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1955 if (sock != NULL) {
1da177e4
LT
1956 err = security_socket_getpeername(sock);
1957 if (err) {
6cb153ca 1958 fput_light(sock->file, fput_needed);
1da177e4
LT
1959 return err;
1960 }
1961
9b2c45d4
DV
1962 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1963 if (err >= 0)
1964 /* "err" is actually length in this case */
1965 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1966 usockaddr_len);
6cb153ca 1967 fput_light(sock->file, fput_needed);
1da177e4
LT
1968 }
1969 return err;
1970}
1971
b21c8f83
DB
1972SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1973 int __user *, usockaddr_len)
1974{
1975 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1976}
1977
1da177e4
LT
1978/*
1979 * Send a datagram to a given address. We move the address into kernel
1980 * space and check the user space data area is readable before invoking
1981 * the protocol.
1982 */
211b634b
DB
1983int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1984 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1985{
1986 struct socket *sock;
230b1839 1987 struct sockaddr_storage address;
1da177e4
LT
1988 int err;
1989 struct msghdr msg;
1990 struct iovec iov;
6cb153ca 1991 int fput_needed;
6cb153ca 1992
602bd0e9
AV
1993 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1994 if (unlikely(err))
1995 return err;
de0fa95c
PE
1996 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1997 if (!sock)
4387ff75 1998 goto out;
6cb153ca 1999
89bddce5 2000 msg.msg_name = NULL;
89bddce5
SH
2001 msg.msg_control = NULL;
2002 msg.msg_controllen = 0;
2003 msg.msg_namelen = 0;
6cb153ca 2004 if (addr) {
43db362d 2005 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
2006 if (err < 0)
2007 goto out_put;
230b1839 2008 msg.msg_name = (struct sockaddr *)&address;
89bddce5 2009 msg.msg_namelen = addr_len;
1da177e4
LT
2010 }
2011 if (sock->file->f_flags & O_NONBLOCK)
2012 flags |= MSG_DONTWAIT;
2013 msg.msg_flags = flags;
d8725c86 2014 err = sock_sendmsg(sock, &msg);
1da177e4 2015
89bddce5 2016out_put:
de0fa95c 2017 fput_light(sock->file, fput_needed);
4387ff75 2018out:
1da177e4
LT
2019 return err;
2020}
2021
211b634b
DB
2022SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2023 unsigned int, flags, struct sockaddr __user *, addr,
2024 int, addr_len)
2025{
2026 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2027}
2028
1da177e4 2029/*
89bddce5 2030 * Send a datagram down a socket.
1da177e4
LT
2031 */
2032
3e0fa65f 2033SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2034 unsigned int, flags)
1da177e4 2035{
211b634b 2036 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2037}
2038
2039/*
89bddce5 2040 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2041 * sender. We verify the buffers are writable and if needed move the
2042 * sender address from kernel to user space.
2043 */
7a09e1eb
DB
2044int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2045 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2046{
2047 struct socket *sock;
2048 struct iovec iov;
2049 struct msghdr msg;
230b1839 2050 struct sockaddr_storage address;
89bddce5 2051 int err, err2;
6cb153ca
BL
2052 int fput_needed;
2053
602bd0e9
AV
2054 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2055 if (unlikely(err))
2056 return err;
de0fa95c 2057 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2058 if (!sock)
de0fa95c 2059 goto out;
1da177e4 2060
89bddce5
SH
2061 msg.msg_control = NULL;
2062 msg.msg_controllen = 0;
f3d33426
HFS
2063 /* Save some cycles and don't copy the address if not needed */
2064 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2065 /* We assume all kernel code knows the size of sockaddr_storage */
2066 msg.msg_namelen = 0;
130ed5d1 2067 msg.msg_iocb = NULL;
9f138fa6 2068 msg.msg_flags = 0;
1da177e4
LT
2069 if (sock->file->f_flags & O_NONBLOCK)
2070 flags |= MSG_DONTWAIT;
2da62906 2071 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2072
89bddce5 2073 if (err >= 0 && addr != NULL) {
43db362d 2074 err2 = move_addr_to_user(&address,
230b1839 2075 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2076 if (err2 < 0)
2077 err = err2;
1da177e4 2078 }
de0fa95c
PE
2079
2080 fput_light(sock->file, fput_needed);
4387ff75 2081out:
1da177e4
LT
2082 return err;
2083}
2084
7a09e1eb
DB
2085SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2086 unsigned int, flags, struct sockaddr __user *, addr,
2087 int __user *, addr_len)
2088{
2089 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2090}
2091
1da177e4 2092/*
89bddce5 2093 * Receive a datagram from a socket.
1da177e4
LT
2094 */
2095
b7c0ddf5
JG
2096SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2097 unsigned int, flags)
1da177e4 2098{
7a09e1eb 2099 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2100}
2101
83f0c10b
FW
2102static bool sock_use_custom_sol_socket(const struct socket *sock)
2103{
2104 const struct sock *sk = sock->sk;
2105
2106 /* Use sock->ops->setsockopt() for MPTCP */
2107 return IS_ENABLED(CONFIG_MPTCP) &&
2108 sk->sk_protocol == IPPROTO_MPTCP &&
2109 sk->sk_type == SOCK_STREAM &&
2110 (sk->sk_family == AF_INET || sk->sk_family == AF_INET6);
2111}
2112
1da177e4
LT
2113/*
2114 * Set a socket option. Because we don't know the option lengths we have
2115 * to pass the user mode parameter for the protocols to sort out.
2116 */
a7b75c5a 2117int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
55db9c0e 2118 int optlen)
1da177e4 2119{
519a8a6c 2120 sockptr_t optval = USER_SOCKPTR(user_optval);
0d01da6a 2121 char *kernel_optval = NULL;
6cb153ca 2122 int err, fput_needed;
1da177e4
LT
2123 struct socket *sock;
2124
2125 if (optlen < 0)
2126 return -EINVAL;
89bddce5
SH
2127
2128 sock = sockfd_lookup_light(fd, &err, &fput_needed);
4a367299
CH
2129 if (!sock)
2130 return err;
1da177e4 2131
4a367299
CH
2132 err = security_socket_setsockopt(sock, level, optname);
2133 if (err)
2134 goto out_put;
0d01da6a 2135
55db9c0e
CH
2136 if (!in_compat_syscall())
2137 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
a7b75c5a 2138 user_optval, &optlen,
55db9c0e 2139 &kernel_optval);
4a367299
CH
2140 if (err < 0)
2141 goto out_put;
2142 if (err > 0) {
2143 err = 0;
2144 goto out_put;
2145 }
0d01da6a 2146
a7b75c5a
CH
2147 if (kernel_optval)
2148 optval = KERNEL_SOCKPTR(kernel_optval);
4a367299 2149 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2150 err = sock_setsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2151 else if (unlikely(!sock->ops->setsockopt))
2152 err = -EOPNOTSUPP;
4a367299
CH
2153 else
2154 err = sock->ops->setsockopt(sock, level, optname, optval,
89bddce5 2155 optlen);
a7b75c5a 2156 kfree(kernel_optval);
4a367299
CH
2157out_put:
2158 fput_light(sock->file, fput_needed);
1da177e4
LT
2159 return err;
2160}
2161
cc36dca0
DB
2162SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2163 char __user *, optval, int, optlen)
2164{
2165 return __sys_setsockopt(fd, level, optname, optval, optlen);
2166}
2167
9cacf81f
SF
2168INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2169 int optname));
2170
1da177e4
LT
2171/*
2172 * Get a socket option. Because we don't know the option lengths we have
2173 * to pass a user mode parameter for the protocols to sort out.
2174 */
55db9c0e
CH
2175int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2176 int __user *optlen)
1da177e4 2177{
6cb153ca 2178 int err, fput_needed;
1da177e4 2179 struct socket *sock;
0d01da6a 2180 int max_optlen;
1da177e4 2181
89bddce5 2182 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2183 if (!sock)
2184 return err;
2185
2186 err = security_socket_getsockopt(sock, level, optname);
2187 if (err)
2188 goto out_put;
1da177e4 2189
55db9c0e
CH
2190 if (!in_compat_syscall())
2191 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2192
d8a9b38f
CH
2193 if (level == SOL_SOCKET)
2194 err = sock_getsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2195 else if (unlikely(!sock->ops->getsockopt))
2196 err = -EOPNOTSUPP;
d8a9b38f
CH
2197 else
2198 err = sock->ops->getsockopt(sock, level, optname, optval,
89bddce5 2199 optlen);
0d01da6a 2200
55db9c0e
CH
2201 if (!in_compat_syscall())
2202 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2203 optval, optlen, max_optlen,
2204 err);
6cb153ca 2205out_put:
d8a9b38f 2206 fput_light(sock->file, fput_needed);
1da177e4
LT
2207 return err;
2208}
2209
13a2d70e
DB
2210SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2211 char __user *, optval, int __user *, optlen)
2212{
2213 return __sys_getsockopt(fd, level, optname, optval, optlen);
2214}
2215
1da177e4
LT
2216/*
2217 * Shutdown a socket.
2218 */
2219
b713c195
JA
2220int __sys_shutdown_sock(struct socket *sock, int how)
2221{
2222 int err;
2223
2224 err = security_socket_shutdown(sock, how);
2225 if (!err)
2226 err = sock->ops->shutdown(sock, how);
2227
2228 return err;
2229}
2230
005a1aea 2231int __sys_shutdown(int fd, int how)
1da177e4 2232{
6cb153ca 2233 int err, fput_needed;
1da177e4
LT
2234 struct socket *sock;
2235
89bddce5
SH
2236 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2237 if (sock != NULL) {
b713c195 2238 err = __sys_shutdown_sock(sock, how);
6cb153ca 2239 fput_light(sock->file, fput_needed);
1da177e4
LT
2240 }
2241 return err;
2242}
2243
005a1aea
DB
2244SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2245{
2246 return __sys_shutdown(fd, how);
2247}
2248
89bddce5 2249/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2250 * fields which are the same type (int / unsigned) on our platforms.
2251 */
2252#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2253#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2254#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2255
c71d8ebe
TH
2256struct used_address {
2257 struct sockaddr_storage name;
2258 unsigned int name_len;
2259};
2260
0a384abf
JA
2261int __copy_msghdr_from_user(struct msghdr *kmsg,
2262 struct user_msghdr __user *umsg,
2263 struct sockaddr __user **save_addr,
2264 struct iovec __user **uiov, size_t *nsegs)
1661bf36 2265{
ffb07550 2266 struct user_msghdr msg;
08adb7da
AV
2267 ssize_t err;
2268
ffb07550 2269 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2270 return -EFAULT;
dbb490b9 2271
1f466e1f
CH
2272 kmsg->msg_control_is_user = true;
2273 kmsg->msg_control_user = msg.msg_control;
ffb07550
AV
2274 kmsg->msg_controllen = msg.msg_controllen;
2275 kmsg->msg_flags = msg.msg_flags;
2276
2277 kmsg->msg_namelen = msg.msg_namelen;
2278 if (!msg.msg_name)
6a2a2b3a
AS
2279 kmsg->msg_namelen = 0;
2280
dbb490b9
ML
2281 if (kmsg->msg_namelen < 0)
2282 return -EINVAL;
2283
1661bf36 2284 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2285 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2286
2287 if (save_addr)
ffb07550 2288 *save_addr = msg.msg_name;
08adb7da 2289
ffb07550 2290 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2291 if (!save_addr) {
864d9664
PA
2292 err = move_addr_to_kernel(msg.msg_name,
2293 kmsg->msg_namelen,
08adb7da
AV
2294 kmsg->msg_name);
2295 if (err < 0)
2296 return err;
2297 }
2298 } else {
2299 kmsg->msg_name = NULL;
2300 kmsg->msg_namelen = 0;
2301 }
2302
ffb07550 2303 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2304 return -EMSGSIZE;
2305
0345f931 2306 kmsg->msg_iocb = NULL;
0a384abf
JA
2307 *uiov = msg.msg_iov;
2308 *nsegs = msg.msg_iovlen;
2309 return 0;
2310}
2311
2312static int copy_msghdr_from_user(struct msghdr *kmsg,
2313 struct user_msghdr __user *umsg,
2314 struct sockaddr __user **save_addr,
2315 struct iovec **iov)
2316{
2317 struct user_msghdr msg;
2318 ssize_t err;
2319
2320 err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
2321 &msg.msg_iovlen);
2322 if (err)
2323 return err;
0345f931 2324
87e5e6da 2325 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2326 msg.msg_iov, msg.msg_iovlen,
da184284 2327 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2328 return err < 0 ? err : 0;
1661bf36
DC
2329}
2330
4257c8ca
JA
2331static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2332 unsigned int flags, struct used_address *used_address,
2333 unsigned int allowed_msghdr_flags)
1da177e4 2334{
b9d717a7 2335 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2336 __aligned(sizeof(__kernel_size_t));
89bddce5 2337 /* 20 is size of ipv6_pktinfo */
1da177e4 2338 unsigned char *ctl_buf = ctl;
d8725c86 2339 int ctl_len;
08adb7da 2340 ssize_t err;
89bddce5 2341
1da177e4
LT
2342 err = -ENOBUFS;
2343
228e548e 2344 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2345 goto out;
28a94d8f 2346 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2347 ctl_len = msg_sys->msg_controllen;
1da177e4 2348 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2349 err =
228e548e 2350 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2351 sizeof(ctl));
1da177e4 2352 if (err)
4257c8ca 2353 goto out;
228e548e
AB
2354 ctl_buf = msg_sys->msg_control;
2355 ctl_len = msg_sys->msg_controllen;
1da177e4 2356 } else if (ctl_len) {
ac4340fc
DM
2357 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2358 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2359 if (ctl_len > sizeof(ctl)) {
1da177e4 2360 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2361 if (ctl_buf == NULL)
4257c8ca 2362 goto out;
1da177e4
LT
2363 }
2364 err = -EFAULT;
1f466e1f 2365 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2366 goto out_freectl;
228e548e 2367 msg_sys->msg_control = ctl_buf;
1f466e1f 2368 msg_sys->msg_control_is_user = false;
1da177e4 2369 }
228e548e 2370 msg_sys->msg_flags = flags;
1da177e4
LT
2371
2372 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2373 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2374 /*
2375 * If this is sendmmsg() and current destination address is same as
2376 * previously succeeded address, omit asking LSM's decision.
2377 * used_address->name_len is initialized to UINT_MAX so that the first
2378 * destination address never matches.
2379 */
bc909d9d
MD
2380 if (used_address && msg_sys->msg_name &&
2381 used_address->name_len == msg_sys->msg_namelen &&
2382 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2383 used_address->name_len)) {
d8725c86 2384 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2385 goto out_freectl;
2386 }
d8725c86 2387 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2388 /*
2389 * If this is sendmmsg() and sending to current destination address was
2390 * successful, remember it.
2391 */
2392 if (used_address && err >= 0) {
2393 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2394 if (msg_sys->msg_name)
2395 memcpy(&used_address->name, msg_sys->msg_name,
2396 used_address->name_len);
c71d8ebe 2397 }
1da177e4
LT
2398
2399out_freectl:
89bddce5 2400 if (ctl_buf != ctl)
1da177e4 2401 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2402out:
2403 return err;
2404}
2405
03b1230c
JA
2406int sendmsg_copy_msghdr(struct msghdr *msg,
2407 struct user_msghdr __user *umsg, unsigned flags,
2408 struct iovec **iov)
4257c8ca
JA
2409{
2410 int err;
2411
2412 if (flags & MSG_CMSG_COMPAT) {
2413 struct compat_msghdr __user *msg_compat;
2414
2415 msg_compat = (struct compat_msghdr __user *) umsg;
2416 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2417 } else {
2418 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2419 }
2420 if (err < 0)
2421 return err;
2422
2423 return 0;
2424}
2425
2426static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2427 struct msghdr *msg_sys, unsigned int flags,
2428 struct used_address *used_address,
2429 unsigned int allowed_msghdr_flags)
2430{
2431 struct sockaddr_storage address;
2432 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2433 ssize_t err;
2434
2435 msg_sys->msg_name = &address;
2436
2437 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2438 if (err < 0)
2439 return err;
2440
2441 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2442 allowed_msghdr_flags);
da184284 2443 kfree(iov);
228e548e
AB
2444 return err;
2445}
2446
2447/*
2448 * BSD sendmsg interface
2449 */
03b1230c 2450long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2451 unsigned int flags)
2452{
03b1230c 2453 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2454}
228e548e 2455
e1834a32
DB
2456long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2457 bool forbid_cmsg_compat)
228e548e
AB
2458{
2459 int fput_needed, err;
2460 struct msghdr msg_sys;
1be374a0
AL
2461 struct socket *sock;
2462
e1834a32
DB
2463 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2464 return -EINVAL;
2465
1be374a0 2466 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2467 if (!sock)
2468 goto out;
2469
28a94d8f 2470 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2471
6cb153ca 2472 fput_light(sock->file, fput_needed);
89bddce5 2473out:
1da177e4
LT
2474 return err;
2475}
2476
666547ff 2477SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2478{
e1834a32 2479 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2480}
2481
228e548e
AB
2482/*
2483 * Linux sendmmsg interface
2484 */
2485
2486int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2487 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2488{
2489 int fput_needed, err, datagrams;
2490 struct socket *sock;
2491 struct mmsghdr __user *entry;
2492 struct compat_mmsghdr __user *compat_entry;
2493 struct msghdr msg_sys;
c71d8ebe 2494 struct used_address used_address;
f092276d 2495 unsigned int oflags = flags;
228e548e 2496
e1834a32
DB
2497 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2498 return -EINVAL;
2499
98382f41
AB
2500 if (vlen > UIO_MAXIOV)
2501 vlen = UIO_MAXIOV;
228e548e
AB
2502
2503 datagrams = 0;
2504
2505 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2506 if (!sock)
2507 return err;
2508
c71d8ebe 2509 used_address.name_len = UINT_MAX;
228e548e
AB
2510 entry = mmsg;
2511 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2512 err = 0;
f092276d 2513 flags |= MSG_BATCH;
228e548e
AB
2514
2515 while (datagrams < vlen) {
f092276d
TH
2516 if (datagrams == vlen - 1)
2517 flags = oflags;
2518
228e548e 2519 if (MSG_CMSG_COMPAT & flags) {
666547ff 2520 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2521 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2522 if (err < 0)
2523 break;
2524 err = __put_user(err, &compat_entry->msg_len);
2525 ++compat_entry;
2526 } else {
a7526eb5 2527 err = ___sys_sendmsg(sock,
666547ff 2528 (struct user_msghdr __user *)entry,
28a94d8f 2529 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2530 if (err < 0)
2531 break;
2532 err = put_user(err, &entry->msg_len);
2533 ++entry;
2534 }
2535
2536 if (err)
2537 break;
2538 ++datagrams;
3023898b
SHY
2539 if (msg_data_left(&msg_sys))
2540 break;
a78cb84c 2541 cond_resched();
228e548e
AB
2542 }
2543
228e548e
AB
2544 fput_light(sock->file, fput_needed);
2545
728ffb86
AB
2546 /* We only return an error if no datagrams were able to be sent */
2547 if (datagrams != 0)
228e548e
AB
2548 return datagrams;
2549
228e548e
AB
2550 return err;
2551}
2552
2553SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2554 unsigned int, vlen, unsigned int, flags)
2555{
e1834a32 2556 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2557}
2558
03b1230c
JA
2559int recvmsg_copy_msghdr(struct msghdr *msg,
2560 struct user_msghdr __user *umsg, unsigned flags,
2561 struct sockaddr __user **uaddr,
2562 struct iovec **iov)
1da177e4 2563{
08adb7da 2564 ssize_t err;
1da177e4 2565
4257c8ca
JA
2566 if (MSG_CMSG_COMPAT & flags) {
2567 struct compat_msghdr __user *msg_compat;
1da177e4 2568
4257c8ca
JA
2569 msg_compat = (struct compat_msghdr __user *) umsg;
2570 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2571 } else {
2572 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2573 }
1da177e4 2574 if (err < 0)
da184284 2575 return err;
1da177e4 2576
4257c8ca
JA
2577 return 0;
2578}
2579
2580static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2581 struct user_msghdr __user *msg,
2582 struct sockaddr __user *uaddr,
2583 unsigned int flags, int nosec)
2584{
2585 struct compat_msghdr __user *msg_compat =
2586 (struct compat_msghdr __user *) msg;
2587 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2588 struct sockaddr_storage addr;
2589 unsigned long cmsg_ptr;
2590 int len;
2591 ssize_t err;
2592
2593 msg_sys->msg_name = &addr;
a2e27255
ACM
2594 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2595 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2596
f3d33426
HFS
2597 /* We assume all kernel code knows the size of sockaddr_storage */
2598 msg_sys->msg_namelen = 0;
2599
1da177e4
LT
2600 if (sock->file->f_flags & O_NONBLOCK)
2601 flags |= MSG_DONTWAIT;
1af66221
ED
2602
2603 if (unlikely(nosec))
2604 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2605 else
2606 err = sock_recvmsg(sock, msg_sys, flags);
2607
1da177e4 2608 if (err < 0)
4257c8ca 2609 goto out;
1da177e4
LT
2610 len = err;
2611
2612 if (uaddr != NULL) {
43db362d 2613 err = move_addr_to_user(&addr,
a2e27255 2614 msg_sys->msg_namelen, uaddr,
89bddce5 2615 uaddr_len);
1da177e4 2616 if (err < 0)
4257c8ca 2617 goto out;
1da177e4 2618 }
a2e27255 2619 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2620 COMPAT_FLAGS(msg));
1da177e4 2621 if (err)
4257c8ca 2622 goto out;
1da177e4 2623 if (MSG_CMSG_COMPAT & flags)
a2e27255 2624 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2625 &msg_compat->msg_controllen);
2626 else
a2e27255 2627 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2628 &msg->msg_controllen);
2629 if (err)
4257c8ca 2630 goto out;
1da177e4 2631 err = len;
4257c8ca
JA
2632out:
2633 return err;
2634}
2635
2636static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2637 struct msghdr *msg_sys, unsigned int flags, int nosec)
2638{
2639 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2640 /* user mode address pointers */
2641 struct sockaddr __user *uaddr;
2642 ssize_t err;
2643
2644 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2645 if (err < 0)
2646 return err;
1da177e4 2647
4257c8ca 2648 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2649 kfree(iov);
a2e27255
ACM
2650 return err;
2651}
2652
2653/*
2654 * BSD recvmsg interface
2655 */
2656
03b1230c
JA
2657long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2658 struct user_msghdr __user *umsg,
2659 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2660{
03b1230c 2661 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2662}
2663
e1834a32
DB
2664long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2665 bool forbid_cmsg_compat)
a2e27255
ACM
2666{
2667 int fput_needed, err;
2668 struct msghdr msg_sys;
1be374a0
AL
2669 struct socket *sock;
2670
e1834a32
DB
2671 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2672 return -EINVAL;
2673
1be374a0 2674 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2675 if (!sock)
2676 goto out;
2677
a7526eb5 2678 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2679
6cb153ca 2680 fput_light(sock->file, fput_needed);
1da177e4
LT
2681out:
2682 return err;
2683}
2684
666547ff 2685SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2686 unsigned int, flags)
2687{
e1834a32 2688 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2689}
2690
a2e27255
ACM
2691/*
2692 * Linux recvmmsg interface
2693 */
2694
e11d4284
AB
2695static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2696 unsigned int vlen, unsigned int flags,
2697 struct timespec64 *timeout)
a2e27255
ACM
2698{
2699 int fput_needed, err, datagrams;
2700 struct socket *sock;
2701 struct mmsghdr __user *entry;
d7256d0e 2702 struct compat_mmsghdr __user *compat_entry;
a2e27255 2703 struct msghdr msg_sys;
766b9f92
DD
2704 struct timespec64 end_time;
2705 struct timespec64 timeout64;
a2e27255
ACM
2706
2707 if (timeout &&
2708 poll_select_set_timeout(&end_time, timeout->tv_sec,
2709 timeout->tv_nsec))
2710 return -EINVAL;
2711
2712 datagrams = 0;
2713
2714 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2715 if (!sock)
2716 return err;
2717
7797dc41
SHY
2718 if (likely(!(flags & MSG_ERRQUEUE))) {
2719 err = sock_error(sock->sk);
2720 if (err) {
2721 datagrams = err;
2722 goto out_put;
2723 }
e623a9e9 2724 }
a2e27255
ACM
2725
2726 entry = mmsg;
d7256d0e 2727 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2728
2729 while (datagrams < vlen) {
2730 /*
2731 * No need to ask LSM for more than the first datagram.
2732 */
d7256d0e 2733 if (MSG_CMSG_COMPAT & flags) {
666547ff 2734 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2735 &msg_sys, flags & ~MSG_WAITFORONE,
2736 datagrams);
d7256d0e
JMG
2737 if (err < 0)
2738 break;
2739 err = __put_user(err, &compat_entry->msg_len);
2740 ++compat_entry;
2741 } else {
a7526eb5 2742 err = ___sys_recvmsg(sock,
666547ff 2743 (struct user_msghdr __user *)entry,
a7526eb5
AL
2744 &msg_sys, flags & ~MSG_WAITFORONE,
2745 datagrams);
d7256d0e
JMG
2746 if (err < 0)
2747 break;
2748 err = put_user(err, &entry->msg_len);
2749 ++entry;
2750 }
2751
a2e27255
ACM
2752 if (err)
2753 break;
a2e27255
ACM
2754 ++datagrams;
2755
71c5c159
BB
2756 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2757 if (flags & MSG_WAITFORONE)
2758 flags |= MSG_DONTWAIT;
2759
a2e27255 2760 if (timeout) {
766b9f92 2761 ktime_get_ts64(&timeout64);
c2e6c856 2762 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2763 if (timeout->tv_sec < 0) {
2764 timeout->tv_sec = timeout->tv_nsec = 0;
2765 break;
2766 }
2767
2768 /* Timeout, return less than vlen datagrams */
2769 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2770 break;
2771 }
2772
2773 /* Out of band data, return right away */
2774 if (msg_sys.msg_flags & MSG_OOB)
2775 break;
a78cb84c 2776 cond_resched();
a2e27255
ACM
2777 }
2778
a2e27255 2779 if (err == 0)
34b88a68
ACM
2780 goto out_put;
2781
2782 if (datagrams == 0) {
2783 datagrams = err;
2784 goto out_put;
2785 }
a2e27255 2786
34b88a68
ACM
2787 /*
2788 * We may return less entries than requested (vlen) if the
2789 * sock is non block and there aren't enough datagrams...
2790 */
2791 if (err != -EAGAIN) {
a2e27255 2792 /*
34b88a68
ACM
2793 * ... or if recvmsg returns an error after we
2794 * received some datagrams, where we record the
2795 * error to return on the next call or if the
2796 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2797 */
34b88a68 2798 sock->sk->sk_err = -err;
a2e27255 2799 }
34b88a68
ACM
2800out_put:
2801 fput_light(sock->file, fput_needed);
a2e27255 2802
34b88a68 2803 return datagrams;
a2e27255
ACM
2804}
2805
e11d4284
AB
2806int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2807 unsigned int vlen, unsigned int flags,
2808 struct __kernel_timespec __user *timeout,
2809 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2810{
2811 int datagrams;
c2e6c856 2812 struct timespec64 timeout_sys;
a2e27255 2813
e11d4284
AB
2814 if (timeout && get_timespec64(&timeout_sys, timeout))
2815 return -EFAULT;
a2e27255 2816
e11d4284 2817 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2818 return -EFAULT;
2819
e11d4284
AB
2820 if (!timeout && !timeout32)
2821 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2822
2823 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2824
e11d4284
AB
2825 if (datagrams <= 0)
2826 return datagrams;
2827
2828 if (timeout && put_timespec64(&timeout_sys, timeout))
2829 datagrams = -EFAULT;
2830
2831 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2832 datagrams = -EFAULT;
2833
2834 return datagrams;
2835}
2836
1255e269
DB
2837SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2838 unsigned int, vlen, unsigned int, flags,
c2e6c856 2839 struct __kernel_timespec __user *, timeout)
1255e269 2840{
e11d4284
AB
2841 if (flags & MSG_CMSG_COMPAT)
2842 return -EINVAL;
2843
2844 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2845}
2846
2847#ifdef CONFIG_COMPAT_32BIT_TIME
2848SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2849 unsigned int, vlen, unsigned int, flags,
2850 struct old_timespec32 __user *, timeout)
2851{
2852 if (flags & MSG_CMSG_COMPAT)
2853 return -EINVAL;
2854
2855 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2856}
e11d4284 2857#endif
1255e269 2858
a2e27255 2859#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2860/* Argument list sizes for sys_socketcall */
2861#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2862static const unsigned char nargs[21] = {
c6d409cf
ED
2863 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2864 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2865 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2866 AL(4), AL(5), AL(4)
89bddce5
SH
2867};
2868
1da177e4
LT
2869#undef AL
2870
2871/*
89bddce5 2872 * System call vectors.
1da177e4
LT
2873 *
2874 * Argument checking cleaned up. Saved 20% in size.
2875 * This function doesn't need to set the kernel lock because
89bddce5 2876 * it is set by the callees.
1da177e4
LT
2877 */
2878
3e0fa65f 2879SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2880{
2950fa9d 2881 unsigned long a[AUDITSC_ARGS];
89bddce5 2882 unsigned long a0, a1;
1da177e4 2883 int err;
47379052 2884 unsigned int len;
1da177e4 2885
228e548e 2886 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2887 return -EINVAL;
c8e8cd57 2888 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2889
47379052
AV
2890 len = nargs[call];
2891 if (len > sizeof(a))
2892 return -EINVAL;
2893
1da177e4 2894 /* copy_from_user should be SMP safe. */
47379052 2895 if (copy_from_user(a, args, len))
1da177e4 2896 return -EFAULT;
3ec3b2fb 2897
2950fa9d
CG
2898 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2899 if (err)
2900 return err;
3ec3b2fb 2901
89bddce5
SH
2902 a0 = a[0];
2903 a1 = a[1];
2904
2905 switch (call) {
2906 case SYS_SOCKET:
9d6a15c3 2907 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2908 break;
2909 case SYS_BIND:
a87d35d8 2910 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2911 break;
2912 case SYS_CONNECT:
1387c2c2 2913 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2914 break;
2915 case SYS_LISTEN:
25e290ee 2916 err = __sys_listen(a0, a1);
89bddce5
SH
2917 break;
2918 case SYS_ACCEPT:
4541e805
DB
2919 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2920 (int __user *)a[2], 0);
89bddce5
SH
2921 break;
2922 case SYS_GETSOCKNAME:
2923 err =
8882a107
DB
2924 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2925 (int __user *)a[2]);
89bddce5
SH
2926 break;
2927 case SYS_GETPEERNAME:
2928 err =
b21c8f83
DB
2929 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2930 (int __user *)a[2]);
89bddce5
SH
2931 break;
2932 case SYS_SOCKETPAIR:
6debc8d8 2933 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2934 break;
2935 case SYS_SEND:
f3bf896b
DB
2936 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2937 NULL, 0);
89bddce5
SH
2938 break;
2939 case SYS_SENDTO:
211b634b
DB
2940 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2941 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2942 break;
2943 case SYS_RECV:
d27e9afc
DB
2944 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2945 NULL, NULL);
89bddce5
SH
2946 break;
2947 case SYS_RECVFROM:
7a09e1eb
DB
2948 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2949 (struct sockaddr __user *)a[4],
2950 (int __user *)a[5]);
89bddce5
SH
2951 break;
2952 case SYS_SHUTDOWN:
005a1aea 2953 err = __sys_shutdown(a0, a1);
89bddce5
SH
2954 break;
2955 case SYS_SETSOCKOPT:
cc36dca0
DB
2956 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2957 a[4]);
89bddce5
SH
2958 break;
2959 case SYS_GETSOCKOPT:
2960 err =
13a2d70e
DB
2961 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2962 (int __user *)a[4]);
89bddce5
SH
2963 break;
2964 case SYS_SENDMSG:
e1834a32
DB
2965 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2966 a[2], true);
89bddce5 2967 break;
228e548e 2968 case SYS_SENDMMSG:
e1834a32
DB
2969 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2970 a[3], true);
228e548e 2971 break;
89bddce5 2972 case SYS_RECVMSG:
e1834a32
DB
2973 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2974 a[2], true);
89bddce5 2975 break;
a2e27255 2976 case SYS_RECVMMSG:
3ca47e95 2977 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
2978 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2979 a[2], a[3],
2980 (struct __kernel_timespec __user *)a[4],
2981 NULL);
2982 else
2983 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2984 a[2], a[3], NULL,
2985 (struct old_timespec32 __user *)a[4]);
a2e27255 2986 break;
de11defe 2987 case SYS_ACCEPT4:
4541e805
DB
2988 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2989 (int __user *)a[2], a[3]);
aaca0bdc 2990 break;
89bddce5
SH
2991 default:
2992 err = -EINVAL;
2993 break;
1da177e4
LT
2994 }
2995 return err;
2996}
2997
89bddce5 2998#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2999
55737fda
SH
3000/**
3001 * sock_register - add a socket protocol handler
3002 * @ops: description of protocol
3003 *
1da177e4
LT
3004 * This function is called by a protocol handler that wants to
3005 * advertise its address family, and have it linked into the
e793c0f7 3006 * socket interface. The value ops->family corresponds to the
55737fda 3007 * socket system call protocol family.
1da177e4 3008 */
f0fd27d4 3009int sock_register(const struct net_proto_family *ops)
1da177e4
LT
3010{
3011 int err;
3012
3013 if (ops->family >= NPROTO) {
3410f22e 3014 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
3015 return -ENOBUFS;
3016 }
55737fda
SH
3017
3018 spin_lock(&net_family_lock);
190683a9
ED
3019 if (rcu_dereference_protected(net_families[ops->family],
3020 lockdep_is_held(&net_family_lock)))
55737fda
SH
3021 err = -EEXIST;
3022 else {
cf778b00 3023 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
3024 err = 0;
3025 }
55737fda
SH
3026 spin_unlock(&net_family_lock);
3027
fe0bdbde 3028 pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
1da177e4
LT
3029 return err;
3030}
c6d409cf 3031EXPORT_SYMBOL(sock_register);
1da177e4 3032
55737fda
SH
3033/**
3034 * sock_unregister - remove a protocol handler
3035 * @family: protocol family to remove
3036 *
1da177e4
LT
3037 * This function is called by a protocol handler that wants to
3038 * remove its address family, and have it unlinked from the
55737fda
SH
3039 * new socket creation.
3040 *
3041 * If protocol handler is a module, then it can use module reference
3042 * counts to protect against new references. If protocol handler is not
3043 * a module then it needs to provide its own protection in
3044 * the ops->create routine.
1da177e4 3045 */
f0fd27d4 3046void sock_unregister(int family)
1da177e4 3047{
f0fd27d4 3048 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3049
55737fda 3050 spin_lock(&net_family_lock);
a9b3cd7f 3051 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3052 spin_unlock(&net_family_lock);
3053
3054 synchronize_rcu();
3055
fe0bdbde 3056 pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
1da177e4 3057}
c6d409cf 3058EXPORT_SYMBOL(sock_unregister);
1da177e4 3059
bf2ae2e4
XL
3060bool sock_is_registered(int family)
3061{
66b51b0a 3062 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3063}
3064
77d76ea3 3065static int __init sock_init(void)
1da177e4 3066{
b3e19d92 3067 int err;
2ca794e5
EB
3068 /*
3069 * Initialize the network sysctl infrastructure.
3070 */
3071 err = net_sysctl_init();
3072 if (err)
3073 goto out;
b3e19d92 3074
1da177e4 3075 /*
89bddce5 3076 * Initialize skbuff SLAB cache
1da177e4
LT
3077 */
3078 skb_init();
1da177e4
LT
3079
3080 /*
89bddce5 3081 * Initialize the protocols module.
1da177e4
LT
3082 */
3083
3084 init_inodecache();
b3e19d92
NP
3085
3086 err = register_filesystem(&sock_fs_type);
3087 if (err)
47260ba9 3088 goto out;
1da177e4 3089 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3090 if (IS_ERR(sock_mnt)) {
3091 err = PTR_ERR(sock_mnt);
3092 goto out_mount;
3093 }
77d76ea3
AK
3094
3095 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3096 */
3097
3098#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3099 err = netfilter_init();
3100 if (err)
3101 goto out;
1da177e4 3102#endif
cbeb321a 3103
408eccce 3104 ptp_classifier_init();
c1f19b51 3105
b3e19d92
NP
3106out:
3107 return err;
3108
3109out_mount:
3110 unregister_filesystem(&sock_fs_type);
b3e19d92 3111 goto out;
1da177e4
LT
3112}
3113
77d76ea3
AK
3114core_initcall(sock_init); /* early initcall */
3115
1da177e4
LT
3116#ifdef CONFIG_PROC_FS
3117void socket_seq_show(struct seq_file *seq)
3118{
648845ab
TZ
3119 seq_printf(seq, "sockets: used %d\n",
3120 sock_inuse_get(seq->private));
1da177e4 3121}
89bddce5 3122#endif /* CONFIG_PROC_FS */
1da177e4 3123
29c49648
AB
3124/* Handle the fact that while struct ifreq has the same *layout* on
3125 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3126 * which are handled elsewhere, it still has different *size* due to
3127 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3128 * resulting in struct ifreq being 32 and 40 bytes respectively).
3129 * As a result, if the struct happens to be at the end of a page and
3130 * the next page isn't readable/writable, we get a fault. To prevent
3131 * that, copy back and forth to the full size.
3132 */
3133int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
3134{
3135 if (in_compat_syscall()) {
3136 struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
3137
3138 memset(ifr, 0, sizeof(*ifr));
3139 if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
3140 return -EFAULT;
3141
3142 if (ifrdata)
3143 *ifrdata = compat_ptr(ifr32->ifr_data);
3144
3145 return 0;
3146 }
3147
3148 if (copy_from_user(ifr, arg, sizeof(*ifr)))
3149 return -EFAULT;
3150
3151 if (ifrdata)
3152 *ifrdata = ifr->ifr_data;
3153
3154 return 0;
3155}
3156EXPORT_SYMBOL(get_user_ifreq);
3157
3158int put_user_ifreq(struct ifreq *ifr, void __user *arg)
3159{
3160 size_t size = sizeof(*ifr);
3161
3162 if (in_compat_syscall())
3163 size = sizeof(struct compat_ifreq);
3164
3165 if (copy_to_user(arg, ifr, size))
3166 return -EFAULT;
3167
3168 return 0;
3169}
3170EXPORT_SYMBOL(put_user_ifreq);
3171
89bbfc95 3172#ifdef CONFIG_COMPAT
7a50a240
AB
3173static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3174{
7a50a240 3175 compat_uptr_t uptr32;
44c02a2c
AV
3176 struct ifreq ifr;
3177 void __user *saved;
3178 int err;
7a50a240 3179
29c49648 3180 if (get_user_ifreq(&ifr, NULL, uifr32))
7a50a240
AB
3181 return -EFAULT;
3182
3183 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3184 return -EFAULT;
3185
44c02a2c
AV
3186 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3187 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3188
44c02a2c
AV
3189 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3190 if (!err) {
3191 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
29c49648 3192 if (put_user_ifreq(&ifr, uifr32))
44c02a2c 3193 err = -EFAULT;
ccbd6a5a 3194 }
44c02a2c 3195 return err;
7a229387
AB
3196}
3197
590d4693
BH
3198/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3199static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3200 struct compat_ifreq __user *u_ifreq32)
7a229387 3201{
44c02a2c 3202 struct ifreq ifreq;
7a229387
AB
3203 u32 data32;
3204
44c02a2c 3205 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3206 return -EFAULT;
44c02a2c 3207 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3208 return -EFAULT;
44c02a2c 3209 ifreq.ifr_data = compat_ptr(data32);
7a229387 3210
44c02a2c 3211 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3212}
3213
37ac39bd
JB
3214static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3215 unsigned int cmd,
29c49648 3216 unsigned long arg,
37ac39bd
JB
3217 struct compat_ifreq __user *uifr32)
3218{
29c49648
AB
3219 struct ifreq ifr;
3220 bool need_copyout;
37ac39bd
JB
3221 int err;
3222
29c49648
AB
3223 err = sock->ops->ioctl(sock, cmd, arg);
3224
3225 /* If this ioctl is unknown try to hand it down
3226 * to the NIC driver.
37ac39bd 3227 */
29c49648
AB
3228 if (err != -ENOIOCTLCMD)
3229 return err;
37ac39bd 3230
29c49648 3231 if (get_user_ifreq(&ifr, NULL, uifr32))
37ac39bd 3232 return -EFAULT;
29c49648
AB
3233 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
3234 if (!err && need_copyout)
3235 if (put_user_ifreq(&ifr, uifr32))
3236 return -EFAULT;
37ac39bd 3237
37ac39bd
JB
3238 return err;
3239}
3240
7a229387
AB
3241/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3242 * for some operations; this forces use of the newer bridge-utils that
25985edc 3243 * use compatible ioctls
7a229387 3244 */
6b96018b 3245static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3246{
6b96018b 3247 compat_ulong_t tmp;
7a229387 3248
6b96018b 3249 if (get_user(tmp, argp))
7a229387
AB
3250 return -EFAULT;
3251 if (tmp == BRCTL_GET_VERSION)
3252 return BRCTL_VERSION + 1;
3253 return -EINVAL;
3254}
3255
6b96018b
AB
3256static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3257 unsigned int cmd, unsigned long arg)
3258{
3259 void __user *argp = compat_ptr(arg);
3260 struct sock *sk = sock->sk;
3261 struct net *net = sock_net(sk);
7a229387 3262
6b96018b 3263 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3264 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3265
3266 switch (cmd) {
3267 case SIOCSIFBR:
3268 case SIOCGIFBR:
3269 return old_bridge_ioctl(argp);
7a50a240
AB
3270 case SIOCWANDEV:
3271 return compat_siocwandev(net, argp);
0768e170
AB
3272 case SIOCGSTAMP_OLD:
3273 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3274 if (!sock->ops->gettstamp)
3275 return -ENOIOCTLCMD;
0768e170 3276 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3277 !COMPAT_USE_64BIT_TIME);
3278
dd98d289 3279 case SIOCETHTOOL:
590d4693
BH
3280 case SIOCBONDSLAVEINFOQUERY:
3281 case SIOCBONDINFOQUERY:
a2116ed2 3282 case SIOCSHWTSTAMP:
fd468c74 3283 case SIOCGHWTSTAMP:
590d4693 3284 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3285
3286 case FIOSETOWN:
3287 case SIOCSPGRP:
3288 case FIOGETOWN:
3289 case SIOCGPGRP:
3290 case SIOCBRADDBR:
3291 case SIOCBRDELBR:
3292 case SIOCGIFVLAN:
3293 case SIOCSIFVLAN:
c62cce2c 3294 case SIOCGSKNS:
0768e170
AB
3295 case SIOCGSTAMP_NEW:
3296 case SIOCGSTAMPNS_NEW:
876f0bf9 3297 case SIOCGIFCONF:
6b96018b
AB
3298 return sock_ioctl(file, cmd, arg);
3299
3300 case SIOCGIFFLAGS:
3301 case SIOCSIFFLAGS:
709566d7
AB
3302 case SIOCGIFMAP:
3303 case SIOCSIFMAP:
6b96018b
AB
3304 case SIOCGIFMETRIC:
3305 case SIOCSIFMETRIC:
3306 case SIOCGIFMTU:
3307 case SIOCSIFMTU:
3308 case SIOCGIFMEM:
3309 case SIOCSIFMEM:
3310 case SIOCGIFHWADDR:
3311 case SIOCSIFHWADDR:
3312 case SIOCADDMULTI:
3313 case SIOCDELMULTI:
3314 case SIOCGIFINDEX:
6b96018b
AB
3315 case SIOCGIFADDR:
3316 case SIOCSIFADDR:
3317 case SIOCSIFHWBROADCAST:
6b96018b 3318 case SIOCDIFADDR:
6b96018b
AB
3319 case SIOCGIFBRDADDR:
3320 case SIOCSIFBRDADDR:
3321 case SIOCGIFDSTADDR:
3322 case SIOCSIFDSTADDR:
3323 case SIOCGIFNETMASK:
3324 case SIOCSIFNETMASK:
3325 case SIOCSIFPFLAGS:
3326 case SIOCGIFPFLAGS:
3327 case SIOCGIFTXQLEN:
3328 case SIOCSIFTXQLEN:
3329 case SIOCBRADDIF:
3330 case SIOCBRDELIF:
c6c9fee3 3331 case SIOCGIFNAME:
9177efd3
AB
3332 case SIOCSIFNAME:
3333 case SIOCGMIIPHY:
3334 case SIOCGMIIREG:
3335 case SIOCSMIIREG:
f92d4fc9
AV
3336 case SIOCBONDENSLAVE:
3337 case SIOCBONDRELEASE:
3338 case SIOCBONDSETHWADDR:
3339 case SIOCBONDCHANGEACTIVE:
29c49648 3340 return compat_ifreq_ioctl(net, sock, cmd, arg, argp);
37ac39bd 3341
6b96018b
AB
3342 case SIOCSARP:
3343 case SIOCGARP:
3344 case SIOCDARP:
c7dc504e 3345 case SIOCOUTQ:
9d7bf41f 3346 case SIOCOUTQNSD:
6b96018b 3347 case SIOCATMARK:
63ff03ab 3348 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3349 }
3350
6b96018b
AB
3351 return -ENOIOCTLCMD;
3352}
7a229387 3353
95c96174 3354static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3355 unsigned long arg)
89bbfc95
SP
3356{
3357 struct socket *sock = file->private_data;
3358 int ret = -ENOIOCTLCMD;
87de87d5
DM
3359 struct sock *sk;
3360 struct net *net;
3361
3362 sk = sock->sk;
3363 net = sock_net(sk);
89bbfc95
SP
3364
3365 if (sock->ops->compat_ioctl)
3366 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3367
87de87d5
DM
3368 if (ret == -ENOIOCTLCMD &&
3369 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3370 ret = compat_wext_handle_ioctl(net, cmd, arg);
3371
6b96018b
AB
3372 if (ret == -ENOIOCTLCMD)
3373 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3374
89bbfc95
SP
3375 return ret;
3376}
3377#endif
3378
8a3c245c
PT
3379/**
3380 * kernel_bind - bind an address to a socket (kernel space)
3381 * @sock: socket
3382 * @addr: address
3383 * @addrlen: length of address
3384 *
3385 * Returns 0 or an error.
3386 */
3387
ac5a488e
SS
3388int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3389{
3390 return sock->ops->bind(sock, addr, addrlen);
3391}
c6d409cf 3392EXPORT_SYMBOL(kernel_bind);
ac5a488e 3393
8a3c245c
PT
3394/**
3395 * kernel_listen - move socket to listening state (kernel space)
3396 * @sock: socket
3397 * @backlog: pending connections queue size
3398 *
3399 * Returns 0 or an error.
3400 */
3401
ac5a488e
SS
3402int kernel_listen(struct socket *sock, int backlog)
3403{
3404 return sock->ops->listen(sock, backlog);
3405}
c6d409cf 3406EXPORT_SYMBOL(kernel_listen);
ac5a488e 3407
8a3c245c
PT
3408/**
3409 * kernel_accept - accept a connection (kernel space)
3410 * @sock: listening socket
3411 * @newsock: new connected socket
3412 * @flags: flags
3413 *
3414 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3415 * If it fails, @newsock is guaranteed to be %NULL.
3416 * Returns 0 or an error.
3417 */
3418
ac5a488e
SS
3419int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3420{
3421 struct sock *sk = sock->sk;
3422 int err;
3423
3424 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3425 newsock);
3426 if (err < 0)
3427 goto done;
3428
cdfbabfb 3429 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3430 if (err < 0) {
3431 sock_release(*newsock);
fa8705b0 3432 *newsock = NULL;
ac5a488e
SS
3433 goto done;
3434 }
3435
3436 (*newsock)->ops = sock->ops;
1b08534e 3437 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3438
3439done:
3440 return err;
3441}
c6d409cf 3442EXPORT_SYMBOL(kernel_accept);
ac5a488e 3443
8a3c245c
PT
3444/**
3445 * kernel_connect - connect a socket (kernel space)
3446 * @sock: socket
3447 * @addr: address
3448 * @addrlen: address length
3449 * @flags: flags (O_NONBLOCK, ...)
3450 *
f1dcffcc 3451 * For datagram sockets, @addr is the address to which datagrams are sent
8a3c245c
PT
3452 * by default, and the only address from which datagrams are received.
3453 * For stream sockets, attempts to connect to @addr.
3454 * Returns 0 or an error code.
3455 */
3456
ac5a488e 3457int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3458 int flags)
ac5a488e
SS
3459{
3460 return sock->ops->connect(sock, addr, addrlen, flags);
3461}
c6d409cf 3462EXPORT_SYMBOL(kernel_connect);
ac5a488e 3463
8a3c245c
PT
3464/**
3465 * kernel_getsockname - get the address which the socket is bound (kernel space)
3466 * @sock: socket
3467 * @addr: address holder
3468 *
3469 * Fills the @addr pointer with the address which the socket is bound.
3470 * Returns 0 or an error code.
3471 */
3472
9b2c45d4 3473int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3474{
9b2c45d4 3475 return sock->ops->getname(sock, addr, 0);
ac5a488e 3476}
c6d409cf 3477EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3478
8a3c245c 3479/**
645f0897 3480 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3481 * @sock: socket
3482 * @addr: address holder
3483 *
3484 * Fills the @addr pointer with the address which the socket is connected.
3485 * Returns 0 or an error code.
3486 */
3487
9b2c45d4 3488int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3489{
9b2c45d4 3490 return sock->ops->getname(sock, addr, 1);
ac5a488e 3491}
c6d409cf 3492EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3493
8a3c245c
PT
3494/**
3495 * kernel_sendpage - send a &page through a socket (kernel space)
3496 * @sock: socket
3497 * @page: page
3498 * @offset: page offset
3499 * @size: total size in bytes
3500 * @flags: flags (MSG_DONTWAIT, ...)
3501 *
3502 * Returns the total amount sent in bytes or an error.
3503 */
3504
ac5a488e
SS
3505int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3506 size_t size, int flags)
3507{
7b62d31d
CL
3508 if (sock->ops->sendpage) {
3509 /* Warn in case the improper page to zero-copy send */
3510 WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
ac5a488e 3511 return sock->ops->sendpage(sock, page, offset, size, flags);
7b62d31d 3512 }
ac5a488e
SS
3513 return sock_no_sendpage(sock, page, offset, size, flags);
3514}
c6d409cf 3515EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3516
8a3c245c
PT
3517/**
3518 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3519 * @sk: sock
3520 * @page: page
3521 * @offset: page offset
3522 * @size: total size in bytes
3523 * @flags: flags (MSG_DONTWAIT, ...)
3524 *
3525 * Returns the total amount sent in bytes or an error.
3526 * Caller must hold @sk.
3527 */
3528
306b13eb
TH
3529int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3530 size_t size, int flags)
3531{
3532 struct socket *sock = sk->sk_socket;
3533
3534 if (sock->ops->sendpage_locked)
3535 return sock->ops->sendpage_locked(sk, page, offset, size,
3536 flags);
3537
3538 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3539}
3540EXPORT_SYMBOL(kernel_sendpage_locked);
3541
8a3c245c 3542/**
645f0897 3543 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3544 * @sock: socket
3545 * @how: connection part
3546 *
3547 * Returns 0 or an error.
3548 */
3549
91cf45f0
TM
3550int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3551{
3552 return sock->ops->shutdown(sock, how);
3553}
91cf45f0 3554EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3555
8a3c245c
PT
3556/**
3557 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3558 * @sk: socket
3559 *
3560 * This routine returns the IP overhead imposed by a socket i.e.
3561 * the length of the underlying IP header, depending on whether
3562 * this is an IPv4 or IPv6 socket and the length from IP options turned
3563 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3564 */
8a3c245c 3565
113c3075
P
3566u32 kernel_sock_ip_overhead(struct sock *sk)
3567{
3568 struct inet_sock *inet;
3569 struct ip_options_rcu *opt;
3570 u32 overhead = 0;
113c3075
P
3571#if IS_ENABLED(CONFIG_IPV6)
3572 struct ipv6_pinfo *np;
3573 struct ipv6_txoptions *optv6 = NULL;
3574#endif /* IS_ENABLED(CONFIG_IPV6) */
3575
3576 if (!sk)
3577 return overhead;
3578
113c3075
P
3579 switch (sk->sk_family) {
3580 case AF_INET:
3581 inet = inet_sk(sk);
3582 overhead += sizeof(struct iphdr);
3583 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3584 sock_owned_by_user(sk));
113c3075
P
3585 if (opt)
3586 overhead += opt->opt.optlen;
3587 return overhead;
3588#if IS_ENABLED(CONFIG_IPV6)
3589 case AF_INET6:
3590 np = inet6_sk(sk);
3591 overhead += sizeof(struct ipv6hdr);
3592 if (np)
3593 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3594 sock_owned_by_user(sk));
113c3075
P
3595 if (optv6)
3596 overhead += (optv6->opt_flen + optv6->opt_nflen);
3597 return overhead;
3598#endif /* IS_ENABLED(CONFIG_IPV6) */
3599 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3600 return overhead;
3601 }
3602}
3603EXPORT_SYMBOL(kernel_sock_ip_overhead);