]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/socket.c
x86/speculation/mmio: Enable CPU Fill buffer clearing on idle
[mirror_ubuntu-jammy-kernel.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
cc69837f 55#include <linux/ethtool.h>
1da177e4 56#include <linux/mm.h>
1da177e4
LT
57#include <linux/socket.h>
58#include <linux/file.h>
59#include <linux/net.h>
60#include <linux/interrupt.h>
aaca0bdc 61#include <linux/thread_info.h>
55737fda 62#include <linux/rcupdate.h>
1da177e4
LT
63#include <linux/netdevice.h>
64#include <linux/proc_fs.h>
65#include <linux/seq_file.h>
4a3e2f71 66#include <linux/mutex.h>
1da177e4 67#include <linux/if_bridge.h>
20380731 68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
c7dc504e 103#include <linux/termios.h>
6b96018b 104#include <linux/sockios.h>
076bb0c8 105#include <net/busy_poll.h>
f24b9be5 106#include <linux/errqueue.h>
d7c08826 107#include <linux/ptp_clock_kernel.h>
06021292 108
e0d1095a 109#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
110unsigned int sysctl_net_busy_read __read_mostly;
111unsigned int sysctl_net_busy_poll __read_mostly;
06021292 112#endif
6b96018b 113
8ae5e030
AV
114static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
115static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 116static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
117
118static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
119static __poll_t sock_poll(struct file *file,
120 struct poll_table_struct *wait);
89bddce5 121static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
122#ifdef CONFIG_COMPAT
123static long compat_sock_ioctl(struct file *file,
89bddce5 124 unsigned int cmd, unsigned long arg);
89bbfc95 125#endif
1da177e4 126static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
127static ssize_t sock_sendpage(struct file *file, struct page *page,
128 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 129static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 130 struct pipe_inode_info *pipe, size_t len,
9c55e01c 131 unsigned int flags);
542d3065
AB
132
133#ifdef CONFIG_PROC_FS
134static void sock_show_fdinfo(struct seq_file *m, struct file *f)
135{
136 struct socket *sock = f->private_data;
137
138 if (sock->ops->show_fdinfo)
139 sock->ops->show_fdinfo(m, sock);
140}
141#else
142#define sock_show_fdinfo NULL
143#endif
1da177e4 144
1da177e4
LT
145/*
146 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
147 * in the operation structures but are done directly via the socketcall() multiplexor.
148 */
149
da7071d7 150static const struct file_operations socket_file_ops = {
1da177e4
LT
151 .owner = THIS_MODULE,
152 .llseek = no_llseek,
8ae5e030
AV
153 .read_iter = sock_read_iter,
154 .write_iter = sock_write_iter,
1da177e4
LT
155 .poll = sock_poll,
156 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
157#ifdef CONFIG_COMPAT
158 .compat_ioctl = compat_sock_ioctl,
159#endif
1da177e4 160 .mmap = sock_mmap,
1da177e4
LT
161 .release = sock_close,
162 .fasync = sock_fasync,
5274f052
JA
163 .sendpage = sock_sendpage,
164 .splice_write = generic_splice_sendpage,
9c55e01c 165 .splice_read = sock_splice_read,
b4653342 166 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
167};
168
fe0bdbde
YD
169static const char * const pf_family_names[] = {
170 [PF_UNSPEC] = "PF_UNSPEC",
171 [PF_UNIX] = "PF_UNIX/PF_LOCAL",
172 [PF_INET] = "PF_INET",
173 [PF_AX25] = "PF_AX25",
174 [PF_IPX] = "PF_IPX",
175 [PF_APPLETALK] = "PF_APPLETALK",
176 [PF_NETROM] = "PF_NETROM",
177 [PF_BRIDGE] = "PF_BRIDGE",
178 [PF_ATMPVC] = "PF_ATMPVC",
179 [PF_X25] = "PF_X25",
180 [PF_INET6] = "PF_INET6",
181 [PF_ROSE] = "PF_ROSE",
182 [PF_DECnet] = "PF_DECnet",
183 [PF_NETBEUI] = "PF_NETBEUI",
184 [PF_SECURITY] = "PF_SECURITY",
185 [PF_KEY] = "PF_KEY",
186 [PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
187 [PF_PACKET] = "PF_PACKET",
188 [PF_ASH] = "PF_ASH",
189 [PF_ECONET] = "PF_ECONET",
190 [PF_ATMSVC] = "PF_ATMSVC",
191 [PF_RDS] = "PF_RDS",
192 [PF_SNA] = "PF_SNA",
193 [PF_IRDA] = "PF_IRDA",
194 [PF_PPPOX] = "PF_PPPOX",
195 [PF_WANPIPE] = "PF_WANPIPE",
196 [PF_LLC] = "PF_LLC",
197 [PF_IB] = "PF_IB",
198 [PF_MPLS] = "PF_MPLS",
199 [PF_CAN] = "PF_CAN",
200 [PF_TIPC] = "PF_TIPC",
201 [PF_BLUETOOTH] = "PF_BLUETOOTH",
202 [PF_IUCV] = "PF_IUCV",
203 [PF_RXRPC] = "PF_RXRPC",
204 [PF_ISDN] = "PF_ISDN",
205 [PF_PHONET] = "PF_PHONET",
206 [PF_IEEE802154] = "PF_IEEE802154",
207 [PF_CAIF] = "PF_CAIF",
208 [PF_ALG] = "PF_ALG",
209 [PF_NFC] = "PF_NFC",
210 [PF_VSOCK] = "PF_VSOCK",
211 [PF_KCM] = "PF_KCM",
212 [PF_QIPCRTR] = "PF_QIPCRTR",
213 [PF_SMC] = "PF_SMC",
214 [PF_XDP] = "PF_XDP",
bc49d816 215 [PF_MCTP] = "PF_MCTP",
fe0bdbde
YD
216};
217
1da177e4
LT
218/*
219 * The protocol list. Each protocol is registered in here.
220 */
221
1da177e4 222static DEFINE_SPINLOCK(net_family_lock);
190683a9 223static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 224
1da177e4 225/*
89bddce5
SH
226 * Support routines.
227 * Move socket addresses back and forth across the kernel/user
228 * divide and look after the messy bits.
1da177e4
LT
229 */
230
1da177e4
LT
231/**
232 * move_addr_to_kernel - copy a socket address into kernel space
233 * @uaddr: Address in user space
234 * @kaddr: Address in kernel space
235 * @ulen: Length in user space
236 *
237 * The address is copied into kernel space. If the provided address is
238 * too long an error code of -EINVAL is returned. If the copy gives
239 * invalid addresses -EFAULT is returned. On a success 0 is returned.
240 */
241
43db362d 242int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 243{
230b1839 244 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 245 return -EINVAL;
89bddce5 246 if (ulen == 0)
1da177e4 247 return 0;
89bddce5 248 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 249 return -EFAULT;
3ec3b2fb 250 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
251}
252
253/**
254 * move_addr_to_user - copy an address to user space
255 * @kaddr: kernel space address
256 * @klen: length of address in kernel
257 * @uaddr: user space address
258 * @ulen: pointer to user length field
259 *
260 * The value pointed to by ulen on entry is the buffer length available.
261 * This is overwritten with the buffer space used. -EINVAL is returned
262 * if an overlong buffer is specified or a negative buffer size. -EFAULT
263 * is returned if either the buffer or the length field are not
264 * accessible.
265 * After copying the data up to the limit the user specifies, the true
266 * length of the data is written over the length limit the user
267 * specified. Zero is returned for a success.
268 */
89bddce5 269
43db362d 270static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 271 void __user *uaddr, int __user *ulen)
1da177e4
LT
272{
273 int err;
274 int len;
275
68c6beb3 276 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
277 err = get_user(len, ulen);
278 if (err)
1da177e4 279 return err;
89bddce5
SH
280 if (len > klen)
281 len = klen;
68c6beb3 282 if (len < 0)
1da177e4 283 return -EINVAL;
89bddce5 284 if (len) {
d6fe3945
SG
285 if (audit_sockaddr(klen, kaddr))
286 return -ENOMEM;
89bddce5 287 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
288 return -EFAULT;
289 }
290 /*
89bddce5
SH
291 * "fromlen shall refer to the value before truncation.."
292 * 1003.1g
1da177e4
LT
293 */
294 return __put_user(klen, ulen);
295}
296
08009a76 297static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
298
299static struct inode *sock_alloc_inode(struct super_block *sb)
300{
301 struct socket_alloc *ei;
89bddce5 302
e94b1766 303 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
304 if (!ei)
305 return NULL;
333f7909
AV
306 init_waitqueue_head(&ei->socket.wq.wait);
307 ei->socket.wq.fasync_list = NULL;
308 ei->socket.wq.flags = 0;
89bddce5 309
1da177e4
LT
310 ei->socket.state = SS_UNCONNECTED;
311 ei->socket.flags = 0;
312 ei->socket.ops = NULL;
313 ei->socket.sk = NULL;
314 ei->socket.file = NULL;
1da177e4
LT
315
316 return &ei->vfs_inode;
317}
318
6d7855c5 319static void sock_free_inode(struct inode *inode)
1da177e4 320{
43815482
ED
321 struct socket_alloc *ei;
322
323 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 324 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
325}
326
51cc5068 327static void init_once(void *foo)
1da177e4 328{
89bddce5 329 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 330
a35afb83 331 inode_init_once(&ei->vfs_inode);
1da177e4 332}
89bddce5 333
1e911632 334static void init_inodecache(void)
1da177e4
LT
335{
336 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
337 sizeof(struct socket_alloc),
338 0,
339 (SLAB_HWCACHE_ALIGN |
340 SLAB_RECLAIM_ACCOUNT |
5d097056 341 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 342 init_once);
1e911632 343 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
344}
345
b87221de 346static const struct super_operations sockfs_ops = {
c6d409cf 347 .alloc_inode = sock_alloc_inode,
6d7855c5 348 .free_inode = sock_free_inode,
c6d409cf 349 .statfs = simple_statfs,
1da177e4
LT
350};
351
c23fbb6b
ED
352/*
353 * sockfs_dname() is called from d_path().
354 */
355static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
356{
357 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 358 d_inode(dentry)->i_ino);
c23fbb6b
ED
359}
360
3ba13d17 361static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 362 .d_dname = sockfs_dname,
1da177e4
LT
363};
364
bba0bd31
AG
365static int sockfs_xattr_get(const struct xattr_handler *handler,
366 struct dentry *dentry, struct inode *inode,
367 const char *suffix, void *value, size_t size)
368{
369 if (value) {
370 if (dentry->d_name.len + 1 > size)
371 return -ERANGE;
372 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
373 }
374 return dentry->d_name.len + 1;
375}
376
377#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
378#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
379#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
380
381static const struct xattr_handler sockfs_xattr_handler = {
382 .name = XATTR_NAME_SOCKPROTONAME,
383 .get = sockfs_xattr_get,
384};
385
4a590153 386static int sockfs_security_xattr_set(const struct xattr_handler *handler,
e65ce2a5 387 struct user_namespace *mnt_userns,
4a590153
AG
388 struct dentry *dentry, struct inode *inode,
389 const char *suffix, const void *value,
390 size_t size, int flags)
391{
392 /* Handled by LSM. */
393 return -EAGAIN;
394}
395
396static const struct xattr_handler sockfs_security_xattr_handler = {
397 .prefix = XATTR_SECURITY_PREFIX,
398 .set = sockfs_security_xattr_set,
399};
400
bba0bd31
AG
401static const struct xattr_handler *sockfs_xattr_handlers[] = {
402 &sockfs_xattr_handler,
4a590153 403 &sockfs_security_xattr_handler,
bba0bd31
AG
404 NULL
405};
406
fba9be49 407static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 408{
fba9be49
DH
409 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
410 if (!ctx)
411 return -ENOMEM;
412 ctx->ops = &sockfs_ops;
413 ctx->dops = &sockfs_dentry_operations;
414 ctx->xattr = sockfs_xattr_handlers;
415 return 0;
c74a1cbb
AV
416}
417
418static struct vfsmount *sock_mnt __read_mostly;
419
420static struct file_system_type sock_fs_type = {
421 .name = "sockfs",
fba9be49 422 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
423 .kill_sb = kill_anon_super,
424};
425
1da177e4
LT
426/*
427 * Obtains the first available file descriptor and sets it up for use.
428 *
39d8c1b6
DM
429 * These functions create file structures and maps them to fd space
430 * of the current process. On success it returns file descriptor
1da177e4
LT
431 * and file struct implicitly stored in sock->file.
432 * Note that another thread may close file descriptor before we return
433 * from this function. We use the fact that now we do not refer
434 * to socket after mapping. If one day we will need it, this
435 * function will increment ref. count on file by 1.
436 *
437 * In any case returned fd MAY BE not valid!
438 * This race condition is unavoidable
439 * with shared fd spaces, we cannot solve it inside kernel,
440 * but we take care of internal coherence yet.
441 */
442
8a3c245c
PT
443/**
444 * sock_alloc_file - Bind a &socket to a &file
445 * @sock: socket
446 * @flags: file status flags
447 * @dname: protocol name
448 *
449 * Returns the &file bound with @sock, implicitly storing it
450 * in sock->file. If dname is %NULL, sets to "".
451 * On failure the return is a ERR pointer (see linux/err.h).
452 * This function uses GFP_KERNEL internally.
453 */
454
aab174f0 455struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 456{
7cbe66b6 457 struct file *file;
1da177e4 458
d93aa9d8
AV
459 if (!dname)
460 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 461
d93aa9d8
AV
462 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
463 O_RDWR | (flags & O_NONBLOCK),
464 &socket_file_ops);
b5ffe634 465 if (IS_ERR(file)) {
8e1611e2 466 sock_release(sock);
39b65252 467 return file;
cc3808f8
AV
468 }
469
470 sock->file = file;
39d8c1b6 471 file->private_data = sock;
d8e464ec 472 stream_open(SOCK_INODE(sock), file);
28407630 473 return file;
39d8c1b6 474}
56b31d1c 475EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 476
56b31d1c 477static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
478{
479 struct file *newfile;
28407630 480 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
481 if (unlikely(fd < 0)) {
482 sock_release(sock);
28407630 483 return fd;
ce4bb04c 484 }
39d8c1b6 485
aab174f0 486 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 487 if (!IS_ERR(newfile)) {
39d8c1b6 488 fd_install(fd, newfile);
28407630
AV
489 return fd;
490 }
7cbe66b6 491
28407630
AV
492 put_unused_fd(fd);
493 return PTR_ERR(newfile);
1da177e4
LT
494}
495
8a3c245c
PT
496/**
497 * sock_from_file - Return the &socket bounded to @file.
498 * @file: file
8a3c245c 499 *
dba4a925 500 * On failure returns %NULL.
8a3c245c
PT
501 */
502
dba4a925 503struct socket *sock_from_file(struct file *file)
6cb153ca 504{
6cb153ca
BL
505 if (file->f_op == &socket_file_ops)
506 return file->private_data; /* set in sock_map_fd */
507
23bb80d2 508 return NULL;
6cb153ca 509}
406a3c63 510EXPORT_SYMBOL(sock_from_file);
6cb153ca 511
1da177e4 512/**
c6d409cf 513 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
514 * @fd: file handle
515 * @err: pointer to an error code return
516 *
517 * The file handle passed in is locked and the socket it is bound
241c4667 518 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
519 * with a negative errno code and NULL is returned. The function checks
520 * for both invalid handles and passing a handle which is not a socket.
521 *
522 * On a success the socket object pointer is returned.
523 */
524
525struct socket *sockfd_lookup(int fd, int *err)
526{
527 struct file *file;
1da177e4
LT
528 struct socket *sock;
529
89bddce5
SH
530 file = fget(fd);
531 if (!file) {
1da177e4
LT
532 *err = -EBADF;
533 return NULL;
534 }
89bddce5 535
dba4a925
FR
536 sock = sock_from_file(file);
537 if (!sock) {
538 *err = -ENOTSOCK;
1da177e4 539 fput(file);
dba4a925 540 }
6cb153ca
BL
541 return sock;
542}
c6d409cf 543EXPORT_SYMBOL(sockfd_lookup);
1da177e4 544
6cb153ca
BL
545static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
546{
00e188ef 547 struct fd f = fdget(fd);
6cb153ca
BL
548 struct socket *sock;
549
3672558c 550 *err = -EBADF;
00e188ef 551 if (f.file) {
dba4a925 552 sock = sock_from_file(f.file);
00e188ef 553 if (likely(sock)) {
ce787a5a 554 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 555 return sock;
00e188ef 556 }
dba4a925 557 *err = -ENOTSOCK;
00e188ef 558 fdput(f);
1da177e4 559 }
6cb153ca 560 return NULL;
1da177e4
LT
561}
562
600e1779
MY
563static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
564 size_t size)
565{
566 ssize_t len;
567 ssize_t used = 0;
568
c5ef6035 569 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
570 if (len < 0)
571 return len;
572 used += len;
573 if (buffer) {
574 if (size < used)
575 return -ERANGE;
576 buffer += len;
577 }
578
579 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
580 used += len;
581 if (buffer) {
582 if (size < used)
583 return -ERANGE;
584 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
585 buffer += len;
586 }
587
588 return used;
589}
590
549c7297
CB
591static int sockfs_setattr(struct user_namespace *mnt_userns,
592 struct dentry *dentry, struct iattr *iattr)
86741ec2 593{
549c7297 594 int err = simple_setattr(&init_user_ns, dentry, iattr);
86741ec2 595
e1a3a60a 596 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
597 struct socket *sock = SOCKET_I(d_inode(dentry));
598
6d8c50dc
CW
599 if (sock->sk)
600 sock->sk->sk_uid = iattr->ia_uid;
601 else
602 err = -ENOENT;
86741ec2
LC
603 }
604
605 return err;
606}
607
600e1779 608static const struct inode_operations sockfs_inode_ops = {
600e1779 609 .listxattr = sockfs_listxattr,
86741ec2 610 .setattr = sockfs_setattr,
600e1779
MY
611};
612
1da177e4 613/**
8a3c245c 614 * sock_alloc - allocate a socket
89bddce5 615 *
1da177e4
LT
616 * Allocate a new inode and socket object. The two are bound together
617 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 618 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
619 */
620
f4a00aac 621struct socket *sock_alloc(void)
1da177e4 622{
89bddce5
SH
623 struct inode *inode;
624 struct socket *sock;
1da177e4 625
a209dfc7 626 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
627 if (!inode)
628 return NULL;
629
630 sock = SOCKET_I(inode);
631
85fe4025 632 inode->i_ino = get_next_ino();
89bddce5 633 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
634 inode->i_uid = current_fsuid();
635 inode->i_gid = current_fsgid();
600e1779 636 inode->i_op = &sockfs_inode_ops;
1da177e4 637
1da177e4
LT
638 return sock;
639}
f4a00aac 640EXPORT_SYMBOL(sock_alloc);
1da177e4 641
6d8c50dc 642static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
643{
644 if (sock->ops) {
645 struct module *owner = sock->ops->owner;
646
6d8c50dc
CW
647 if (inode)
648 inode_lock(inode);
1da177e4 649 sock->ops->release(sock);
ff7b11aa 650 sock->sk = NULL;
6d8c50dc
CW
651 if (inode)
652 inode_unlock(inode);
1da177e4
LT
653 sock->ops = NULL;
654 module_put(owner);
655 }
656
333f7909 657 if (sock->wq.fasync_list)
3410f22e 658 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 659
1da177e4
LT
660 if (!sock->file) {
661 iput(SOCK_INODE(sock));
662 return;
663 }
89bddce5 664 sock->file = NULL;
1da177e4 665}
6d8c50dc 666
9a8ad9ac
AL
667/**
668 * sock_release - close a socket
669 * @sock: socket to close
670 *
671 * The socket is released from the protocol stack if it has a release
672 * callback, and the inode is then released if the socket is bound to
673 * an inode not a file.
674 */
6d8c50dc
CW
675void sock_release(struct socket *sock)
676{
677 __sock_release(sock, NULL);
678}
c6d409cf 679EXPORT_SYMBOL(sock_release);
1da177e4 680
c14ac945 681void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 682{
140c55d4
ED
683 u8 flags = *tx_flags;
684
c14ac945 685 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
686 flags |= SKBTX_HW_TSTAMP;
687
c14ac945 688 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
689 flags |= SKBTX_SW_TSTAMP;
690
c14ac945 691 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
692 flags |= SKBTX_SCHED_TSTAMP;
693
140c55d4 694 *tx_flags = flags;
20d49473 695}
67cc0d40 696EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 697
8c3c447b
PA
698INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
699 size_t));
a648a592
PA
700INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
701 size_t));
d8725c86 702static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 703{
a648a592
PA
704 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
705 inet_sendmsg, sock, msg,
706 msg_data_left(msg));
d8725c86
AV
707 BUG_ON(ret == -EIOCBQUEUED);
708 return ret;
1da177e4
LT
709}
710
85806af0
RD
711/**
712 * sock_sendmsg - send a message through @sock
713 * @sock: socket
714 * @msg: message to send
715 *
716 * Sends @msg through @sock, passing through LSM.
717 * Returns the number of bytes sent, or an error code.
718 */
d8725c86 719int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 720{
d8725c86 721 int err = security_socket_sendmsg(sock, msg,
01e97e65 722 msg_data_left(msg));
228e548e 723
d8725c86 724 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 725}
c6d409cf 726EXPORT_SYMBOL(sock_sendmsg);
1da177e4 727
8a3c245c
PT
728/**
729 * kernel_sendmsg - send a message through @sock (kernel-space)
730 * @sock: socket
731 * @msg: message header
732 * @vec: kernel vec
733 * @num: vec array length
734 * @size: total message data size
735 *
736 * Builds the message data with @vec and sends it through @sock.
737 * Returns the number of bytes sent, or an error code.
738 */
739
1da177e4
LT
740int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
741 struct kvec *vec, size_t num, size_t size)
742{
aa563d7b 743 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 744 return sock_sendmsg(sock, msg);
1da177e4 745}
c6d409cf 746EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 747
8a3c245c
PT
748/**
749 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
750 * @sk: sock
751 * @msg: message header
752 * @vec: output s/g array
753 * @num: output s/g array length
754 * @size: total message data size
755 *
756 * Builds the message data with @vec and sends it through @sock.
757 * Returns the number of bytes sent, or an error code.
758 * Caller must hold @sk.
759 */
760
306b13eb
TH
761int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
762 struct kvec *vec, size_t num, size_t size)
763{
764 struct socket *sock = sk->sk_socket;
765
766 if (!sock->ops->sendmsg_locked)
db5980d8 767 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 768
aa563d7b 769 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
770
771 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
772}
773EXPORT_SYMBOL(kernel_sendmsg_locked);
774
8605330a
SHY
775static bool skb_is_err_queue(const struct sk_buff *skb)
776{
777 /* pkt_type of skbs enqueued on the error queue are set to
778 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
779 * in recvmsg, since skbs received on a local socket will never
780 * have a pkt_type of PACKET_OUTGOING.
781 */
782 return skb->pkt_type == PACKET_OUTGOING;
783}
784
b50a5c70
ML
785/* On transmit, software and hardware timestamps are returned independently.
786 * As the two skb clones share the hardware timestamp, which may be updated
787 * before the software timestamp is received, a hardware TX timestamp may be
788 * returned only if there is no software TX timestamp. Ignore false software
789 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 790 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
791 * hardware timestamp.
792 */
793static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
794{
795 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
796}
797
aad9c8c4
ML
798static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
799{
800 struct scm_ts_pktinfo ts_pktinfo;
801 struct net_device *orig_dev;
802
803 if (!skb_mac_header_was_set(skb))
804 return;
805
806 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
807
808 rcu_read_lock();
809 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
810 if (orig_dev)
811 ts_pktinfo.if_index = orig_dev->ifindex;
812 rcu_read_unlock();
813
814 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
815 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
816 sizeof(ts_pktinfo), &ts_pktinfo);
817}
818
92f37fd2
ED
819/*
820 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
821 */
822void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
823 struct sk_buff *skb)
824{
20d49473 825 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 826 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
827 struct scm_timestamping_internal tss;
828
b50a5c70 829 int empty = 1, false_tstamp = 0;
20d49473
PO
830 struct skb_shared_hwtstamps *shhwtstamps =
831 skb_hwtstamps(skb);
9f7d49f8 832 ktime_t hwtstamp;
20d49473
PO
833
834 /* Race occurred between timestamp enabling and packet
835 receiving. Fill in the current time for now. */
b50a5c70 836 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 837 __net_timestamp(skb);
b50a5c70
ML
838 false_tstamp = 1;
839 }
20d49473
PO
840
841 if (need_software_tstamp) {
842 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
843 if (new_tstamp) {
844 struct __kernel_sock_timeval tv;
845
846 skb_get_new_timestamp(skb, &tv);
847 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
848 sizeof(tv), &tv);
849 } else {
850 struct __kernel_old_timeval tv;
851
852 skb_get_timestamp(skb, &tv);
853 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
854 sizeof(tv), &tv);
855 }
20d49473 856 } else {
887feae3
DD
857 if (new_tstamp) {
858 struct __kernel_timespec ts;
859
860 skb_get_new_timestampns(skb, &ts);
861 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
862 sizeof(ts), &ts);
863 } else {
df1b4ba9 864 struct __kernel_old_timespec ts;
887feae3
DD
865
866 skb_get_timestampns(skb, &ts);
867 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
868 sizeof(ts), &ts);
869 }
20d49473
PO
870 }
871 }
872
f24b9be5 873 memset(&tss, 0, sizeof(tss));
c199105d 874 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 875 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 876 empty = 0;
4d276eb6 877 if (shhwtstamps &&
b9f40e21 878 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
d7c08826
YL
879 !skb_is_swtx_tstamp(skb, false_tstamp)) {
880 if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
9f7d49f8
ML
881 hwtstamp = ptp_convert_timestamp(shhwtstamps,
882 sk->sk_bind_phc);
883 else
884 hwtstamp = shhwtstamps->hwtstamp;
d7c08826 885
9f7d49f8 886 if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
d7c08826
YL
887 empty = 0;
888
889 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
890 !skb_is_err_queue(skb))
891 put_ts_pktinfo(msg, skb);
892 }
aad9c8c4 893 }
1c885808 894 if (!empty) {
9718475e
DD
895 if (sock_flag(sk, SOCK_TSTAMP_NEW))
896 put_cmsg_scm_timestamping64(msg, &tss);
897 else
898 put_cmsg_scm_timestamping(msg, &tss);
1c885808 899
8605330a 900 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 901 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
902 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
903 skb->len, skb->data);
904 }
92f37fd2 905}
7c81fd8b
ACM
906EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
907
6e3e939f
JB
908void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
909 struct sk_buff *skb)
910{
911 int ack;
912
913 if (!sock_flag(sk, SOCK_WIFI_STATUS))
914 return;
915 if (!skb->wifi_acked_valid)
916 return;
917
918 ack = skb->wifi_acked;
919
920 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
921}
922EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
923
11165f14 924static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
925 struct sk_buff *skb)
3b885787 926{
744d5a3e 927 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 928 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 929 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
930}
931
767dd033 932void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
933 struct sk_buff *skb)
934{
935 sock_recv_timestamp(msg, sk, skb);
936 sock_recv_drops(msg, sk, skb);
937}
767dd033 938EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 939
8c3c447b 940INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
941 size_t, int));
942INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
943 size_t, int));
1b784140 944static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 945 int flags)
1da177e4 946{
a648a592
PA
947 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
948 inet_recvmsg, sock, msg, msg_data_left(msg),
949 flags);
1da177e4
LT
950}
951
85806af0
RD
952/**
953 * sock_recvmsg - receive a message from @sock
954 * @sock: socket
955 * @msg: message to receive
956 * @flags: message flags
957 *
958 * Receives @msg from @sock, passing through LSM. Returns the total number
959 * of bytes received, or an error.
960 */
2da62906 961int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 962{
2da62906 963 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 964
2da62906 965 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 966}
c6d409cf 967EXPORT_SYMBOL(sock_recvmsg);
1da177e4 968
c1249c0a 969/**
8a3c245c
PT
970 * kernel_recvmsg - Receive a message from a socket (kernel space)
971 * @sock: The socket to receive the message from
972 * @msg: Received message
973 * @vec: Input s/g array for message data
974 * @num: Size of input s/g array
975 * @size: Number of bytes to read
976 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 977 *
8a3c245c
PT
978 * On return the msg structure contains the scatter/gather array passed in the
979 * vec argument. The array is modified so that it consists of the unfilled
980 * portion of the original array.
c1249c0a 981 *
8a3c245c 982 * The returned value is the total number of bytes received, or an error.
c1249c0a 983 */
8a3c245c 984
89bddce5
SH
985int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
986 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 987{
1f466e1f 988 msg->msg_control_is_user = false;
aa563d7b 989 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1f466e1f 990 return sock_recvmsg(sock, msg, flags);
1da177e4 991}
c6d409cf 992EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 993
ce1d4d3e
CH
994static ssize_t sock_sendpage(struct file *file, struct page *page,
995 int offset, size_t size, loff_t *ppos, int more)
1da177e4 996{
1da177e4
LT
997 struct socket *sock;
998 int flags;
999
ce1d4d3e
CH
1000 sock = file->private_data;
1001
35f9c09f
ED
1002 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
1003 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
1004 flags |= more;
ce1d4d3e 1005
e6949583 1006 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 1007}
1da177e4 1008
9c55e01c 1009static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 1010 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
1011 unsigned int flags)
1012{
1013 struct socket *sock = file->private_data;
1014
997b37da 1015 if (unlikely(!sock->ops->splice_read))
95506588 1016 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 1017
9c55e01c
JA
1018 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
1019}
1020
8ae5e030 1021static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 1022{
6d652330
AV
1023 struct file *file = iocb->ki_filp;
1024 struct socket *sock = file->private_data;
0345f931 1025 struct msghdr msg = {.msg_iter = *to,
1026 .msg_iocb = iocb};
8ae5e030 1027 ssize_t res;
ce1d4d3e 1028
ebfcd895 1029 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1030 msg.msg_flags = MSG_DONTWAIT;
1031
1032 if (iocb->ki_pos != 0)
1da177e4 1033 return -ESPIPE;
027445c3 1034
66ee59af 1035 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
1036 return 0;
1037
2da62906 1038 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
1039 *to = msg.msg_iter;
1040 return res;
1da177e4
LT
1041}
1042
8ae5e030 1043static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 1044{
6d652330
AV
1045 struct file *file = iocb->ki_filp;
1046 struct socket *sock = file->private_data;
0345f931 1047 struct msghdr msg = {.msg_iter = *from,
1048 .msg_iocb = iocb};
8ae5e030 1049 ssize_t res;
1da177e4 1050
8ae5e030 1051 if (iocb->ki_pos != 0)
ce1d4d3e 1052 return -ESPIPE;
027445c3 1053
ebfcd895 1054 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1055 msg.msg_flags = MSG_DONTWAIT;
1056
6d652330
AV
1057 if (sock->type == SOCK_SEQPACKET)
1058 msg.msg_flags |= MSG_EOR;
1059
d8725c86 1060 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1061 *from = msg.msg_iter;
1062 return res;
1da177e4
LT
1063}
1064
1da177e4
LT
1065/*
1066 * Atomic setting of ioctl hooks to avoid race
1067 * with module unload.
1068 */
1069
4a3e2f71 1070static DEFINE_MUTEX(br_ioctl_mutex);
ad2f99ae
AB
1071static int (*br_ioctl_hook)(struct net *net, struct net_bridge *br,
1072 unsigned int cmd, struct ifreq *ifr,
1073 void __user *uarg);
1da177e4 1074
ad2f99ae
AB
1075void brioctl_set(int (*hook)(struct net *net, struct net_bridge *br,
1076 unsigned int cmd, struct ifreq *ifr,
1077 void __user *uarg))
1da177e4 1078{
4a3e2f71 1079 mutex_lock(&br_ioctl_mutex);
1da177e4 1080 br_ioctl_hook = hook;
4a3e2f71 1081 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1082}
1083EXPORT_SYMBOL(brioctl_set);
1084
ad2f99ae
AB
1085int br_ioctl_call(struct net *net, struct net_bridge *br, unsigned int cmd,
1086 struct ifreq *ifr, void __user *uarg)
1087{
1088 int err = -ENOPKG;
1089
1090 if (!br_ioctl_hook)
1091 request_module("bridge");
1092
1093 mutex_lock(&br_ioctl_mutex);
1094 if (br_ioctl_hook)
1095 err = br_ioctl_hook(net, br, cmd, ifr, uarg);
1096 mutex_unlock(&br_ioctl_mutex);
1097
1098 return err;
1099}
1100
4a3e2f71 1101static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1102static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1103
881d966b 1104void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1105{
4a3e2f71 1106 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1107 vlan_ioctl_hook = hook;
4a3e2f71 1108 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1109}
1110EXPORT_SYMBOL(vlan_ioctl_set);
1111
6b96018b 1112static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1113 unsigned int cmd, unsigned long arg)
6b96018b 1114{
876f0bf9
AB
1115 struct ifreq ifr;
1116 bool need_copyout;
6b96018b
AB
1117 int err;
1118 void __user *argp = (void __user *)arg;
a554bf96 1119 void __user *data;
6b96018b
AB
1120
1121 err = sock->ops->ioctl(sock, cmd, arg);
1122
1123 /*
1124 * If this ioctl is unknown try to hand it down
1125 * to the NIC driver.
1126 */
36fd633e
AV
1127 if (err != -ENOIOCTLCMD)
1128 return err;
6b96018b 1129
29ce8f97
JK
1130 if (!is_socket_ioctl_cmd(cmd))
1131 return -ENOTTY;
1132
a554bf96 1133 if (get_user_ifreq(&ifr, &data, argp))
876f0bf9 1134 return -EFAULT;
a554bf96 1135 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
876f0bf9 1136 if (!err && need_copyout)
a554bf96 1137 if (put_user_ifreq(&ifr, argp))
44c02a2c 1138 return -EFAULT;
876f0bf9 1139
6b96018b
AB
1140 return err;
1141}
1142
1da177e4
LT
1143/*
1144 * With an ioctl, arg may well be a user mode pointer, but we don't know
1145 * what to do with it - that's up to the protocol still.
1146 */
1147
1148static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1149{
1150 struct socket *sock;
881d966b 1151 struct sock *sk;
1da177e4
LT
1152 void __user *argp = (void __user *)arg;
1153 int pid, err;
881d966b 1154 struct net *net;
1da177e4 1155
b69aee04 1156 sock = file->private_data;
881d966b 1157 sk = sock->sk;
3b1e0a65 1158 net = sock_net(sk);
44c02a2c
AV
1159 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1160 struct ifreq ifr;
a554bf96 1161 void __user *data;
44c02a2c 1162 bool need_copyout;
a554bf96 1163 if (get_user_ifreq(&ifr, &data, argp))
44c02a2c 1164 return -EFAULT;
a554bf96 1165 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
44c02a2c 1166 if (!err && need_copyout)
a554bf96 1167 if (put_user_ifreq(&ifr, argp))
44c02a2c 1168 return -EFAULT;
1da177e4 1169 } else
3d23e349 1170#ifdef CONFIG_WEXT_CORE
1da177e4 1171 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1172 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1173 } else
3d23e349 1174#endif
89bddce5 1175 switch (cmd) {
1da177e4
LT
1176 case FIOSETOWN:
1177 case SIOCSPGRP:
1178 err = -EFAULT;
1179 if (get_user(pid, (int __user *)argp))
1180 break;
393cc3f5 1181 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1182 break;
1183 case FIOGETOWN:
1184 case SIOCGPGRP:
609d7fa9 1185 err = put_user(f_getown(sock->file),
89bddce5 1186 (int __user *)argp);
1da177e4
LT
1187 break;
1188 case SIOCGIFBR:
1189 case SIOCSIFBR:
1190 case SIOCBRADDBR:
1191 case SIOCBRDELBR:
ad2f99ae 1192 err = br_ioctl_call(net, NULL, cmd, NULL, argp);
1da177e4
LT
1193 break;
1194 case SIOCGIFVLAN:
1195 case SIOCSIFVLAN:
1196 err = -ENOPKG;
1197 if (!vlan_ioctl_hook)
1198 request_module("8021q");
1199
4a3e2f71 1200 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1201 if (vlan_ioctl_hook)
881d966b 1202 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1203 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1204 break;
c62cce2c
AV
1205 case SIOCGSKNS:
1206 err = -EPERM;
1207 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1208 break;
1209
1210 err = open_related_ns(&net->ns, get_net_ns);
1211 break;
0768e170
AB
1212 case SIOCGSTAMP_OLD:
1213 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1214 if (!sock->ops->gettstamp) {
1215 err = -ENOIOCTLCMD;
1216 break;
1217 }
1218 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1219 cmd == SIOCGSTAMP_OLD,
1220 !IS_ENABLED(CONFIG_64BIT));
60747828 1221 break;
0768e170
AB
1222 case SIOCGSTAMP_NEW:
1223 case SIOCGSTAMPNS_NEW:
1224 if (!sock->ops->gettstamp) {
1225 err = -ENOIOCTLCMD;
1226 break;
1227 }
1228 err = sock->ops->gettstamp(sock, argp,
1229 cmd == SIOCGSTAMP_NEW,
1230 false);
c7cbdbf2 1231 break;
876f0bf9
AB
1232
1233 case SIOCGIFCONF:
1234 err = dev_ifconf(net, argp);
1235 break;
1236
1da177e4 1237 default:
63ff03ab 1238 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1239 break;
89bddce5 1240 }
1da177e4
LT
1241 return err;
1242}
1243
8a3c245c
PT
1244/**
1245 * sock_create_lite - creates a socket
1246 * @family: protocol family (AF_INET, ...)
1247 * @type: communication type (SOCK_STREAM, ...)
1248 * @protocol: protocol (0, ...)
1249 * @res: new socket
1250 *
1251 * Creates a new socket and assigns it to @res, passing through LSM.
1252 * The new socket initialization is not complete, see kernel_accept().
1253 * Returns 0 or an error. On failure @res is set to %NULL.
1254 * This function internally uses GFP_KERNEL.
1255 */
1256
1da177e4
LT
1257int sock_create_lite(int family, int type, int protocol, struct socket **res)
1258{
1259 int err;
1260 struct socket *sock = NULL;
89bddce5 1261
1da177e4
LT
1262 err = security_socket_create(family, type, protocol, 1);
1263 if (err)
1264 goto out;
1265
1266 sock = sock_alloc();
1267 if (!sock) {
1268 err = -ENOMEM;
1269 goto out;
1270 }
1271
1da177e4 1272 sock->type = type;
7420ed23
VY
1273 err = security_socket_post_create(sock, family, type, protocol, 1);
1274 if (err)
1275 goto out_release;
1276
1da177e4
LT
1277out:
1278 *res = sock;
1279 return err;
7420ed23
VY
1280out_release:
1281 sock_release(sock);
1282 sock = NULL;
1283 goto out;
1da177e4 1284}
c6d409cf 1285EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1286
1287/* No kernel lock held - perfect */
ade994f4 1288static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1289{
3cafb376 1290 struct socket *sock = file->private_data;
a331de3b 1291 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1292
e88958e6
CH
1293 if (!sock->ops->poll)
1294 return 0;
f641f13b 1295
a331de3b
CH
1296 if (sk_can_busy_loop(sock->sk)) {
1297 /* poll once if requested by the syscall */
1298 if (events & POLL_BUSY_LOOP)
1299 sk_busy_loop(sock->sk, 1);
1300
1301 /* if this socket can poll_ll, tell the system call */
1302 flag = POLL_BUSY_LOOP;
1303 }
1304
1305 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1306}
1307
89bddce5 1308static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1309{
b69aee04 1310 struct socket *sock = file->private_data;
1da177e4
LT
1311
1312 return sock->ops->mmap(file, sock, vma);
1313}
1314
20380731 1315static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1316{
6d8c50dc 1317 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1318 return 0;
1319}
1320
1321/*
1322 * Update the socket async list
1323 *
1324 * Fasync_list locking strategy.
1325 *
1326 * 1. fasync_list is modified only under process context socket lock
1327 * i.e. under semaphore.
1328 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1329 * or under socket lock
1da177e4
LT
1330 */
1331
1332static int sock_fasync(int fd, struct file *filp, int on)
1333{
989a2979
ED
1334 struct socket *sock = filp->private_data;
1335 struct sock *sk = sock->sk;
333f7909 1336 struct socket_wq *wq = &sock->wq;
1da177e4 1337
989a2979 1338 if (sk == NULL)
1da177e4 1339 return -EINVAL;
1da177e4
LT
1340
1341 lock_sock(sk);
eaefd110 1342 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1343
eaefd110 1344 if (!wq->fasync_list)
989a2979
ED
1345 sock_reset_flag(sk, SOCK_FASYNC);
1346 else
bcdce719 1347 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1348
989a2979 1349 release_sock(sk);
1da177e4
LT
1350 return 0;
1351}
1352
ceb5d58b 1353/* This function may be called only under rcu_lock */
1da177e4 1354
ceb5d58b 1355int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1356{
ceb5d58b 1357 if (!wq || !wq->fasync_list)
1da177e4 1358 return -1;
ceb5d58b 1359
89bddce5 1360 switch (how) {
8d8ad9d7 1361 case SOCK_WAKE_WAITD:
ceb5d58b 1362 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1363 break;
1364 goto call_kill;
8d8ad9d7 1365 case SOCK_WAKE_SPACE:
ceb5d58b 1366 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1367 break;
7c7ab580 1368 fallthrough;
8d8ad9d7 1369 case SOCK_WAKE_IO:
89bddce5 1370call_kill:
43815482 1371 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1372 break;
8d8ad9d7 1373 case SOCK_WAKE_URG:
43815482 1374 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1375 }
ceb5d58b 1376
1da177e4
LT
1377 return 0;
1378}
c6d409cf 1379EXPORT_SYMBOL(sock_wake_async);
1da177e4 1380
8a3c245c
PT
1381/**
1382 * __sock_create - creates a socket
1383 * @net: net namespace
1384 * @family: protocol family (AF_INET, ...)
1385 * @type: communication type (SOCK_STREAM, ...)
1386 * @protocol: protocol (0, ...)
1387 * @res: new socket
1388 * @kern: boolean for kernel space sockets
1389 *
1390 * Creates a new socket and assigns it to @res, passing through LSM.
1391 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1392 * be set to true if the socket resides in kernel space.
1393 * This function internally uses GFP_KERNEL.
1394 */
1395
721db93a 1396int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1397 struct socket **res, int kern)
1da177e4
LT
1398{
1399 int err;
1400 struct socket *sock;
55737fda 1401 const struct net_proto_family *pf;
1da177e4
LT
1402
1403 /*
89bddce5 1404 * Check protocol is in range
1da177e4
LT
1405 */
1406 if (family < 0 || family >= NPROTO)
1407 return -EAFNOSUPPORT;
1408 if (type < 0 || type >= SOCK_MAX)
1409 return -EINVAL;
1410
1411 /* Compatibility.
1412
1413 This uglymoron is moved from INET layer to here to avoid
1414 deadlock in module load.
1415 */
1416 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1417 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1418 current->comm);
1da177e4
LT
1419 family = PF_PACKET;
1420 }
1421
1422 err = security_socket_create(family, type, protocol, kern);
1423 if (err)
1424 return err;
89bddce5 1425
55737fda
SH
1426 /*
1427 * Allocate the socket and allow the family to set things up. if
1428 * the protocol is 0, the family is instructed to select an appropriate
1429 * default.
1430 */
1431 sock = sock_alloc();
1432 if (!sock) {
e87cc472 1433 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1434 return -ENFILE; /* Not exactly a match, but its the
1435 closest posix thing */
1436 }
1437
1438 sock->type = type;
1439
95a5afca 1440#ifdef CONFIG_MODULES
89bddce5
SH
1441 /* Attempt to load a protocol module if the find failed.
1442 *
1443 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1444 * requested real, full-featured networking support upon configuration.
1445 * Otherwise module support will break!
1446 */
190683a9 1447 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1448 request_module("net-pf-%d", family);
1da177e4
LT
1449#endif
1450
55737fda
SH
1451 rcu_read_lock();
1452 pf = rcu_dereference(net_families[family]);
1453 err = -EAFNOSUPPORT;
1454 if (!pf)
1455 goto out_release;
1da177e4
LT
1456
1457 /*
1458 * We will call the ->create function, that possibly is in a loadable
1459 * module, so we have to bump that loadable module refcnt first.
1460 */
55737fda 1461 if (!try_module_get(pf->owner))
1da177e4
LT
1462 goto out_release;
1463
55737fda
SH
1464 /* Now protected by module ref count */
1465 rcu_read_unlock();
1466
3f378b68 1467 err = pf->create(net, sock, protocol, kern);
55737fda 1468 if (err < 0)
1da177e4 1469 goto out_module_put;
a79af59e 1470
1da177e4
LT
1471 /*
1472 * Now to bump the refcnt of the [loadable] module that owns this
1473 * socket at sock_release time we decrement its refcnt.
1474 */
55737fda
SH
1475 if (!try_module_get(sock->ops->owner))
1476 goto out_module_busy;
1477
1da177e4
LT
1478 /*
1479 * Now that we're done with the ->create function, the [loadable]
1480 * module can have its refcnt decremented
1481 */
55737fda 1482 module_put(pf->owner);
7420ed23
VY
1483 err = security_socket_post_create(sock, family, type, protocol, kern);
1484 if (err)
3b185525 1485 goto out_sock_release;
55737fda 1486 *res = sock;
1da177e4 1487
55737fda
SH
1488 return 0;
1489
1490out_module_busy:
1491 err = -EAFNOSUPPORT;
1da177e4 1492out_module_put:
55737fda
SH
1493 sock->ops = NULL;
1494 module_put(pf->owner);
1495out_sock_release:
1da177e4 1496 sock_release(sock);
55737fda
SH
1497 return err;
1498
1499out_release:
1500 rcu_read_unlock();
1501 goto out_sock_release;
1da177e4 1502}
721db93a 1503EXPORT_SYMBOL(__sock_create);
1da177e4 1504
8a3c245c
PT
1505/**
1506 * sock_create - creates a socket
1507 * @family: protocol family (AF_INET, ...)
1508 * @type: communication type (SOCK_STREAM, ...)
1509 * @protocol: protocol (0, ...)
1510 * @res: new socket
1511 *
1512 * A wrapper around __sock_create().
1513 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1514 */
1515
1da177e4
LT
1516int sock_create(int family, int type, int protocol, struct socket **res)
1517{
1b8d7ae4 1518 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1519}
c6d409cf 1520EXPORT_SYMBOL(sock_create);
1da177e4 1521
8a3c245c
PT
1522/**
1523 * sock_create_kern - creates a socket (kernel space)
1524 * @net: net namespace
1525 * @family: protocol family (AF_INET, ...)
1526 * @type: communication type (SOCK_STREAM, ...)
1527 * @protocol: protocol (0, ...)
1528 * @res: new socket
1529 *
1530 * A wrapper around __sock_create().
1531 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1532 */
1533
eeb1bd5c 1534int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1535{
eeb1bd5c 1536 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1537}
c6d409cf 1538EXPORT_SYMBOL(sock_create_kern);
1da177e4 1539
9d6a15c3 1540int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1541{
1542 int retval;
1543 struct socket *sock;
a677a039
UD
1544 int flags;
1545
e38b36f3
UD
1546 /* Check the SOCK_* constants for consistency. */
1547 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1548 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1549 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1550 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1551
a677a039 1552 flags = type & ~SOCK_TYPE_MASK;
77d27200 1553 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1554 return -EINVAL;
1555 type &= SOCK_TYPE_MASK;
1da177e4 1556
aaca0bdc
UD
1557 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1558 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1559
1da177e4
LT
1560 retval = sock_create(family, type, protocol, &sock);
1561 if (retval < 0)
8e1611e2 1562 return retval;
1da177e4 1563
8e1611e2 1564 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1565}
1566
9d6a15c3
DB
1567SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1568{
1569 return __sys_socket(family, type, protocol);
1570}
1571
1da177e4
LT
1572/*
1573 * Create a pair of connected sockets.
1574 */
1575
6debc8d8 1576int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1577{
1578 struct socket *sock1, *sock2;
1579 int fd1, fd2, err;
db349509 1580 struct file *newfile1, *newfile2;
a677a039
UD
1581 int flags;
1582
1583 flags = type & ~SOCK_TYPE_MASK;
77d27200 1584 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1585 return -EINVAL;
1586 type &= SOCK_TYPE_MASK;
1da177e4 1587
aaca0bdc
UD
1588 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1589 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1590
016a266b
AV
1591 /*
1592 * reserve descriptors and make sure we won't fail
1593 * to return them to userland.
1594 */
1595 fd1 = get_unused_fd_flags(flags);
1596 if (unlikely(fd1 < 0))
1597 return fd1;
1598
1599 fd2 = get_unused_fd_flags(flags);
1600 if (unlikely(fd2 < 0)) {
1601 put_unused_fd(fd1);
1602 return fd2;
1603 }
1604
1605 err = put_user(fd1, &usockvec[0]);
1606 if (err)
1607 goto out;
1608
1609 err = put_user(fd2, &usockvec[1]);
1610 if (err)
1611 goto out;
1612
1da177e4
LT
1613 /*
1614 * Obtain the first socket and check if the underlying protocol
1615 * supports the socketpair call.
1616 */
1617
1618 err = sock_create(family, type, protocol, &sock1);
016a266b 1619 if (unlikely(err < 0))
1da177e4
LT
1620 goto out;
1621
1622 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1623 if (unlikely(err < 0)) {
1624 sock_release(sock1);
1625 goto out;
bf3c23d1 1626 }
d73aa286 1627
d47cd945
DH
1628 err = security_socket_socketpair(sock1, sock2);
1629 if (unlikely(err)) {
1630 sock_release(sock2);
1631 sock_release(sock1);
1632 goto out;
1633 }
1634
016a266b
AV
1635 err = sock1->ops->socketpair(sock1, sock2);
1636 if (unlikely(err < 0)) {
1637 sock_release(sock2);
1638 sock_release(sock1);
1639 goto out;
28407630
AV
1640 }
1641
aab174f0 1642 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1643 if (IS_ERR(newfile1)) {
28407630 1644 err = PTR_ERR(newfile1);
016a266b
AV
1645 sock_release(sock2);
1646 goto out;
28407630
AV
1647 }
1648
aab174f0 1649 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1650 if (IS_ERR(newfile2)) {
1651 err = PTR_ERR(newfile2);
016a266b
AV
1652 fput(newfile1);
1653 goto out;
db349509
AV
1654 }
1655
157cf649 1656 audit_fd_pair(fd1, fd2);
d73aa286 1657
db349509
AV
1658 fd_install(fd1, newfile1);
1659 fd_install(fd2, newfile2);
d73aa286 1660 return 0;
1da177e4 1661
016a266b 1662out:
d73aa286 1663 put_unused_fd(fd2);
d73aa286 1664 put_unused_fd(fd1);
1da177e4
LT
1665 return err;
1666}
1667
6debc8d8
DB
1668SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1669 int __user *, usockvec)
1670{
1671 return __sys_socketpair(family, type, protocol, usockvec);
1672}
1673
1da177e4
LT
1674/*
1675 * Bind a name to a socket. Nothing much to do here since it's
1676 * the protocol's responsibility to handle the local address.
1677 *
1678 * We move the socket address to kernel space before we call
1679 * the protocol layer (having also checked the address is ok).
1680 */
1681
a87d35d8 1682int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1683{
1684 struct socket *sock;
230b1839 1685 struct sockaddr_storage address;
6cb153ca 1686 int err, fput_needed;
1da177e4 1687
89bddce5 1688 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1689 if (sock) {
43db362d 1690 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1691 if (!err) {
89bddce5 1692 err = security_socket_bind(sock,
230b1839 1693 (struct sockaddr *)&address,
89bddce5 1694 addrlen);
6cb153ca
BL
1695 if (!err)
1696 err = sock->ops->bind(sock,
89bddce5 1697 (struct sockaddr *)
230b1839 1698 &address, addrlen);
1da177e4 1699 }
6cb153ca 1700 fput_light(sock->file, fput_needed);
89bddce5 1701 }
1da177e4
LT
1702 return err;
1703}
1704
a87d35d8
DB
1705SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1706{
1707 return __sys_bind(fd, umyaddr, addrlen);
1708}
1709
1da177e4
LT
1710/*
1711 * Perform a listen. Basically, we allow the protocol to do anything
1712 * necessary for a listen, and if that works, we mark the socket as
1713 * ready for listening.
1714 */
1715
25e290ee 1716int __sys_listen(int fd, int backlog)
1da177e4
LT
1717{
1718 struct socket *sock;
6cb153ca 1719 int err, fput_needed;
b8e1f9b5 1720 int somaxconn;
89bddce5
SH
1721
1722 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1723 if (sock) {
8efa6e93 1724 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1725 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1726 backlog = somaxconn;
1da177e4
LT
1727
1728 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1729 if (!err)
1730 err = sock->ops->listen(sock, backlog);
1da177e4 1731
6cb153ca 1732 fput_light(sock->file, fput_needed);
1da177e4
LT
1733 }
1734 return err;
1735}
1736
25e290ee
DB
1737SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1738{
1739 return __sys_listen(fd, backlog);
1740}
1741
d32f89da 1742struct file *do_accept(struct file *file, unsigned file_flags,
de2ea4b6 1743 struct sockaddr __user *upeer_sockaddr,
d32f89da 1744 int __user *upeer_addrlen, int flags)
1da177e4
LT
1745{
1746 struct socket *sock, *newsock;
39d8c1b6 1747 struct file *newfile;
d32f89da 1748 int err, len;
230b1839 1749 struct sockaddr_storage address;
1da177e4 1750
dba4a925 1751 sock = sock_from_file(file);
d32f89da
PB
1752 if (!sock)
1753 return ERR_PTR(-ENOTSOCK);
1da177e4 1754
c6d409cf
ED
1755 newsock = sock_alloc();
1756 if (!newsock)
d32f89da 1757 return ERR_PTR(-ENFILE);
1da177e4
LT
1758
1759 newsock->type = sock->type;
1760 newsock->ops = sock->ops;
1761
1da177e4
LT
1762 /*
1763 * We don't need try_module_get here, as the listening socket (sock)
1764 * has the protocol module (sock->ops->owner) held.
1765 */
1766 __module_get(newsock->ops->owner);
1767
aab174f0 1768 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
d32f89da
PB
1769 if (IS_ERR(newfile))
1770 return newfile;
39d8c1b6 1771
a79af59e
FF
1772 err = security_socket_accept(sock, newsock);
1773 if (err)
39d8c1b6 1774 goto out_fd;
a79af59e 1775
de2ea4b6
JA
1776 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1777 false);
1da177e4 1778 if (err < 0)
39d8c1b6 1779 goto out_fd;
1da177e4
LT
1780
1781 if (upeer_sockaddr) {
9b2c45d4
DV
1782 len = newsock->ops->getname(newsock,
1783 (struct sockaddr *)&address, 2);
1784 if (len < 0) {
1da177e4 1785 err = -ECONNABORTED;
39d8c1b6 1786 goto out_fd;
1da177e4 1787 }
43db362d 1788 err = move_addr_to_user(&address,
230b1839 1789 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1790 if (err < 0)
39d8c1b6 1791 goto out_fd;
1da177e4
LT
1792 }
1793
1794 /* File flags are not inherited via accept() unlike another OSes. */
d32f89da 1795 return newfile;
39d8c1b6 1796out_fd:
9606a216 1797 fput(newfile);
d32f89da
PB
1798 return ERR_PTR(err);
1799}
1800
1801int __sys_accept4_file(struct file *file, unsigned file_flags,
1802 struct sockaddr __user *upeer_sockaddr,
1803 int __user *upeer_addrlen, int flags,
1804 unsigned long nofile)
1805{
1806 struct file *newfile;
1807 int newfd;
1808
1809 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1810 return -EINVAL;
1811
1812 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1813 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
de2ea4b6 1814
d32f89da
PB
1815 newfd = __get_unused_fd_flags(flags, nofile);
1816 if (unlikely(newfd < 0))
1817 return newfd;
1818
1819 newfile = do_accept(file, file_flags, upeer_sockaddr, upeer_addrlen,
1820 flags);
1821 if (IS_ERR(newfile)) {
1822 put_unused_fd(newfd);
1823 return PTR_ERR(newfile);
1824 }
1825 fd_install(newfd, newfile);
1826 return newfd;
de2ea4b6
JA
1827}
1828
1829/*
1830 * For accept, we attempt to create a new socket, set up the link
1831 * with the client, wake up the client, then return the new
1832 * connected fd. We collect the address of the connector in kernel
1833 * space and move it to user at the very end. This is unclean because
1834 * we open the socket then return an error.
1835 *
1836 * 1003.1g adds the ability to recvmsg() to query connection pending
1837 * status to recvmsg. We need to add that support in a way thats
1838 * clean when we restructure accept also.
1839 */
1840
1841int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1842 int __user *upeer_addrlen, int flags)
1843{
1844 int ret = -EBADF;
1845 struct fd f;
1846
1847 f = fdget(fd);
1848 if (f.file) {
1849 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
09952e3e
JA
1850 upeer_addrlen, flags,
1851 rlimit(RLIMIT_NOFILE));
6b07edeb 1852 fdput(f);
de2ea4b6
JA
1853 }
1854
1855 return ret;
1da177e4
LT
1856}
1857
4541e805
DB
1858SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1859 int __user *, upeer_addrlen, int, flags)
1860{
1861 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1862}
1863
20f37034
HC
1864SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1865 int __user *, upeer_addrlen)
aaca0bdc 1866{
4541e805 1867 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1868}
1869
1da177e4
LT
1870/*
1871 * Attempt to connect to a socket with the server address. The address
1872 * is in user space so we verify it is OK and move it to kernel space.
1873 *
1874 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1875 * break bindings
1876 *
1877 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1878 * other SEQPACKET protocols that take time to connect() as it doesn't
1879 * include the -EINPROGRESS status for such sockets.
1880 */
1881
f499a021 1882int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 1883 int addrlen, int file_flags)
1da177e4
LT
1884{
1885 struct socket *sock;
bd3ded31 1886 int err;
1da177e4 1887
dba4a925
FR
1888 sock = sock_from_file(file);
1889 if (!sock) {
1890 err = -ENOTSOCK;
1da177e4 1891 goto out;
dba4a925 1892 }
1da177e4 1893
89bddce5 1894 err =
f499a021 1895 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 1896 if (err)
bd3ded31 1897 goto out;
1da177e4 1898
f499a021 1899 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
bd3ded31 1900 sock->file->f_flags | file_flags);
1da177e4
LT
1901out:
1902 return err;
1903}
1904
bd3ded31
JA
1905int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1906{
1907 int ret = -EBADF;
1908 struct fd f;
1909
1910 f = fdget(fd);
1911 if (f.file) {
f499a021
JA
1912 struct sockaddr_storage address;
1913
1914 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1915 if (!ret)
1916 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 1917 fdput(f);
bd3ded31
JA
1918 }
1919
1920 return ret;
1921}
1922
1387c2c2
DB
1923SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1924 int, addrlen)
1925{
1926 return __sys_connect(fd, uservaddr, addrlen);
1927}
1928
1da177e4
LT
1929/*
1930 * Get the local address ('name') of a socket object. Move the obtained
1931 * name to user space.
1932 */
1933
8882a107
DB
1934int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1935 int __user *usockaddr_len)
1da177e4
LT
1936{
1937 struct socket *sock;
230b1839 1938 struct sockaddr_storage address;
9b2c45d4 1939 int err, fput_needed;
89bddce5 1940
6cb153ca 1941 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1942 if (!sock)
1943 goto out;
1944
1945 err = security_socket_getsockname(sock);
1946 if (err)
1947 goto out_put;
1948
9b2c45d4
DV
1949 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1950 if (err < 0)
1da177e4 1951 goto out_put;
9b2c45d4
DV
1952 /* "err" is actually length in this case */
1953 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1954
1955out_put:
6cb153ca 1956 fput_light(sock->file, fput_needed);
1da177e4
LT
1957out:
1958 return err;
1959}
1960
8882a107
DB
1961SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1962 int __user *, usockaddr_len)
1963{
1964 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1965}
1966
1da177e4
LT
1967/*
1968 * Get the remote address ('name') of a socket object. Move the obtained
1969 * name to user space.
1970 */
1971
b21c8f83
DB
1972int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1973 int __user *usockaddr_len)
1da177e4
LT
1974{
1975 struct socket *sock;
230b1839 1976 struct sockaddr_storage address;
9b2c45d4 1977 int err, fput_needed;
1da177e4 1978
89bddce5
SH
1979 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1980 if (sock != NULL) {
1da177e4
LT
1981 err = security_socket_getpeername(sock);
1982 if (err) {
6cb153ca 1983 fput_light(sock->file, fput_needed);
1da177e4
LT
1984 return err;
1985 }
1986
9b2c45d4
DV
1987 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1988 if (err >= 0)
1989 /* "err" is actually length in this case */
1990 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1991 usockaddr_len);
6cb153ca 1992 fput_light(sock->file, fput_needed);
1da177e4
LT
1993 }
1994 return err;
1995}
1996
b21c8f83
DB
1997SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1998 int __user *, usockaddr_len)
1999{
2000 return __sys_getpeername(fd, usockaddr, usockaddr_len);
2001}
2002
1da177e4
LT
2003/*
2004 * Send a datagram to a given address. We move the address into kernel
2005 * space and check the user space data area is readable before invoking
2006 * the protocol.
2007 */
211b634b
DB
2008int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
2009 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
2010{
2011 struct socket *sock;
230b1839 2012 struct sockaddr_storage address;
1da177e4
LT
2013 int err;
2014 struct msghdr msg;
2015 struct iovec iov;
6cb153ca 2016 int fput_needed;
6cb153ca 2017
602bd0e9
AV
2018 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
2019 if (unlikely(err))
2020 return err;
de0fa95c
PE
2021 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2022 if (!sock)
4387ff75 2023 goto out;
6cb153ca 2024
89bddce5 2025 msg.msg_name = NULL;
89bddce5
SH
2026 msg.msg_control = NULL;
2027 msg.msg_controllen = 0;
2028 msg.msg_namelen = 0;
6cb153ca 2029 if (addr) {
43db362d 2030 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
2031 if (err < 0)
2032 goto out_put;
230b1839 2033 msg.msg_name = (struct sockaddr *)&address;
89bddce5 2034 msg.msg_namelen = addr_len;
1da177e4
LT
2035 }
2036 if (sock->file->f_flags & O_NONBLOCK)
2037 flags |= MSG_DONTWAIT;
2038 msg.msg_flags = flags;
d8725c86 2039 err = sock_sendmsg(sock, &msg);
1da177e4 2040
89bddce5 2041out_put:
de0fa95c 2042 fput_light(sock->file, fput_needed);
4387ff75 2043out:
1da177e4
LT
2044 return err;
2045}
2046
211b634b
DB
2047SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2048 unsigned int, flags, struct sockaddr __user *, addr,
2049 int, addr_len)
2050{
2051 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2052}
2053
1da177e4 2054/*
89bddce5 2055 * Send a datagram down a socket.
1da177e4
LT
2056 */
2057
3e0fa65f 2058SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2059 unsigned int, flags)
1da177e4 2060{
211b634b 2061 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2062}
2063
2064/*
89bddce5 2065 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2066 * sender. We verify the buffers are writable and if needed move the
2067 * sender address from kernel to user space.
2068 */
7a09e1eb
DB
2069int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2070 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2071{
2072 struct socket *sock;
2073 struct iovec iov;
2074 struct msghdr msg;
230b1839 2075 struct sockaddr_storage address;
89bddce5 2076 int err, err2;
6cb153ca
BL
2077 int fput_needed;
2078
602bd0e9
AV
2079 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2080 if (unlikely(err))
2081 return err;
de0fa95c 2082 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2083 if (!sock)
de0fa95c 2084 goto out;
1da177e4 2085
89bddce5
SH
2086 msg.msg_control = NULL;
2087 msg.msg_controllen = 0;
f3d33426
HFS
2088 /* Save some cycles and don't copy the address if not needed */
2089 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2090 /* We assume all kernel code knows the size of sockaddr_storage */
2091 msg.msg_namelen = 0;
130ed5d1 2092 msg.msg_iocb = NULL;
9f138fa6 2093 msg.msg_flags = 0;
1da177e4
LT
2094 if (sock->file->f_flags & O_NONBLOCK)
2095 flags |= MSG_DONTWAIT;
2da62906 2096 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2097
89bddce5 2098 if (err >= 0 && addr != NULL) {
43db362d 2099 err2 = move_addr_to_user(&address,
230b1839 2100 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2101 if (err2 < 0)
2102 err = err2;
1da177e4 2103 }
de0fa95c
PE
2104
2105 fput_light(sock->file, fput_needed);
4387ff75 2106out:
1da177e4
LT
2107 return err;
2108}
2109
7a09e1eb
DB
2110SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2111 unsigned int, flags, struct sockaddr __user *, addr,
2112 int __user *, addr_len)
2113{
2114 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2115}
2116
1da177e4 2117/*
89bddce5 2118 * Receive a datagram from a socket.
1da177e4
LT
2119 */
2120
b7c0ddf5
JG
2121SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2122 unsigned int, flags)
1da177e4 2123{
7a09e1eb 2124 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2125}
2126
83f0c10b
FW
2127static bool sock_use_custom_sol_socket(const struct socket *sock)
2128{
2129 const struct sock *sk = sock->sk;
2130
2131 /* Use sock->ops->setsockopt() for MPTCP */
2132 return IS_ENABLED(CONFIG_MPTCP) &&
2133 sk->sk_protocol == IPPROTO_MPTCP &&
2134 sk->sk_type == SOCK_STREAM &&
2135 (sk->sk_family == AF_INET || sk->sk_family == AF_INET6);
2136}
2137
1da177e4
LT
2138/*
2139 * Set a socket option. Because we don't know the option lengths we have
2140 * to pass the user mode parameter for the protocols to sort out.
2141 */
a7b75c5a 2142int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
55db9c0e 2143 int optlen)
1da177e4 2144{
519a8a6c 2145 sockptr_t optval = USER_SOCKPTR(user_optval);
0d01da6a 2146 char *kernel_optval = NULL;
6cb153ca 2147 int err, fput_needed;
1da177e4
LT
2148 struct socket *sock;
2149
2150 if (optlen < 0)
2151 return -EINVAL;
89bddce5
SH
2152
2153 sock = sockfd_lookup_light(fd, &err, &fput_needed);
4a367299
CH
2154 if (!sock)
2155 return err;
1da177e4 2156
4a367299
CH
2157 err = security_socket_setsockopt(sock, level, optname);
2158 if (err)
2159 goto out_put;
0d01da6a 2160
55db9c0e
CH
2161 if (!in_compat_syscall())
2162 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
a7b75c5a 2163 user_optval, &optlen,
55db9c0e 2164 &kernel_optval);
4a367299
CH
2165 if (err < 0)
2166 goto out_put;
2167 if (err > 0) {
2168 err = 0;
2169 goto out_put;
2170 }
0d01da6a 2171
a7b75c5a
CH
2172 if (kernel_optval)
2173 optval = KERNEL_SOCKPTR(kernel_optval);
4a367299 2174 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2175 err = sock_setsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2176 else if (unlikely(!sock->ops->setsockopt))
2177 err = -EOPNOTSUPP;
4a367299
CH
2178 else
2179 err = sock->ops->setsockopt(sock, level, optname, optval,
89bddce5 2180 optlen);
a7b75c5a 2181 kfree(kernel_optval);
4a367299
CH
2182out_put:
2183 fput_light(sock->file, fput_needed);
1da177e4
LT
2184 return err;
2185}
2186
cc36dca0
DB
2187SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2188 char __user *, optval, int, optlen)
2189{
2190 return __sys_setsockopt(fd, level, optname, optval, optlen);
2191}
2192
9cacf81f
SF
2193INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2194 int optname));
2195
1da177e4
LT
2196/*
2197 * Get a socket option. Because we don't know the option lengths we have
2198 * to pass a user mode parameter for the protocols to sort out.
2199 */
55db9c0e
CH
2200int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2201 int __user *optlen)
1da177e4 2202{
6cb153ca 2203 int err, fput_needed;
1da177e4 2204 struct socket *sock;
0d01da6a 2205 int max_optlen;
1da177e4 2206
89bddce5 2207 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2208 if (!sock)
2209 return err;
2210
2211 err = security_socket_getsockopt(sock, level, optname);
2212 if (err)
2213 goto out_put;
1da177e4 2214
55db9c0e
CH
2215 if (!in_compat_syscall())
2216 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2217
d8a9b38f
CH
2218 if (level == SOL_SOCKET)
2219 err = sock_getsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2220 else if (unlikely(!sock->ops->getsockopt))
2221 err = -EOPNOTSUPP;
d8a9b38f
CH
2222 else
2223 err = sock->ops->getsockopt(sock, level, optname, optval,
89bddce5 2224 optlen);
0d01da6a 2225
55db9c0e
CH
2226 if (!in_compat_syscall())
2227 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2228 optval, optlen, max_optlen,
2229 err);
6cb153ca 2230out_put:
d8a9b38f 2231 fput_light(sock->file, fput_needed);
1da177e4
LT
2232 return err;
2233}
2234
13a2d70e
DB
2235SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2236 char __user *, optval, int __user *, optlen)
2237{
2238 return __sys_getsockopt(fd, level, optname, optval, optlen);
2239}
2240
1da177e4
LT
2241/*
2242 * Shutdown a socket.
2243 */
2244
b713c195
JA
2245int __sys_shutdown_sock(struct socket *sock, int how)
2246{
2247 int err;
2248
2249 err = security_socket_shutdown(sock, how);
2250 if (!err)
2251 err = sock->ops->shutdown(sock, how);
2252
2253 return err;
2254}
2255
005a1aea 2256int __sys_shutdown(int fd, int how)
1da177e4 2257{
6cb153ca 2258 int err, fput_needed;
1da177e4
LT
2259 struct socket *sock;
2260
89bddce5
SH
2261 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2262 if (sock != NULL) {
b713c195 2263 err = __sys_shutdown_sock(sock, how);
6cb153ca 2264 fput_light(sock->file, fput_needed);
1da177e4
LT
2265 }
2266 return err;
2267}
2268
005a1aea
DB
2269SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2270{
2271 return __sys_shutdown(fd, how);
2272}
2273
89bddce5 2274/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2275 * fields which are the same type (int / unsigned) on our platforms.
2276 */
2277#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2278#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2279#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2280
c71d8ebe
TH
2281struct used_address {
2282 struct sockaddr_storage name;
2283 unsigned int name_len;
2284};
2285
0a384abf
JA
2286int __copy_msghdr_from_user(struct msghdr *kmsg,
2287 struct user_msghdr __user *umsg,
2288 struct sockaddr __user **save_addr,
2289 struct iovec __user **uiov, size_t *nsegs)
1661bf36 2290{
ffb07550 2291 struct user_msghdr msg;
08adb7da
AV
2292 ssize_t err;
2293
ffb07550 2294 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2295 return -EFAULT;
dbb490b9 2296
1f466e1f
CH
2297 kmsg->msg_control_is_user = true;
2298 kmsg->msg_control_user = msg.msg_control;
ffb07550
AV
2299 kmsg->msg_controllen = msg.msg_controllen;
2300 kmsg->msg_flags = msg.msg_flags;
2301
2302 kmsg->msg_namelen = msg.msg_namelen;
2303 if (!msg.msg_name)
6a2a2b3a
AS
2304 kmsg->msg_namelen = 0;
2305
dbb490b9
ML
2306 if (kmsg->msg_namelen < 0)
2307 return -EINVAL;
2308
1661bf36 2309 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2310 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2311
2312 if (save_addr)
ffb07550 2313 *save_addr = msg.msg_name;
08adb7da 2314
ffb07550 2315 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2316 if (!save_addr) {
864d9664
PA
2317 err = move_addr_to_kernel(msg.msg_name,
2318 kmsg->msg_namelen,
08adb7da
AV
2319 kmsg->msg_name);
2320 if (err < 0)
2321 return err;
2322 }
2323 } else {
2324 kmsg->msg_name = NULL;
2325 kmsg->msg_namelen = 0;
2326 }
2327
ffb07550 2328 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2329 return -EMSGSIZE;
2330
0345f931 2331 kmsg->msg_iocb = NULL;
0a384abf
JA
2332 *uiov = msg.msg_iov;
2333 *nsegs = msg.msg_iovlen;
2334 return 0;
2335}
2336
2337static int copy_msghdr_from_user(struct msghdr *kmsg,
2338 struct user_msghdr __user *umsg,
2339 struct sockaddr __user **save_addr,
2340 struct iovec **iov)
2341{
2342 struct user_msghdr msg;
2343 ssize_t err;
2344
2345 err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
2346 &msg.msg_iovlen);
2347 if (err)
2348 return err;
0345f931 2349
87e5e6da 2350 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2351 msg.msg_iov, msg.msg_iovlen,
da184284 2352 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2353 return err < 0 ? err : 0;
1661bf36
DC
2354}
2355
4257c8ca
JA
2356static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2357 unsigned int flags, struct used_address *used_address,
2358 unsigned int allowed_msghdr_flags)
1da177e4 2359{
b9d717a7 2360 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2361 __aligned(sizeof(__kernel_size_t));
89bddce5 2362 /* 20 is size of ipv6_pktinfo */
1da177e4 2363 unsigned char *ctl_buf = ctl;
d8725c86 2364 int ctl_len;
08adb7da 2365 ssize_t err;
89bddce5 2366
1da177e4
LT
2367 err = -ENOBUFS;
2368
228e548e 2369 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2370 goto out;
28a94d8f 2371 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2372 ctl_len = msg_sys->msg_controllen;
1da177e4 2373 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2374 err =
228e548e 2375 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2376 sizeof(ctl));
1da177e4 2377 if (err)
4257c8ca 2378 goto out;
228e548e
AB
2379 ctl_buf = msg_sys->msg_control;
2380 ctl_len = msg_sys->msg_controllen;
1da177e4 2381 } else if (ctl_len) {
ac4340fc
DM
2382 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2383 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2384 if (ctl_len > sizeof(ctl)) {
1da177e4 2385 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2386 if (ctl_buf == NULL)
4257c8ca 2387 goto out;
1da177e4
LT
2388 }
2389 err = -EFAULT;
1f466e1f 2390 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2391 goto out_freectl;
228e548e 2392 msg_sys->msg_control = ctl_buf;
1f466e1f 2393 msg_sys->msg_control_is_user = false;
1da177e4 2394 }
228e548e 2395 msg_sys->msg_flags = flags;
1da177e4
LT
2396
2397 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2398 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2399 /*
2400 * If this is sendmmsg() and current destination address is same as
2401 * previously succeeded address, omit asking LSM's decision.
2402 * used_address->name_len is initialized to UINT_MAX so that the first
2403 * destination address never matches.
2404 */
bc909d9d
MD
2405 if (used_address && msg_sys->msg_name &&
2406 used_address->name_len == msg_sys->msg_namelen &&
2407 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2408 used_address->name_len)) {
d8725c86 2409 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2410 goto out_freectl;
2411 }
d8725c86 2412 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2413 /*
2414 * If this is sendmmsg() and sending to current destination address was
2415 * successful, remember it.
2416 */
2417 if (used_address && err >= 0) {
2418 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2419 if (msg_sys->msg_name)
2420 memcpy(&used_address->name, msg_sys->msg_name,
2421 used_address->name_len);
c71d8ebe 2422 }
1da177e4
LT
2423
2424out_freectl:
89bddce5 2425 if (ctl_buf != ctl)
1da177e4 2426 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2427out:
2428 return err;
2429}
2430
03b1230c
JA
2431int sendmsg_copy_msghdr(struct msghdr *msg,
2432 struct user_msghdr __user *umsg, unsigned flags,
2433 struct iovec **iov)
4257c8ca
JA
2434{
2435 int err;
2436
2437 if (flags & MSG_CMSG_COMPAT) {
2438 struct compat_msghdr __user *msg_compat;
2439
2440 msg_compat = (struct compat_msghdr __user *) umsg;
2441 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2442 } else {
2443 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2444 }
2445 if (err < 0)
2446 return err;
2447
2448 return 0;
2449}
2450
2451static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2452 struct msghdr *msg_sys, unsigned int flags,
2453 struct used_address *used_address,
2454 unsigned int allowed_msghdr_flags)
2455{
2456 struct sockaddr_storage address;
2457 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2458 ssize_t err;
2459
2460 msg_sys->msg_name = &address;
2461
2462 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2463 if (err < 0)
2464 return err;
2465
2466 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2467 allowed_msghdr_flags);
da184284 2468 kfree(iov);
228e548e
AB
2469 return err;
2470}
2471
2472/*
2473 * BSD sendmsg interface
2474 */
03b1230c 2475long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2476 unsigned int flags)
2477{
03b1230c 2478 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2479}
228e548e 2480
e1834a32
DB
2481long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2482 bool forbid_cmsg_compat)
228e548e
AB
2483{
2484 int fput_needed, err;
2485 struct msghdr msg_sys;
1be374a0
AL
2486 struct socket *sock;
2487
e1834a32
DB
2488 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2489 return -EINVAL;
2490
1be374a0 2491 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2492 if (!sock)
2493 goto out;
2494
28a94d8f 2495 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2496
6cb153ca 2497 fput_light(sock->file, fput_needed);
89bddce5 2498out:
1da177e4
LT
2499 return err;
2500}
2501
666547ff 2502SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2503{
e1834a32 2504 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2505}
2506
228e548e
AB
2507/*
2508 * Linux sendmmsg interface
2509 */
2510
2511int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2512 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2513{
2514 int fput_needed, err, datagrams;
2515 struct socket *sock;
2516 struct mmsghdr __user *entry;
2517 struct compat_mmsghdr __user *compat_entry;
2518 struct msghdr msg_sys;
c71d8ebe 2519 struct used_address used_address;
f092276d 2520 unsigned int oflags = flags;
228e548e 2521
e1834a32
DB
2522 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2523 return -EINVAL;
2524
98382f41
AB
2525 if (vlen > UIO_MAXIOV)
2526 vlen = UIO_MAXIOV;
228e548e
AB
2527
2528 datagrams = 0;
2529
2530 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2531 if (!sock)
2532 return err;
2533
c71d8ebe 2534 used_address.name_len = UINT_MAX;
228e548e
AB
2535 entry = mmsg;
2536 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2537 err = 0;
f092276d 2538 flags |= MSG_BATCH;
228e548e
AB
2539
2540 while (datagrams < vlen) {
f092276d
TH
2541 if (datagrams == vlen - 1)
2542 flags = oflags;
2543
228e548e 2544 if (MSG_CMSG_COMPAT & flags) {
666547ff 2545 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2546 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2547 if (err < 0)
2548 break;
2549 err = __put_user(err, &compat_entry->msg_len);
2550 ++compat_entry;
2551 } else {
a7526eb5 2552 err = ___sys_sendmsg(sock,
666547ff 2553 (struct user_msghdr __user *)entry,
28a94d8f 2554 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2555 if (err < 0)
2556 break;
2557 err = put_user(err, &entry->msg_len);
2558 ++entry;
2559 }
2560
2561 if (err)
2562 break;
2563 ++datagrams;
3023898b
SHY
2564 if (msg_data_left(&msg_sys))
2565 break;
a78cb84c 2566 cond_resched();
228e548e
AB
2567 }
2568
228e548e
AB
2569 fput_light(sock->file, fput_needed);
2570
728ffb86
AB
2571 /* We only return an error if no datagrams were able to be sent */
2572 if (datagrams != 0)
228e548e
AB
2573 return datagrams;
2574
228e548e
AB
2575 return err;
2576}
2577
2578SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2579 unsigned int, vlen, unsigned int, flags)
2580{
e1834a32 2581 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2582}
2583
03b1230c
JA
2584int recvmsg_copy_msghdr(struct msghdr *msg,
2585 struct user_msghdr __user *umsg, unsigned flags,
2586 struct sockaddr __user **uaddr,
2587 struct iovec **iov)
1da177e4 2588{
08adb7da 2589 ssize_t err;
1da177e4 2590
4257c8ca
JA
2591 if (MSG_CMSG_COMPAT & flags) {
2592 struct compat_msghdr __user *msg_compat;
1da177e4 2593
4257c8ca
JA
2594 msg_compat = (struct compat_msghdr __user *) umsg;
2595 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2596 } else {
2597 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2598 }
1da177e4 2599 if (err < 0)
da184284 2600 return err;
1da177e4 2601
4257c8ca
JA
2602 return 0;
2603}
2604
2605static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2606 struct user_msghdr __user *msg,
2607 struct sockaddr __user *uaddr,
2608 unsigned int flags, int nosec)
2609{
2610 struct compat_msghdr __user *msg_compat =
2611 (struct compat_msghdr __user *) msg;
2612 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2613 struct sockaddr_storage addr;
2614 unsigned long cmsg_ptr;
2615 int len;
2616 ssize_t err;
2617
2618 msg_sys->msg_name = &addr;
a2e27255
ACM
2619 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2620 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2621
f3d33426
HFS
2622 /* We assume all kernel code knows the size of sockaddr_storage */
2623 msg_sys->msg_namelen = 0;
2624
1da177e4
LT
2625 if (sock->file->f_flags & O_NONBLOCK)
2626 flags |= MSG_DONTWAIT;
1af66221
ED
2627
2628 if (unlikely(nosec))
2629 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2630 else
2631 err = sock_recvmsg(sock, msg_sys, flags);
2632
1da177e4 2633 if (err < 0)
4257c8ca 2634 goto out;
1da177e4
LT
2635 len = err;
2636
2637 if (uaddr != NULL) {
43db362d 2638 err = move_addr_to_user(&addr,
a2e27255 2639 msg_sys->msg_namelen, uaddr,
89bddce5 2640 uaddr_len);
1da177e4 2641 if (err < 0)
4257c8ca 2642 goto out;
1da177e4 2643 }
a2e27255 2644 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2645 COMPAT_FLAGS(msg));
1da177e4 2646 if (err)
4257c8ca 2647 goto out;
1da177e4 2648 if (MSG_CMSG_COMPAT & flags)
a2e27255 2649 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2650 &msg_compat->msg_controllen);
2651 else
a2e27255 2652 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2653 &msg->msg_controllen);
2654 if (err)
4257c8ca 2655 goto out;
1da177e4 2656 err = len;
4257c8ca
JA
2657out:
2658 return err;
2659}
2660
2661static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2662 struct msghdr *msg_sys, unsigned int flags, int nosec)
2663{
2664 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2665 /* user mode address pointers */
2666 struct sockaddr __user *uaddr;
2667 ssize_t err;
2668
2669 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2670 if (err < 0)
2671 return err;
1da177e4 2672
4257c8ca 2673 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2674 kfree(iov);
a2e27255
ACM
2675 return err;
2676}
2677
2678/*
2679 * BSD recvmsg interface
2680 */
2681
03b1230c
JA
2682long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2683 struct user_msghdr __user *umsg,
2684 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2685{
03b1230c 2686 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2687}
2688
e1834a32
DB
2689long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2690 bool forbid_cmsg_compat)
a2e27255
ACM
2691{
2692 int fput_needed, err;
2693 struct msghdr msg_sys;
1be374a0
AL
2694 struct socket *sock;
2695
e1834a32
DB
2696 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2697 return -EINVAL;
2698
1be374a0 2699 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2700 if (!sock)
2701 goto out;
2702
a7526eb5 2703 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2704
6cb153ca 2705 fput_light(sock->file, fput_needed);
1da177e4
LT
2706out:
2707 return err;
2708}
2709
666547ff 2710SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2711 unsigned int, flags)
2712{
e1834a32 2713 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2714}
2715
a2e27255
ACM
2716/*
2717 * Linux recvmmsg interface
2718 */
2719
e11d4284
AB
2720static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2721 unsigned int vlen, unsigned int flags,
2722 struct timespec64 *timeout)
a2e27255
ACM
2723{
2724 int fput_needed, err, datagrams;
2725 struct socket *sock;
2726 struct mmsghdr __user *entry;
d7256d0e 2727 struct compat_mmsghdr __user *compat_entry;
a2e27255 2728 struct msghdr msg_sys;
766b9f92
DD
2729 struct timespec64 end_time;
2730 struct timespec64 timeout64;
a2e27255
ACM
2731
2732 if (timeout &&
2733 poll_select_set_timeout(&end_time, timeout->tv_sec,
2734 timeout->tv_nsec))
2735 return -EINVAL;
2736
2737 datagrams = 0;
2738
2739 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2740 if (!sock)
2741 return err;
2742
7797dc41
SHY
2743 if (likely(!(flags & MSG_ERRQUEUE))) {
2744 err = sock_error(sock->sk);
2745 if (err) {
2746 datagrams = err;
2747 goto out_put;
2748 }
e623a9e9 2749 }
a2e27255
ACM
2750
2751 entry = mmsg;
d7256d0e 2752 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2753
2754 while (datagrams < vlen) {
2755 /*
2756 * No need to ask LSM for more than the first datagram.
2757 */
d7256d0e 2758 if (MSG_CMSG_COMPAT & flags) {
666547ff 2759 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2760 &msg_sys, flags & ~MSG_WAITFORONE,
2761 datagrams);
d7256d0e
JMG
2762 if (err < 0)
2763 break;
2764 err = __put_user(err, &compat_entry->msg_len);
2765 ++compat_entry;
2766 } else {
a7526eb5 2767 err = ___sys_recvmsg(sock,
666547ff 2768 (struct user_msghdr __user *)entry,
a7526eb5
AL
2769 &msg_sys, flags & ~MSG_WAITFORONE,
2770 datagrams);
d7256d0e
JMG
2771 if (err < 0)
2772 break;
2773 err = put_user(err, &entry->msg_len);
2774 ++entry;
2775 }
2776
a2e27255
ACM
2777 if (err)
2778 break;
a2e27255
ACM
2779 ++datagrams;
2780
71c5c159
BB
2781 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2782 if (flags & MSG_WAITFORONE)
2783 flags |= MSG_DONTWAIT;
2784
a2e27255 2785 if (timeout) {
766b9f92 2786 ktime_get_ts64(&timeout64);
c2e6c856 2787 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2788 if (timeout->tv_sec < 0) {
2789 timeout->tv_sec = timeout->tv_nsec = 0;
2790 break;
2791 }
2792
2793 /* Timeout, return less than vlen datagrams */
2794 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2795 break;
2796 }
2797
2798 /* Out of band data, return right away */
2799 if (msg_sys.msg_flags & MSG_OOB)
2800 break;
a78cb84c 2801 cond_resched();
a2e27255
ACM
2802 }
2803
a2e27255 2804 if (err == 0)
34b88a68
ACM
2805 goto out_put;
2806
2807 if (datagrams == 0) {
2808 datagrams = err;
2809 goto out_put;
2810 }
a2e27255 2811
34b88a68
ACM
2812 /*
2813 * We may return less entries than requested (vlen) if the
2814 * sock is non block and there aren't enough datagrams...
2815 */
2816 if (err != -EAGAIN) {
a2e27255 2817 /*
34b88a68
ACM
2818 * ... or if recvmsg returns an error after we
2819 * received some datagrams, where we record the
2820 * error to return on the next call or if the
2821 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2822 */
34b88a68 2823 sock->sk->sk_err = -err;
a2e27255 2824 }
34b88a68
ACM
2825out_put:
2826 fput_light(sock->file, fput_needed);
a2e27255 2827
34b88a68 2828 return datagrams;
a2e27255
ACM
2829}
2830
e11d4284
AB
2831int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2832 unsigned int vlen, unsigned int flags,
2833 struct __kernel_timespec __user *timeout,
2834 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2835{
2836 int datagrams;
c2e6c856 2837 struct timespec64 timeout_sys;
a2e27255 2838
e11d4284
AB
2839 if (timeout && get_timespec64(&timeout_sys, timeout))
2840 return -EFAULT;
a2e27255 2841
e11d4284 2842 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2843 return -EFAULT;
2844
e11d4284
AB
2845 if (!timeout && !timeout32)
2846 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2847
2848 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2849
e11d4284
AB
2850 if (datagrams <= 0)
2851 return datagrams;
2852
2853 if (timeout && put_timespec64(&timeout_sys, timeout))
2854 datagrams = -EFAULT;
2855
2856 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2857 datagrams = -EFAULT;
2858
2859 return datagrams;
2860}
2861
1255e269
DB
2862SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2863 unsigned int, vlen, unsigned int, flags,
c2e6c856 2864 struct __kernel_timespec __user *, timeout)
1255e269 2865{
e11d4284
AB
2866 if (flags & MSG_CMSG_COMPAT)
2867 return -EINVAL;
2868
2869 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2870}
2871
2872#ifdef CONFIG_COMPAT_32BIT_TIME
2873SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2874 unsigned int, vlen, unsigned int, flags,
2875 struct old_timespec32 __user *, timeout)
2876{
2877 if (flags & MSG_CMSG_COMPAT)
2878 return -EINVAL;
2879
2880 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2881}
e11d4284 2882#endif
1255e269 2883
a2e27255 2884#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2885/* Argument list sizes for sys_socketcall */
2886#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2887static const unsigned char nargs[21] = {
c6d409cf
ED
2888 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2889 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2890 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2891 AL(4), AL(5), AL(4)
89bddce5
SH
2892};
2893
1da177e4
LT
2894#undef AL
2895
2896/*
89bddce5 2897 * System call vectors.
1da177e4
LT
2898 *
2899 * Argument checking cleaned up. Saved 20% in size.
2900 * This function doesn't need to set the kernel lock because
89bddce5 2901 * it is set by the callees.
1da177e4
LT
2902 */
2903
3e0fa65f 2904SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2905{
2950fa9d 2906 unsigned long a[AUDITSC_ARGS];
89bddce5 2907 unsigned long a0, a1;
1da177e4 2908 int err;
47379052 2909 unsigned int len;
1da177e4 2910
228e548e 2911 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2912 return -EINVAL;
c8e8cd57 2913 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2914
47379052
AV
2915 len = nargs[call];
2916 if (len > sizeof(a))
2917 return -EINVAL;
2918
1da177e4 2919 /* copy_from_user should be SMP safe. */
47379052 2920 if (copy_from_user(a, args, len))
1da177e4 2921 return -EFAULT;
3ec3b2fb 2922
2950fa9d
CG
2923 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2924 if (err)
2925 return err;
3ec3b2fb 2926
89bddce5
SH
2927 a0 = a[0];
2928 a1 = a[1];
2929
2930 switch (call) {
2931 case SYS_SOCKET:
9d6a15c3 2932 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2933 break;
2934 case SYS_BIND:
a87d35d8 2935 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2936 break;
2937 case SYS_CONNECT:
1387c2c2 2938 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2939 break;
2940 case SYS_LISTEN:
25e290ee 2941 err = __sys_listen(a0, a1);
89bddce5
SH
2942 break;
2943 case SYS_ACCEPT:
4541e805
DB
2944 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2945 (int __user *)a[2], 0);
89bddce5
SH
2946 break;
2947 case SYS_GETSOCKNAME:
2948 err =
8882a107
DB
2949 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2950 (int __user *)a[2]);
89bddce5
SH
2951 break;
2952 case SYS_GETPEERNAME:
2953 err =
b21c8f83
DB
2954 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2955 (int __user *)a[2]);
89bddce5
SH
2956 break;
2957 case SYS_SOCKETPAIR:
6debc8d8 2958 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2959 break;
2960 case SYS_SEND:
f3bf896b
DB
2961 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2962 NULL, 0);
89bddce5
SH
2963 break;
2964 case SYS_SENDTO:
211b634b
DB
2965 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2966 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2967 break;
2968 case SYS_RECV:
d27e9afc
DB
2969 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2970 NULL, NULL);
89bddce5
SH
2971 break;
2972 case SYS_RECVFROM:
7a09e1eb
DB
2973 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2974 (struct sockaddr __user *)a[4],
2975 (int __user *)a[5]);
89bddce5
SH
2976 break;
2977 case SYS_SHUTDOWN:
005a1aea 2978 err = __sys_shutdown(a0, a1);
89bddce5
SH
2979 break;
2980 case SYS_SETSOCKOPT:
cc36dca0
DB
2981 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2982 a[4]);
89bddce5
SH
2983 break;
2984 case SYS_GETSOCKOPT:
2985 err =
13a2d70e
DB
2986 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2987 (int __user *)a[4]);
89bddce5
SH
2988 break;
2989 case SYS_SENDMSG:
e1834a32
DB
2990 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2991 a[2], true);
89bddce5 2992 break;
228e548e 2993 case SYS_SENDMMSG:
e1834a32
DB
2994 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2995 a[3], true);
228e548e 2996 break;
89bddce5 2997 case SYS_RECVMSG:
e1834a32
DB
2998 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2999 a[2], true);
89bddce5 3000 break;
a2e27255 3001 case SYS_RECVMMSG:
3ca47e95 3002 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
3003 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3004 a[2], a[3],
3005 (struct __kernel_timespec __user *)a[4],
3006 NULL);
3007 else
3008 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
3009 a[2], a[3], NULL,
3010 (struct old_timespec32 __user *)a[4]);
a2e27255 3011 break;
de11defe 3012 case SYS_ACCEPT4:
4541e805
DB
3013 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
3014 (int __user *)a[2], a[3]);
aaca0bdc 3015 break;
89bddce5
SH
3016 default:
3017 err = -EINVAL;
3018 break;
1da177e4
LT
3019 }
3020 return err;
3021}
3022
89bddce5 3023#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 3024
55737fda
SH
3025/**
3026 * sock_register - add a socket protocol handler
3027 * @ops: description of protocol
3028 *
1da177e4
LT
3029 * This function is called by a protocol handler that wants to
3030 * advertise its address family, and have it linked into the
e793c0f7 3031 * socket interface. The value ops->family corresponds to the
55737fda 3032 * socket system call protocol family.
1da177e4 3033 */
f0fd27d4 3034int sock_register(const struct net_proto_family *ops)
1da177e4
LT
3035{
3036 int err;
3037
3038 if (ops->family >= NPROTO) {
3410f22e 3039 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
3040 return -ENOBUFS;
3041 }
55737fda
SH
3042
3043 spin_lock(&net_family_lock);
190683a9
ED
3044 if (rcu_dereference_protected(net_families[ops->family],
3045 lockdep_is_held(&net_family_lock)))
55737fda
SH
3046 err = -EEXIST;
3047 else {
cf778b00 3048 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
3049 err = 0;
3050 }
55737fda
SH
3051 spin_unlock(&net_family_lock);
3052
fe0bdbde 3053 pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
1da177e4
LT
3054 return err;
3055}
c6d409cf 3056EXPORT_SYMBOL(sock_register);
1da177e4 3057
55737fda
SH
3058/**
3059 * sock_unregister - remove a protocol handler
3060 * @family: protocol family to remove
3061 *
1da177e4
LT
3062 * This function is called by a protocol handler that wants to
3063 * remove its address family, and have it unlinked from the
55737fda
SH
3064 * new socket creation.
3065 *
3066 * If protocol handler is a module, then it can use module reference
3067 * counts to protect against new references. If protocol handler is not
3068 * a module then it needs to provide its own protection in
3069 * the ops->create routine.
1da177e4 3070 */
f0fd27d4 3071void sock_unregister(int family)
1da177e4 3072{
f0fd27d4 3073 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3074
55737fda 3075 spin_lock(&net_family_lock);
a9b3cd7f 3076 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3077 spin_unlock(&net_family_lock);
3078
3079 synchronize_rcu();
3080
fe0bdbde 3081 pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
1da177e4 3082}
c6d409cf 3083EXPORT_SYMBOL(sock_unregister);
1da177e4 3084
bf2ae2e4
XL
3085bool sock_is_registered(int family)
3086{
66b51b0a 3087 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3088}
3089
77d76ea3 3090static int __init sock_init(void)
1da177e4 3091{
b3e19d92 3092 int err;
2ca794e5
EB
3093 /*
3094 * Initialize the network sysctl infrastructure.
3095 */
3096 err = net_sysctl_init();
3097 if (err)
3098 goto out;
b3e19d92 3099
1da177e4 3100 /*
89bddce5 3101 * Initialize skbuff SLAB cache
1da177e4
LT
3102 */
3103 skb_init();
1da177e4
LT
3104
3105 /*
89bddce5 3106 * Initialize the protocols module.
1da177e4
LT
3107 */
3108
3109 init_inodecache();
b3e19d92
NP
3110
3111 err = register_filesystem(&sock_fs_type);
3112 if (err)
47260ba9 3113 goto out;
1da177e4 3114 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3115 if (IS_ERR(sock_mnt)) {
3116 err = PTR_ERR(sock_mnt);
3117 goto out_mount;
3118 }
77d76ea3
AK
3119
3120 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3121 */
3122
3123#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3124 err = netfilter_init();
3125 if (err)
3126 goto out;
1da177e4 3127#endif
cbeb321a 3128
408eccce 3129 ptp_classifier_init();
c1f19b51 3130
b3e19d92
NP
3131out:
3132 return err;
3133
3134out_mount:
3135 unregister_filesystem(&sock_fs_type);
b3e19d92 3136 goto out;
1da177e4
LT
3137}
3138
77d76ea3
AK
3139core_initcall(sock_init); /* early initcall */
3140
1da177e4
LT
3141#ifdef CONFIG_PROC_FS
3142void socket_seq_show(struct seq_file *seq)
3143{
648845ab
TZ
3144 seq_printf(seq, "sockets: used %d\n",
3145 sock_inuse_get(seq->private));
1da177e4 3146}
89bddce5 3147#endif /* CONFIG_PROC_FS */
1da177e4 3148
29c49648
AB
3149/* Handle the fact that while struct ifreq has the same *layout* on
3150 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3151 * which are handled elsewhere, it still has different *size* due to
3152 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3153 * resulting in struct ifreq being 32 and 40 bytes respectively).
3154 * As a result, if the struct happens to be at the end of a page and
3155 * the next page isn't readable/writable, we get a fault. To prevent
3156 * that, copy back and forth to the full size.
3157 */
3158int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
7a229387 3159{
29c49648
AB
3160 if (in_compat_syscall()) {
3161 struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
7a229387 3162
29c49648
AB
3163 memset(ifr, 0, sizeof(*ifr));
3164 if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
3165 return -EFAULT;
7a229387 3166
29c49648
AB
3167 if (ifrdata)
3168 *ifrdata = compat_ptr(ifr32->ifr_data);
7a229387 3169
29c49648
AB
3170 return 0;
3171 }
7a229387 3172
29c49648 3173 if (copy_from_user(ifr, arg, sizeof(*ifr)))
7a229387
AB
3174 return -EFAULT;
3175
29c49648
AB
3176 if (ifrdata)
3177 *ifrdata = ifr->ifr_data;
3178
7a229387
AB
3179 return 0;
3180}
29c49648 3181EXPORT_SYMBOL(get_user_ifreq);
7a229387 3182
29c49648 3183int put_user_ifreq(struct ifreq *ifr, void __user *arg)
7a229387 3184{
29c49648 3185 size_t size = sizeof(*ifr);
7a229387 3186
29c49648
AB
3187 if (in_compat_syscall())
3188 size = sizeof(struct compat_ifreq);
7a229387 3189
29c49648 3190 if (copy_to_user(arg, ifr, size))
7a229387
AB
3191 return -EFAULT;
3192
3a7da39d 3193 return 0;
7a229387 3194}
29c49648 3195EXPORT_SYMBOL(put_user_ifreq);
7a229387 3196
89bbfc95 3197#ifdef CONFIG_COMPAT
7a50a240
AB
3198static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3199{
7a50a240 3200 compat_uptr_t uptr32;
44c02a2c
AV
3201 struct ifreq ifr;
3202 void __user *saved;
3203 int err;
7a50a240 3204
29c49648 3205 if (get_user_ifreq(&ifr, NULL, uifr32))
7a50a240
AB
3206 return -EFAULT;
3207
3208 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3209 return -EFAULT;
3210
44c02a2c
AV
3211 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3212 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3213
a554bf96 3214 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
44c02a2c
AV
3215 if (!err) {
3216 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
29c49648 3217 if (put_user_ifreq(&ifr, uifr32))
44c02a2c 3218 err = -EFAULT;
ccbd6a5a 3219 }
44c02a2c 3220 return err;
7a229387
AB
3221}
3222
590d4693
BH
3223/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3224static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3225 struct compat_ifreq __user *u_ifreq32)
7a229387 3226{
44c02a2c 3227 struct ifreq ifreq;
a554bf96 3228 void __user *data;
7a229387 3229
d0efb162
PC
3230 if (!is_socket_ioctl_cmd(cmd))
3231 return -ENOTTY;
a554bf96 3232 if (get_user_ifreq(&ifreq, &data, u_ifreq32))
7a229387 3233 return -EFAULT;
a554bf96 3234 ifreq.ifr_data = data;
7a229387 3235
a554bf96 3236 return dev_ioctl(net, cmd, &ifreq, data, NULL);
a2116ed2
AB
3237}
3238
7a229387
AB
3239/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3240 * for some operations; this forces use of the newer bridge-utils that
25985edc 3241 * use compatible ioctls
7a229387 3242 */
6b96018b 3243static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3244{
6b96018b 3245 compat_ulong_t tmp;
7a229387 3246
6b96018b 3247 if (get_user(tmp, argp))
7a229387
AB
3248 return -EFAULT;
3249 if (tmp == BRCTL_GET_VERSION)
3250 return BRCTL_VERSION + 1;
3251 return -EINVAL;
3252}
3253
6b96018b
AB
3254static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3255 unsigned int cmd, unsigned long arg)
3256{
3257 void __user *argp = compat_ptr(arg);
3258 struct sock *sk = sock->sk;
3259 struct net *net = sock_net(sk);
7a229387 3260
6b96018b 3261 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
88fc023f 3262 return sock_ioctl(file, cmd, (unsigned long)argp);
6b96018b
AB
3263
3264 switch (cmd) {
3265 case SIOCSIFBR:
3266 case SIOCGIFBR:
3267 return old_bridge_ioctl(argp);
7a50a240
AB
3268 case SIOCWANDEV:
3269 return compat_siocwandev(net, argp);
0768e170
AB
3270 case SIOCGSTAMP_OLD:
3271 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3272 if (!sock->ops->gettstamp)
3273 return -ENOIOCTLCMD;
0768e170 3274 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3275 !COMPAT_USE_64BIT_TIME);
3276
dd98d289 3277 case SIOCETHTOOL:
590d4693
BH
3278 case SIOCBONDSLAVEINFOQUERY:
3279 case SIOCBONDINFOQUERY:
a2116ed2 3280 case SIOCSHWTSTAMP:
fd468c74 3281 case SIOCGHWTSTAMP:
590d4693 3282 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3283
3284 case FIOSETOWN:
3285 case SIOCSPGRP:
3286 case FIOGETOWN:
3287 case SIOCGPGRP:
3288 case SIOCBRADDBR:
3289 case SIOCBRDELBR:
3290 case SIOCGIFVLAN:
3291 case SIOCSIFVLAN:
c62cce2c 3292 case SIOCGSKNS:
0768e170
AB
3293 case SIOCGSTAMP_NEW:
3294 case SIOCGSTAMPNS_NEW:
876f0bf9 3295 case SIOCGIFCONF:
6b96018b
AB
3296 return sock_ioctl(file, cmd, arg);
3297
3298 case SIOCGIFFLAGS:
3299 case SIOCSIFFLAGS:
709566d7
AB
3300 case SIOCGIFMAP:
3301 case SIOCSIFMAP:
6b96018b
AB
3302 case SIOCGIFMETRIC:
3303 case SIOCSIFMETRIC:
3304 case SIOCGIFMTU:
3305 case SIOCSIFMTU:
3306 case SIOCGIFMEM:
3307 case SIOCSIFMEM:
3308 case SIOCGIFHWADDR:
3309 case SIOCSIFHWADDR:
3310 case SIOCADDMULTI:
3311 case SIOCDELMULTI:
3312 case SIOCGIFINDEX:
6b96018b
AB
3313 case SIOCGIFADDR:
3314 case SIOCSIFADDR:
3315 case SIOCSIFHWBROADCAST:
6b96018b 3316 case SIOCDIFADDR:
6b96018b
AB
3317 case SIOCGIFBRDADDR:
3318 case SIOCSIFBRDADDR:
3319 case SIOCGIFDSTADDR:
3320 case SIOCSIFDSTADDR:
3321 case SIOCGIFNETMASK:
3322 case SIOCSIFNETMASK:
3323 case SIOCSIFPFLAGS:
3324 case SIOCGIFPFLAGS:
3325 case SIOCGIFTXQLEN:
3326 case SIOCSIFTXQLEN:
3327 case SIOCBRADDIF:
3328 case SIOCBRDELIF:
c6c9fee3 3329 case SIOCGIFNAME:
9177efd3
AB
3330 case SIOCSIFNAME:
3331 case SIOCGMIIPHY:
3332 case SIOCGMIIREG:
3333 case SIOCSMIIREG:
f92d4fc9
AV
3334 case SIOCBONDENSLAVE:
3335 case SIOCBONDRELEASE:
3336 case SIOCBONDSETHWADDR:
3337 case SIOCBONDCHANGEACTIVE:
6b96018b
AB
3338 case SIOCSARP:
3339 case SIOCGARP:
3340 case SIOCDARP:
c7dc504e 3341 case SIOCOUTQ:
9d7bf41f 3342 case SIOCOUTQNSD:
6b96018b 3343 case SIOCATMARK:
63ff03ab 3344 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3345 }
3346
6b96018b
AB
3347 return -ENOIOCTLCMD;
3348}
7a229387 3349
95c96174 3350static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3351 unsigned long arg)
89bbfc95
SP
3352{
3353 struct socket *sock = file->private_data;
3354 int ret = -ENOIOCTLCMD;
87de87d5
DM
3355 struct sock *sk;
3356 struct net *net;
3357
3358 sk = sock->sk;
3359 net = sock_net(sk);
89bbfc95
SP
3360
3361 if (sock->ops->compat_ioctl)
3362 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3363
87de87d5
DM
3364 if (ret == -ENOIOCTLCMD &&
3365 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3366 ret = compat_wext_handle_ioctl(net, cmd, arg);
3367
6b96018b
AB
3368 if (ret == -ENOIOCTLCMD)
3369 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3370
89bbfc95
SP
3371 return ret;
3372}
3373#endif
3374
8a3c245c
PT
3375/**
3376 * kernel_bind - bind an address to a socket (kernel space)
3377 * @sock: socket
3378 * @addr: address
3379 * @addrlen: length of address
3380 *
3381 * Returns 0 or an error.
3382 */
3383
ac5a488e
SS
3384int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3385{
3386 return sock->ops->bind(sock, addr, addrlen);
3387}
c6d409cf 3388EXPORT_SYMBOL(kernel_bind);
ac5a488e 3389
8a3c245c
PT
3390/**
3391 * kernel_listen - move socket to listening state (kernel space)
3392 * @sock: socket
3393 * @backlog: pending connections queue size
3394 *
3395 * Returns 0 or an error.
3396 */
3397
ac5a488e
SS
3398int kernel_listen(struct socket *sock, int backlog)
3399{
3400 return sock->ops->listen(sock, backlog);
3401}
c6d409cf 3402EXPORT_SYMBOL(kernel_listen);
ac5a488e 3403
8a3c245c
PT
3404/**
3405 * kernel_accept - accept a connection (kernel space)
3406 * @sock: listening socket
3407 * @newsock: new connected socket
3408 * @flags: flags
3409 *
3410 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3411 * If it fails, @newsock is guaranteed to be %NULL.
3412 * Returns 0 or an error.
3413 */
3414
ac5a488e
SS
3415int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3416{
3417 struct sock *sk = sock->sk;
3418 int err;
3419
3420 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3421 newsock);
3422 if (err < 0)
3423 goto done;
3424
cdfbabfb 3425 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3426 if (err < 0) {
3427 sock_release(*newsock);
fa8705b0 3428 *newsock = NULL;
ac5a488e
SS
3429 goto done;
3430 }
3431
3432 (*newsock)->ops = sock->ops;
1b08534e 3433 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3434
3435done:
3436 return err;
3437}
c6d409cf 3438EXPORT_SYMBOL(kernel_accept);
ac5a488e 3439
8a3c245c
PT
3440/**
3441 * kernel_connect - connect a socket (kernel space)
3442 * @sock: socket
3443 * @addr: address
3444 * @addrlen: address length
3445 * @flags: flags (O_NONBLOCK, ...)
3446 *
f1dcffcc 3447 * For datagram sockets, @addr is the address to which datagrams are sent
8a3c245c
PT
3448 * by default, and the only address from which datagrams are received.
3449 * For stream sockets, attempts to connect to @addr.
3450 * Returns 0 or an error code.
3451 */
3452
ac5a488e 3453int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3454 int flags)
ac5a488e
SS
3455{
3456 return sock->ops->connect(sock, addr, addrlen, flags);
3457}
c6d409cf 3458EXPORT_SYMBOL(kernel_connect);
ac5a488e 3459
8a3c245c
PT
3460/**
3461 * kernel_getsockname - get the address which the socket is bound (kernel space)
3462 * @sock: socket
3463 * @addr: address holder
3464 *
3465 * Fills the @addr pointer with the address which the socket is bound.
3466 * Returns 0 or an error code.
3467 */
3468
9b2c45d4 3469int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3470{
9b2c45d4 3471 return sock->ops->getname(sock, addr, 0);
ac5a488e 3472}
c6d409cf 3473EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3474
8a3c245c 3475/**
645f0897 3476 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3477 * @sock: socket
3478 * @addr: address holder
3479 *
3480 * Fills the @addr pointer with the address which the socket is connected.
3481 * Returns 0 or an error code.
3482 */
3483
9b2c45d4 3484int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3485{
9b2c45d4 3486 return sock->ops->getname(sock, addr, 1);
ac5a488e 3487}
c6d409cf 3488EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3489
8a3c245c
PT
3490/**
3491 * kernel_sendpage - send a &page through a socket (kernel space)
3492 * @sock: socket
3493 * @page: page
3494 * @offset: page offset
3495 * @size: total size in bytes
3496 * @flags: flags (MSG_DONTWAIT, ...)
3497 *
3498 * Returns the total amount sent in bytes or an error.
3499 */
3500
ac5a488e
SS
3501int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3502 size_t size, int flags)
3503{
7b62d31d
CL
3504 if (sock->ops->sendpage) {
3505 /* Warn in case the improper page to zero-copy send */
3506 WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
ac5a488e 3507 return sock->ops->sendpage(sock, page, offset, size, flags);
7b62d31d 3508 }
ac5a488e
SS
3509 return sock_no_sendpage(sock, page, offset, size, flags);
3510}
c6d409cf 3511EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3512
8a3c245c
PT
3513/**
3514 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3515 * @sk: sock
3516 * @page: page
3517 * @offset: page offset
3518 * @size: total size in bytes
3519 * @flags: flags (MSG_DONTWAIT, ...)
3520 *
3521 * Returns the total amount sent in bytes or an error.
3522 * Caller must hold @sk.
3523 */
3524
306b13eb
TH
3525int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3526 size_t size, int flags)
3527{
3528 struct socket *sock = sk->sk_socket;
3529
3530 if (sock->ops->sendpage_locked)
3531 return sock->ops->sendpage_locked(sk, page, offset, size,
3532 flags);
3533
3534 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3535}
3536EXPORT_SYMBOL(kernel_sendpage_locked);
3537
8a3c245c 3538/**
645f0897 3539 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3540 * @sock: socket
3541 * @how: connection part
3542 *
3543 * Returns 0 or an error.
3544 */
3545
91cf45f0
TM
3546int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3547{
3548 return sock->ops->shutdown(sock, how);
3549}
91cf45f0 3550EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3551
8a3c245c
PT
3552/**
3553 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3554 * @sk: socket
3555 *
3556 * This routine returns the IP overhead imposed by a socket i.e.
3557 * the length of the underlying IP header, depending on whether
3558 * this is an IPv4 or IPv6 socket and the length from IP options turned
3559 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3560 */
8a3c245c 3561
113c3075
P
3562u32 kernel_sock_ip_overhead(struct sock *sk)
3563{
3564 struct inet_sock *inet;
3565 struct ip_options_rcu *opt;
3566 u32 overhead = 0;
113c3075
P
3567#if IS_ENABLED(CONFIG_IPV6)
3568 struct ipv6_pinfo *np;
3569 struct ipv6_txoptions *optv6 = NULL;
3570#endif /* IS_ENABLED(CONFIG_IPV6) */
3571
3572 if (!sk)
3573 return overhead;
3574
113c3075
P
3575 switch (sk->sk_family) {
3576 case AF_INET:
3577 inet = inet_sk(sk);
3578 overhead += sizeof(struct iphdr);
3579 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3580 sock_owned_by_user(sk));
113c3075
P
3581 if (opt)
3582 overhead += opt->opt.optlen;
3583 return overhead;
3584#if IS_ENABLED(CONFIG_IPV6)
3585 case AF_INET6:
3586 np = inet6_sk(sk);
3587 overhead += sizeof(struct ipv6hdr);
3588 if (np)
3589 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3590 sock_owned_by_user(sk));
113c3075
P
3591 if (optv6)
3592 overhead += (optv6->opt_flen + optv6->opt_nflen);
3593 return overhead;
3594#endif /* IS_ENABLED(CONFIG_IPV6) */
3595 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3596 return overhead;
3597 }
3598}
3599EXPORT_SYMBOL(kernel_sock_ip_overhead);