]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/socket.c
net: socket: remove register_gifconf
[mirror_ubuntu-jammy-kernel.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
cc69837f 55#include <linux/ethtool.h>
1da177e4 56#include <linux/mm.h>
1da177e4
LT
57#include <linux/socket.h>
58#include <linux/file.h>
59#include <linux/net.h>
60#include <linux/interrupt.h>
aaca0bdc 61#include <linux/thread_info.h>
55737fda 62#include <linux/rcupdate.h>
1da177e4
LT
63#include <linux/netdevice.h>
64#include <linux/proc_fs.h>
65#include <linux/seq_file.h>
4a3e2f71 66#include <linux/mutex.h>
1da177e4 67#include <linux/if_bridge.h>
20380731 68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
c7dc504e 103#include <linux/termios.h>
6b96018b 104#include <linux/sockios.h>
076bb0c8 105#include <net/busy_poll.h>
f24b9be5 106#include <linux/errqueue.h>
d7c08826 107#include <linux/ptp_clock_kernel.h>
06021292 108
e0d1095a 109#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
110unsigned int sysctl_net_busy_read __read_mostly;
111unsigned int sysctl_net_busy_poll __read_mostly;
06021292 112#endif
6b96018b 113
8ae5e030
AV
114static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
115static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 116static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
117
118static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
119static __poll_t sock_poll(struct file *file,
120 struct poll_table_struct *wait);
89bddce5 121static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
122#ifdef CONFIG_COMPAT
123static long compat_sock_ioctl(struct file *file,
89bddce5 124 unsigned int cmd, unsigned long arg);
89bbfc95 125#endif
1da177e4 126static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
127static ssize_t sock_sendpage(struct file *file, struct page *page,
128 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 129static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 130 struct pipe_inode_info *pipe, size_t len,
9c55e01c 131 unsigned int flags);
542d3065
AB
132
133#ifdef CONFIG_PROC_FS
134static void sock_show_fdinfo(struct seq_file *m, struct file *f)
135{
136 struct socket *sock = f->private_data;
137
138 if (sock->ops->show_fdinfo)
139 sock->ops->show_fdinfo(m, sock);
140}
141#else
142#define sock_show_fdinfo NULL
143#endif
1da177e4 144
1da177e4
LT
145/*
146 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
147 * in the operation structures but are done directly via the socketcall() multiplexor.
148 */
149
da7071d7 150static const struct file_operations socket_file_ops = {
1da177e4
LT
151 .owner = THIS_MODULE,
152 .llseek = no_llseek,
8ae5e030
AV
153 .read_iter = sock_read_iter,
154 .write_iter = sock_write_iter,
1da177e4
LT
155 .poll = sock_poll,
156 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
157#ifdef CONFIG_COMPAT
158 .compat_ioctl = compat_sock_ioctl,
159#endif
1da177e4 160 .mmap = sock_mmap,
1da177e4
LT
161 .release = sock_close,
162 .fasync = sock_fasync,
5274f052
JA
163 .sendpage = sock_sendpage,
164 .splice_write = generic_splice_sendpage,
9c55e01c 165 .splice_read = sock_splice_read,
b4653342 166 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
167};
168
fe0bdbde
YD
169static const char * const pf_family_names[] = {
170 [PF_UNSPEC] = "PF_UNSPEC",
171 [PF_UNIX] = "PF_UNIX/PF_LOCAL",
172 [PF_INET] = "PF_INET",
173 [PF_AX25] = "PF_AX25",
174 [PF_IPX] = "PF_IPX",
175 [PF_APPLETALK] = "PF_APPLETALK",
176 [PF_NETROM] = "PF_NETROM",
177 [PF_BRIDGE] = "PF_BRIDGE",
178 [PF_ATMPVC] = "PF_ATMPVC",
179 [PF_X25] = "PF_X25",
180 [PF_INET6] = "PF_INET6",
181 [PF_ROSE] = "PF_ROSE",
182 [PF_DECnet] = "PF_DECnet",
183 [PF_NETBEUI] = "PF_NETBEUI",
184 [PF_SECURITY] = "PF_SECURITY",
185 [PF_KEY] = "PF_KEY",
186 [PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
187 [PF_PACKET] = "PF_PACKET",
188 [PF_ASH] = "PF_ASH",
189 [PF_ECONET] = "PF_ECONET",
190 [PF_ATMSVC] = "PF_ATMSVC",
191 [PF_RDS] = "PF_RDS",
192 [PF_SNA] = "PF_SNA",
193 [PF_IRDA] = "PF_IRDA",
194 [PF_PPPOX] = "PF_PPPOX",
195 [PF_WANPIPE] = "PF_WANPIPE",
196 [PF_LLC] = "PF_LLC",
197 [PF_IB] = "PF_IB",
198 [PF_MPLS] = "PF_MPLS",
199 [PF_CAN] = "PF_CAN",
200 [PF_TIPC] = "PF_TIPC",
201 [PF_BLUETOOTH] = "PF_BLUETOOTH",
202 [PF_IUCV] = "PF_IUCV",
203 [PF_RXRPC] = "PF_RXRPC",
204 [PF_ISDN] = "PF_ISDN",
205 [PF_PHONET] = "PF_PHONET",
206 [PF_IEEE802154] = "PF_IEEE802154",
207 [PF_CAIF] = "PF_CAIF",
208 [PF_ALG] = "PF_ALG",
209 [PF_NFC] = "PF_NFC",
210 [PF_VSOCK] = "PF_VSOCK",
211 [PF_KCM] = "PF_KCM",
212 [PF_QIPCRTR] = "PF_QIPCRTR",
213 [PF_SMC] = "PF_SMC",
214 [PF_XDP] = "PF_XDP",
215};
216
1da177e4
LT
217/*
218 * The protocol list. Each protocol is registered in here.
219 */
220
1da177e4 221static DEFINE_SPINLOCK(net_family_lock);
190683a9 222static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 223
1da177e4 224/*
89bddce5
SH
225 * Support routines.
226 * Move socket addresses back and forth across the kernel/user
227 * divide and look after the messy bits.
1da177e4
LT
228 */
229
1da177e4
LT
230/**
231 * move_addr_to_kernel - copy a socket address into kernel space
232 * @uaddr: Address in user space
233 * @kaddr: Address in kernel space
234 * @ulen: Length in user space
235 *
236 * The address is copied into kernel space. If the provided address is
237 * too long an error code of -EINVAL is returned. If the copy gives
238 * invalid addresses -EFAULT is returned. On a success 0 is returned.
239 */
240
43db362d 241int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 242{
230b1839 243 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 244 return -EINVAL;
89bddce5 245 if (ulen == 0)
1da177e4 246 return 0;
89bddce5 247 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 248 return -EFAULT;
3ec3b2fb 249 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
250}
251
252/**
253 * move_addr_to_user - copy an address to user space
254 * @kaddr: kernel space address
255 * @klen: length of address in kernel
256 * @uaddr: user space address
257 * @ulen: pointer to user length field
258 *
259 * The value pointed to by ulen on entry is the buffer length available.
260 * This is overwritten with the buffer space used. -EINVAL is returned
261 * if an overlong buffer is specified or a negative buffer size. -EFAULT
262 * is returned if either the buffer or the length field are not
263 * accessible.
264 * After copying the data up to the limit the user specifies, the true
265 * length of the data is written over the length limit the user
266 * specified. Zero is returned for a success.
267 */
89bddce5 268
43db362d 269static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 270 void __user *uaddr, int __user *ulen)
1da177e4
LT
271{
272 int err;
273 int len;
274
68c6beb3 275 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
276 err = get_user(len, ulen);
277 if (err)
1da177e4 278 return err;
89bddce5
SH
279 if (len > klen)
280 len = klen;
68c6beb3 281 if (len < 0)
1da177e4 282 return -EINVAL;
89bddce5 283 if (len) {
d6fe3945
SG
284 if (audit_sockaddr(klen, kaddr))
285 return -ENOMEM;
89bddce5 286 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
287 return -EFAULT;
288 }
289 /*
89bddce5
SH
290 * "fromlen shall refer to the value before truncation.."
291 * 1003.1g
1da177e4
LT
292 */
293 return __put_user(klen, ulen);
294}
295
08009a76 296static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
297
298static struct inode *sock_alloc_inode(struct super_block *sb)
299{
300 struct socket_alloc *ei;
89bddce5 301
e94b1766 302 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
303 if (!ei)
304 return NULL;
333f7909
AV
305 init_waitqueue_head(&ei->socket.wq.wait);
306 ei->socket.wq.fasync_list = NULL;
307 ei->socket.wq.flags = 0;
89bddce5 308
1da177e4
LT
309 ei->socket.state = SS_UNCONNECTED;
310 ei->socket.flags = 0;
311 ei->socket.ops = NULL;
312 ei->socket.sk = NULL;
313 ei->socket.file = NULL;
1da177e4
LT
314
315 return &ei->vfs_inode;
316}
317
6d7855c5 318static void sock_free_inode(struct inode *inode)
1da177e4 319{
43815482
ED
320 struct socket_alloc *ei;
321
322 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 323 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
324}
325
51cc5068 326static void init_once(void *foo)
1da177e4 327{
89bddce5 328 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 329
a35afb83 330 inode_init_once(&ei->vfs_inode);
1da177e4 331}
89bddce5 332
1e911632 333static void init_inodecache(void)
1da177e4
LT
334{
335 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
336 sizeof(struct socket_alloc),
337 0,
338 (SLAB_HWCACHE_ALIGN |
339 SLAB_RECLAIM_ACCOUNT |
5d097056 340 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 341 init_once);
1e911632 342 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
343}
344
b87221de 345static const struct super_operations sockfs_ops = {
c6d409cf 346 .alloc_inode = sock_alloc_inode,
6d7855c5 347 .free_inode = sock_free_inode,
c6d409cf 348 .statfs = simple_statfs,
1da177e4
LT
349};
350
c23fbb6b
ED
351/*
352 * sockfs_dname() is called from d_path().
353 */
354static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
355{
356 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 357 d_inode(dentry)->i_ino);
c23fbb6b
ED
358}
359
3ba13d17 360static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 361 .d_dname = sockfs_dname,
1da177e4
LT
362};
363
bba0bd31
AG
364static int sockfs_xattr_get(const struct xattr_handler *handler,
365 struct dentry *dentry, struct inode *inode,
366 const char *suffix, void *value, size_t size)
367{
368 if (value) {
369 if (dentry->d_name.len + 1 > size)
370 return -ERANGE;
371 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
372 }
373 return dentry->d_name.len + 1;
374}
375
376#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
377#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
378#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
379
380static const struct xattr_handler sockfs_xattr_handler = {
381 .name = XATTR_NAME_SOCKPROTONAME,
382 .get = sockfs_xattr_get,
383};
384
4a590153 385static int sockfs_security_xattr_set(const struct xattr_handler *handler,
e65ce2a5 386 struct user_namespace *mnt_userns,
4a590153
AG
387 struct dentry *dentry, struct inode *inode,
388 const char *suffix, const void *value,
389 size_t size, int flags)
390{
391 /* Handled by LSM. */
392 return -EAGAIN;
393}
394
395static const struct xattr_handler sockfs_security_xattr_handler = {
396 .prefix = XATTR_SECURITY_PREFIX,
397 .set = sockfs_security_xattr_set,
398};
399
bba0bd31
AG
400static const struct xattr_handler *sockfs_xattr_handlers[] = {
401 &sockfs_xattr_handler,
4a590153 402 &sockfs_security_xattr_handler,
bba0bd31
AG
403 NULL
404};
405
fba9be49 406static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 407{
fba9be49
DH
408 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
409 if (!ctx)
410 return -ENOMEM;
411 ctx->ops = &sockfs_ops;
412 ctx->dops = &sockfs_dentry_operations;
413 ctx->xattr = sockfs_xattr_handlers;
414 return 0;
c74a1cbb
AV
415}
416
417static struct vfsmount *sock_mnt __read_mostly;
418
419static struct file_system_type sock_fs_type = {
420 .name = "sockfs",
fba9be49 421 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
422 .kill_sb = kill_anon_super,
423};
424
1da177e4
LT
425/*
426 * Obtains the first available file descriptor and sets it up for use.
427 *
39d8c1b6
DM
428 * These functions create file structures and maps them to fd space
429 * of the current process. On success it returns file descriptor
1da177e4
LT
430 * and file struct implicitly stored in sock->file.
431 * Note that another thread may close file descriptor before we return
432 * from this function. We use the fact that now we do not refer
433 * to socket after mapping. If one day we will need it, this
434 * function will increment ref. count on file by 1.
435 *
436 * In any case returned fd MAY BE not valid!
437 * This race condition is unavoidable
438 * with shared fd spaces, we cannot solve it inside kernel,
439 * but we take care of internal coherence yet.
440 */
441
8a3c245c
PT
442/**
443 * sock_alloc_file - Bind a &socket to a &file
444 * @sock: socket
445 * @flags: file status flags
446 * @dname: protocol name
447 *
448 * Returns the &file bound with @sock, implicitly storing it
449 * in sock->file. If dname is %NULL, sets to "".
450 * On failure the return is a ERR pointer (see linux/err.h).
451 * This function uses GFP_KERNEL internally.
452 */
453
aab174f0 454struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 455{
7cbe66b6 456 struct file *file;
1da177e4 457
d93aa9d8
AV
458 if (!dname)
459 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 460
d93aa9d8
AV
461 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
462 O_RDWR | (flags & O_NONBLOCK),
463 &socket_file_ops);
b5ffe634 464 if (IS_ERR(file)) {
8e1611e2 465 sock_release(sock);
39b65252 466 return file;
cc3808f8
AV
467 }
468
469 sock->file = file;
39d8c1b6 470 file->private_data = sock;
d8e464ec 471 stream_open(SOCK_INODE(sock), file);
28407630 472 return file;
39d8c1b6 473}
56b31d1c 474EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 475
56b31d1c 476static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
477{
478 struct file *newfile;
28407630 479 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
480 if (unlikely(fd < 0)) {
481 sock_release(sock);
28407630 482 return fd;
ce4bb04c 483 }
39d8c1b6 484
aab174f0 485 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 486 if (!IS_ERR(newfile)) {
39d8c1b6 487 fd_install(fd, newfile);
28407630
AV
488 return fd;
489 }
7cbe66b6 490
28407630
AV
491 put_unused_fd(fd);
492 return PTR_ERR(newfile);
1da177e4
LT
493}
494
8a3c245c
PT
495/**
496 * sock_from_file - Return the &socket bounded to @file.
497 * @file: file
8a3c245c 498 *
dba4a925 499 * On failure returns %NULL.
8a3c245c
PT
500 */
501
dba4a925 502struct socket *sock_from_file(struct file *file)
6cb153ca 503{
6cb153ca
BL
504 if (file->f_op == &socket_file_ops)
505 return file->private_data; /* set in sock_map_fd */
506
23bb80d2 507 return NULL;
6cb153ca 508}
406a3c63 509EXPORT_SYMBOL(sock_from_file);
6cb153ca 510
1da177e4 511/**
c6d409cf 512 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
513 * @fd: file handle
514 * @err: pointer to an error code return
515 *
516 * The file handle passed in is locked and the socket it is bound
241c4667 517 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
518 * with a negative errno code and NULL is returned. The function checks
519 * for both invalid handles and passing a handle which is not a socket.
520 *
521 * On a success the socket object pointer is returned.
522 */
523
524struct socket *sockfd_lookup(int fd, int *err)
525{
526 struct file *file;
1da177e4
LT
527 struct socket *sock;
528
89bddce5
SH
529 file = fget(fd);
530 if (!file) {
1da177e4
LT
531 *err = -EBADF;
532 return NULL;
533 }
89bddce5 534
dba4a925
FR
535 sock = sock_from_file(file);
536 if (!sock) {
537 *err = -ENOTSOCK;
1da177e4 538 fput(file);
dba4a925 539 }
6cb153ca
BL
540 return sock;
541}
c6d409cf 542EXPORT_SYMBOL(sockfd_lookup);
1da177e4 543
6cb153ca
BL
544static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
545{
00e188ef 546 struct fd f = fdget(fd);
6cb153ca
BL
547 struct socket *sock;
548
3672558c 549 *err = -EBADF;
00e188ef 550 if (f.file) {
dba4a925 551 sock = sock_from_file(f.file);
00e188ef 552 if (likely(sock)) {
ce787a5a 553 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 554 return sock;
00e188ef 555 }
dba4a925 556 *err = -ENOTSOCK;
00e188ef 557 fdput(f);
1da177e4 558 }
6cb153ca 559 return NULL;
1da177e4
LT
560}
561
600e1779
MY
562static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
563 size_t size)
564{
565 ssize_t len;
566 ssize_t used = 0;
567
c5ef6035 568 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
569 if (len < 0)
570 return len;
571 used += len;
572 if (buffer) {
573 if (size < used)
574 return -ERANGE;
575 buffer += len;
576 }
577
578 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
579 used += len;
580 if (buffer) {
581 if (size < used)
582 return -ERANGE;
583 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
584 buffer += len;
585 }
586
587 return used;
588}
589
549c7297
CB
590static int sockfs_setattr(struct user_namespace *mnt_userns,
591 struct dentry *dentry, struct iattr *iattr)
86741ec2 592{
549c7297 593 int err = simple_setattr(&init_user_ns, dentry, iattr);
86741ec2 594
e1a3a60a 595 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
596 struct socket *sock = SOCKET_I(d_inode(dentry));
597
6d8c50dc
CW
598 if (sock->sk)
599 sock->sk->sk_uid = iattr->ia_uid;
600 else
601 err = -ENOENT;
86741ec2
LC
602 }
603
604 return err;
605}
606
600e1779 607static const struct inode_operations sockfs_inode_ops = {
600e1779 608 .listxattr = sockfs_listxattr,
86741ec2 609 .setattr = sockfs_setattr,
600e1779
MY
610};
611
1da177e4 612/**
8a3c245c 613 * sock_alloc - allocate a socket
89bddce5 614 *
1da177e4
LT
615 * Allocate a new inode and socket object. The two are bound together
616 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 617 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
618 */
619
f4a00aac 620struct socket *sock_alloc(void)
1da177e4 621{
89bddce5
SH
622 struct inode *inode;
623 struct socket *sock;
1da177e4 624
a209dfc7 625 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
626 if (!inode)
627 return NULL;
628
629 sock = SOCKET_I(inode);
630
85fe4025 631 inode->i_ino = get_next_ino();
89bddce5 632 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
633 inode->i_uid = current_fsuid();
634 inode->i_gid = current_fsgid();
600e1779 635 inode->i_op = &sockfs_inode_ops;
1da177e4 636
1da177e4
LT
637 return sock;
638}
f4a00aac 639EXPORT_SYMBOL(sock_alloc);
1da177e4 640
6d8c50dc 641static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
642{
643 if (sock->ops) {
644 struct module *owner = sock->ops->owner;
645
6d8c50dc
CW
646 if (inode)
647 inode_lock(inode);
1da177e4 648 sock->ops->release(sock);
ff7b11aa 649 sock->sk = NULL;
6d8c50dc
CW
650 if (inode)
651 inode_unlock(inode);
1da177e4
LT
652 sock->ops = NULL;
653 module_put(owner);
654 }
655
333f7909 656 if (sock->wq.fasync_list)
3410f22e 657 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 658
1da177e4
LT
659 if (!sock->file) {
660 iput(SOCK_INODE(sock));
661 return;
662 }
89bddce5 663 sock->file = NULL;
1da177e4 664}
6d8c50dc 665
9a8ad9ac
AL
666/**
667 * sock_release - close a socket
668 * @sock: socket to close
669 *
670 * The socket is released from the protocol stack if it has a release
671 * callback, and the inode is then released if the socket is bound to
672 * an inode not a file.
673 */
6d8c50dc
CW
674void sock_release(struct socket *sock)
675{
676 __sock_release(sock, NULL);
677}
c6d409cf 678EXPORT_SYMBOL(sock_release);
1da177e4 679
c14ac945 680void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 681{
140c55d4
ED
682 u8 flags = *tx_flags;
683
c14ac945 684 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
685 flags |= SKBTX_HW_TSTAMP;
686
c14ac945 687 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
688 flags |= SKBTX_SW_TSTAMP;
689
c14ac945 690 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
691 flags |= SKBTX_SCHED_TSTAMP;
692
140c55d4 693 *tx_flags = flags;
20d49473 694}
67cc0d40 695EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 696
8c3c447b
PA
697INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
698 size_t));
a648a592
PA
699INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
700 size_t));
d8725c86 701static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 702{
a648a592
PA
703 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
704 inet_sendmsg, sock, msg,
705 msg_data_left(msg));
d8725c86
AV
706 BUG_ON(ret == -EIOCBQUEUED);
707 return ret;
1da177e4
LT
708}
709
85806af0
RD
710/**
711 * sock_sendmsg - send a message through @sock
712 * @sock: socket
713 * @msg: message to send
714 *
715 * Sends @msg through @sock, passing through LSM.
716 * Returns the number of bytes sent, or an error code.
717 */
d8725c86 718int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 719{
d8725c86 720 int err = security_socket_sendmsg(sock, msg,
01e97e65 721 msg_data_left(msg));
228e548e 722
d8725c86 723 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 724}
c6d409cf 725EXPORT_SYMBOL(sock_sendmsg);
1da177e4 726
8a3c245c
PT
727/**
728 * kernel_sendmsg - send a message through @sock (kernel-space)
729 * @sock: socket
730 * @msg: message header
731 * @vec: kernel vec
732 * @num: vec array length
733 * @size: total message data size
734 *
735 * Builds the message data with @vec and sends it through @sock.
736 * Returns the number of bytes sent, or an error code.
737 */
738
1da177e4
LT
739int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
740 struct kvec *vec, size_t num, size_t size)
741{
aa563d7b 742 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 743 return sock_sendmsg(sock, msg);
1da177e4 744}
c6d409cf 745EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 746
8a3c245c
PT
747/**
748 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
749 * @sk: sock
750 * @msg: message header
751 * @vec: output s/g array
752 * @num: output s/g array length
753 * @size: total message data size
754 *
755 * Builds the message data with @vec and sends it through @sock.
756 * Returns the number of bytes sent, or an error code.
757 * Caller must hold @sk.
758 */
759
306b13eb
TH
760int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
761 struct kvec *vec, size_t num, size_t size)
762{
763 struct socket *sock = sk->sk_socket;
764
765 if (!sock->ops->sendmsg_locked)
db5980d8 766 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 767
aa563d7b 768 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
769
770 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
771}
772EXPORT_SYMBOL(kernel_sendmsg_locked);
773
8605330a
SHY
774static bool skb_is_err_queue(const struct sk_buff *skb)
775{
776 /* pkt_type of skbs enqueued on the error queue are set to
777 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
778 * in recvmsg, since skbs received on a local socket will never
779 * have a pkt_type of PACKET_OUTGOING.
780 */
781 return skb->pkt_type == PACKET_OUTGOING;
782}
783
b50a5c70
ML
784/* On transmit, software and hardware timestamps are returned independently.
785 * As the two skb clones share the hardware timestamp, which may be updated
786 * before the software timestamp is received, a hardware TX timestamp may be
787 * returned only if there is no software TX timestamp. Ignore false software
788 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 789 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
790 * hardware timestamp.
791 */
792static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
793{
794 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
795}
796
aad9c8c4
ML
797static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
798{
799 struct scm_ts_pktinfo ts_pktinfo;
800 struct net_device *orig_dev;
801
802 if (!skb_mac_header_was_set(skb))
803 return;
804
805 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
806
807 rcu_read_lock();
808 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
809 if (orig_dev)
810 ts_pktinfo.if_index = orig_dev->ifindex;
811 rcu_read_unlock();
812
813 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
814 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
815 sizeof(ts_pktinfo), &ts_pktinfo);
816}
817
92f37fd2
ED
818/*
819 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
820 */
821void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
822 struct sk_buff *skb)
823{
20d49473 824 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 825 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
826 struct scm_timestamping_internal tss;
827
b50a5c70 828 int empty = 1, false_tstamp = 0;
20d49473
PO
829 struct skb_shared_hwtstamps *shhwtstamps =
830 skb_hwtstamps(skb);
831
832 /* Race occurred between timestamp enabling and packet
833 receiving. Fill in the current time for now. */
b50a5c70 834 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 835 __net_timestamp(skb);
b50a5c70
ML
836 false_tstamp = 1;
837 }
20d49473
PO
838
839 if (need_software_tstamp) {
840 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
841 if (new_tstamp) {
842 struct __kernel_sock_timeval tv;
843
844 skb_get_new_timestamp(skb, &tv);
845 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
846 sizeof(tv), &tv);
847 } else {
848 struct __kernel_old_timeval tv;
849
850 skb_get_timestamp(skb, &tv);
851 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
852 sizeof(tv), &tv);
853 }
20d49473 854 } else {
887feae3
DD
855 if (new_tstamp) {
856 struct __kernel_timespec ts;
857
858 skb_get_new_timestampns(skb, &ts);
859 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
860 sizeof(ts), &ts);
861 } else {
df1b4ba9 862 struct __kernel_old_timespec ts;
887feae3
DD
863
864 skb_get_timestampns(skb, &ts);
865 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
866 sizeof(ts), &ts);
867 }
20d49473
PO
868 }
869 }
870
f24b9be5 871 memset(&tss, 0, sizeof(tss));
c199105d 872 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 873 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 874 empty = 0;
4d276eb6 875 if (shhwtstamps &&
b9f40e21 876 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
d7c08826
YL
877 !skb_is_swtx_tstamp(skb, false_tstamp)) {
878 if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
879 ptp_convert_timestamp(shhwtstamps, sk->sk_bind_phc);
880
881 if (ktime_to_timespec64_cond(shhwtstamps->hwtstamp,
882 tss.ts + 2)) {
883 empty = 0;
884
885 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
886 !skb_is_err_queue(skb))
887 put_ts_pktinfo(msg, skb);
888 }
aad9c8c4 889 }
1c885808 890 if (!empty) {
9718475e
DD
891 if (sock_flag(sk, SOCK_TSTAMP_NEW))
892 put_cmsg_scm_timestamping64(msg, &tss);
893 else
894 put_cmsg_scm_timestamping(msg, &tss);
1c885808 895
8605330a 896 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 897 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
898 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
899 skb->len, skb->data);
900 }
92f37fd2 901}
7c81fd8b
ACM
902EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
903
6e3e939f
JB
904void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
905 struct sk_buff *skb)
906{
907 int ack;
908
909 if (!sock_flag(sk, SOCK_WIFI_STATUS))
910 return;
911 if (!skb->wifi_acked_valid)
912 return;
913
914 ack = skb->wifi_acked;
915
916 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
917}
918EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
919
11165f14 920static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
921 struct sk_buff *skb)
3b885787 922{
744d5a3e 923 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 924 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 925 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
926}
927
767dd033 928void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
929 struct sk_buff *skb)
930{
931 sock_recv_timestamp(msg, sk, skb);
932 sock_recv_drops(msg, sk, skb);
933}
767dd033 934EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 935
8c3c447b 936INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
937 size_t, int));
938INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
939 size_t, int));
1b784140 940static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 941 int flags)
1da177e4 942{
a648a592
PA
943 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
944 inet_recvmsg, sock, msg, msg_data_left(msg),
945 flags);
1da177e4
LT
946}
947
85806af0
RD
948/**
949 * sock_recvmsg - receive a message from @sock
950 * @sock: socket
951 * @msg: message to receive
952 * @flags: message flags
953 *
954 * Receives @msg from @sock, passing through LSM. Returns the total number
955 * of bytes received, or an error.
956 */
2da62906 957int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 958{
2da62906 959 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 960
2da62906 961 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 962}
c6d409cf 963EXPORT_SYMBOL(sock_recvmsg);
1da177e4 964
c1249c0a 965/**
8a3c245c
PT
966 * kernel_recvmsg - Receive a message from a socket (kernel space)
967 * @sock: The socket to receive the message from
968 * @msg: Received message
969 * @vec: Input s/g array for message data
970 * @num: Size of input s/g array
971 * @size: Number of bytes to read
972 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 973 *
8a3c245c
PT
974 * On return the msg structure contains the scatter/gather array passed in the
975 * vec argument. The array is modified so that it consists of the unfilled
976 * portion of the original array.
c1249c0a 977 *
8a3c245c 978 * The returned value is the total number of bytes received, or an error.
c1249c0a 979 */
8a3c245c 980
89bddce5
SH
981int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
982 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 983{
1f466e1f 984 msg->msg_control_is_user = false;
aa563d7b 985 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1f466e1f 986 return sock_recvmsg(sock, msg, flags);
1da177e4 987}
c6d409cf 988EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 989
ce1d4d3e
CH
990static ssize_t sock_sendpage(struct file *file, struct page *page,
991 int offset, size_t size, loff_t *ppos, int more)
1da177e4 992{
1da177e4
LT
993 struct socket *sock;
994 int flags;
995
ce1d4d3e
CH
996 sock = file->private_data;
997
35f9c09f
ED
998 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
999 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
1000 flags |= more;
ce1d4d3e 1001
e6949583 1002 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 1003}
1da177e4 1004
9c55e01c 1005static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 1006 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
1007 unsigned int flags)
1008{
1009 struct socket *sock = file->private_data;
1010
997b37da 1011 if (unlikely(!sock->ops->splice_read))
95506588 1012 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 1013
9c55e01c
JA
1014 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
1015}
1016
8ae5e030 1017static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 1018{
6d652330
AV
1019 struct file *file = iocb->ki_filp;
1020 struct socket *sock = file->private_data;
0345f931 1021 struct msghdr msg = {.msg_iter = *to,
1022 .msg_iocb = iocb};
8ae5e030 1023 ssize_t res;
ce1d4d3e 1024
ebfcd895 1025 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1026 msg.msg_flags = MSG_DONTWAIT;
1027
1028 if (iocb->ki_pos != 0)
1da177e4 1029 return -ESPIPE;
027445c3 1030
66ee59af 1031 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
1032 return 0;
1033
2da62906 1034 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
1035 *to = msg.msg_iter;
1036 return res;
1da177e4
LT
1037}
1038
8ae5e030 1039static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 1040{
6d652330
AV
1041 struct file *file = iocb->ki_filp;
1042 struct socket *sock = file->private_data;
0345f931 1043 struct msghdr msg = {.msg_iter = *from,
1044 .msg_iocb = iocb};
8ae5e030 1045 ssize_t res;
1da177e4 1046
8ae5e030 1047 if (iocb->ki_pos != 0)
ce1d4d3e 1048 return -ESPIPE;
027445c3 1049
ebfcd895 1050 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1051 msg.msg_flags = MSG_DONTWAIT;
1052
6d652330
AV
1053 if (sock->type == SOCK_SEQPACKET)
1054 msg.msg_flags |= MSG_EOR;
1055
d8725c86 1056 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1057 *from = msg.msg_iter;
1058 return res;
1da177e4
LT
1059}
1060
1da177e4
LT
1061/*
1062 * Atomic setting of ioctl hooks to avoid race
1063 * with module unload.
1064 */
1065
4a3e2f71 1066static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1067static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1068
881d966b 1069void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1070{
4a3e2f71 1071 mutex_lock(&br_ioctl_mutex);
1da177e4 1072 br_ioctl_hook = hook;
4a3e2f71 1073 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1074}
1075EXPORT_SYMBOL(brioctl_set);
1076
4a3e2f71 1077static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1078static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1079
881d966b 1080void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1081{
4a3e2f71 1082 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1083 vlan_ioctl_hook = hook;
4a3e2f71 1084 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1085}
1086EXPORT_SYMBOL(vlan_ioctl_set);
1087
6b96018b 1088static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1089 unsigned int cmd, unsigned long arg)
6b96018b
AB
1090{
1091 int err;
1092 void __user *argp = (void __user *)arg;
1093
1094 err = sock->ops->ioctl(sock, cmd, arg);
1095
1096 /*
1097 * If this ioctl is unknown try to hand it down
1098 * to the NIC driver.
1099 */
36fd633e
AV
1100 if (err != -ENOIOCTLCMD)
1101 return err;
6b96018b 1102
36fd633e
AV
1103 if (cmd == SIOCGIFCONF) {
1104 struct ifconf ifc;
1105 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1106 return -EFAULT;
1107 rtnl_lock();
1108 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1109 rtnl_unlock();
1110 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1111 err = -EFAULT;
44c02a2c
AV
1112 } else {
1113 struct ifreq ifr;
1114 bool need_copyout;
63ff03ab 1115 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1116 return -EFAULT;
1117 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1118 if (!err && need_copyout)
63ff03ab 1119 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1120 return -EFAULT;
36fd633e 1121 }
6b96018b
AB
1122 return err;
1123}
1124
1da177e4
LT
1125/*
1126 * With an ioctl, arg may well be a user mode pointer, but we don't know
1127 * what to do with it - that's up to the protocol still.
1128 */
1129
1130static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1131{
1132 struct socket *sock;
881d966b 1133 struct sock *sk;
1da177e4
LT
1134 void __user *argp = (void __user *)arg;
1135 int pid, err;
881d966b 1136 struct net *net;
1da177e4 1137
b69aee04 1138 sock = file->private_data;
881d966b 1139 sk = sock->sk;
3b1e0a65 1140 net = sock_net(sk);
44c02a2c
AV
1141 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1142 struct ifreq ifr;
1143 bool need_copyout;
1144 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1145 return -EFAULT;
1146 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1147 if (!err && need_copyout)
1148 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1149 return -EFAULT;
1da177e4 1150 } else
3d23e349 1151#ifdef CONFIG_WEXT_CORE
1da177e4 1152 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1153 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1154 } else
3d23e349 1155#endif
89bddce5 1156 switch (cmd) {
1da177e4
LT
1157 case FIOSETOWN:
1158 case SIOCSPGRP:
1159 err = -EFAULT;
1160 if (get_user(pid, (int __user *)argp))
1161 break;
393cc3f5 1162 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1163 break;
1164 case FIOGETOWN:
1165 case SIOCGPGRP:
609d7fa9 1166 err = put_user(f_getown(sock->file),
89bddce5 1167 (int __user *)argp);
1da177e4
LT
1168 break;
1169 case SIOCGIFBR:
1170 case SIOCSIFBR:
1171 case SIOCBRADDBR:
1172 case SIOCBRDELBR:
1173 err = -ENOPKG;
1174 if (!br_ioctl_hook)
1175 request_module("bridge");
1176
4a3e2f71 1177 mutex_lock(&br_ioctl_mutex);
89bddce5 1178 if (br_ioctl_hook)
881d966b 1179 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1180 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1181 break;
1182 case SIOCGIFVLAN:
1183 case SIOCSIFVLAN:
1184 err = -ENOPKG;
1185 if (!vlan_ioctl_hook)
1186 request_module("8021q");
1187
4a3e2f71 1188 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1189 if (vlan_ioctl_hook)
881d966b 1190 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1191 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1192 break;
c62cce2c
AV
1193 case SIOCGSKNS:
1194 err = -EPERM;
1195 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1196 break;
1197
1198 err = open_related_ns(&net->ns, get_net_ns);
1199 break;
0768e170
AB
1200 case SIOCGSTAMP_OLD:
1201 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1202 if (!sock->ops->gettstamp) {
1203 err = -ENOIOCTLCMD;
1204 break;
1205 }
1206 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1207 cmd == SIOCGSTAMP_OLD,
1208 !IS_ENABLED(CONFIG_64BIT));
60747828 1209 break;
0768e170
AB
1210 case SIOCGSTAMP_NEW:
1211 case SIOCGSTAMPNS_NEW:
1212 if (!sock->ops->gettstamp) {
1213 err = -ENOIOCTLCMD;
1214 break;
1215 }
1216 err = sock->ops->gettstamp(sock, argp,
1217 cmd == SIOCGSTAMP_NEW,
1218 false);
c7cbdbf2 1219 break;
1da177e4 1220 default:
63ff03ab 1221 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1222 break;
89bddce5 1223 }
1da177e4
LT
1224 return err;
1225}
1226
8a3c245c
PT
1227/**
1228 * sock_create_lite - creates a socket
1229 * @family: protocol family (AF_INET, ...)
1230 * @type: communication type (SOCK_STREAM, ...)
1231 * @protocol: protocol (0, ...)
1232 * @res: new socket
1233 *
1234 * Creates a new socket and assigns it to @res, passing through LSM.
1235 * The new socket initialization is not complete, see kernel_accept().
1236 * Returns 0 or an error. On failure @res is set to %NULL.
1237 * This function internally uses GFP_KERNEL.
1238 */
1239
1da177e4
LT
1240int sock_create_lite(int family, int type, int protocol, struct socket **res)
1241{
1242 int err;
1243 struct socket *sock = NULL;
89bddce5 1244
1da177e4
LT
1245 err = security_socket_create(family, type, protocol, 1);
1246 if (err)
1247 goto out;
1248
1249 sock = sock_alloc();
1250 if (!sock) {
1251 err = -ENOMEM;
1252 goto out;
1253 }
1254
1da177e4 1255 sock->type = type;
7420ed23
VY
1256 err = security_socket_post_create(sock, family, type, protocol, 1);
1257 if (err)
1258 goto out_release;
1259
1da177e4
LT
1260out:
1261 *res = sock;
1262 return err;
7420ed23
VY
1263out_release:
1264 sock_release(sock);
1265 sock = NULL;
1266 goto out;
1da177e4 1267}
c6d409cf 1268EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1269
1270/* No kernel lock held - perfect */
ade994f4 1271static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1272{
3cafb376 1273 struct socket *sock = file->private_data;
a331de3b 1274 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1275
e88958e6
CH
1276 if (!sock->ops->poll)
1277 return 0;
f641f13b 1278
a331de3b
CH
1279 if (sk_can_busy_loop(sock->sk)) {
1280 /* poll once if requested by the syscall */
1281 if (events & POLL_BUSY_LOOP)
1282 sk_busy_loop(sock->sk, 1);
1283
1284 /* if this socket can poll_ll, tell the system call */
1285 flag = POLL_BUSY_LOOP;
1286 }
1287
1288 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1289}
1290
89bddce5 1291static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1292{
b69aee04 1293 struct socket *sock = file->private_data;
1da177e4
LT
1294
1295 return sock->ops->mmap(file, sock, vma);
1296}
1297
20380731 1298static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1299{
6d8c50dc 1300 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1301 return 0;
1302}
1303
1304/*
1305 * Update the socket async list
1306 *
1307 * Fasync_list locking strategy.
1308 *
1309 * 1. fasync_list is modified only under process context socket lock
1310 * i.e. under semaphore.
1311 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1312 * or under socket lock
1da177e4
LT
1313 */
1314
1315static int sock_fasync(int fd, struct file *filp, int on)
1316{
989a2979
ED
1317 struct socket *sock = filp->private_data;
1318 struct sock *sk = sock->sk;
333f7909 1319 struct socket_wq *wq = &sock->wq;
1da177e4 1320
989a2979 1321 if (sk == NULL)
1da177e4 1322 return -EINVAL;
1da177e4
LT
1323
1324 lock_sock(sk);
eaefd110 1325 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1326
eaefd110 1327 if (!wq->fasync_list)
989a2979
ED
1328 sock_reset_flag(sk, SOCK_FASYNC);
1329 else
bcdce719 1330 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1331
989a2979 1332 release_sock(sk);
1da177e4
LT
1333 return 0;
1334}
1335
ceb5d58b 1336/* This function may be called only under rcu_lock */
1da177e4 1337
ceb5d58b 1338int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1339{
ceb5d58b 1340 if (!wq || !wq->fasync_list)
1da177e4 1341 return -1;
ceb5d58b 1342
89bddce5 1343 switch (how) {
8d8ad9d7 1344 case SOCK_WAKE_WAITD:
ceb5d58b 1345 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1346 break;
1347 goto call_kill;
8d8ad9d7 1348 case SOCK_WAKE_SPACE:
ceb5d58b 1349 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1350 break;
7c7ab580 1351 fallthrough;
8d8ad9d7 1352 case SOCK_WAKE_IO:
89bddce5 1353call_kill:
43815482 1354 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1355 break;
8d8ad9d7 1356 case SOCK_WAKE_URG:
43815482 1357 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1358 }
ceb5d58b 1359
1da177e4
LT
1360 return 0;
1361}
c6d409cf 1362EXPORT_SYMBOL(sock_wake_async);
1da177e4 1363
8a3c245c
PT
1364/**
1365 * __sock_create - creates a socket
1366 * @net: net namespace
1367 * @family: protocol family (AF_INET, ...)
1368 * @type: communication type (SOCK_STREAM, ...)
1369 * @protocol: protocol (0, ...)
1370 * @res: new socket
1371 * @kern: boolean for kernel space sockets
1372 *
1373 * Creates a new socket and assigns it to @res, passing through LSM.
1374 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1375 * be set to true if the socket resides in kernel space.
1376 * This function internally uses GFP_KERNEL.
1377 */
1378
721db93a 1379int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1380 struct socket **res, int kern)
1da177e4
LT
1381{
1382 int err;
1383 struct socket *sock;
55737fda 1384 const struct net_proto_family *pf;
1da177e4
LT
1385
1386 /*
89bddce5 1387 * Check protocol is in range
1da177e4
LT
1388 */
1389 if (family < 0 || family >= NPROTO)
1390 return -EAFNOSUPPORT;
1391 if (type < 0 || type >= SOCK_MAX)
1392 return -EINVAL;
1393
1394 /* Compatibility.
1395
1396 This uglymoron is moved from INET layer to here to avoid
1397 deadlock in module load.
1398 */
1399 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1400 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1401 current->comm);
1da177e4
LT
1402 family = PF_PACKET;
1403 }
1404
1405 err = security_socket_create(family, type, protocol, kern);
1406 if (err)
1407 return err;
89bddce5 1408
55737fda
SH
1409 /*
1410 * Allocate the socket and allow the family to set things up. if
1411 * the protocol is 0, the family is instructed to select an appropriate
1412 * default.
1413 */
1414 sock = sock_alloc();
1415 if (!sock) {
e87cc472 1416 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1417 return -ENFILE; /* Not exactly a match, but its the
1418 closest posix thing */
1419 }
1420
1421 sock->type = type;
1422
95a5afca 1423#ifdef CONFIG_MODULES
89bddce5
SH
1424 /* Attempt to load a protocol module if the find failed.
1425 *
1426 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1427 * requested real, full-featured networking support upon configuration.
1428 * Otherwise module support will break!
1429 */
190683a9 1430 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1431 request_module("net-pf-%d", family);
1da177e4
LT
1432#endif
1433
55737fda
SH
1434 rcu_read_lock();
1435 pf = rcu_dereference(net_families[family]);
1436 err = -EAFNOSUPPORT;
1437 if (!pf)
1438 goto out_release;
1da177e4
LT
1439
1440 /*
1441 * We will call the ->create function, that possibly is in a loadable
1442 * module, so we have to bump that loadable module refcnt first.
1443 */
55737fda 1444 if (!try_module_get(pf->owner))
1da177e4
LT
1445 goto out_release;
1446
55737fda
SH
1447 /* Now protected by module ref count */
1448 rcu_read_unlock();
1449
3f378b68 1450 err = pf->create(net, sock, protocol, kern);
55737fda 1451 if (err < 0)
1da177e4 1452 goto out_module_put;
a79af59e 1453
1da177e4
LT
1454 /*
1455 * Now to bump the refcnt of the [loadable] module that owns this
1456 * socket at sock_release time we decrement its refcnt.
1457 */
55737fda
SH
1458 if (!try_module_get(sock->ops->owner))
1459 goto out_module_busy;
1460
1da177e4
LT
1461 /*
1462 * Now that we're done with the ->create function, the [loadable]
1463 * module can have its refcnt decremented
1464 */
55737fda 1465 module_put(pf->owner);
7420ed23
VY
1466 err = security_socket_post_create(sock, family, type, protocol, kern);
1467 if (err)
3b185525 1468 goto out_sock_release;
55737fda 1469 *res = sock;
1da177e4 1470
55737fda
SH
1471 return 0;
1472
1473out_module_busy:
1474 err = -EAFNOSUPPORT;
1da177e4 1475out_module_put:
55737fda
SH
1476 sock->ops = NULL;
1477 module_put(pf->owner);
1478out_sock_release:
1da177e4 1479 sock_release(sock);
55737fda
SH
1480 return err;
1481
1482out_release:
1483 rcu_read_unlock();
1484 goto out_sock_release;
1da177e4 1485}
721db93a 1486EXPORT_SYMBOL(__sock_create);
1da177e4 1487
8a3c245c
PT
1488/**
1489 * sock_create - creates a socket
1490 * @family: protocol family (AF_INET, ...)
1491 * @type: communication type (SOCK_STREAM, ...)
1492 * @protocol: protocol (0, ...)
1493 * @res: new socket
1494 *
1495 * A wrapper around __sock_create().
1496 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1497 */
1498
1da177e4
LT
1499int sock_create(int family, int type, int protocol, struct socket **res)
1500{
1b8d7ae4 1501 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1502}
c6d409cf 1503EXPORT_SYMBOL(sock_create);
1da177e4 1504
8a3c245c
PT
1505/**
1506 * sock_create_kern - creates a socket (kernel space)
1507 * @net: net namespace
1508 * @family: protocol family (AF_INET, ...)
1509 * @type: communication type (SOCK_STREAM, ...)
1510 * @protocol: protocol (0, ...)
1511 * @res: new socket
1512 *
1513 * A wrapper around __sock_create().
1514 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1515 */
1516
eeb1bd5c 1517int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1518{
eeb1bd5c 1519 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1520}
c6d409cf 1521EXPORT_SYMBOL(sock_create_kern);
1da177e4 1522
9d6a15c3 1523int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1524{
1525 int retval;
1526 struct socket *sock;
a677a039
UD
1527 int flags;
1528
e38b36f3
UD
1529 /* Check the SOCK_* constants for consistency. */
1530 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1531 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1532 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1533 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1534
a677a039 1535 flags = type & ~SOCK_TYPE_MASK;
77d27200 1536 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1537 return -EINVAL;
1538 type &= SOCK_TYPE_MASK;
1da177e4 1539
aaca0bdc
UD
1540 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1541 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1542
1da177e4
LT
1543 retval = sock_create(family, type, protocol, &sock);
1544 if (retval < 0)
8e1611e2 1545 return retval;
1da177e4 1546
8e1611e2 1547 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1548}
1549
9d6a15c3
DB
1550SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1551{
1552 return __sys_socket(family, type, protocol);
1553}
1554
1da177e4
LT
1555/*
1556 * Create a pair of connected sockets.
1557 */
1558
6debc8d8 1559int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1560{
1561 struct socket *sock1, *sock2;
1562 int fd1, fd2, err;
db349509 1563 struct file *newfile1, *newfile2;
a677a039
UD
1564 int flags;
1565
1566 flags = type & ~SOCK_TYPE_MASK;
77d27200 1567 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1568 return -EINVAL;
1569 type &= SOCK_TYPE_MASK;
1da177e4 1570
aaca0bdc
UD
1571 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1572 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1573
016a266b
AV
1574 /*
1575 * reserve descriptors and make sure we won't fail
1576 * to return them to userland.
1577 */
1578 fd1 = get_unused_fd_flags(flags);
1579 if (unlikely(fd1 < 0))
1580 return fd1;
1581
1582 fd2 = get_unused_fd_flags(flags);
1583 if (unlikely(fd2 < 0)) {
1584 put_unused_fd(fd1);
1585 return fd2;
1586 }
1587
1588 err = put_user(fd1, &usockvec[0]);
1589 if (err)
1590 goto out;
1591
1592 err = put_user(fd2, &usockvec[1]);
1593 if (err)
1594 goto out;
1595
1da177e4
LT
1596 /*
1597 * Obtain the first socket and check if the underlying protocol
1598 * supports the socketpair call.
1599 */
1600
1601 err = sock_create(family, type, protocol, &sock1);
016a266b 1602 if (unlikely(err < 0))
1da177e4
LT
1603 goto out;
1604
1605 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1606 if (unlikely(err < 0)) {
1607 sock_release(sock1);
1608 goto out;
bf3c23d1 1609 }
d73aa286 1610
d47cd945
DH
1611 err = security_socket_socketpair(sock1, sock2);
1612 if (unlikely(err)) {
1613 sock_release(sock2);
1614 sock_release(sock1);
1615 goto out;
1616 }
1617
016a266b
AV
1618 err = sock1->ops->socketpair(sock1, sock2);
1619 if (unlikely(err < 0)) {
1620 sock_release(sock2);
1621 sock_release(sock1);
1622 goto out;
28407630
AV
1623 }
1624
aab174f0 1625 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1626 if (IS_ERR(newfile1)) {
28407630 1627 err = PTR_ERR(newfile1);
016a266b
AV
1628 sock_release(sock2);
1629 goto out;
28407630
AV
1630 }
1631
aab174f0 1632 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1633 if (IS_ERR(newfile2)) {
1634 err = PTR_ERR(newfile2);
016a266b
AV
1635 fput(newfile1);
1636 goto out;
db349509
AV
1637 }
1638
157cf649 1639 audit_fd_pair(fd1, fd2);
d73aa286 1640
db349509
AV
1641 fd_install(fd1, newfile1);
1642 fd_install(fd2, newfile2);
d73aa286 1643 return 0;
1da177e4 1644
016a266b 1645out:
d73aa286 1646 put_unused_fd(fd2);
d73aa286 1647 put_unused_fd(fd1);
1da177e4
LT
1648 return err;
1649}
1650
6debc8d8
DB
1651SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1652 int __user *, usockvec)
1653{
1654 return __sys_socketpair(family, type, protocol, usockvec);
1655}
1656
1da177e4
LT
1657/*
1658 * Bind a name to a socket. Nothing much to do here since it's
1659 * the protocol's responsibility to handle the local address.
1660 *
1661 * We move the socket address to kernel space before we call
1662 * the protocol layer (having also checked the address is ok).
1663 */
1664
a87d35d8 1665int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1666{
1667 struct socket *sock;
230b1839 1668 struct sockaddr_storage address;
6cb153ca 1669 int err, fput_needed;
1da177e4 1670
89bddce5 1671 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1672 if (sock) {
43db362d 1673 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1674 if (!err) {
89bddce5 1675 err = security_socket_bind(sock,
230b1839 1676 (struct sockaddr *)&address,
89bddce5 1677 addrlen);
6cb153ca
BL
1678 if (!err)
1679 err = sock->ops->bind(sock,
89bddce5 1680 (struct sockaddr *)
230b1839 1681 &address, addrlen);
1da177e4 1682 }
6cb153ca 1683 fput_light(sock->file, fput_needed);
89bddce5 1684 }
1da177e4
LT
1685 return err;
1686}
1687
a87d35d8
DB
1688SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1689{
1690 return __sys_bind(fd, umyaddr, addrlen);
1691}
1692
1da177e4
LT
1693/*
1694 * Perform a listen. Basically, we allow the protocol to do anything
1695 * necessary for a listen, and if that works, we mark the socket as
1696 * ready for listening.
1697 */
1698
25e290ee 1699int __sys_listen(int fd, int backlog)
1da177e4
LT
1700{
1701 struct socket *sock;
6cb153ca 1702 int err, fput_needed;
b8e1f9b5 1703 int somaxconn;
89bddce5
SH
1704
1705 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1706 if (sock) {
8efa6e93 1707 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1708 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1709 backlog = somaxconn;
1da177e4
LT
1710
1711 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1712 if (!err)
1713 err = sock->ops->listen(sock, backlog);
1da177e4 1714
6cb153ca 1715 fput_light(sock->file, fput_needed);
1da177e4
LT
1716 }
1717 return err;
1718}
1719
25e290ee
DB
1720SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1721{
1722 return __sys_listen(fd, backlog);
1723}
1724
de2ea4b6
JA
1725int __sys_accept4_file(struct file *file, unsigned file_flags,
1726 struct sockaddr __user *upeer_sockaddr,
09952e3e
JA
1727 int __user *upeer_addrlen, int flags,
1728 unsigned long nofile)
1da177e4
LT
1729{
1730 struct socket *sock, *newsock;
39d8c1b6 1731 struct file *newfile;
de2ea4b6 1732 int err, len, newfd;
230b1839 1733 struct sockaddr_storage address;
1da177e4 1734
77d27200 1735 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1736 return -EINVAL;
1737
1738 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1739 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1740
dba4a925
FR
1741 sock = sock_from_file(file);
1742 if (!sock) {
1743 err = -ENOTSOCK;
1da177e4 1744 goto out;
dba4a925 1745 }
1da177e4
LT
1746
1747 err = -ENFILE;
c6d409cf
ED
1748 newsock = sock_alloc();
1749 if (!newsock)
de2ea4b6 1750 goto out;
1da177e4
LT
1751
1752 newsock->type = sock->type;
1753 newsock->ops = sock->ops;
1754
1da177e4
LT
1755 /*
1756 * We don't need try_module_get here, as the listening socket (sock)
1757 * has the protocol module (sock->ops->owner) held.
1758 */
1759 __module_get(newsock->ops->owner);
1760
09952e3e 1761 newfd = __get_unused_fd_flags(flags, nofile);
39d8c1b6
DM
1762 if (unlikely(newfd < 0)) {
1763 err = newfd;
9a1875e6 1764 sock_release(newsock);
de2ea4b6 1765 goto out;
39d8c1b6 1766 }
aab174f0 1767 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1768 if (IS_ERR(newfile)) {
28407630
AV
1769 err = PTR_ERR(newfile);
1770 put_unused_fd(newfd);
de2ea4b6 1771 goto out;
28407630 1772 }
39d8c1b6 1773
a79af59e
FF
1774 err = security_socket_accept(sock, newsock);
1775 if (err)
39d8c1b6 1776 goto out_fd;
a79af59e 1777
de2ea4b6
JA
1778 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1779 false);
1da177e4 1780 if (err < 0)
39d8c1b6 1781 goto out_fd;
1da177e4
LT
1782
1783 if (upeer_sockaddr) {
9b2c45d4
DV
1784 len = newsock->ops->getname(newsock,
1785 (struct sockaddr *)&address, 2);
1786 if (len < 0) {
1da177e4 1787 err = -ECONNABORTED;
39d8c1b6 1788 goto out_fd;
1da177e4 1789 }
43db362d 1790 err = move_addr_to_user(&address,
230b1839 1791 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1792 if (err < 0)
39d8c1b6 1793 goto out_fd;
1da177e4
LT
1794 }
1795
1796 /* File flags are not inherited via accept() unlike another OSes. */
1797
39d8c1b6
DM
1798 fd_install(newfd, newfile);
1799 err = newfd;
1da177e4
LT
1800out:
1801 return err;
39d8c1b6 1802out_fd:
9606a216 1803 fput(newfile);
39d8c1b6 1804 put_unused_fd(newfd);
de2ea4b6
JA
1805 goto out;
1806
1807}
1808
1809/*
1810 * For accept, we attempt to create a new socket, set up the link
1811 * with the client, wake up the client, then return the new
1812 * connected fd. We collect the address of the connector in kernel
1813 * space and move it to user at the very end. This is unclean because
1814 * we open the socket then return an error.
1815 *
1816 * 1003.1g adds the ability to recvmsg() to query connection pending
1817 * status to recvmsg. We need to add that support in a way thats
1818 * clean when we restructure accept also.
1819 */
1820
1821int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1822 int __user *upeer_addrlen, int flags)
1823{
1824 int ret = -EBADF;
1825 struct fd f;
1826
1827 f = fdget(fd);
1828 if (f.file) {
1829 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
09952e3e
JA
1830 upeer_addrlen, flags,
1831 rlimit(RLIMIT_NOFILE));
6b07edeb 1832 fdput(f);
de2ea4b6
JA
1833 }
1834
1835 return ret;
1da177e4
LT
1836}
1837
4541e805
DB
1838SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1839 int __user *, upeer_addrlen, int, flags)
1840{
1841 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1842}
1843
20f37034
HC
1844SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1845 int __user *, upeer_addrlen)
aaca0bdc 1846{
4541e805 1847 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1848}
1849
1da177e4
LT
1850/*
1851 * Attempt to connect to a socket with the server address. The address
1852 * is in user space so we verify it is OK and move it to kernel space.
1853 *
1854 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1855 * break bindings
1856 *
1857 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1858 * other SEQPACKET protocols that take time to connect() as it doesn't
1859 * include the -EINPROGRESS status for such sockets.
1860 */
1861
f499a021 1862int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 1863 int addrlen, int file_flags)
1da177e4
LT
1864{
1865 struct socket *sock;
bd3ded31 1866 int err;
1da177e4 1867
dba4a925
FR
1868 sock = sock_from_file(file);
1869 if (!sock) {
1870 err = -ENOTSOCK;
1da177e4 1871 goto out;
dba4a925 1872 }
1da177e4 1873
89bddce5 1874 err =
f499a021 1875 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 1876 if (err)
bd3ded31 1877 goto out;
1da177e4 1878
f499a021 1879 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
bd3ded31 1880 sock->file->f_flags | file_flags);
1da177e4
LT
1881out:
1882 return err;
1883}
1884
bd3ded31
JA
1885int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1886{
1887 int ret = -EBADF;
1888 struct fd f;
1889
1890 f = fdget(fd);
1891 if (f.file) {
f499a021
JA
1892 struct sockaddr_storage address;
1893
1894 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1895 if (!ret)
1896 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 1897 fdput(f);
bd3ded31
JA
1898 }
1899
1900 return ret;
1901}
1902
1387c2c2
DB
1903SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1904 int, addrlen)
1905{
1906 return __sys_connect(fd, uservaddr, addrlen);
1907}
1908
1da177e4
LT
1909/*
1910 * Get the local address ('name') of a socket object. Move the obtained
1911 * name to user space.
1912 */
1913
8882a107
DB
1914int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1915 int __user *usockaddr_len)
1da177e4
LT
1916{
1917 struct socket *sock;
230b1839 1918 struct sockaddr_storage address;
9b2c45d4 1919 int err, fput_needed;
89bddce5 1920
6cb153ca 1921 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1922 if (!sock)
1923 goto out;
1924
1925 err = security_socket_getsockname(sock);
1926 if (err)
1927 goto out_put;
1928
9b2c45d4
DV
1929 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1930 if (err < 0)
1da177e4 1931 goto out_put;
9b2c45d4
DV
1932 /* "err" is actually length in this case */
1933 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1934
1935out_put:
6cb153ca 1936 fput_light(sock->file, fput_needed);
1da177e4
LT
1937out:
1938 return err;
1939}
1940
8882a107
DB
1941SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1942 int __user *, usockaddr_len)
1943{
1944 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1945}
1946
1da177e4
LT
1947/*
1948 * Get the remote address ('name') of a socket object. Move the obtained
1949 * name to user space.
1950 */
1951
b21c8f83
DB
1952int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1953 int __user *usockaddr_len)
1da177e4
LT
1954{
1955 struct socket *sock;
230b1839 1956 struct sockaddr_storage address;
9b2c45d4 1957 int err, fput_needed;
1da177e4 1958
89bddce5
SH
1959 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1960 if (sock != NULL) {
1da177e4
LT
1961 err = security_socket_getpeername(sock);
1962 if (err) {
6cb153ca 1963 fput_light(sock->file, fput_needed);
1da177e4
LT
1964 return err;
1965 }
1966
9b2c45d4
DV
1967 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1968 if (err >= 0)
1969 /* "err" is actually length in this case */
1970 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1971 usockaddr_len);
6cb153ca 1972 fput_light(sock->file, fput_needed);
1da177e4
LT
1973 }
1974 return err;
1975}
1976
b21c8f83
DB
1977SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1978 int __user *, usockaddr_len)
1979{
1980 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1981}
1982
1da177e4
LT
1983/*
1984 * Send a datagram to a given address. We move the address into kernel
1985 * space and check the user space data area is readable before invoking
1986 * the protocol.
1987 */
211b634b
DB
1988int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1989 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1990{
1991 struct socket *sock;
230b1839 1992 struct sockaddr_storage address;
1da177e4
LT
1993 int err;
1994 struct msghdr msg;
1995 struct iovec iov;
6cb153ca 1996 int fput_needed;
6cb153ca 1997
602bd0e9
AV
1998 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1999 if (unlikely(err))
2000 return err;
de0fa95c
PE
2001 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2002 if (!sock)
4387ff75 2003 goto out;
6cb153ca 2004
89bddce5 2005 msg.msg_name = NULL;
89bddce5
SH
2006 msg.msg_control = NULL;
2007 msg.msg_controllen = 0;
2008 msg.msg_namelen = 0;
6cb153ca 2009 if (addr) {
43db362d 2010 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
2011 if (err < 0)
2012 goto out_put;
230b1839 2013 msg.msg_name = (struct sockaddr *)&address;
89bddce5 2014 msg.msg_namelen = addr_len;
1da177e4
LT
2015 }
2016 if (sock->file->f_flags & O_NONBLOCK)
2017 flags |= MSG_DONTWAIT;
2018 msg.msg_flags = flags;
d8725c86 2019 err = sock_sendmsg(sock, &msg);
1da177e4 2020
89bddce5 2021out_put:
de0fa95c 2022 fput_light(sock->file, fput_needed);
4387ff75 2023out:
1da177e4
LT
2024 return err;
2025}
2026
211b634b
DB
2027SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2028 unsigned int, flags, struct sockaddr __user *, addr,
2029 int, addr_len)
2030{
2031 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2032}
2033
1da177e4 2034/*
89bddce5 2035 * Send a datagram down a socket.
1da177e4
LT
2036 */
2037
3e0fa65f 2038SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2039 unsigned int, flags)
1da177e4 2040{
211b634b 2041 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2042}
2043
2044/*
89bddce5 2045 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2046 * sender. We verify the buffers are writable and if needed move the
2047 * sender address from kernel to user space.
2048 */
7a09e1eb
DB
2049int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2050 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2051{
2052 struct socket *sock;
2053 struct iovec iov;
2054 struct msghdr msg;
230b1839 2055 struct sockaddr_storage address;
89bddce5 2056 int err, err2;
6cb153ca
BL
2057 int fput_needed;
2058
602bd0e9
AV
2059 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2060 if (unlikely(err))
2061 return err;
de0fa95c 2062 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2063 if (!sock)
de0fa95c 2064 goto out;
1da177e4 2065
89bddce5
SH
2066 msg.msg_control = NULL;
2067 msg.msg_controllen = 0;
f3d33426
HFS
2068 /* Save some cycles and don't copy the address if not needed */
2069 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2070 /* We assume all kernel code knows the size of sockaddr_storage */
2071 msg.msg_namelen = 0;
130ed5d1 2072 msg.msg_iocb = NULL;
9f138fa6 2073 msg.msg_flags = 0;
1da177e4
LT
2074 if (sock->file->f_flags & O_NONBLOCK)
2075 flags |= MSG_DONTWAIT;
2da62906 2076 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2077
89bddce5 2078 if (err >= 0 && addr != NULL) {
43db362d 2079 err2 = move_addr_to_user(&address,
230b1839 2080 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2081 if (err2 < 0)
2082 err = err2;
1da177e4 2083 }
de0fa95c
PE
2084
2085 fput_light(sock->file, fput_needed);
4387ff75 2086out:
1da177e4
LT
2087 return err;
2088}
2089
7a09e1eb
DB
2090SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2091 unsigned int, flags, struct sockaddr __user *, addr,
2092 int __user *, addr_len)
2093{
2094 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2095}
2096
1da177e4 2097/*
89bddce5 2098 * Receive a datagram from a socket.
1da177e4
LT
2099 */
2100
b7c0ddf5
JG
2101SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2102 unsigned int, flags)
1da177e4 2103{
7a09e1eb 2104 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2105}
2106
83f0c10b
FW
2107static bool sock_use_custom_sol_socket(const struct socket *sock)
2108{
2109 const struct sock *sk = sock->sk;
2110
2111 /* Use sock->ops->setsockopt() for MPTCP */
2112 return IS_ENABLED(CONFIG_MPTCP) &&
2113 sk->sk_protocol == IPPROTO_MPTCP &&
2114 sk->sk_type == SOCK_STREAM &&
2115 (sk->sk_family == AF_INET || sk->sk_family == AF_INET6);
2116}
2117
1da177e4
LT
2118/*
2119 * Set a socket option. Because we don't know the option lengths we have
2120 * to pass the user mode parameter for the protocols to sort out.
2121 */
a7b75c5a 2122int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
55db9c0e 2123 int optlen)
1da177e4 2124{
519a8a6c 2125 sockptr_t optval = USER_SOCKPTR(user_optval);
0d01da6a 2126 char *kernel_optval = NULL;
6cb153ca 2127 int err, fput_needed;
1da177e4
LT
2128 struct socket *sock;
2129
2130 if (optlen < 0)
2131 return -EINVAL;
89bddce5
SH
2132
2133 sock = sockfd_lookup_light(fd, &err, &fput_needed);
4a367299
CH
2134 if (!sock)
2135 return err;
1da177e4 2136
4a367299
CH
2137 err = security_socket_setsockopt(sock, level, optname);
2138 if (err)
2139 goto out_put;
0d01da6a 2140
55db9c0e
CH
2141 if (!in_compat_syscall())
2142 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
a7b75c5a 2143 user_optval, &optlen,
55db9c0e 2144 &kernel_optval);
4a367299
CH
2145 if (err < 0)
2146 goto out_put;
2147 if (err > 0) {
2148 err = 0;
2149 goto out_put;
2150 }
0d01da6a 2151
a7b75c5a
CH
2152 if (kernel_optval)
2153 optval = KERNEL_SOCKPTR(kernel_optval);
4a367299 2154 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2155 err = sock_setsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2156 else if (unlikely(!sock->ops->setsockopt))
2157 err = -EOPNOTSUPP;
4a367299
CH
2158 else
2159 err = sock->ops->setsockopt(sock, level, optname, optval,
89bddce5 2160 optlen);
a7b75c5a 2161 kfree(kernel_optval);
4a367299
CH
2162out_put:
2163 fput_light(sock->file, fput_needed);
1da177e4
LT
2164 return err;
2165}
2166
cc36dca0
DB
2167SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2168 char __user *, optval, int, optlen)
2169{
2170 return __sys_setsockopt(fd, level, optname, optval, optlen);
2171}
2172
9cacf81f
SF
2173INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2174 int optname));
2175
1da177e4
LT
2176/*
2177 * Get a socket option. Because we don't know the option lengths we have
2178 * to pass a user mode parameter for the protocols to sort out.
2179 */
55db9c0e
CH
2180int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2181 int __user *optlen)
1da177e4 2182{
6cb153ca 2183 int err, fput_needed;
1da177e4 2184 struct socket *sock;
0d01da6a 2185 int max_optlen;
1da177e4 2186
89bddce5 2187 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2188 if (!sock)
2189 return err;
2190
2191 err = security_socket_getsockopt(sock, level, optname);
2192 if (err)
2193 goto out_put;
1da177e4 2194
55db9c0e
CH
2195 if (!in_compat_syscall())
2196 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2197
d8a9b38f
CH
2198 if (level == SOL_SOCKET)
2199 err = sock_getsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2200 else if (unlikely(!sock->ops->getsockopt))
2201 err = -EOPNOTSUPP;
d8a9b38f
CH
2202 else
2203 err = sock->ops->getsockopt(sock, level, optname, optval,
89bddce5 2204 optlen);
0d01da6a 2205
55db9c0e
CH
2206 if (!in_compat_syscall())
2207 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2208 optval, optlen, max_optlen,
2209 err);
6cb153ca 2210out_put:
d8a9b38f 2211 fput_light(sock->file, fput_needed);
1da177e4
LT
2212 return err;
2213}
2214
13a2d70e
DB
2215SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2216 char __user *, optval, int __user *, optlen)
2217{
2218 return __sys_getsockopt(fd, level, optname, optval, optlen);
2219}
2220
1da177e4
LT
2221/*
2222 * Shutdown a socket.
2223 */
2224
b713c195
JA
2225int __sys_shutdown_sock(struct socket *sock, int how)
2226{
2227 int err;
2228
2229 err = security_socket_shutdown(sock, how);
2230 if (!err)
2231 err = sock->ops->shutdown(sock, how);
2232
2233 return err;
2234}
2235
005a1aea 2236int __sys_shutdown(int fd, int how)
1da177e4 2237{
6cb153ca 2238 int err, fput_needed;
1da177e4
LT
2239 struct socket *sock;
2240
89bddce5
SH
2241 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2242 if (sock != NULL) {
b713c195 2243 err = __sys_shutdown_sock(sock, how);
6cb153ca 2244 fput_light(sock->file, fput_needed);
1da177e4
LT
2245 }
2246 return err;
2247}
2248
005a1aea
DB
2249SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2250{
2251 return __sys_shutdown(fd, how);
2252}
2253
89bddce5 2254/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2255 * fields which are the same type (int / unsigned) on our platforms.
2256 */
2257#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2258#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2259#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2260
c71d8ebe
TH
2261struct used_address {
2262 struct sockaddr_storage name;
2263 unsigned int name_len;
2264};
2265
0a384abf
JA
2266int __copy_msghdr_from_user(struct msghdr *kmsg,
2267 struct user_msghdr __user *umsg,
2268 struct sockaddr __user **save_addr,
2269 struct iovec __user **uiov, size_t *nsegs)
1661bf36 2270{
ffb07550 2271 struct user_msghdr msg;
08adb7da
AV
2272 ssize_t err;
2273
ffb07550 2274 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2275 return -EFAULT;
dbb490b9 2276
1f466e1f
CH
2277 kmsg->msg_control_is_user = true;
2278 kmsg->msg_control_user = msg.msg_control;
ffb07550
AV
2279 kmsg->msg_controllen = msg.msg_controllen;
2280 kmsg->msg_flags = msg.msg_flags;
2281
2282 kmsg->msg_namelen = msg.msg_namelen;
2283 if (!msg.msg_name)
6a2a2b3a
AS
2284 kmsg->msg_namelen = 0;
2285
dbb490b9
ML
2286 if (kmsg->msg_namelen < 0)
2287 return -EINVAL;
2288
1661bf36 2289 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2290 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2291
2292 if (save_addr)
ffb07550 2293 *save_addr = msg.msg_name;
08adb7da 2294
ffb07550 2295 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2296 if (!save_addr) {
864d9664
PA
2297 err = move_addr_to_kernel(msg.msg_name,
2298 kmsg->msg_namelen,
08adb7da
AV
2299 kmsg->msg_name);
2300 if (err < 0)
2301 return err;
2302 }
2303 } else {
2304 kmsg->msg_name = NULL;
2305 kmsg->msg_namelen = 0;
2306 }
2307
ffb07550 2308 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2309 return -EMSGSIZE;
2310
0345f931 2311 kmsg->msg_iocb = NULL;
0a384abf
JA
2312 *uiov = msg.msg_iov;
2313 *nsegs = msg.msg_iovlen;
2314 return 0;
2315}
2316
2317static int copy_msghdr_from_user(struct msghdr *kmsg,
2318 struct user_msghdr __user *umsg,
2319 struct sockaddr __user **save_addr,
2320 struct iovec **iov)
2321{
2322 struct user_msghdr msg;
2323 ssize_t err;
2324
2325 err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
2326 &msg.msg_iovlen);
2327 if (err)
2328 return err;
0345f931 2329
87e5e6da 2330 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2331 msg.msg_iov, msg.msg_iovlen,
da184284 2332 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2333 return err < 0 ? err : 0;
1661bf36
DC
2334}
2335
4257c8ca
JA
2336static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2337 unsigned int flags, struct used_address *used_address,
2338 unsigned int allowed_msghdr_flags)
1da177e4 2339{
b9d717a7 2340 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2341 __aligned(sizeof(__kernel_size_t));
89bddce5 2342 /* 20 is size of ipv6_pktinfo */
1da177e4 2343 unsigned char *ctl_buf = ctl;
d8725c86 2344 int ctl_len;
08adb7da 2345 ssize_t err;
89bddce5 2346
1da177e4
LT
2347 err = -ENOBUFS;
2348
228e548e 2349 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2350 goto out;
28a94d8f 2351 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2352 ctl_len = msg_sys->msg_controllen;
1da177e4 2353 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2354 err =
228e548e 2355 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2356 sizeof(ctl));
1da177e4 2357 if (err)
4257c8ca 2358 goto out;
228e548e
AB
2359 ctl_buf = msg_sys->msg_control;
2360 ctl_len = msg_sys->msg_controllen;
1da177e4 2361 } else if (ctl_len) {
ac4340fc
DM
2362 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2363 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2364 if (ctl_len > sizeof(ctl)) {
1da177e4 2365 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2366 if (ctl_buf == NULL)
4257c8ca 2367 goto out;
1da177e4
LT
2368 }
2369 err = -EFAULT;
1f466e1f 2370 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2371 goto out_freectl;
228e548e 2372 msg_sys->msg_control = ctl_buf;
1f466e1f 2373 msg_sys->msg_control_is_user = false;
1da177e4 2374 }
228e548e 2375 msg_sys->msg_flags = flags;
1da177e4
LT
2376
2377 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2378 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2379 /*
2380 * If this is sendmmsg() and current destination address is same as
2381 * previously succeeded address, omit asking LSM's decision.
2382 * used_address->name_len is initialized to UINT_MAX so that the first
2383 * destination address never matches.
2384 */
bc909d9d
MD
2385 if (used_address && msg_sys->msg_name &&
2386 used_address->name_len == msg_sys->msg_namelen &&
2387 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2388 used_address->name_len)) {
d8725c86 2389 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2390 goto out_freectl;
2391 }
d8725c86 2392 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2393 /*
2394 * If this is sendmmsg() and sending to current destination address was
2395 * successful, remember it.
2396 */
2397 if (used_address && err >= 0) {
2398 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2399 if (msg_sys->msg_name)
2400 memcpy(&used_address->name, msg_sys->msg_name,
2401 used_address->name_len);
c71d8ebe 2402 }
1da177e4
LT
2403
2404out_freectl:
89bddce5 2405 if (ctl_buf != ctl)
1da177e4 2406 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2407out:
2408 return err;
2409}
2410
03b1230c
JA
2411int sendmsg_copy_msghdr(struct msghdr *msg,
2412 struct user_msghdr __user *umsg, unsigned flags,
2413 struct iovec **iov)
4257c8ca
JA
2414{
2415 int err;
2416
2417 if (flags & MSG_CMSG_COMPAT) {
2418 struct compat_msghdr __user *msg_compat;
2419
2420 msg_compat = (struct compat_msghdr __user *) umsg;
2421 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2422 } else {
2423 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2424 }
2425 if (err < 0)
2426 return err;
2427
2428 return 0;
2429}
2430
2431static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2432 struct msghdr *msg_sys, unsigned int flags,
2433 struct used_address *used_address,
2434 unsigned int allowed_msghdr_flags)
2435{
2436 struct sockaddr_storage address;
2437 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2438 ssize_t err;
2439
2440 msg_sys->msg_name = &address;
2441
2442 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2443 if (err < 0)
2444 return err;
2445
2446 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2447 allowed_msghdr_flags);
da184284 2448 kfree(iov);
228e548e
AB
2449 return err;
2450}
2451
2452/*
2453 * BSD sendmsg interface
2454 */
03b1230c 2455long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2456 unsigned int flags)
2457{
03b1230c 2458 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2459}
228e548e 2460
e1834a32
DB
2461long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2462 bool forbid_cmsg_compat)
228e548e
AB
2463{
2464 int fput_needed, err;
2465 struct msghdr msg_sys;
1be374a0
AL
2466 struct socket *sock;
2467
e1834a32
DB
2468 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2469 return -EINVAL;
2470
1be374a0 2471 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2472 if (!sock)
2473 goto out;
2474
28a94d8f 2475 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2476
6cb153ca 2477 fput_light(sock->file, fput_needed);
89bddce5 2478out:
1da177e4
LT
2479 return err;
2480}
2481
666547ff 2482SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2483{
e1834a32 2484 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2485}
2486
228e548e
AB
2487/*
2488 * Linux sendmmsg interface
2489 */
2490
2491int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2492 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2493{
2494 int fput_needed, err, datagrams;
2495 struct socket *sock;
2496 struct mmsghdr __user *entry;
2497 struct compat_mmsghdr __user *compat_entry;
2498 struct msghdr msg_sys;
c71d8ebe 2499 struct used_address used_address;
f092276d 2500 unsigned int oflags = flags;
228e548e 2501
e1834a32
DB
2502 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2503 return -EINVAL;
2504
98382f41
AB
2505 if (vlen > UIO_MAXIOV)
2506 vlen = UIO_MAXIOV;
228e548e
AB
2507
2508 datagrams = 0;
2509
2510 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2511 if (!sock)
2512 return err;
2513
c71d8ebe 2514 used_address.name_len = UINT_MAX;
228e548e
AB
2515 entry = mmsg;
2516 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2517 err = 0;
f092276d 2518 flags |= MSG_BATCH;
228e548e
AB
2519
2520 while (datagrams < vlen) {
f092276d
TH
2521 if (datagrams == vlen - 1)
2522 flags = oflags;
2523
228e548e 2524 if (MSG_CMSG_COMPAT & flags) {
666547ff 2525 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2526 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2527 if (err < 0)
2528 break;
2529 err = __put_user(err, &compat_entry->msg_len);
2530 ++compat_entry;
2531 } else {
a7526eb5 2532 err = ___sys_sendmsg(sock,
666547ff 2533 (struct user_msghdr __user *)entry,
28a94d8f 2534 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2535 if (err < 0)
2536 break;
2537 err = put_user(err, &entry->msg_len);
2538 ++entry;
2539 }
2540
2541 if (err)
2542 break;
2543 ++datagrams;
3023898b
SHY
2544 if (msg_data_left(&msg_sys))
2545 break;
a78cb84c 2546 cond_resched();
228e548e
AB
2547 }
2548
228e548e
AB
2549 fput_light(sock->file, fput_needed);
2550
728ffb86
AB
2551 /* We only return an error if no datagrams were able to be sent */
2552 if (datagrams != 0)
228e548e
AB
2553 return datagrams;
2554
228e548e
AB
2555 return err;
2556}
2557
2558SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2559 unsigned int, vlen, unsigned int, flags)
2560{
e1834a32 2561 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2562}
2563
03b1230c
JA
2564int recvmsg_copy_msghdr(struct msghdr *msg,
2565 struct user_msghdr __user *umsg, unsigned flags,
2566 struct sockaddr __user **uaddr,
2567 struct iovec **iov)
1da177e4 2568{
08adb7da 2569 ssize_t err;
1da177e4 2570
4257c8ca
JA
2571 if (MSG_CMSG_COMPAT & flags) {
2572 struct compat_msghdr __user *msg_compat;
1da177e4 2573
4257c8ca
JA
2574 msg_compat = (struct compat_msghdr __user *) umsg;
2575 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2576 } else {
2577 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2578 }
1da177e4 2579 if (err < 0)
da184284 2580 return err;
1da177e4 2581
4257c8ca
JA
2582 return 0;
2583}
2584
2585static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2586 struct user_msghdr __user *msg,
2587 struct sockaddr __user *uaddr,
2588 unsigned int flags, int nosec)
2589{
2590 struct compat_msghdr __user *msg_compat =
2591 (struct compat_msghdr __user *) msg;
2592 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2593 struct sockaddr_storage addr;
2594 unsigned long cmsg_ptr;
2595 int len;
2596 ssize_t err;
2597
2598 msg_sys->msg_name = &addr;
a2e27255
ACM
2599 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2600 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2601
f3d33426
HFS
2602 /* We assume all kernel code knows the size of sockaddr_storage */
2603 msg_sys->msg_namelen = 0;
2604
1da177e4
LT
2605 if (sock->file->f_flags & O_NONBLOCK)
2606 flags |= MSG_DONTWAIT;
1af66221
ED
2607
2608 if (unlikely(nosec))
2609 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2610 else
2611 err = sock_recvmsg(sock, msg_sys, flags);
2612
1da177e4 2613 if (err < 0)
4257c8ca 2614 goto out;
1da177e4
LT
2615 len = err;
2616
2617 if (uaddr != NULL) {
43db362d 2618 err = move_addr_to_user(&addr,
a2e27255 2619 msg_sys->msg_namelen, uaddr,
89bddce5 2620 uaddr_len);
1da177e4 2621 if (err < 0)
4257c8ca 2622 goto out;
1da177e4 2623 }
a2e27255 2624 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2625 COMPAT_FLAGS(msg));
1da177e4 2626 if (err)
4257c8ca 2627 goto out;
1da177e4 2628 if (MSG_CMSG_COMPAT & flags)
a2e27255 2629 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2630 &msg_compat->msg_controllen);
2631 else
a2e27255 2632 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2633 &msg->msg_controllen);
2634 if (err)
4257c8ca 2635 goto out;
1da177e4 2636 err = len;
4257c8ca
JA
2637out:
2638 return err;
2639}
2640
2641static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2642 struct msghdr *msg_sys, unsigned int flags, int nosec)
2643{
2644 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2645 /* user mode address pointers */
2646 struct sockaddr __user *uaddr;
2647 ssize_t err;
2648
2649 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2650 if (err < 0)
2651 return err;
1da177e4 2652
4257c8ca 2653 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2654 kfree(iov);
a2e27255
ACM
2655 return err;
2656}
2657
2658/*
2659 * BSD recvmsg interface
2660 */
2661
03b1230c
JA
2662long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2663 struct user_msghdr __user *umsg,
2664 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2665{
03b1230c 2666 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2667}
2668
e1834a32
DB
2669long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2670 bool forbid_cmsg_compat)
a2e27255
ACM
2671{
2672 int fput_needed, err;
2673 struct msghdr msg_sys;
1be374a0
AL
2674 struct socket *sock;
2675
e1834a32
DB
2676 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2677 return -EINVAL;
2678
1be374a0 2679 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2680 if (!sock)
2681 goto out;
2682
a7526eb5 2683 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2684
6cb153ca 2685 fput_light(sock->file, fput_needed);
1da177e4
LT
2686out:
2687 return err;
2688}
2689
666547ff 2690SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2691 unsigned int, flags)
2692{
e1834a32 2693 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2694}
2695
a2e27255
ACM
2696/*
2697 * Linux recvmmsg interface
2698 */
2699
e11d4284
AB
2700static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2701 unsigned int vlen, unsigned int flags,
2702 struct timespec64 *timeout)
a2e27255
ACM
2703{
2704 int fput_needed, err, datagrams;
2705 struct socket *sock;
2706 struct mmsghdr __user *entry;
d7256d0e 2707 struct compat_mmsghdr __user *compat_entry;
a2e27255 2708 struct msghdr msg_sys;
766b9f92
DD
2709 struct timespec64 end_time;
2710 struct timespec64 timeout64;
a2e27255
ACM
2711
2712 if (timeout &&
2713 poll_select_set_timeout(&end_time, timeout->tv_sec,
2714 timeout->tv_nsec))
2715 return -EINVAL;
2716
2717 datagrams = 0;
2718
2719 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2720 if (!sock)
2721 return err;
2722
7797dc41
SHY
2723 if (likely(!(flags & MSG_ERRQUEUE))) {
2724 err = sock_error(sock->sk);
2725 if (err) {
2726 datagrams = err;
2727 goto out_put;
2728 }
e623a9e9 2729 }
a2e27255
ACM
2730
2731 entry = mmsg;
d7256d0e 2732 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2733
2734 while (datagrams < vlen) {
2735 /*
2736 * No need to ask LSM for more than the first datagram.
2737 */
d7256d0e 2738 if (MSG_CMSG_COMPAT & flags) {
666547ff 2739 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2740 &msg_sys, flags & ~MSG_WAITFORONE,
2741 datagrams);
d7256d0e
JMG
2742 if (err < 0)
2743 break;
2744 err = __put_user(err, &compat_entry->msg_len);
2745 ++compat_entry;
2746 } else {
a7526eb5 2747 err = ___sys_recvmsg(sock,
666547ff 2748 (struct user_msghdr __user *)entry,
a7526eb5
AL
2749 &msg_sys, flags & ~MSG_WAITFORONE,
2750 datagrams);
d7256d0e
JMG
2751 if (err < 0)
2752 break;
2753 err = put_user(err, &entry->msg_len);
2754 ++entry;
2755 }
2756
a2e27255
ACM
2757 if (err)
2758 break;
a2e27255
ACM
2759 ++datagrams;
2760
71c5c159
BB
2761 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2762 if (flags & MSG_WAITFORONE)
2763 flags |= MSG_DONTWAIT;
2764
a2e27255 2765 if (timeout) {
766b9f92 2766 ktime_get_ts64(&timeout64);
c2e6c856 2767 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2768 if (timeout->tv_sec < 0) {
2769 timeout->tv_sec = timeout->tv_nsec = 0;
2770 break;
2771 }
2772
2773 /* Timeout, return less than vlen datagrams */
2774 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2775 break;
2776 }
2777
2778 /* Out of band data, return right away */
2779 if (msg_sys.msg_flags & MSG_OOB)
2780 break;
a78cb84c 2781 cond_resched();
a2e27255
ACM
2782 }
2783
a2e27255 2784 if (err == 0)
34b88a68
ACM
2785 goto out_put;
2786
2787 if (datagrams == 0) {
2788 datagrams = err;
2789 goto out_put;
2790 }
a2e27255 2791
34b88a68
ACM
2792 /*
2793 * We may return less entries than requested (vlen) if the
2794 * sock is non block and there aren't enough datagrams...
2795 */
2796 if (err != -EAGAIN) {
a2e27255 2797 /*
34b88a68
ACM
2798 * ... or if recvmsg returns an error after we
2799 * received some datagrams, where we record the
2800 * error to return on the next call or if the
2801 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2802 */
34b88a68 2803 sock->sk->sk_err = -err;
a2e27255 2804 }
34b88a68
ACM
2805out_put:
2806 fput_light(sock->file, fput_needed);
a2e27255 2807
34b88a68 2808 return datagrams;
a2e27255
ACM
2809}
2810
e11d4284
AB
2811int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2812 unsigned int vlen, unsigned int flags,
2813 struct __kernel_timespec __user *timeout,
2814 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2815{
2816 int datagrams;
c2e6c856 2817 struct timespec64 timeout_sys;
a2e27255 2818
e11d4284
AB
2819 if (timeout && get_timespec64(&timeout_sys, timeout))
2820 return -EFAULT;
a2e27255 2821
e11d4284 2822 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2823 return -EFAULT;
2824
e11d4284
AB
2825 if (!timeout && !timeout32)
2826 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2827
2828 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2829
e11d4284
AB
2830 if (datagrams <= 0)
2831 return datagrams;
2832
2833 if (timeout && put_timespec64(&timeout_sys, timeout))
2834 datagrams = -EFAULT;
2835
2836 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2837 datagrams = -EFAULT;
2838
2839 return datagrams;
2840}
2841
1255e269
DB
2842SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2843 unsigned int, vlen, unsigned int, flags,
c2e6c856 2844 struct __kernel_timespec __user *, timeout)
1255e269 2845{
e11d4284
AB
2846 if (flags & MSG_CMSG_COMPAT)
2847 return -EINVAL;
2848
2849 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2850}
2851
2852#ifdef CONFIG_COMPAT_32BIT_TIME
2853SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2854 unsigned int, vlen, unsigned int, flags,
2855 struct old_timespec32 __user *, timeout)
2856{
2857 if (flags & MSG_CMSG_COMPAT)
2858 return -EINVAL;
2859
2860 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2861}
e11d4284 2862#endif
1255e269 2863
a2e27255 2864#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2865/* Argument list sizes for sys_socketcall */
2866#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2867static const unsigned char nargs[21] = {
c6d409cf
ED
2868 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2869 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2870 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2871 AL(4), AL(5), AL(4)
89bddce5
SH
2872};
2873
1da177e4
LT
2874#undef AL
2875
2876/*
89bddce5 2877 * System call vectors.
1da177e4
LT
2878 *
2879 * Argument checking cleaned up. Saved 20% in size.
2880 * This function doesn't need to set the kernel lock because
89bddce5 2881 * it is set by the callees.
1da177e4
LT
2882 */
2883
3e0fa65f 2884SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2885{
2950fa9d 2886 unsigned long a[AUDITSC_ARGS];
89bddce5 2887 unsigned long a0, a1;
1da177e4 2888 int err;
47379052 2889 unsigned int len;
1da177e4 2890
228e548e 2891 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2892 return -EINVAL;
c8e8cd57 2893 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2894
47379052
AV
2895 len = nargs[call];
2896 if (len > sizeof(a))
2897 return -EINVAL;
2898
1da177e4 2899 /* copy_from_user should be SMP safe. */
47379052 2900 if (copy_from_user(a, args, len))
1da177e4 2901 return -EFAULT;
3ec3b2fb 2902
2950fa9d
CG
2903 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2904 if (err)
2905 return err;
3ec3b2fb 2906
89bddce5
SH
2907 a0 = a[0];
2908 a1 = a[1];
2909
2910 switch (call) {
2911 case SYS_SOCKET:
9d6a15c3 2912 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2913 break;
2914 case SYS_BIND:
a87d35d8 2915 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2916 break;
2917 case SYS_CONNECT:
1387c2c2 2918 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2919 break;
2920 case SYS_LISTEN:
25e290ee 2921 err = __sys_listen(a0, a1);
89bddce5
SH
2922 break;
2923 case SYS_ACCEPT:
4541e805
DB
2924 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2925 (int __user *)a[2], 0);
89bddce5
SH
2926 break;
2927 case SYS_GETSOCKNAME:
2928 err =
8882a107
DB
2929 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2930 (int __user *)a[2]);
89bddce5
SH
2931 break;
2932 case SYS_GETPEERNAME:
2933 err =
b21c8f83
DB
2934 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2935 (int __user *)a[2]);
89bddce5
SH
2936 break;
2937 case SYS_SOCKETPAIR:
6debc8d8 2938 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2939 break;
2940 case SYS_SEND:
f3bf896b
DB
2941 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2942 NULL, 0);
89bddce5
SH
2943 break;
2944 case SYS_SENDTO:
211b634b
DB
2945 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2946 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2947 break;
2948 case SYS_RECV:
d27e9afc
DB
2949 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2950 NULL, NULL);
89bddce5
SH
2951 break;
2952 case SYS_RECVFROM:
7a09e1eb
DB
2953 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2954 (struct sockaddr __user *)a[4],
2955 (int __user *)a[5]);
89bddce5
SH
2956 break;
2957 case SYS_SHUTDOWN:
005a1aea 2958 err = __sys_shutdown(a0, a1);
89bddce5
SH
2959 break;
2960 case SYS_SETSOCKOPT:
cc36dca0
DB
2961 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2962 a[4]);
89bddce5
SH
2963 break;
2964 case SYS_GETSOCKOPT:
2965 err =
13a2d70e
DB
2966 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2967 (int __user *)a[4]);
89bddce5
SH
2968 break;
2969 case SYS_SENDMSG:
e1834a32
DB
2970 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2971 a[2], true);
89bddce5 2972 break;
228e548e 2973 case SYS_SENDMMSG:
e1834a32
DB
2974 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2975 a[3], true);
228e548e 2976 break;
89bddce5 2977 case SYS_RECVMSG:
e1834a32
DB
2978 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2979 a[2], true);
89bddce5 2980 break;
a2e27255 2981 case SYS_RECVMMSG:
3ca47e95 2982 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
2983 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2984 a[2], a[3],
2985 (struct __kernel_timespec __user *)a[4],
2986 NULL);
2987 else
2988 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2989 a[2], a[3], NULL,
2990 (struct old_timespec32 __user *)a[4]);
a2e27255 2991 break;
de11defe 2992 case SYS_ACCEPT4:
4541e805
DB
2993 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2994 (int __user *)a[2], a[3]);
aaca0bdc 2995 break;
89bddce5
SH
2996 default:
2997 err = -EINVAL;
2998 break;
1da177e4
LT
2999 }
3000 return err;
3001}
3002
89bddce5 3003#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 3004
55737fda
SH
3005/**
3006 * sock_register - add a socket protocol handler
3007 * @ops: description of protocol
3008 *
1da177e4
LT
3009 * This function is called by a protocol handler that wants to
3010 * advertise its address family, and have it linked into the
e793c0f7 3011 * socket interface. The value ops->family corresponds to the
55737fda 3012 * socket system call protocol family.
1da177e4 3013 */
f0fd27d4 3014int sock_register(const struct net_proto_family *ops)
1da177e4
LT
3015{
3016 int err;
3017
3018 if (ops->family >= NPROTO) {
3410f22e 3019 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
3020 return -ENOBUFS;
3021 }
55737fda
SH
3022
3023 spin_lock(&net_family_lock);
190683a9
ED
3024 if (rcu_dereference_protected(net_families[ops->family],
3025 lockdep_is_held(&net_family_lock)))
55737fda
SH
3026 err = -EEXIST;
3027 else {
cf778b00 3028 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
3029 err = 0;
3030 }
55737fda
SH
3031 spin_unlock(&net_family_lock);
3032
fe0bdbde 3033 pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
1da177e4
LT
3034 return err;
3035}
c6d409cf 3036EXPORT_SYMBOL(sock_register);
1da177e4 3037
55737fda
SH
3038/**
3039 * sock_unregister - remove a protocol handler
3040 * @family: protocol family to remove
3041 *
1da177e4
LT
3042 * This function is called by a protocol handler that wants to
3043 * remove its address family, and have it unlinked from the
55737fda
SH
3044 * new socket creation.
3045 *
3046 * If protocol handler is a module, then it can use module reference
3047 * counts to protect against new references. If protocol handler is not
3048 * a module then it needs to provide its own protection in
3049 * the ops->create routine.
1da177e4 3050 */
f0fd27d4 3051void sock_unregister(int family)
1da177e4 3052{
f0fd27d4 3053 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3054
55737fda 3055 spin_lock(&net_family_lock);
a9b3cd7f 3056 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3057 spin_unlock(&net_family_lock);
3058
3059 synchronize_rcu();
3060
fe0bdbde 3061 pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
1da177e4 3062}
c6d409cf 3063EXPORT_SYMBOL(sock_unregister);
1da177e4 3064
bf2ae2e4
XL
3065bool sock_is_registered(int family)
3066{
66b51b0a 3067 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3068}
3069
77d76ea3 3070static int __init sock_init(void)
1da177e4 3071{
b3e19d92 3072 int err;
2ca794e5
EB
3073 /*
3074 * Initialize the network sysctl infrastructure.
3075 */
3076 err = net_sysctl_init();
3077 if (err)
3078 goto out;
b3e19d92 3079
1da177e4 3080 /*
89bddce5 3081 * Initialize skbuff SLAB cache
1da177e4
LT
3082 */
3083 skb_init();
1da177e4
LT
3084
3085 /*
89bddce5 3086 * Initialize the protocols module.
1da177e4
LT
3087 */
3088
3089 init_inodecache();
b3e19d92
NP
3090
3091 err = register_filesystem(&sock_fs_type);
3092 if (err)
47260ba9 3093 goto out;
1da177e4 3094 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3095 if (IS_ERR(sock_mnt)) {
3096 err = PTR_ERR(sock_mnt);
3097 goto out_mount;
3098 }
77d76ea3
AK
3099
3100 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3101 */
3102
3103#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3104 err = netfilter_init();
3105 if (err)
3106 goto out;
1da177e4 3107#endif
cbeb321a 3108
408eccce 3109 ptp_classifier_init();
c1f19b51 3110
b3e19d92
NP
3111out:
3112 return err;
3113
3114out_mount:
3115 unregister_filesystem(&sock_fs_type);
b3e19d92 3116 goto out;
1da177e4
LT
3117}
3118
77d76ea3
AK
3119core_initcall(sock_init); /* early initcall */
3120
1da177e4
LT
3121#ifdef CONFIG_PROC_FS
3122void socket_seq_show(struct seq_file *seq)
3123{
648845ab
TZ
3124 seq_printf(seq, "sockets: used %d\n",
3125 sock_inuse_get(seq->private));
1da177e4 3126}
89bddce5 3127#endif /* CONFIG_PROC_FS */
1da177e4 3128
89bbfc95 3129#ifdef CONFIG_COMPAT
36fd633e 3130static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 3131{
6b96018b 3132 struct compat_ifconf ifc32;
7a229387 3133 struct ifconf ifc;
7a229387
AB
3134 int err;
3135
6b96018b 3136 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3137 return -EFAULT;
3138
36fd633e
AV
3139 ifc.ifc_len = ifc32.ifc_len;
3140 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 3141
36fd633e
AV
3142 rtnl_lock();
3143 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
3144 rtnl_unlock();
7a229387
AB
3145 if (err)
3146 return err;
3147
36fd633e 3148 ifc32.ifc_len = ifc.ifc_len;
6b96018b 3149 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
3150 return -EFAULT;
3151
3152 return 0;
3153}
3154
7a50a240
AB
3155static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3156{
7a50a240 3157 compat_uptr_t uptr32;
44c02a2c
AV
3158 struct ifreq ifr;
3159 void __user *saved;
3160 int err;
7a50a240 3161
44c02a2c 3162 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3163 return -EFAULT;
3164
3165 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3166 return -EFAULT;
3167
44c02a2c
AV
3168 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3169 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3170
44c02a2c
AV
3171 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3172 if (!err) {
3173 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3174 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3175 err = -EFAULT;
ccbd6a5a 3176 }
44c02a2c 3177 return err;
7a229387
AB
3178}
3179
590d4693
BH
3180/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3181static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3182 struct compat_ifreq __user *u_ifreq32)
7a229387 3183{
44c02a2c 3184 struct ifreq ifreq;
7a229387
AB
3185 u32 data32;
3186
44c02a2c 3187 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3188 return -EFAULT;
44c02a2c 3189 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3190 return -EFAULT;
44c02a2c 3191 ifreq.ifr_data = compat_ptr(data32);
7a229387 3192
44c02a2c 3193 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3194}
3195
37ac39bd
JB
3196static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3197 unsigned int cmd,
3198 struct compat_ifreq __user *uifr32)
3199{
3200 struct ifreq __user *uifr;
3201 int err;
3202
3203 /* Handle the fact that while struct ifreq has the same *layout* on
3204 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3205 * which are handled elsewhere, it still has different *size* due to
3206 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3207 * resulting in struct ifreq being 32 and 40 bytes respectively).
3208 * As a result, if the struct happens to be at the end of a page and
3209 * the next page isn't readable/writable, we get a fault. To prevent
3210 * that, copy back and forth to the full size.
3211 */
3212
3213 uifr = compat_alloc_user_space(sizeof(*uifr));
3214 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3215 return -EFAULT;
3216
3217 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3218
3219 if (!err) {
3220 switch (cmd) {
3221 case SIOCGIFFLAGS:
3222 case SIOCGIFMETRIC:
3223 case SIOCGIFMTU:
3224 case SIOCGIFMEM:
3225 case SIOCGIFHWADDR:
3226 case SIOCGIFINDEX:
3227 case SIOCGIFADDR:
3228 case SIOCGIFBRDADDR:
3229 case SIOCGIFDSTADDR:
3230 case SIOCGIFNETMASK:
3231 case SIOCGIFPFLAGS:
3232 case SIOCGIFTXQLEN:
3233 case SIOCGMIIPHY:
3234 case SIOCGMIIREG:
c6c9fee3 3235 case SIOCGIFNAME:
37ac39bd
JB
3236 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3237 err = -EFAULT;
3238 break;
3239 }
3240 }
3241 return err;
3242}
3243
7a229387
AB
3244/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3245 * for some operations; this forces use of the newer bridge-utils that
25985edc 3246 * use compatible ioctls
7a229387 3247 */
6b96018b 3248static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3249{
6b96018b 3250 compat_ulong_t tmp;
7a229387 3251
6b96018b 3252 if (get_user(tmp, argp))
7a229387
AB
3253 return -EFAULT;
3254 if (tmp == BRCTL_GET_VERSION)
3255 return BRCTL_VERSION + 1;
3256 return -EINVAL;
3257}
3258
6b96018b
AB
3259static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3260 unsigned int cmd, unsigned long arg)
3261{
3262 void __user *argp = compat_ptr(arg);
3263 struct sock *sk = sock->sk;
3264 struct net *net = sock_net(sk);
7a229387 3265
6b96018b 3266 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3267 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3268
3269 switch (cmd) {
3270 case SIOCSIFBR:
3271 case SIOCGIFBR:
3272 return old_bridge_ioctl(argp);
6b96018b 3273 case SIOCGIFCONF:
36fd633e 3274 return compat_dev_ifconf(net, argp);
7a50a240
AB
3275 case SIOCWANDEV:
3276 return compat_siocwandev(net, argp);
0768e170
AB
3277 case SIOCGSTAMP_OLD:
3278 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3279 if (!sock->ops->gettstamp)
3280 return -ENOIOCTLCMD;
0768e170 3281 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3282 !COMPAT_USE_64BIT_TIME);
3283
dd98d289 3284 case SIOCETHTOOL:
590d4693
BH
3285 case SIOCBONDSLAVEINFOQUERY:
3286 case SIOCBONDINFOQUERY:
a2116ed2 3287 case SIOCSHWTSTAMP:
fd468c74 3288 case SIOCGHWTSTAMP:
590d4693 3289 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3290
3291 case FIOSETOWN:
3292 case SIOCSPGRP:
3293 case FIOGETOWN:
3294 case SIOCGPGRP:
3295 case SIOCBRADDBR:
3296 case SIOCBRDELBR:
3297 case SIOCGIFVLAN:
3298 case SIOCSIFVLAN:
c62cce2c 3299 case SIOCGSKNS:
0768e170
AB
3300 case SIOCGSTAMP_NEW:
3301 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3302 return sock_ioctl(file, cmd, arg);
3303
3304 case SIOCGIFFLAGS:
3305 case SIOCSIFFLAGS:
709566d7
AB
3306 case SIOCGIFMAP:
3307 case SIOCSIFMAP:
6b96018b
AB
3308 case SIOCGIFMETRIC:
3309 case SIOCSIFMETRIC:
3310 case SIOCGIFMTU:
3311 case SIOCSIFMTU:
3312 case SIOCGIFMEM:
3313 case SIOCSIFMEM:
3314 case SIOCGIFHWADDR:
3315 case SIOCSIFHWADDR:
3316 case SIOCADDMULTI:
3317 case SIOCDELMULTI:
3318 case SIOCGIFINDEX:
6b96018b
AB
3319 case SIOCGIFADDR:
3320 case SIOCSIFADDR:
3321 case SIOCSIFHWBROADCAST:
6b96018b 3322 case SIOCDIFADDR:
6b96018b
AB
3323 case SIOCGIFBRDADDR:
3324 case SIOCSIFBRDADDR:
3325 case SIOCGIFDSTADDR:
3326 case SIOCSIFDSTADDR:
3327 case SIOCGIFNETMASK:
3328 case SIOCSIFNETMASK:
3329 case SIOCSIFPFLAGS:
3330 case SIOCGIFPFLAGS:
3331 case SIOCGIFTXQLEN:
3332 case SIOCSIFTXQLEN:
3333 case SIOCBRADDIF:
3334 case SIOCBRDELIF:
c6c9fee3 3335 case SIOCGIFNAME:
9177efd3
AB
3336 case SIOCSIFNAME:
3337 case SIOCGMIIPHY:
3338 case SIOCGMIIREG:
3339 case SIOCSMIIREG:
f92d4fc9
AV
3340 case SIOCBONDENSLAVE:
3341 case SIOCBONDRELEASE:
3342 case SIOCBONDSETHWADDR:
3343 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3344 return compat_ifreq_ioctl(net, sock, cmd, argp);
3345
6b96018b
AB
3346 case SIOCSARP:
3347 case SIOCGARP:
3348 case SIOCDARP:
c7dc504e 3349 case SIOCOUTQ:
9d7bf41f 3350 case SIOCOUTQNSD:
6b96018b 3351 case SIOCATMARK:
63ff03ab 3352 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3353 }
3354
6b96018b
AB
3355 return -ENOIOCTLCMD;
3356}
7a229387 3357
95c96174 3358static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3359 unsigned long arg)
89bbfc95
SP
3360{
3361 struct socket *sock = file->private_data;
3362 int ret = -ENOIOCTLCMD;
87de87d5
DM
3363 struct sock *sk;
3364 struct net *net;
3365
3366 sk = sock->sk;
3367 net = sock_net(sk);
89bbfc95
SP
3368
3369 if (sock->ops->compat_ioctl)
3370 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3371
87de87d5
DM
3372 if (ret == -ENOIOCTLCMD &&
3373 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3374 ret = compat_wext_handle_ioctl(net, cmd, arg);
3375
6b96018b
AB
3376 if (ret == -ENOIOCTLCMD)
3377 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3378
89bbfc95
SP
3379 return ret;
3380}
3381#endif
3382
8a3c245c
PT
3383/**
3384 * kernel_bind - bind an address to a socket (kernel space)
3385 * @sock: socket
3386 * @addr: address
3387 * @addrlen: length of address
3388 *
3389 * Returns 0 or an error.
3390 */
3391
ac5a488e
SS
3392int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3393{
3394 return sock->ops->bind(sock, addr, addrlen);
3395}
c6d409cf 3396EXPORT_SYMBOL(kernel_bind);
ac5a488e 3397
8a3c245c
PT
3398/**
3399 * kernel_listen - move socket to listening state (kernel space)
3400 * @sock: socket
3401 * @backlog: pending connections queue size
3402 *
3403 * Returns 0 or an error.
3404 */
3405
ac5a488e
SS
3406int kernel_listen(struct socket *sock, int backlog)
3407{
3408 return sock->ops->listen(sock, backlog);
3409}
c6d409cf 3410EXPORT_SYMBOL(kernel_listen);
ac5a488e 3411
8a3c245c
PT
3412/**
3413 * kernel_accept - accept a connection (kernel space)
3414 * @sock: listening socket
3415 * @newsock: new connected socket
3416 * @flags: flags
3417 *
3418 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3419 * If it fails, @newsock is guaranteed to be %NULL.
3420 * Returns 0 or an error.
3421 */
3422
ac5a488e
SS
3423int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3424{
3425 struct sock *sk = sock->sk;
3426 int err;
3427
3428 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3429 newsock);
3430 if (err < 0)
3431 goto done;
3432
cdfbabfb 3433 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3434 if (err < 0) {
3435 sock_release(*newsock);
fa8705b0 3436 *newsock = NULL;
ac5a488e
SS
3437 goto done;
3438 }
3439
3440 (*newsock)->ops = sock->ops;
1b08534e 3441 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3442
3443done:
3444 return err;
3445}
c6d409cf 3446EXPORT_SYMBOL(kernel_accept);
ac5a488e 3447
8a3c245c
PT
3448/**
3449 * kernel_connect - connect a socket (kernel space)
3450 * @sock: socket
3451 * @addr: address
3452 * @addrlen: address length
3453 * @flags: flags (O_NONBLOCK, ...)
3454 *
f1dcffcc 3455 * For datagram sockets, @addr is the address to which datagrams are sent
8a3c245c
PT
3456 * by default, and the only address from which datagrams are received.
3457 * For stream sockets, attempts to connect to @addr.
3458 * Returns 0 or an error code.
3459 */
3460
ac5a488e 3461int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3462 int flags)
ac5a488e
SS
3463{
3464 return sock->ops->connect(sock, addr, addrlen, flags);
3465}
c6d409cf 3466EXPORT_SYMBOL(kernel_connect);
ac5a488e 3467
8a3c245c
PT
3468/**
3469 * kernel_getsockname - get the address which the socket is bound (kernel space)
3470 * @sock: socket
3471 * @addr: address holder
3472 *
3473 * Fills the @addr pointer with the address which the socket is bound.
3474 * Returns 0 or an error code.
3475 */
3476
9b2c45d4 3477int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3478{
9b2c45d4 3479 return sock->ops->getname(sock, addr, 0);
ac5a488e 3480}
c6d409cf 3481EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3482
8a3c245c 3483/**
645f0897 3484 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3485 * @sock: socket
3486 * @addr: address holder
3487 *
3488 * Fills the @addr pointer with the address which the socket is connected.
3489 * Returns 0 or an error code.
3490 */
3491
9b2c45d4 3492int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3493{
9b2c45d4 3494 return sock->ops->getname(sock, addr, 1);
ac5a488e 3495}
c6d409cf 3496EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3497
8a3c245c
PT
3498/**
3499 * kernel_sendpage - send a &page through a socket (kernel space)
3500 * @sock: socket
3501 * @page: page
3502 * @offset: page offset
3503 * @size: total size in bytes
3504 * @flags: flags (MSG_DONTWAIT, ...)
3505 *
3506 * Returns the total amount sent in bytes or an error.
3507 */
3508
ac5a488e
SS
3509int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3510 size_t size, int flags)
3511{
7b62d31d
CL
3512 if (sock->ops->sendpage) {
3513 /* Warn in case the improper page to zero-copy send */
3514 WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
ac5a488e 3515 return sock->ops->sendpage(sock, page, offset, size, flags);
7b62d31d 3516 }
ac5a488e
SS
3517 return sock_no_sendpage(sock, page, offset, size, flags);
3518}
c6d409cf 3519EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3520
8a3c245c
PT
3521/**
3522 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3523 * @sk: sock
3524 * @page: page
3525 * @offset: page offset
3526 * @size: total size in bytes
3527 * @flags: flags (MSG_DONTWAIT, ...)
3528 *
3529 * Returns the total amount sent in bytes or an error.
3530 * Caller must hold @sk.
3531 */
3532
306b13eb
TH
3533int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3534 size_t size, int flags)
3535{
3536 struct socket *sock = sk->sk_socket;
3537
3538 if (sock->ops->sendpage_locked)
3539 return sock->ops->sendpage_locked(sk, page, offset, size,
3540 flags);
3541
3542 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3543}
3544EXPORT_SYMBOL(kernel_sendpage_locked);
3545
8a3c245c 3546/**
645f0897 3547 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3548 * @sock: socket
3549 * @how: connection part
3550 *
3551 * Returns 0 or an error.
3552 */
3553
91cf45f0
TM
3554int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3555{
3556 return sock->ops->shutdown(sock, how);
3557}
91cf45f0 3558EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3559
8a3c245c
PT
3560/**
3561 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3562 * @sk: socket
3563 *
3564 * This routine returns the IP overhead imposed by a socket i.e.
3565 * the length of the underlying IP header, depending on whether
3566 * this is an IPv4 or IPv6 socket and the length from IP options turned
3567 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3568 */
8a3c245c 3569
113c3075
P
3570u32 kernel_sock_ip_overhead(struct sock *sk)
3571{
3572 struct inet_sock *inet;
3573 struct ip_options_rcu *opt;
3574 u32 overhead = 0;
113c3075
P
3575#if IS_ENABLED(CONFIG_IPV6)
3576 struct ipv6_pinfo *np;
3577 struct ipv6_txoptions *optv6 = NULL;
3578#endif /* IS_ENABLED(CONFIG_IPV6) */
3579
3580 if (!sk)
3581 return overhead;
3582
113c3075
P
3583 switch (sk->sk_family) {
3584 case AF_INET:
3585 inet = inet_sk(sk);
3586 overhead += sizeof(struct iphdr);
3587 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3588 sock_owned_by_user(sk));
113c3075
P
3589 if (opt)
3590 overhead += opt->opt.optlen;
3591 return overhead;
3592#if IS_ENABLED(CONFIG_IPV6)
3593 case AF_INET6:
3594 np = inet6_sk(sk);
3595 overhead += sizeof(struct ipv6hdr);
3596 if (np)
3597 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3598 sock_owned_by_user(sk));
113c3075
P
3599 if (optv6)
3600 overhead += (optv6->opt_flen + optv6->opt_nflen);
3601 return overhead;
3602#endif /* IS_ENABLED(CONFIG_IPV6) */
3603 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3604 return overhead;
3605 }
3606}
3607EXPORT_SYMBOL(kernel_sock_ip_overhead);