]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/socket.c
net: socket: return changed ifreq from SIOCDEVPRIVATE
[mirror_ubuntu-jammy-kernel.git] / net / socket.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET An implementation of the SOCKET network access protocol.
4 *
5 * Version: @(#)socket.c 1.1.93 18/02/95
6 *
7 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 8 * Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 *
11 * Fixes:
12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
13 * shutdown()
14 * Alan Cox : verify_area() fixes
15 * Alan Cox : Removed DDI
16 * Jonathan Kamens : SOCK_DGRAM reconnect bug
17 * Alan Cox : Moved a load of checks to the very
18 * top level.
19 * Alan Cox : Move address structures to/from user
20 * mode above the protocol layers.
21 * Rob Janssen : Allow 0 length sends.
22 * Alan Cox : Asynchronous I/O support (cribbed from the
23 * tty drivers).
24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
25 * Jeff Uphoff : Made max number of sockets command-line
26 * configurable.
27 * Matti Aarnio : Made the number of sockets dynamic,
28 * to be allocated when needed, and mr.
29 * Uphoff's max is used as max to be
30 * allowed to allocate.
31 * Linus : Argh. removed all the socket allocation
32 * altogether: it's in the inode now.
33 * Alan Cox : Made sock_alloc()/sock_release() public
34 * for NetROM and future kernel nfsd type
35 * stuff.
36 * Alan Cox : sendmsg/recvmsg basics.
37 * Tom Dyas : Export net symbols.
38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
39 * Alan Cox : Added thread locking to sys_* calls
40 * for sockets. May have errors at the
41 * moment.
42 * Kevin Buhr : Fixed the dumb errors in the above.
43 * Andi Kleen : Some small cleanups, optimizations,
44 * and fixed a copy_from_user() bug.
45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 46 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
47 * protocol-independent
48 *
1da177e4 49 * This module is effectively the top level interface to the BSD socket
89bddce5 50 * paradigm.
1da177e4
LT
51 *
52 * Based upon Swansea University Computer Society NET3.039
53 */
54
cc69837f 55#include <linux/ethtool.h>
1da177e4 56#include <linux/mm.h>
1da177e4
LT
57#include <linux/socket.h>
58#include <linux/file.h>
59#include <linux/net.h>
60#include <linux/interrupt.h>
aaca0bdc 61#include <linux/thread_info.h>
55737fda 62#include <linux/rcupdate.h>
1da177e4
LT
63#include <linux/netdevice.h>
64#include <linux/proc_fs.h>
65#include <linux/seq_file.h>
4a3e2f71 66#include <linux/mutex.h>
1da177e4 67#include <linux/if_bridge.h>
20380731 68#include <linux/if_vlan.h>
408eccce 69#include <linux/ptp_classify.h>
1da177e4
LT
70#include <linux/init.h>
71#include <linux/poll.h>
72#include <linux/cache.h>
73#include <linux/module.h>
74#include <linux/highmem.h>
1da177e4 75#include <linux/mount.h>
fba9be49 76#include <linux/pseudo_fs.h>
1da177e4
LT
77#include <linux/security.h>
78#include <linux/syscalls.h>
79#include <linux/compat.h>
80#include <linux/kmod.h>
3ec3b2fb 81#include <linux/audit.h>
d86b5e0e 82#include <linux/wireless.h>
1b8d7ae4 83#include <linux/nsproxy.h>
1fd7317d 84#include <linux/magic.h>
5a0e3ad6 85#include <linux/slab.h>
600e1779 86#include <linux/xattr.h>
c8e8cd57 87#include <linux/nospec.h>
8c3c447b 88#include <linux/indirect_call_wrapper.h>
1da177e4 89
7c0f6ba6 90#include <linux/uaccess.h>
1da177e4
LT
91#include <asm/unistd.h>
92
93#include <net/compat.h>
87de87d5 94#include <net/wext.h>
f8451725 95#include <net/cls_cgroup.h>
1da177e4
LT
96
97#include <net/sock.h>
98#include <linux/netfilter.h>
99
6b96018b
AB
100#include <linux/if_tun.h>
101#include <linux/ipv6_route.h>
102#include <linux/route.h>
c7dc504e 103#include <linux/termios.h>
6b96018b 104#include <linux/sockios.h>
076bb0c8 105#include <net/busy_poll.h>
f24b9be5 106#include <linux/errqueue.h>
d7c08826 107#include <linux/ptp_clock_kernel.h>
06021292 108
e0d1095a 109#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
110unsigned int sysctl_net_busy_read __read_mostly;
111unsigned int sysctl_net_busy_poll __read_mostly;
06021292 112#endif
6b96018b 113
8ae5e030
AV
114static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
115static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 116static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
117
118static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
119static __poll_t sock_poll(struct file *file,
120 struct poll_table_struct *wait);
89bddce5 121static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
122#ifdef CONFIG_COMPAT
123static long compat_sock_ioctl(struct file *file,
89bddce5 124 unsigned int cmd, unsigned long arg);
89bbfc95 125#endif
1da177e4 126static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
127static ssize_t sock_sendpage(struct file *file, struct page *page,
128 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 129static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 130 struct pipe_inode_info *pipe, size_t len,
9c55e01c 131 unsigned int flags);
542d3065
AB
132
133#ifdef CONFIG_PROC_FS
134static void sock_show_fdinfo(struct seq_file *m, struct file *f)
135{
136 struct socket *sock = f->private_data;
137
138 if (sock->ops->show_fdinfo)
139 sock->ops->show_fdinfo(m, sock);
140}
141#else
142#define sock_show_fdinfo NULL
143#endif
1da177e4 144
1da177e4
LT
145/*
146 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
147 * in the operation structures but are done directly via the socketcall() multiplexor.
148 */
149
da7071d7 150static const struct file_operations socket_file_ops = {
1da177e4
LT
151 .owner = THIS_MODULE,
152 .llseek = no_llseek,
8ae5e030
AV
153 .read_iter = sock_read_iter,
154 .write_iter = sock_write_iter,
1da177e4
LT
155 .poll = sock_poll,
156 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
157#ifdef CONFIG_COMPAT
158 .compat_ioctl = compat_sock_ioctl,
159#endif
1da177e4 160 .mmap = sock_mmap,
1da177e4
LT
161 .release = sock_close,
162 .fasync = sock_fasync,
5274f052
JA
163 .sendpage = sock_sendpage,
164 .splice_write = generic_splice_sendpage,
9c55e01c 165 .splice_read = sock_splice_read,
b4653342 166 .show_fdinfo = sock_show_fdinfo,
1da177e4
LT
167};
168
fe0bdbde
YD
169static const char * const pf_family_names[] = {
170 [PF_UNSPEC] = "PF_UNSPEC",
171 [PF_UNIX] = "PF_UNIX/PF_LOCAL",
172 [PF_INET] = "PF_INET",
173 [PF_AX25] = "PF_AX25",
174 [PF_IPX] = "PF_IPX",
175 [PF_APPLETALK] = "PF_APPLETALK",
176 [PF_NETROM] = "PF_NETROM",
177 [PF_BRIDGE] = "PF_BRIDGE",
178 [PF_ATMPVC] = "PF_ATMPVC",
179 [PF_X25] = "PF_X25",
180 [PF_INET6] = "PF_INET6",
181 [PF_ROSE] = "PF_ROSE",
182 [PF_DECnet] = "PF_DECnet",
183 [PF_NETBEUI] = "PF_NETBEUI",
184 [PF_SECURITY] = "PF_SECURITY",
185 [PF_KEY] = "PF_KEY",
186 [PF_NETLINK] = "PF_NETLINK/PF_ROUTE",
187 [PF_PACKET] = "PF_PACKET",
188 [PF_ASH] = "PF_ASH",
189 [PF_ECONET] = "PF_ECONET",
190 [PF_ATMSVC] = "PF_ATMSVC",
191 [PF_RDS] = "PF_RDS",
192 [PF_SNA] = "PF_SNA",
193 [PF_IRDA] = "PF_IRDA",
194 [PF_PPPOX] = "PF_PPPOX",
195 [PF_WANPIPE] = "PF_WANPIPE",
196 [PF_LLC] = "PF_LLC",
197 [PF_IB] = "PF_IB",
198 [PF_MPLS] = "PF_MPLS",
199 [PF_CAN] = "PF_CAN",
200 [PF_TIPC] = "PF_TIPC",
201 [PF_BLUETOOTH] = "PF_BLUETOOTH",
202 [PF_IUCV] = "PF_IUCV",
203 [PF_RXRPC] = "PF_RXRPC",
204 [PF_ISDN] = "PF_ISDN",
205 [PF_PHONET] = "PF_PHONET",
206 [PF_IEEE802154] = "PF_IEEE802154",
207 [PF_CAIF] = "PF_CAIF",
208 [PF_ALG] = "PF_ALG",
209 [PF_NFC] = "PF_NFC",
210 [PF_VSOCK] = "PF_VSOCK",
211 [PF_KCM] = "PF_KCM",
212 [PF_QIPCRTR] = "PF_QIPCRTR",
213 [PF_SMC] = "PF_SMC",
214 [PF_XDP] = "PF_XDP",
215};
216
1da177e4
LT
217/*
218 * The protocol list. Each protocol is registered in here.
219 */
220
1da177e4 221static DEFINE_SPINLOCK(net_family_lock);
190683a9 222static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 223
1da177e4 224/*
89bddce5
SH
225 * Support routines.
226 * Move socket addresses back and forth across the kernel/user
227 * divide and look after the messy bits.
1da177e4
LT
228 */
229
1da177e4
LT
230/**
231 * move_addr_to_kernel - copy a socket address into kernel space
232 * @uaddr: Address in user space
233 * @kaddr: Address in kernel space
234 * @ulen: Length in user space
235 *
236 * The address is copied into kernel space. If the provided address is
237 * too long an error code of -EINVAL is returned. If the copy gives
238 * invalid addresses -EFAULT is returned. On a success 0 is returned.
239 */
240
43db362d 241int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 242{
230b1839 243 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 244 return -EINVAL;
89bddce5 245 if (ulen == 0)
1da177e4 246 return 0;
89bddce5 247 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 248 return -EFAULT;
3ec3b2fb 249 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
250}
251
252/**
253 * move_addr_to_user - copy an address to user space
254 * @kaddr: kernel space address
255 * @klen: length of address in kernel
256 * @uaddr: user space address
257 * @ulen: pointer to user length field
258 *
259 * The value pointed to by ulen on entry is the buffer length available.
260 * This is overwritten with the buffer space used. -EINVAL is returned
261 * if an overlong buffer is specified or a negative buffer size. -EFAULT
262 * is returned if either the buffer or the length field are not
263 * accessible.
264 * After copying the data up to the limit the user specifies, the true
265 * length of the data is written over the length limit the user
266 * specified. Zero is returned for a success.
267 */
89bddce5 268
43db362d 269static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 270 void __user *uaddr, int __user *ulen)
1da177e4
LT
271{
272 int err;
273 int len;
274
68c6beb3 275 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
276 err = get_user(len, ulen);
277 if (err)
1da177e4 278 return err;
89bddce5
SH
279 if (len > klen)
280 len = klen;
68c6beb3 281 if (len < 0)
1da177e4 282 return -EINVAL;
89bddce5 283 if (len) {
d6fe3945
SG
284 if (audit_sockaddr(klen, kaddr))
285 return -ENOMEM;
89bddce5 286 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
287 return -EFAULT;
288 }
289 /*
89bddce5
SH
290 * "fromlen shall refer to the value before truncation.."
291 * 1003.1g
1da177e4
LT
292 */
293 return __put_user(klen, ulen);
294}
295
08009a76 296static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
297
298static struct inode *sock_alloc_inode(struct super_block *sb)
299{
300 struct socket_alloc *ei;
89bddce5 301
e94b1766 302 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
303 if (!ei)
304 return NULL;
333f7909
AV
305 init_waitqueue_head(&ei->socket.wq.wait);
306 ei->socket.wq.fasync_list = NULL;
307 ei->socket.wq.flags = 0;
89bddce5 308
1da177e4
LT
309 ei->socket.state = SS_UNCONNECTED;
310 ei->socket.flags = 0;
311 ei->socket.ops = NULL;
312 ei->socket.sk = NULL;
313 ei->socket.file = NULL;
1da177e4
LT
314
315 return &ei->vfs_inode;
316}
317
6d7855c5 318static void sock_free_inode(struct inode *inode)
1da177e4 319{
43815482
ED
320 struct socket_alloc *ei;
321
322 ei = container_of(inode, struct socket_alloc, vfs_inode);
43815482 323 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
324}
325
51cc5068 326static void init_once(void *foo)
1da177e4 327{
89bddce5 328 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 329
a35afb83 330 inode_init_once(&ei->vfs_inode);
1da177e4 331}
89bddce5 332
1e911632 333static void init_inodecache(void)
1da177e4
LT
334{
335 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
336 sizeof(struct socket_alloc),
337 0,
338 (SLAB_HWCACHE_ALIGN |
339 SLAB_RECLAIM_ACCOUNT |
5d097056 340 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 341 init_once);
1e911632 342 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
343}
344
b87221de 345static const struct super_operations sockfs_ops = {
c6d409cf 346 .alloc_inode = sock_alloc_inode,
6d7855c5 347 .free_inode = sock_free_inode,
c6d409cf 348 .statfs = simple_statfs,
1da177e4
LT
349};
350
c23fbb6b
ED
351/*
352 * sockfs_dname() is called from d_path().
353 */
354static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
355{
356 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 357 d_inode(dentry)->i_ino);
c23fbb6b
ED
358}
359
3ba13d17 360static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 361 .d_dname = sockfs_dname,
1da177e4
LT
362};
363
bba0bd31
AG
364static int sockfs_xattr_get(const struct xattr_handler *handler,
365 struct dentry *dentry, struct inode *inode,
366 const char *suffix, void *value, size_t size)
367{
368 if (value) {
369 if (dentry->d_name.len + 1 > size)
370 return -ERANGE;
371 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
372 }
373 return dentry->d_name.len + 1;
374}
375
376#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
377#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
378#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
379
380static const struct xattr_handler sockfs_xattr_handler = {
381 .name = XATTR_NAME_SOCKPROTONAME,
382 .get = sockfs_xattr_get,
383};
384
4a590153 385static int sockfs_security_xattr_set(const struct xattr_handler *handler,
e65ce2a5 386 struct user_namespace *mnt_userns,
4a590153
AG
387 struct dentry *dentry, struct inode *inode,
388 const char *suffix, const void *value,
389 size_t size, int flags)
390{
391 /* Handled by LSM. */
392 return -EAGAIN;
393}
394
395static const struct xattr_handler sockfs_security_xattr_handler = {
396 .prefix = XATTR_SECURITY_PREFIX,
397 .set = sockfs_security_xattr_set,
398};
399
bba0bd31
AG
400static const struct xattr_handler *sockfs_xattr_handlers[] = {
401 &sockfs_xattr_handler,
4a590153 402 &sockfs_security_xattr_handler,
bba0bd31
AG
403 NULL
404};
405
fba9be49 406static int sockfs_init_fs_context(struct fs_context *fc)
c74a1cbb 407{
fba9be49
DH
408 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
409 if (!ctx)
410 return -ENOMEM;
411 ctx->ops = &sockfs_ops;
412 ctx->dops = &sockfs_dentry_operations;
413 ctx->xattr = sockfs_xattr_handlers;
414 return 0;
c74a1cbb
AV
415}
416
417static struct vfsmount *sock_mnt __read_mostly;
418
419static struct file_system_type sock_fs_type = {
420 .name = "sockfs",
fba9be49 421 .init_fs_context = sockfs_init_fs_context,
c74a1cbb
AV
422 .kill_sb = kill_anon_super,
423};
424
1da177e4
LT
425/*
426 * Obtains the first available file descriptor and sets it up for use.
427 *
39d8c1b6
DM
428 * These functions create file structures and maps them to fd space
429 * of the current process. On success it returns file descriptor
1da177e4
LT
430 * and file struct implicitly stored in sock->file.
431 * Note that another thread may close file descriptor before we return
432 * from this function. We use the fact that now we do not refer
433 * to socket after mapping. If one day we will need it, this
434 * function will increment ref. count on file by 1.
435 *
436 * In any case returned fd MAY BE not valid!
437 * This race condition is unavoidable
438 * with shared fd spaces, we cannot solve it inside kernel,
439 * but we take care of internal coherence yet.
440 */
441
8a3c245c
PT
442/**
443 * sock_alloc_file - Bind a &socket to a &file
444 * @sock: socket
445 * @flags: file status flags
446 * @dname: protocol name
447 *
448 * Returns the &file bound with @sock, implicitly storing it
449 * in sock->file. If dname is %NULL, sets to "".
450 * On failure the return is a ERR pointer (see linux/err.h).
451 * This function uses GFP_KERNEL internally.
452 */
453
aab174f0 454struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 455{
7cbe66b6 456 struct file *file;
1da177e4 457
d93aa9d8
AV
458 if (!dname)
459 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 460
d93aa9d8
AV
461 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
462 O_RDWR | (flags & O_NONBLOCK),
463 &socket_file_ops);
b5ffe634 464 if (IS_ERR(file)) {
8e1611e2 465 sock_release(sock);
39b65252 466 return file;
cc3808f8
AV
467 }
468
469 sock->file = file;
39d8c1b6 470 file->private_data = sock;
d8e464ec 471 stream_open(SOCK_INODE(sock), file);
28407630 472 return file;
39d8c1b6 473}
56b31d1c 474EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 475
56b31d1c 476static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
477{
478 struct file *newfile;
28407630 479 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
480 if (unlikely(fd < 0)) {
481 sock_release(sock);
28407630 482 return fd;
ce4bb04c 483 }
39d8c1b6 484
aab174f0 485 newfile = sock_alloc_file(sock, flags, NULL);
4546e44c 486 if (!IS_ERR(newfile)) {
39d8c1b6 487 fd_install(fd, newfile);
28407630
AV
488 return fd;
489 }
7cbe66b6 490
28407630
AV
491 put_unused_fd(fd);
492 return PTR_ERR(newfile);
1da177e4
LT
493}
494
8a3c245c
PT
495/**
496 * sock_from_file - Return the &socket bounded to @file.
497 * @file: file
8a3c245c 498 *
dba4a925 499 * On failure returns %NULL.
8a3c245c
PT
500 */
501
dba4a925 502struct socket *sock_from_file(struct file *file)
6cb153ca 503{
6cb153ca
BL
504 if (file->f_op == &socket_file_ops)
505 return file->private_data; /* set in sock_map_fd */
506
23bb80d2 507 return NULL;
6cb153ca 508}
406a3c63 509EXPORT_SYMBOL(sock_from_file);
6cb153ca 510
1da177e4 511/**
c6d409cf 512 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
513 * @fd: file handle
514 * @err: pointer to an error code return
515 *
516 * The file handle passed in is locked and the socket it is bound
241c4667 517 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
518 * with a negative errno code and NULL is returned. The function checks
519 * for both invalid handles and passing a handle which is not a socket.
520 *
521 * On a success the socket object pointer is returned.
522 */
523
524struct socket *sockfd_lookup(int fd, int *err)
525{
526 struct file *file;
1da177e4
LT
527 struct socket *sock;
528
89bddce5
SH
529 file = fget(fd);
530 if (!file) {
1da177e4
LT
531 *err = -EBADF;
532 return NULL;
533 }
89bddce5 534
dba4a925
FR
535 sock = sock_from_file(file);
536 if (!sock) {
537 *err = -ENOTSOCK;
1da177e4 538 fput(file);
dba4a925 539 }
6cb153ca
BL
540 return sock;
541}
c6d409cf 542EXPORT_SYMBOL(sockfd_lookup);
1da177e4 543
6cb153ca
BL
544static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
545{
00e188ef 546 struct fd f = fdget(fd);
6cb153ca
BL
547 struct socket *sock;
548
3672558c 549 *err = -EBADF;
00e188ef 550 if (f.file) {
dba4a925 551 sock = sock_from_file(f.file);
00e188ef 552 if (likely(sock)) {
ce787a5a 553 *fput_needed = f.flags & FDPUT_FPUT;
6cb153ca 554 return sock;
00e188ef 555 }
dba4a925 556 *err = -ENOTSOCK;
00e188ef 557 fdput(f);
1da177e4 558 }
6cb153ca 559 return NULL;
1da177e4
LT
560}
561
600e1779
MY
562static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
563 size_t size)
564{
565 ssize_t len;
566 ssize_t used = 0;
567
c5ef6035 568 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
569 if (len < 0)
570 return len;
571 used += len;
572 if (buffer) {
573 if (size < used)
574 return -ERANGE;
575 buffer += len;
576 }
577
578 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
579 used += len;
580 if (buffer) {
581 if (size < used)
582 return -ERANGE;
583 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
584 buffer += len;
585 }
586
587 return used;
588}
589
549c7297
CB
590static int sockfs_setattr(struct user_namespace *mnt_userns,
591 struct dentry *dentry, struct iattr *iattr)
86741ec2 592{
549c7297 593 int err = simple_setattr(&init_user_ns, dentry, iattr);
86741ec2 594
e1a3a60a 595 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
596 struct socket *sock = SOCKET_I(d_inode(dentry));
597
6d8c50dc
CW
598 if (sock->sk)
599 sock->sk->sk_uid = iattr->ia_uid;
600 else
601 err = -ENOENT;
86741ec2
LC
602 }
603
604 return err;
605}
606
600e1779 607static const struct inode_operations sockfs_inode_ops = {
600e1779 608 .listxattr = sockfs_listxattr,
86741ec2 609 .setattr = sockfs_setattr,
600e1779
MY
610};
611
1da177e4 612/**
8a3c245c 613 * sock_alloc - allocate a socket
89bddce5 614 *
1da177e4
LT
615 * Allocate a new inode and socket object. The two are bound together
616 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 617 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
618 */
619
f4a00aac 620struct socket *sock_alloc(void)
1da177e4 621{
89bddce5
SH
622 struct inode *inode;
623 struct socket *sock;
1da177e4 624
a209dfc7 625 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
626 if (!inode)
627 return NULL;
628
629 sock = SOCKET_I(inode);
630
85fe4025 631 inode->i_ino = get_next_ino();
89bddce5 632 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
633 inode->i_uid = current_fsuid();
634 inode->i_gid = current_fsgid();
600e1779 635 inode->i_op = &sockfs_inode_ops;
1da177e4 636
1da177e4
LT
637 return sock;
638}
f4a00aac 639EXPORT_SYMBOL(sock_alloc);
1da177e4 640
6d8c50dc 641static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
642{
643 if (sock->ops) {
644 struct module *owner = sock->ops->owner;
645
6d8c50dc
CW
646 if (inode)
647 inode_lock(inode);
1da177e4 648 sock->ops->release(sock);
ff7b11aa 649 sock->sk = NULL;
6d8c50dc
CW
650 if (inode)
651 inode_unlock(inode);
1da177e4
LT
652 sock->ops = NULL;
653 module_put(owner);
654 }
655
333f7909 656 if (sock->wq.fasync_list)
3410f22e 657 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 658
1da177e4
LT
659 if (!sock->file) {
660 iput(SOCK_INODE(sock));
661 return;
662 }
89bddce5 663 sock->file = NULL;
1da177e4 664}
6d8c50dc 665
9a8ad9ac
AL
666/**
667 * sock_release - close a socket
668 * @sock: socket to close
669 *
670 * The socket is released from the protocol stack if it has a release
671 * callback, and the inode is then released if the socket is bound to
672 * an inode not a file.
673 */
6d8c50dc
CW
674void sock_release(struct socket *sock)
675{
676 __sock_release(sock, NULL);
677}
c6d409cf 678EXPORT_SYMBOL(sock_release);
1da177e4 679
c14ac945 680void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 681{
140c55d4
ED
682 u8 flags = *tx_flags;
683
c14ac945 684 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
685 flags |= SKBTX_HW_TSTAMP;
686
c14ac945 687 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
688 flags |= SKBTX_SW_TSTAMP;
689
c14ac945 690 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
691 flags |= SKBTX_SCHED_TSTAMP;
692
140c55d4 693 *tx_flags = flags;
20d49473 694}
67cc0d40 695EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 696
8c3c447b
PA
697INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
698 size_t));
a648a592
PA
699INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
700 size_t));
d8725c86 701static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 702{
a648a592
PA
703 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
704 inet_sendmsg, sock, msg,
705 msg_data_left(msg));
d8725c86
AV
706 BUG_ON(ret == -EIOCBQUEUED);
707 return ret;
1da177e4
LT
708}
709
85806af0
RD
710/**
711 * sock_sendmsg - send a message through @sock
712 * @sock: socket
713 * @msg: message to send
714 *
715 * Sends @msg through @sock, passing through LSM.
716 * Returns the number of bytes sent, or an error code.
717 */
d8725c86 718int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 719{
d8725c86 720 int err = security_socket_sendmsg(sock, msg,
01e97e65 721 msg_data_left(msg));
228e548e 722
d8725c86 723 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 724}
c6d409cf 725EXPORT_SYMBOL(sock_sendmsg);
1da177e4 726
8a3c245c
PT
727/**
728 * kernel_sendmsg - send a message through @sock (kernel-space)
729 * @sock: socket
730 * @msg: message header
731 * @vec: kernel vec
732 * @num: vec array length
733 * @size: total message data size
734 *
735 * Builds the message data with @vec and sends it through @sock.
736 * Returns the number of bytes sent, or an error code.
737 */
738
1da177e4
LT
739int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
740 struct kvec *vec, size_t num, size_t size)
741{
aa563d7b 742 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 743 return sock_sendmsg(sock, msg);
1da177e4 744}
c6d409cf 745EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 746
8a3c245c
PT
747/**
748 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
749 * @sk: sock
750 * @msg: message header
751 * @vec: output s/g array
752 * @num: output s/g array length
753 * @size: total message data size
754 *
755 * Builds the message data with @vec and sends it through @sock.
756 * Returns the number of bytes sent, or an error code.
757 * Caller must hold @sk.
758 */
759
306b13eb
TH
760int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
761 struct kvec *vec, size_t num, size_t size)
762{
763 struct socket *sock = sk->sk_socket;
764
765 if (!sock->ops->sendmsg_locked)
db5980d8 766 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 767
aa563d7b 768 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
769
770 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
771}
772EXPORT_SYMBOL(kernel_sendmsg_locked);
773
8605330a
SHY
774static bool skb_is_err_queue(const struct sk_buff *skb)
775{
776 /* pkt_type of skbs enqueued on the error queue are set to
777 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
778 * in recvmsg, since skbs received on a local socket will never
779 * have a pkt_type of PACKET_OUTGOING.
780 */
781 return skb->pkt_type == PACKET_OUTGOING;
782}
783
b50a5c70
ML
784/* On transmit, software and hardware timestamps are returned independently.
785 * As the two skb clones share the hardware timestamp, which may be updated
786 * before the software timestamp is received, a hardware TX timestamp may be
787 * returned only if there is no software TX timestamp. Ignore false software
788 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 789 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
790 * hardware timestamp.
791 */
792static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
793{
794 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
795}
796
aad9c8c4
ML
797static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
798{
799 struct scm_ts_pktinfo ts_pktinfo;
800 struct net_device *orig_dev;
801
802 if (!skb_mac_header_was_set(skb))
803 return;
804
805 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
806
807 rcu_read_lock();
808 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
809 if (orig_dev)
810 ts_pktinfo.if_index = orig_dev->ifindex;
811 rcu_read_unlock();
812
813 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
814 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
815 sizeof(ts_pktinfo), &ts_pktinfo);
816}
817
92f37fd2
ED
818/*
819 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
820 */
821void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
822 struct sk_buff *skb)
823{
20d49473 824 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 825 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
826 struct scm_timestamping_internal tss;
827
b50a5c70 828 int empty = 1, false_tstamp = 0;
20d49473
PO
829 struct skb_shared_hwtstamps *shhwtstamps =
830 skb_hwtstamps(skb);
831
832 /* Race occurred between timestamp enabling and packet
833 receiving. Fill in the current time for now. */
b50a5c70 834 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 835 __net_timestamp(skb);
b50a5c70
ML
836 false_tstamp = 1;
837 }
20d49473
PO
838
839 if (need_software_tstamp) {
840 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
841 if (new_tstamp) {
842 struct __kernel_sock_timeval tv;
843
844 skb_get_new_timestamp(skb, &tv);
845 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
846 sizeof(tv), &tv);
847 } else {
848 struct __kernel_old_timeval tv;
849
850 skb_get_timestamp(skb, &tv);
851 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
852 sizeof(tv), &tv);
853 }
20d49473 854 } else {
887feae3
DD
855 if (new_tstamp) {
856 struct __kernel_timespec ts;
857
858 skb_get_new_timestampns(skb, &ts);
859 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
860 sizeof(ts), &ts);
861 } else {
df1b4ba9 862 struct __kernel_old_timespec ts;
887feae3
DD
863
864 skb_get_timestampns(skb, &ts);
865 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
866 sizeof(ts), &ts);
867 }
20d49473
PO
868 }
869 }
870
f24b9be5 871 memset(&tss, 0, sizeof(tss));
c199105d 872 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 873 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 874 empty = 0;
4d276eb6 875 if (shhwtstamps &&
b9f40e21 876 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
d7c08826
YL
877 !skb_is_swtx_tstamp(skb, false_tstamp)) {
878 if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
879 ptp_convert_timestamp(shhwtstamps, sk->sk_bind_phc);
880
881 if (ktime_to_timespec64_cond(shhwtstamps->hwtstamp,
882 tss.ts + 2)) {
883 empty = 0;
884
885 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
886 !skb_is_err_queue(skb))
887 put_ts_pktinfo(msg, skb);
888 }
aad9c8c4 889 }
1c885808 890 if (!empty) {
9718475e
DD
891 if (sock_flag(sk, SOCK_TSTAMP_NEW))
892 put_cmsg_scm_timestamping64(msg, &tss);
893 else
894 put_cmsg_scm_timestamping(msg, &tss);
1c885808 895
8605330a 896 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 897 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
898 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
899 skb->len, skb->data);
900 }
92f37fd2 901}
7c81fd8b
ACM
902EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
903
6e3e939f
JB
904void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
905 struct sk_buff *skb)
906{
907 int ack;
908
909 if (!sock_flag(sk, SOCK_WIFI_STATUS))
910 return;
911 if (!skb->wifi_acked_valid)
912 return;
913
914 ack = skb->wifi_acked;
915
916 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
917}
918EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
919
11165f14 920static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
921 struct sk_buff *skb)
3b885787 922{
744d5a3e 923 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 924 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 925 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
926}
927
767dd033 928void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
929 struct sk_buff *skb)
930{
931 sock_recv_timestamp(msg, sk, skb);
932 sock_recv_drops(msg, sk, skb);
933}
767dd033 934EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 935
8c3c447b 936INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
a648a592
PA
937 size_t, int));
938INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
939 size_t, int));
1b784140 940static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 941 int flags)
1da177e4 942{
a648a592
PA
943 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
944 inet_recvmsg, sock, msg, msg_data_left(msg),
945 flags);
1da177e4
LT
946}
947
85806af0
RD
948/**
949 * sock_recvmsg - receive a message from @sock
950 * @sock: socket
951 * @msg: message to receive
952 * @flags: message flags
953 *
954 * Receives @msg from @sock, passing through LSM. Returns the total number
955 * of bytes received, or an error.
956 */
2da62906 957int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 958{
2da62906 959 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 960
2da62906 961 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 962}
c6d409cf 963EXPORT_SYMBOL(sock_recvmsg);
1da177e4 964
c1249c0a 965/**
8a3c245c
PT
966 * kernel_recvmsg - Receive a message from a socket (kernel space)
967 * @sock: The socket to receive the message from
968 * @msg: Received message
969 * @vec: Input s/g array for message data
970 * @num: Size of input s/g array
971 * @size: Number of bytes to read
972 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 973 *
8a3c245c
PT
974 * On return the msg structure contains the scatter/gather array passed in the
975 * vec argument. The array is modified so that it consists of the unfilled
976 * portion of the original array.
c1249c0a 977 *
8a3c245c 978 * The returned value is the total number of bytes received, or an error.
c1249c0a 979 */
8a3c245c 980
89bddce5
SH
981int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
982 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4 983{
1f466e1f 984 msg->msg_control_is_user = false;
aa563d7b 985 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1f466e1f 986 return sock_recvmsg(sock, msg, flags);
1da177e4 987}
c6d409cf 988EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 989
ce1d4d3e
CH
990static ssize_t sock_sendpage(struct file *file, struct page *page,
991 int offset, size_t size, loff_t *ppos, int more)
1da177e4 992{
1da177e4
LT
993 struct socket *sock;
994 int flags;
995
ce1d4d3e
CH
996 sock = file->private_data;
997
35f9c09f
ED
998 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
999 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
1000 flags |= more;
ce1d4d3e 1001
e6949583 1002 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 1003}
1da177e4 1004
9c55e01c 1005static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 1006 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
1007 unsigned int flags)
1008{
1009 struct socket *sock = file->private_data;
1010
997b37da 1011 if (unlikely(!sock->ops->splice_read))
95506588 1012 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 1013
9c55e01c
JA
1014 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
1015}
1016
8ae5e030 1017static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 1018{
6d652330
AV
1019 struct file *file = iocb->ki_filp;
1020 struct socket *sock = file->private_data;
0345f931 1021 struct msghdr msg = {.msg_iter = *to,
1022 .msg_iocb = iocb};
8ae5e030 1023 ssize_t res;
ce1d4d3e 1024
ebfcd895 1025 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1026 msg.msg_flags = MSG_DONTWAIT;
1027
1028 if (iocb->ki_pos != 0)
1da177e4 1029 return -ESPIPE;
027445c3 1030
66ee59af 1031 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
1032 return 0;
1033
2da62906 1034 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
1035 *to = msg.msg_iter;
1036 return res;
1da177e4
LT
1037}
1038
8ae5e030 1039static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 1040{
6d652330
AV
1041 struct file *file = iocb->ki_filp;
1042 struct socket *sock = file->private_data;
0345f931 1043 struct msghdr msg = {.msg_iter = *from,
1044 .msg_iocb = iocb};
8ae5e030 1045 ssize_t res;
1da177e4 1046
8ae5e030 1047 if (iocb->ki_pos != 0)
ce1d4d3e 1048 return -ESPIPE;
027445c3 1049
ebfcd895 1050 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
1051 msg.msg_flags = MSG_DONTWAIT;
1052
6d652330
AV
1053 if (sock->type == SOCK_SEQPACKET)
1054 msg.msg_flags |= MSG_EOR;
1055
d8725c86 1056 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
1057 *from = msg.msg_iter;
1058 return res;
1da177e4
LT
1059}
1060
1da177e4
LT
1061/*
1062 * Atomic setting of ioctl hooks to avoid race
1063 * with module unload.
1064 */
1065
4a3e2f71 1066static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 1067static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1068
881d966b 1069void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1070{
4a3e2f71 1071 mutex_lock(&br_ioctl_mutex);
1da177e4 1072 br_ioctl_hook = hook;
4a3e2f71 1073 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1074}
1075EXPORT_SYMBOL(brioctl_set);
1076
4a3e2f71 1077static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1078static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1079
881d966b 1080void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1081{
4a3e2f71 1082 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1083 vlan_ioctl_hook = hook;
4a3e2f71 1084 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1085}
1086EXPORT_SYMBOL(vlan_ioctl_set);
1087
6b96018b 1088static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1089 unsigned int cmd, unsigned long arg)
6b96018b 1090{
876f0bf9
AB
1091 struct ifreq ifr;
1092 bool need_copyout;
6b96018b
AB
1093 int err;
1094 void __user *argp = (void __user *)arg;
a554bf96 1095 void __user *data;
6b96018b
AB
1096
1097 err = sock->ops->ioctl(sock, cmd, arg);
1098
1099 /*
1100 * If this ioctl is unknown try to hand it down
1101 * to the NIC driver.
1102 */
36fd633e
AV
1103 if (err != -ENOIOCTLCMD)
1104 return err;
6b96018b 1105
a554bf96 1106 if (get_user_ifreq(&ifr, &data, argp))
876f0bf9 1107 return -EFAULT;
a554bf96 1108 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
876f0bf9 1109 if (!err && need_copyout)
a554bf96 1110 if (put_user_ifreq(&ifr, argp))
44c02a2c 1111 return -EFAULT;
876f0bf9 1112
6b96018b
AB
1113 return err;
1114}
1115
1da177e4
LT
1116/*
1117 * With an ioctl, arg may well be a user mode pointer, but we don't know
1118 * what to do with it - that's up to the protocol still.
1119 */
1120
1121static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1122{
1123 struct socket *sock;
881d966b 1124 struct sock *sk;
1da177e4
LT
1125 void __user *argp = (void __user *)arg;
1126 int pid, err;
881d966b 1127 struct net *net;
1da177e4 1128
b69aee04 1129 sock = file->private_data;
881d966b 1130 sk = sock->sk;
3b1e0a65 1131 net = sock_net(sk);
44c02a2c
AV
1132 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1133 struct ifreq ifr;
a554bf96 1134 void __user *data;
44c02a2c 1135 bool need_copyout;
a554bf96 1136 if (get_user_ifreq(&ifr, &data, argp))
44c02a2c 1137 return -EFAULT;
a554bf96 1138 err = dev_ioctl(net, cmd, &ifr, data, &need_copyout);
44c02a2c 1139 if (!err && need_copyout)
a554bf96 1140 if (put_user_ifreq(&ifr, argp))
44c02a2c 1141 return -EFAULT;
1da177e4 1142 } else
3d23e349 1143#ifdef CONFIG_WEXT_CORE
1da177e4 1144 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1145 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1146 } else
3d23e349 1147#endif
89bddce5 1148 switch (cmd) {
1da177e4
LT
1149 case FIOSETOWN:
1150 case SIOCSPGRP:
1151 err = -EFAULT;
1152 if (get_user(pid, (int __user *)argp))
1153 break;
393cc3f5 1154 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1155 break;
1156 case FIOGETOWN:
1157 case SIOCGPGRP:
609d7fa9 1158 err = put_user(f_getown(sock->file),
89bddce5 1159 (int __user *)argp);
1da177e4
LT
1160 break;
1161 case SIOCGIFBR:
1162 case SIOCSIFBR:
1163 case SIOCBRADDBR:
1164 case SIOCBRDELBR:
1165 err = -ENOPKG;
1166 if (!br_ioctl_hook)
1167 request_module("bridge");
1168
4a3e2f71 1169 mutex_lock(&br_ioctl_mutex);
89bddce5 1170 if (br_ioctl_hook)
881d966b 1171 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1172 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1173 break;
1174 case SIOCGIFVLAN:
1175 case SIOCSIFVLAN:
1176 err = -ENOPKG;
1177 if (!vlan_ioctl_hook)
1178 request_module("8021q");
1179
4a3e2f71 1180 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1181 if (vlan_ioctl_hook)
881d966b 1182 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1183 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1184 break;
c62cce2c
AV
1185 case SIOCGSKNS:
1186 err = -EPERM;
1187 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1188 break;
1189
1190 err = open_related_ns(&net->ns, get_net_ns);
1191 break;
0768e170
AB
1192 case SIOCGSTAMP_OLD:
1193 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1194 if (!sock->ops->gettstamp) {
1195 err = -ENOIOCTLCMD;
1196 break;
1197 }
1198 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1199 cmd == SIOCGSTAMP_OLD,
1200 !IS_ENABLED(CONFIG_64BIT));
60747828 1201 break;
0768e170
AB
1202 case SIOCGSTAMP_NEW:
1203 case SIOCGSTAMPNS_NEW:
1204 if (!sock->ops->gettstamp) {
1205 err = -ENOIOCTLCMD;
1206 break;
1207 }
1208 err = sock->ops->gettstamp(sock, argp,
1209 cmd == SIOCGSTAMP_NEW,
1210 false);
c7cbdbf2 1211 break;
876f0bf9
AB
1212
1213 case SIOCGIFCONF:
1214 err = dev_ifconf(net, argp);
1215 break;
1216
1da177e4 1217 default:
63ff03ab 1218 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1219 break;
89bddce5 1220 }
1da177e4
LT
1221 return err;
1222}
1223
8a3c245c
PT
1224/**
1225 * sock_create_lite - creates a socket
1226 * @family: protocol family (AF_INET, ...)
1227 * @type: communication type (SOCK_STREAM, ...)
1228 * @protocol: protocol (0, ...)
1229 * @res: new socket
1230 *
1231 * Creates a new socket and assigns it to @res, passing through LSM.
1232 * The new socket initialization is not complete, see kernel_accept().
1233 * Returns 0 or an error. On failure @res is set to %NULL.
1234 * This function internally uses GFP_KERNEL.
1235 */
1236
1da177e4
LT
1237int sock_create_lite(int family, int type, int protocol, struct socket **res)
1238{
1239 int err;
1240 struct socket *sock = NULL;
89bddce5 1241
1da177e4
LT
1242 err = security_socket_create(family, type, protocol, 1);
1243 if (err)
1244 goto out;
1245
1246 sock = sock_alloc();
1247 if (!sock) {
1248 err = -ENOMEM;
1249 goto out;
1250 }
1251
1da177e4 1252 sock->type = type;
7420ed23
VY
1253 err = security_socket_post_create(sock, family, type, protocol, 1);
1254 if (err)
1255 goto out_release;
1256
1da177e4
LT
1257out:
1258 *res = sock;
1259 return err;
7420ed23
VY
1260out_release:
1261 sock_release(sock);
1262 sock = NULL;
1263 goto out;
1da177e4 1264}
c6d409cf 1265EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1266
1267/* No kernel lock held - perfect */
ade994f4 1268static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1269{
3cafb376 1270 struct socket *sock = file->private_data;
a331de3b 1271 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1272
e88958e6
CH
1273 if (!sock->ops->poll)
1274 return 0;
f641f13b 1275
a331de3b
CH
1276 if (sk_can_busy_loop(sock->sk)) {
1277 /* poll once if requested by the syscall */
1278 if (events & POLL_BUSY_LOOP)
1279 sk_busy_loop(sock->sk, 1);
1280
1281 /* if this socket can poll_ll, tell the system call */
1282 flag = POLL_BUSY_LOOP;
1283 }
1284
1285 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1286}
1287
89bddce5 1288static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1289{
b69aee04 1290 struct socket *sock = file->private_data;
1da177e4
LT
1291
1292 return sock->ops->mmap(file, sock, vma);
1293}
1294
20380731 1295static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1296{
6d8c50dc 1297 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1298 return 0;
1299}
1300
1301/*
1302 * Update the socket async list
1303 *
1304 * Fasync_list locking strategy.
1305 *
1306 * 1. fasync_list is modified only under process context socket lock
1307 * i.e. under semaphore.
1308 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1309 * or under socket lock
1da177e4
LT
1310 */
1311
1312static int sock_fasync(int fd, struct file *filp, int on)
1313{
989a2979
ED
1314 struct socket *sock = filp->private_data;
1315 struct sock *sk = sock->sk;
333f7909 1316 struct socket_wq *wq = &sock->wq;
1da177e4 1317
989a2979 1318 if (sk == NULL)
1da177e4 1319 return -EINVAL;
1da177e4
LT
1320
1321 lock_sock(sk);
eaefd110 1322 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1323
eaefd110 1324 if (!wq->fasync_list)
989a2979
ED
1325 sock_reset_flag(sk, SOCK_FASYNC);
1326 else
bcdce719 1327 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1328
989a2979 1329 release_sock(sk);
1da177e4
LT
1330 return 0;
1331}
1332
ceb5d58b 1333/* This function may be called only under rcu_lock */
1da177e4 1334
ceb5d58b 1335int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1336{
ceb5d58b 1337 if (!wq || !wq->fasync_list)
1da177e4 1338 return -1;
ceb5d58b 1339
89bddce5 1340 switch (how) {
8d8ad9d7 1341 case SOCK_WAKE_WAITD:
ceb5d58b 1342 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1343 break;
1344 goto call_kill;
8d8ad9d7 1345 case SOCK_WAKE_SPACE:
ceb5d58b 1346 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4 1347 break;
7c7ab580 1348 fallthrough;
8d8ad9d7 1349 case SOCK_WAKE_IO:
89bddce5 1350call_kill:
43815482 1351 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1352 break;
8d8ad9d7 1353 case SOCK_WAKE_URG:
43815482 1354 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1355 }
ceb5d58b 1356
1da177e4
LT
1357 return 0;
1358}
c6d409cf 1359EXPORT_SYMBOL(sock_wake_async);
1da177e4 1360
8a3c245c
PT
1361/**
1362 * __sock_create - creates a socket
1363 * @net: net namespace
1364 * @family: protocol family (AF_INET, ...)
1365 * @type: communication type (SOCK_STREAM, ...)
1366 * @protocol: protocol (0, ...)
1367 * @res: new socket
1368 * @kern: boolean for kernel space sockets
1369 *
1370 * Creates a new socket and assigns it to @res, passing through LSM.
1371 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1372 * be set to true if the socket resides in kernel space.
1373 * This function internally uses GFP_KERNEL.
1374 */
1375
721db93a 1376int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1377 struct socket **res, int kern)
1da177e4
LT
1378{
1379 int err;
1380 struct socket *sock;
55737fda 1381 const struct net_proto_family *pf;
1da177e4
LT
1382
1383 /*
89bddce5 1384 * Check protocol is in range
1da177e4
LT
1385 */
1386 if (family < 0 || family >= NPROTO)
1387 return -EAFNOSUPPORT;
1388 if (type < 0 || type >= SOCK_MAX)
1389 return -EINVAL;
1390
1391 /* Compatibility.
1392
1393 This uglymoron is moved from INET layer to here to avoid
1394 deadlock in module load.
1395 */
1396 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1397 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1398 current->comm);
1da177e4
LT
1399 family = PF_PACKET;
1400 }
1401
1402 err = security_socket_create(family, type, protocol, kern);
1403 if (err)
1404 return err;
89bddce5 1405
55737fda
SH
1406 /*
1407 * Allocate the socket and allow the family to set things up. if
1408 * the protocol is 0, the family is instructed to select an appropriate
1409 * default.
1410 */
1411 sock = sock_alloc();
1412 if (!sock) {
e87cc472 1413 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1414 return -ENFILE; /* Not exactly a match, but its the
1415 closest posix thing */
1416 }
1417
1418 sock->type = type;
1419
95a5afca 1420#ifdef CONFIG_MODULES
89bddce5
SH
1421 /* Attempt to load a protocol module if the find failed.
1422 *
1423 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1424 * requested real, full-featured networking support upon configuration.
1425 * Otherwise module support will break!
1426 */
190683a9 1427 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1428 request_module("net-pf-%d", family);
1da177e4
LT
1429#endif
1430
55737fda
SH
1431 rcu_read_lock();
1432 pf = rcu_dereference(net_families[family]);
1433 err = -EAFNOSUPPORT;
1434 if (!pf)
1435 goto out_release;
1da177e4
LT
1436
1437 /*
1438 * We will call the ->create function, that possibly is in a loadable
1439 * module, so we have to bump that loadable module refcnt first.
1440 */
55737fda 1441 if (!try_module_get(pf->owner))
1da177e4
LT
1442 goto out_release;
1443
55737fda
SH
1444 /* Now protected by module ref count */
1445 rcu_read_unlock();
1446
3f378b68 1447 err = pf->create(net, sock, protocol, kern);
55737fda 1448 if (err < 0)
1da177e4 1449 goto out_module_put;
a79af59e 1450
1da177e4
LT
1451 /*
1452 * Now to bump the refcnt of the [loadable] module that owns this
1453 * socket at sock_release time we decrement its refcnt.
1454 */
55737fda
SH
1455 if (!try_module_get(sock->ops->owner))
1456 goto out_module_busy;
1457
1da177e4
LT
1458 /*
1459 * Now that we're done with the ->create function, the [loadable]
1460 * module can have its refcnt decremented
1461 */
55737fda 1462 module_put(pf->owner);
7420ed23
VY
1463 err = security_socket_post_create(sock, family, type, protocol, kern);
1464 if (err)
3b185525 1465 goto out_sock_release;
55737fda 1466 *res = sock;
1da177e4 1467
55737fda
SH
1468 return 0;
1469
1470out_module_busy:
1471 err = -EAFNOSUPPORT;
1da177e4 1472out_module_put:
55737fda
SH
1473 sock->ops = NULL;
1474 module_put(pf->owner);
1475out_sock_release:
1da177e4 1476 sock_release(sock);
55737fda
SH
1477 return err;
1478
1479out_release:
1480 rcu_read_unlock();
1481 goto out_sock_release;
1da177e4 1482}
721db93a 1483EXPORT_SYMBOL(__sock_create);
1da177e4 1484
8a3c245c
PT
1485/**
1486 * sock_create - creates a socket
1487 * @family: protocol family (AF_INET, ...)
1488 * @type: communication type (SOCK_STREAM, ...)
1489 * @protocol: protocol (0, ...)
1490 * @res: new socket
1491 *
1492 * A wrapper around __sock_create().
1493 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1494 */
1495
1da177e4
LT
1496int sock_create(int family, int type, int protocol, struct socket **res)
1497{
1b8d7ae4 1498 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1499}
c6d409cf 1500EXPORT_SYMBOL(sock_create);
1da177e4 1501
8a3c245c
PT
1502/**
1503 * sock_create_kern - creates a socket (kernel space)
1504 * @net: net namespace
1505 * @family: protocol family (AF_INET, ...)
1506 * @type: communication type (SOCK_STREAM, ...)
1507 * @protocol: protocol (0, ...)
1508 * @res: new socket
1509 *
1510 * A wrapper around __sock_create().
1511 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1512 */
1513
eeb1bd5c 1514int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1515{
eeb1bd5c 1516 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1517}
c6d409cf 1518EXPORT_SYMBOL(sock_create_kern);
1da177e4 1519
9d6a15c3 1520int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1521{
1522 int retval;
1523 struct socket *sock;
a677a039
UD
1524 int flags;
1525
e38b36f3
UD
1526 /* Check the SOCK_* constants for consistency. */
1527 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1528 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1529 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1530 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1531
a677a039 1532 flags = type & ~SOCK_TYPE_MASK;
77d27200 1533 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1534 return -EINVAL;
1535 type &= SOCK_TYPE_MASK;
1da177e4 1536
aaca0bdc
UD
1537 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1538 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1539
1da177e4
LT
1540 retval = sock_create(family, type, protocol, &sock);
1541 if (retval < 0)
8e1611e2 1542 return retval;
1da177e4 1543
8e1611e2 1544 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1545}
1546
9d6a15c3
DB
1547SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1548{
1549 return __sys_socket(family, type, protocol);
1550}
1551
1da177e4
LT
1552/*
1553 * Create a pair of connected sockets.
1554 */
1555
6debc8d8 1556int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1557{
1558 struct socket *sock1, *sock2;
1559 int fd1, fd2, err;
db349509 1560 struct file *newfile1, *newfile2;
a677a039
UD
1561 int flags;
1562
1563 flags = type & ~SOCK_TYPE_MASK;
77d27200 1564 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1565 return -EINVAL;
1566 type &= SOCK_TYPE_MASK;
1da177e4 1567
aaca0bdc
UD
1568 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1569 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1570
016a266b
AV
1571 /*
1572 * reserve descriptors and make sure we won't fail
1573 * to return them to userland.
1574 */
1575 fd1 = get_unused_fd_flags(flags);
1576 if (unlikely(fd1 < 0))
1577 return fd1;
1578
1579 fd2 = get_unused_fd_flags(flags);
1580 if (unlikely(fd2 < 0)) {
1581 put_unused_fd(fd1);
1582 return fd2;
1583 }
1584
1585 err = put_user(fd1, &usockvec[0]);
1586 if (err)
1587 goto out;
1588
1589 err = put_user(fd2, &usockvec[1]);
1590 if (err)
1591 goto out;
1592
1da177e4
LT
1593 /*
1594 * Obtain the first socket and check if the underlying protocol
1595 * supports the socketpair call.
1596 */
1597
1598 err = sock_create(family, type, protocol, &sock1);
016a266b 1599 if (unlikely(err < 0))
1da177e4
LT
1600 goto out;
1601
1602 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1603 if (unlikely(err < 0)) {
1604 sock_release(sock1);
1605 goto out;
bf3c23d1 1606 }
d73aa286 1607
d47cd945
DH
1608 err = security_socket_socketpair(sock1, sock2);
1609 if (unlikely(err)) {
1610 sock_release(sock2);
1611 sock_release(sock1);
1612 goto out;
1613 }
1614
016a266b
AV
1615 err = sock1->ops->socketpair(sock1, sock2);
1616 if (unlikely(err < 0)) {
1617 sock_release(sock2);
1618 sock_release(sock1);
1619 goto out;
28407630
AV
1620 }
1621
aab174f0 1622 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1623 if (IS_ERR(newfile1)) {
28407630 1624 err = PTR_ERR(newfile1);
016a266b
AV
1625 sock_release(sock2);
1626 goto out;
28407630
AV
1627 }
1628
aab174f0 1629 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1630 if (IS_ERR(newfile2)) {
1631 err = PTR_ERR(newfile2);
016a266b
AV
1632 fput(newfile1);
1633 goto out;
db349509
AV
1634 }
1635
157cf649 1636 audit_fd_pair(fd1, fd2);
d73aa286 1637
db349509
AV
1638 fd_install(fd1, newfile1);
1639 fd_install(fd2, newfile2);
d73aa286 1640 return 0;
1da177e4 1641
016a266b 1642out:
d73aa286 1643 put_unused_fd(fd2);
d73aa286 1644 put_unused_fd(fd1);
1da177e4
LT
1645 return err;
1646}
1647
6debc8d8
DB
1648SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1649 int __user *, usockvec)
1650{
1651 return __sys_socketpair(family, type, protocol, usockvec);
1652}
1653
1da177e4
LT
1654/*
1655 * Bind a name to a socket. Nothing much to do here since it's
1656 * the protocol's responsibility to handle the local address.
1657 *
1658 * We move the socket address to kernel space before we call
1659 * the protocol layer (having also checked the address is ok).
1660 */
1661
a87d35d8 1662int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1663{
1664 struct socket *sock;
230b1839 1665 struct sockaddr_storage address;
6cb153ca 1666 int err, fput_needed;
1da177e4 1667
89bddce5 1668 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1669 if (sock) {
43db362d 1670 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1671 if (!err) {
89bddce5 1672 err = security_socket_bind(sock,
230b1839 1673 (struct sockaddr *)&address,
89bddce5 1674 addrlen);
6cb153ca
BL
1675 if (!err)
1676 err = sock->ops->bind(sock,
89bddce5 1677 (struct sockaddr *)
230b1839 1678 &address, addrlen);
1da177e4 1679 }
6cb153ca 1680 fput_light(sock->file, fput_needed);
89bddce5 1681 }
1da177e4
LT
1682 return err;
1683}
1684
a87d35d8
DB
1685SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1686{
1687 return __sys_bind(fd, umyaddr, addrlen);
1688}
1689
1da177e4
LT
1690/*
1691 * Perform a listen. Basically, we allow the protocol to do anything
1692 * necessary for a listen, and if that works, we mark the socket as
1693 * ready for listening.
1694 */
1695
25e290ee 1696int __sys_listen(int fd, int backlog)
1da177e4
LT
1697{
1698 struct socket *sock;
6cb153ca 1699 int err, fput_needed;
b8e1f9b5 1700 int somaxconn;
89bddce5
SH
1701
1702 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1703 if (sock) {
8efa6e93 1704 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1705 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1706 backlog = somaxconn;
1da177e4
LT
1707
1708 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1709 if (!err)
1710 err = sock->ops->listen(sock, backlog);
1da177e4 1711
6cb153ca 1712 fput_light(sock->file, fput_needed);
1da177e4
LT
1713 }
1714 return err;
1715}
1716
25e290ee
DB
1717SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1718{
1719 return __sys_listen(fd, backlog);
1720}
1721
de2ea4b6
JA
1722int __sys_accept4_file(struct file *file, unsigned file_flags,
1723 struct sockaddr __user *upeer_sockaddr,
09952e3e
JA
1724 int __user *upeer_addrlen, int flags,
1725 unsigned long nofile)
1da177e4
LT
1726{
1727 struct socket *sock, *newsock;
39d8c1b6 1728 struct file *newfile;
de2ea4b6 1729 int err, len, newfd;
230b1839 1730 struct sockaddr_storage address;
1da177e4 1731
77d27200 1732 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1733 return -EINVAL;
1734
1735 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1736 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1737
dba4a925
FR
1738 sock = sock_from_file(file);
1739 if (!sock) {
1740 err = -ENOTSOCK;
1da177e4 1741 goto out;
dba4a925 1742 }
1da177e4
LT
1743
1744 err = -ENFILE;
c6d409cf
ED
1745 newsock = sock_alloc();
1746 if (!newsock)
de2ea4b6 1747 goto out;
1da177e4
LT
1748
1749 newsock->type = sock->type;
1750 newsock->ops = sock->ops;
1751
1da177e4
LT
1752 /*
1753 * We don't need try_module_get here, as the listening socket (sock)
1754 * has the protocol module (sock->ops->owner) held.
1755 */
1756 __module_get(newsock->ops->owner);
1757
09952e3e 1758 newfd = __get_unused_fd_flags(flags, nofile);
39d8c1b6
DM
1759 if (unlikely(newfd < 0)) {
1760 err = newfd;
9a1875e6 1761 sock_release(newsock);
de2ea4b6 1762 goto out;
39d8c1b6 1763 }
aab174f0 1764 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1765 if (IS_ERR(newfile)) {
28407630
AV
1766 err = PTR_ERR(newfile);
1767 put_unused_fd(newfd);
de2ea4b6 1768 goto out;
28407630 1769 }
39d8c1b6 1770
a79af59e
FF
1771 err = security_socket_accept(sock, newsock);
1772 if (err)
39d8c1b6 1773 goto out_fd;
a79af59e 1774
de2ea4b6
JA
1775 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1776 false);
1da177e4 1777 if (err < 0)
39d8c1b6 1778 goto out_fd;
1da177e4
LT
1779
1780 if (upeer_sockaddr) {
9b2c45d4
DV
1781 len = newsock->ops->getname(newsock,
1782 (struct sockaddr *)&address, 2);
1783 if (len < 0) {
1da177e4 1784 err = -ECONNABORTED;
39d8c1b6 1785 goto out_fd;
1da177e4 1786 }
43db362d 1787 err = move_addr_to_user(&address,
230b1839 1788 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1789 if (err < 0)
39d8c1b6 1790 goto out_fd;
1da177e4
LT
1791 }
1792
1793 /* File flags are not inherited via accept() unlike another OSes. */
1794
39d8c1b6
DM
1795 fd_install(newfd, newfile);
1796 err = newfd;
1da177e4
LT
1797out:
1798 return err;
39d8c1b6 1799out_fd:
9606a216 1800 fput(newfile);
39d8c1b6 1801 put_unused_fd(newfd);
de2ea4b6
JA
1802 goto out;
1803
1804}
1805
1806/*
1807 * For accept, we attempt to create a new socket, set up the link
1808 * with the client, wake up the client, then return the new
1809 * connected fd. We collect the address of the connector in kernel
1810 * space and move it to user at the very end. This is unclean because
1811 * we open the socket then return an error.
1812 *
1813 * 1003.1g adds the ability to recvmsg() to query connection pending
1814 * status to recvmsg. We need to add that support in a way thats
1815 * clean when we restructure accept also.
1816 */
1817
1818int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1819 int __user *upeer_addrlen, int flags)
1820{
1821 int ret = -EBADF;
1822 struct fd f;
1823
1824 f = fdget(fd);
1825 if (f.file) {
1826 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
09952e3e
JA
1827 upeer_addrlen, flags,
1828 rlimit(RLIMIT_NOFILE));
6b07edeb 1829 fdput(f);
de2ea4b6
JA
1830 }
1831
1832 return ret;
1da177e4
LT
1833}
1834
4541e805
DB
1835SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1836 int __user *, upeer_addrlen, int, flags)
1837{
1838 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1839}
1840
20f37034
HC
1841SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1842 int __user *, upeer_addrlen)
aaca0bdc 1843{
4541e805 1844 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1845}
1846
1da177e4
LT
1847/*
1848 * Attempt to connect to a socket with the server address. The address
1849 * is in user space so we verify it is OK and move it to kernel space.
1850 *
1851 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1852 * break bindings
1853 *
1854 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1855 * other SEQPACKET protocols that take time to connect() as it doesn't
1856 * include the -EINPROGRESS status for such sockets.
1857 */
1858
f499a021 1859int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
bd3ded31 1860 int addrlen, int file_flags)
1da177e4
LT
1861{
1862 struct socket *sock;
bd3ded31 1863 int err;
1da177e4 1864
dba4a925
FR
1865 sock = sock_from_file(file);
1866 if (!sock) {
1867 err = -ENOTSOCK;
1da177e4 1868 goto out;
dba4a925 1869 }
1da177e4 1870
89bddce5 1871 err =
f499a021 1872 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4 1873 if (err)
bd3ded31 1874 goto out;
1da177e4 1875
f499a021 1876 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
bd3ded31 1877 sock->file->f_flags | file_flags);
1da177e4
LT
1878out:
1879 return err;
1880}
1881
bd3ded31
JA
1882int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1883{
1884 int ret = -EBADF;
1885 struct fd f;
1886
1887 f = fdget(fd);
1888 if (f.file) {
f499a021
JA
1889 struct sockaddr_storage address;
1890
1891 ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1892 if (!ret)
1893 ret = __sys_connect_file(f.file, &address, addrlen, 0);
6b07edeb 1894 fdput(f);
bd3ded31
JA
1895 }
1896
1897 return ret;
1898}
1899
1387c2c2
DB
1900SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1901 int, addrlen)
1902{
1903 return __sys_connect(fd, uservaddr, addrlen);
1904}
1905
1da177e4
LT
1906/*
1907 * Get the local address ('name') of a socket object. Move the obtained
1908 * name to user space.
1909 */
1910
8882a107
DB
1911int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1912 int __user *usockaddr_len)
1da177e4
LT
1913{
1914 struct socket *sock;
230b1839 1915 struct sockaddr_storage address;
9b2c45d4 1916 int err, fput_needed;
89bddce5 1917
6cb153ca 1918 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1919 if (!sock)
1920 goto out;
1921
1922 err = security_socket_getsockname(sock);
1923 if (err)
1924 goto out_put;
1925
9b2c45d4
DV
1926 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1927 if (err < 0)
1da177e4 1928 goto out_put;
9b2c45d4
DV
1929 /* "err" is actually length in this case */
1930 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1931
1932out_put:
6cb153ca 1933 fput_light(sock->file, fput_needed);
1da177e4
LT
1934out:
1935 return err;
1936}
1937
8882a107
DB
1938SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1939 int __user *, usockaddr_len)
1940{
1941 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1942}
1943
1da177e4
LT
1944/*
1945 * Get the remote address ('name') of a socket object. Move the obtained
1946 * name to user space.
1947 */
1948
b21c8f83
DB
1949int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1950 int __user *usockaddr_len)
1da177e4
LT
1951{
1952 struct socket *sock;
230b1839 1953 struct sockaddr_storage address;
9b2c45d4 1954 int err, fput_needed;
1da177e4 1955
89bddce5
SH
1956 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1957 if (sock != NULL) {
1da177e4
LT
1958 err = security_socket_getpeername(sock);
1959 if (err) {
6cb153ca 1960 fput_light(sock->file, fput_needed);
1da177e4
LT
1961 return err;
1962 }
1963
9b2c45d4
DV
1964 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1965 if (err >= 0)
1966 /* "err" is actually length in this case */
1967 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1968 usockaddr_len);
6cb153ca 1969 fput_light(sock->file, fput_needed);
1da177e4
LT
1970 }
1971 return err;
1972}
1973
b21c8f83
DB
1974SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1975 int __user *, usockaddr_len)
1976{
1977 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1978}
1979
1da177e4
LT
1980/*
1981 * Send a datagram to a given address. We move the address into kernel
1982 * space and check the user space data area is readable before invoking
1983 * the protocol.
1984 */
211b634b
DB
1985int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1986 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1987{
1988 struct socket *sock;
230b1839 1989 struct sockaddr_storage address;
1da177e4
LT
1990 int err;
1991 struct msghdr msg;
1992 struct iovec iov;
6cb153ca 1993 int fput_needed;
6cb153ca 1994
602bd0e9
AV
1995 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1996 if (unlikely(err))
1997 return err;
de0fa95c
PE
1998 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1999 if (!sock)
4387ff75 2000 goto out;
6cb153ca 2001
89bddce5 2002 msg.msg_name = NULL;
89bddce5
SH
2003 msg.msg_control = NULL;
2004 msg.msg_controllen = 0;
2005 msg.msg_namelen = 0;
6cb153ca 2006 if (addr) {
43db362d 2007 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
2008 if (err < 0)
2009 goto out_put;
230b1839 2010 msg.msg_name = (struct sockaddr *)&address;
89bddce5 2011 msg.msg_namelen = addr_len;
1da177e4
LT
2012 }
2013 if (sock->file->f_flags & O_NONBLOCK)
2014 flags |= MSG_DONTWAIT;
2015 msg.msg_flags = flags;
d8725c86 2016 err = sock_sendmsg(sock, &msg);
1da177e4 2017
89bddce5 2018out_put:
de0fa95c 2019 fput_light(sock->file, fput_needed);
4387ff75 2020out:
1da177e4
LT
2021 return err;
2022}
2023
211b634b
DB
2024SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2025 unsigned int, flags, struct sockaddr __user *, addr,
2026 int, addr_len)
2027{
2028 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2029}
2030
1da177e4 2031/*
89bddce5 2032 * Send a datagram down a socket.
1da177e4
LT
2033 */
2034
3e0fa65f 2035SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 2036 unsigned int, flags)
1da177e4 2037{
211b634b 2038 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
2039}
2040
2041/*
89bddce5 2042 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
2043 * sender. We verify the buffers are writable and if needed move the
2044 * sender address from kernel to user space.
2045 */
7a09e1eb
DB
2046int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
2047 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
2048{
2049 struct socket *sock;
2050 struct iovec iov;
2051 struct msghdr msg;
230b1839 2052 struct sockaddr_storage address;
89bddce5 2053 int err, err2;
6cb153ca
BL
2054 int fput_needed;
2055
602bd0e9
AV
2056 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2057 if (unlikely(err))
2058 return err;
de0fa95c 2059 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 2060 if (!sock)
de0fa95c 2061 goto out;
1da177e4 2062
89bddce5
SH
2063 msg.msg_control = NULL;
2064 msg.msg_controllen = 0;
f3d33426
HFS
2065 /* Save some cycles and don't copy the address if not needed */
2066 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2067 /* We assume all kernel code knows the size of sockaddr_storage */
2068 msg.msg_namelen = 0;
130ed5d1 2069 msg.msg_iocb = NULL;
9f138fa6 2070 msg.msg_flags = 0;
1da177e4
LT
2071 if (sock->file->f_flags & O_NONBLOCK)
2072 flags |= MSG_DONTWAIT;
2da62906 2073 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2074
89bddce5 2075 if (err >= 0 && addr != NULL) {
43db362d 2076 err2 = move_addr_to_user(&address,
230b1839 2077 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2078 if (err2 < 0)
2079 err = err2;
1da177e4 2080 }
de0fa95c
PE
2081
2082 fput_light(sock->file, fput_needed);
4387ff75 2083out:
1da177e4
LT
2084 return err;
2085}
2086
7a09e1eb
DB
2087SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2088 unsigned int, flags, struct sockaddr __user *, addr,
2089 int __user *, addr_len)
2090{
2091 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2092}
2093
1da177e4 2094/*
89bddce5 2095 * Receive a datagram from a socket.
1da177e4
LT
2096 */
2097
b7c0ddf5
JG
2098SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2099 unsigned int, flags)
1da177e4 2100{
7a09e1eb 2101 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2102}
2103
83f0c10b
FW
2104static bool sock_use_custom_sol_socket(const struct socket *sock)
2105{
2106 const struct sock *sk = sock->sk;
2107
2108 /* Use sock->ops->setsockopt() for MPTCP */
2109 return IS_ENABLED(CONFIG_MPTCP) &&
2110 sk->sk_protocol == IPPROTO_MPTCP &&
2111 sk->sk_type == SOCK_STREAM &&
2112 (sk->sk_family == AF_INET || sk->sk_family == AF_INET6);
2113}
2114
1da177e4
LT
2115/*
2116 * Set a socket option. Because we don't know the option lengths we have
2117 * to pass the user mode parameter for the protocols to sort out.
2118 */
a7b75c5a 2119int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
55db9c0e 2120 int optlen)
1da177e4 2121{
519a8a6c 2122 sockptr_t optval = USER_SOCKPTR(user_optval);
0d01da6a 2123 char *kernel_optval = NULL;
6cb153ca 2124 int err, fput_needed;
1da177e4
LT
2125 struct socket *sock;
2126
2127 if (optlen < 0)
2128 return -EINVAL;
89bddce5
SH
2129
2130 sock = sockfd_lookup_light(fd, &err, &fput_needed);
4a367299
CH
2131 if (!sock)
2132 return err;
1da177e4 2133
4a367299
CH
2134 err = security_socket_setsockopt(sock, level, optname);
2135 if (err)
2136 goto out_put;
0d01da6a 2137
55db9c0e
CH
2138 if (!in_compat_syscall())
2139 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
a7b75c5a 2140 user_optval, &optlen,
55db9c0e 2141 &kernel_optval);
4a367299
CH
2142 if (err < 0)
2143 goto out_put;
2144 if (err > 0) {
2145 err = 0;
2146 goto out_put;
2147 }
0d01da6a 2148
a7b75c5a
CH
2149 if (kernel_optval)
2150 optval = KERNEL_SOCKPTR(kernel_optval);
4a367299 2151 if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
a7b75c5a 2152 err = sock_setsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2153 else if (unlikely(!sock->ops->setsockopt))
2154 err = -EOPNOTSUPP;
4a367299
CH
2155 else
2156 err = sock->ops->setsockopt(sock, level, optname, optval,
89bddce5 2157 optlen);
a7b75c5a 2158 kfree(kernel_optval);
4a367299
CH
2159out_put:
2160 fput_light(sock->file, fput_needed);
1da177e4
LT
2161 return err;
2162}
2163
cc36dca0
DB
2164SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2165 char __user *, optval, int, optlen)
2166{
2167 return __sys_setsockopt(fd, level, optname, optval, optlen);
2168}
2169
9cacf81f
SF
2170INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
2171 int optname));
2172
1da177e4
LT
2173/*
2174 * Get a socket option. Because we don't know the option lengths we have
2175 * to pass a user mode parameter for the protocols to sort out.
2176 */
55db9c0e
CH
2177int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
2178 int __user *optlen)
1da177e4 2179{
6cb153ca 2180 int err, fput_needed;
1da177e4 2181 struct socket *sock;
0d01da6a 2182 int max_optlen;
1da177e4 2183
89bddce5 2184 sock = sockfd_lookup_light(fd, &err, &fput_needed);
d8a9b38f
CH
2185 if (!sock)
2186 return err;
2187
2188 err = security_socket_getsockopt(sock, level, optname);
2189 if (err)
2190 goto out_put;
1da177e4 2191
55db9c0e
CH
2192 if (!in_compat_syscall())
2193 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
0d01da6a 2194
d8a9b38f
CH
2195 if (level == SOL_SOCKET)
2196 err = sock_getsockopt(sock, level, optname, optval, optlen);
a44d9e72
CH
2197 else if (unlikely(!sock->ops->getsockopt))
2198 err = -EOPNOTSUPP;
d8a9b38f
CH
2199 else
2200 err = sock->ops->getsockopt(sock, level, optname, optval,
89bddce5 2201 optlen);
0d01da6a 2202
55db9c0e
CH
2203 if (!in_compat_syscall())
2204 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
2205 optval, optlen, max_optlen,
2206 err);
6cb153ca 2207out_put:
d8a9b38f 2208 fput_light(sock->file, fput_needed);
1da177e4
LT
2209 return err;
2210}
2211
13a2d70e
DB
2212SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2213 char __user *, optval, int __user *, optlen)
2214{
2215 return __sys_getsockopt(fd, level, optname, optval, optlen);
2216}
2217
1da177e4
LT
2218/*
2219 * Shutdown a socket.
2220 */
2221
b713c195
JA
2222int __sys_shutdown_sock(struct socket *sock, int how)
2223{
2224 int err;
2225
2226 err = security_socket_shutdown(sock, how);
2227 if (!err)
2228 err = sock->ops->shutdown(sock, how);
2229
2230 return err;
2231}
2232
005a1aea 2233int __sys_shutdown(int fd, int how)
1da177e4 2234{
6cb153ca 2235 int err, fput_needed;
1da177e4
LT
2236 struct socket *sock;
2237
89bddce5
SH
2238 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2239 if (sock != NULL) {
b713c195 2240 err = __sys_shutdown_sock(sock, how);
6cb153ca 2241 fput_light(sock->file, fput_needed);
1da177e4
LT
2242 }
2243 return err;
2244}
2245
005a1aea
DB
2246SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2247{
2248 return __sys_shutdown(fd, how);
2249}
2250
89bddce5 2251/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2252 * fields which are the same type (int / unsigned) on our platforms.
2253 */
2254#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2255#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2256#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2257
c71d8ebe
TH
2258struct used_address {
2259 struct sockaddr_storage name;
2260 unsigned int name_len;
2261};
2262
0a384abf
JA
2263int __copy_msghdr_from_user(struct msghdr *kmsg,
2264 struct user_msghdr __user *umsg,
2265 struct sockaddr __user **save_addr,
2266 struct iovec __user **uiov, size_t *nsegs)
1661bf36 2267{
ffb07550 2268 struct user_msghdr msg;
08adb7da
AV
2269 ssize_t err;
2270
ffb07550 2271 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2272 return -EFAULT;
dbb490b9 2273
1f466e1f
CH
2274 kmsg->msg_control_is_user = true;
2275 kmsg->msg_control_user = msg.msg_control;
ffb07550
AV
2276 kmsg->msg_controllen = msg.msg_controllen;
2277 kmsg->msg_flags = msg.msg_flags;
2278
2279 kmsg->msg_namelen = msg.msg_namelen;
2280 if (!msg.msg_name)
6a2a2b3a
AS
2281 kmsg->msg_namelen = 0;
2282
dbb490b9
ML
2283 if (kmsg->msg_namelen < 0)
2284 return -EINVAL;
2285
1661bf36 2286 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2287 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2288
2289 if (save_addr)
ffb07550 2290 *save_addr = msg.msg_name;
08adb7da 2291
ffb07550 2292 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2293 if (!save_addr) {
864d9664
PA
2294 err = move_addr_to_kernel(msg.msg_name,
2295 kmsg->msg_namelen,
08adb7da
AV
2296 kmsg->msg_name);
2297 if (err < 0)
2298 return err;
2299 }
2300 } else {
2301 kmsg->msg_name = NULL;
2302 kmsg->msg_namelen = 0;
2303 }
2304
ffb07550 2305 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2306 return -EMSGSIZE;
2307
0345f931 2308 kmsg->msg_iocb = NULL;
0a384abf
JA
2309 *uiov = msg.msg_iov;
2310 *nsegs = msg.msg_iovlen;
2311 return 0;
2312}
2313
2314static int copy_msghdr_from_user(struct msghdr *kmsg,
2315 struct user_msghdr __user *umsg,
2316 struct sockaddr __user **save_addr,
2317 struct iovec **iov)
2318{
2319 struct user_msghdr msg;
2320 ssize_t err;
2321
2322 err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
2323 &msg.msg_iovlen);
2324 if (err)
2325 return err;
0345f931 2326
87e5e6da 2327 err = import_iovec(save_addr ? READ : WRITE,
ffb07550 2328 msg.msg_iov, msg.msg_iovlen,
da184284 2329 UIO_FASTIOV, iov, &kmsg->msg_iter);
87e5e6da 2330 return err < 0 ? err : 0;
1661bf36
DC
2331}
2332
4257c8ca
JA
2333static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
2334 unsigned int flags, struct used_address *used_address,
2335 unsigned int allowed_msghdr_flags)
1da177e4 2336{
b9d717a7 2337 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2338 __aligned(sizeof(__kernel_size_t));
89bddce5 2339 /* 20 is size of ipv6_pktinfo */
1da177e4 2340 unsigned char *ctl_buf = ctl;
d8725c86 2341 int ctl_len;
08adb7da 2342 ssize_t err;
89bddce5 2343
1da177e4
LT
2344 err = -ENOBUFS;
2345
228e548e 2346 if (msg_sys->msg_controllen > INT_MAX)
4257c8ca 2347 goto out;
28a94d8f 2348 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2349 ctl_len = msg_sys->msg_controllen;
1da177e4 2350 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2351 err =
228e548e 2352 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2353 sizeof(ctl));
1da177e4 2354 if (err)
4257c8ca 2355 goto out;
228e548e
AB
2356 ctl_buf = msg_sys->msg_control;
2357 ctl_len = msg_sys->msg_controllen;
1da177e4 2358 } else if (ctl_len) {
ac4340fc
DM
2359 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2360 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2361 if (ctl_len > sizeof(ctl)) {
1da177e4 2362 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2363 if (ctl_buf == NULL)
4257c8ca 2364 goto out;
1da177e4
LT
2365 }
2366 err = -EFAULT;
1f466e1f 2367 if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
1da177e4 2368 goto out_freectl;
228e548e 2369 msg_sys->msg_control = ctl_buf;
1f466e1f 2370 msg_sys->msg_control_is_user = false;
1da177e4 2371 }
228e548e 2372 msg_sys->msg_flags = flags;
1da177e4
LT
2373
2374 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2375 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2376 /*
2377 * If this is sendmmsg() and current destination address is same as
2378 * previously succeeded address, omit asking LSM's decision.
2379 * used_address->name_len is initialized to UINT_MAX so that the first
2380 * destination address never matches.
2381 */
bc909d9d
MD
2382 if (used_address && msg_sys->msg_name &&
2383 used_address->name_len == msg_sys->msg_namelen &&
2384 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2385 used_address->name_len)) {
d8725c86 2386 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2387 goto out_freectl;
2388 }
d8725c86 2389 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2390 /*
2391 * If this is sendmmsg() and sending to current destination address was
2392 * successful, remember it.
2393 */
2394 if (used_address && err >= 0) {
2395 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2396 if (msg_sys->msg_name)
2397 memcpy(&used_address->name, msg_sys->msg_name,
2398 used_address->name_len);
c71d8ebe 2399 }
1da177e4
LT
2400
2401out_freectl:
89bddce5 2402 if (ctl_buf != ctl)
1da177e4 2403 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
4257c8ca
JA
2404out:
2405 return err;
2406}
2407
03b1230c
JA
2408int sendmsg_copy_msghdr(struct msghdr *msg,
2409 struct user_msghdr __user *umsg, unsigned flags,
2410 struct iovec **iov)
4257c8ca
JA
2411{
2412 int err;
2413
2414 if (flags & MSG_CMSG_COMPAT) {
2415 struct compat_msghdr __user *msg_compat;
2416
2417 msg_compat = (struct compat_msghdr __user *) umsg;
2418 err = get_compat_msghdr(msg, msg_compat, NULL, iov);
2419 } else {
2420 err = copy_msghdr_from_user(msg, umsg, NULL, iov);
2421 }
2422 if (err < 0)
2423 return err;
2424
2425 return 0;
2426}
2427
2428static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
2429 struct msghdr *msg_sys, unsigned int flags,
2430 struct used_address *used_address,
2431 unsigned int allowed_msghdr_flags)
2432{
2433 struct sockaddr_storage address;
2434 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2435 ssize_t err;
2436
2437 msg_sys->msg_name = &address;
2438
2439 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
2440 if (err < 0)
2441 return err;
2442
2443 err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
2444 allowed_msghdr_flags);
da184284 2445 kfree(iov);
228e548e
AB
2446 return err;
2447}
2448
2449/*
2450 * BSD sendmsg interface
2451 */
03b1230c 2452long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
0fa03c62
JA
2453 unsigned int flags)
2454{
03b1230c 2455 return ____sys_sendmsg(sock, msg, flags, NULL, 0);
0fa03c62 2456}
228e548e 2457
e1834a32
DB
2458long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2459 bool forbid_cmsg_compat)
228e548e
AB
2460{
2461 int fput_needed, err;
2462 struct msghdr msg_sys;
1be374a0
AL
2463 struct socket *sock;
2464
e1834a32
DB
2465 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2466 return -EINVAL;
2467
1be374a0 2468 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2469 if (!sock)
2470 goto out;
2471
28a94d8f 2472 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2473
6cb153ca 2474 fput_light(sock->file, fput_needed);
89bddce5 2475out:
1da177e4
LT
2476 return err;
2477}
2478
666547ff 2479SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2480{
e1834a32 2481 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2482}
2483
228e548e
AB
2484/*
2485 * Linux sendmmsg interface
2486 */
2487
2488int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2489 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2490{
2491 int fput_needed, err, datagrams;
2492 struct socket *sock;
2493 struct mmsghdr __user *entry;
2494 struct compat_mmsghdr __user *compat_entry;
2495 struct msghdr msg_sys;
c71d8ebe 2496 struct used_address used_address;
f092276d 2497 unsigned int oflags = flags;
228e548e 2498
e1834a32
DB
2499 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2500 return -EINVAL;
2501
98382f41
AB
2502 if (vlen > UIO_MAXIOV)
2503 vlen = UIO_MAXIOV;
228e548e
AB
2504
2505 datagrams = 0;
2506
2507 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2508 if (!sock)
2509 return err;
2510
c71d8ebe 2511 used_address.name_len = UINT_MAX;
228e548e
AB
2512 entry = mmsg;
2513 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2514 err = 0;
f092276d 2515 flags |= MSG_BATCH;
228e548e
AB
2516
2517 while (datagrams < vlen) {
f092276d
TH
2518 if (datagrams == vlen - 1)
2519 flags = oflags;
2520
228e548e 2521 if (MSG_CMSG_COMPAT & flags) {
666547ff 2522 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2523 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2524 if (err < 0)
2525 break;
2526 err = __put_user(err, &compat_entry->msg_len);
2527 ++compat_entry;
2528 } else {
a7526eb5 2529 err = ___sys_sendmsg(sock,
666547ff 2530 (struct user_msghdr __user *)entry,
28a94d8f 2531 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2532 if (err < 0)
2533 break;
2534 err = put_user(err, &entry->msg_len);
2535 ++entry;
2536 }
2537
2538 if (err)
2539 break;
2540 ++datagrams;
3023898b
SHY
2541 if (msg_data_left(&msg_sys))
2542 break;
a78cb84c 2543 cond_resched();
228e548e
AB
2544 }
2545
228e548e
AB
2546 fput_light(sock->file, fput_needed);
2547
728ffb86
AB
2548 /* We only return an error if no datagrams were able to be sent */
2549 if (datagrams != 0)
228e548e
AB
2550 return datagrams;
2551
228e548e
AB
2552 return err;
2553}
2554
2555SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2556 unsigned int, vlen, unsigned int, flags)
2557{
e1834a32 2558 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2559}
2560
03b1230c
JA
2561int recvmsg_copy_msghdr(struct msghdr *msg,
2562 struct user_msghdr __user *umsg, unsigned flags,
2563 struct sockaddr __user **uaddr,
2564 struct iovec **iov)
1da177e4 2565{
08adb7da 2566 ssize_t err;
1da177e4 2567
4257c8ca
JA
2568 if (MSG_CMSG_COMPAT & flags) {
2569 struct compat_msghdr __user *msg_compat;
1da177e4 2570
4257c8ca
JA
2571 msg_compat = (struct compat_msghdr __user *) umsg;
2572 err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
2573 } else {
2574 err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
2575 }
1da177e4 2576 if (err < 0)
da184284 2577 return err;
1da177e4 2578
4257c8ca
JA
2579 return 0;
2580}
2581
2582static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
2583 struct user_msghdr __user *msg,
2584 struct sockaddr __user *uaddr,
2585 unsigned int flags, int nosec)
2586{
2587 struct compat_msghdr __user *msg_compat =
2588 (struct compat_msghdr __user *) msg;
2589 int __user *uaddr_len = COMPAT_NAMELEN(msg);
2590 struct sockaddr_storage addr;
2591 unsigned long cmsg_ptr;
2592 int len;
2593 ssize_t err;
2594
2595 msg_sys->msg_name = &addr;
a2e27255
ACM
2596 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2597 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2598
f3d33426
HFS
2599 /* We assume all kernel code knows the size of sockaddr_storage */
2600 msg_sys->msg_namelen = 0;
2601
1da177e4
LT
2602 if (sock->file->f_flags & O_NONBLOCK)
2603 flags |= MSG_DONTWAIT;
1af66221
ED
2604
2605 if (unlikely(nosec))
2606 err = sock_recvmsg_nosec(sock, msg_sys, flags);
2607 else
2608 err = sock_recvmsg(sock, msg_sys, flags);
2609
1da177e4 2610 if (err < 0)
4257c8ca 2611 goto out;
1da177e4
LT
2612 len = err;
2613
2614 if (uaddr != NULL) {
43db362d 2615 err = move_addr_to_user(&addr,
a2e27255 2616 msg_sys->msg_namelen, uaddr,
89bddce5 2617 uaddr_len);
1da177e4 2618 if (err < 0)
4257c8ca 2619 goto out;
1da177e4 2620 }
a2e27255 2621 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2622 COMPAT_FLAGS(msg));
1da177e4 2623 if (err)
4257c8ca 2624 goto out;
1da177e4 2625 if (MSG_CMSG_COMPAT & flags)
a2e27255 2626 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2627 &msg_compat->msg_controllen);
2628 else
a2e27255 2629 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2630 &msg->msg_controllen);
2631 if (err)
4257c8ca 2632 goto out;
1da177e4 2633 err = len;
4257c8ca
JA
2634out:
2635 return err;
2636}
2637
2638static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
2639 struct msghdr *msg_sys, unsigned int flags, int nosec)
2640{
2641 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
2642 /* user mode address pointers */
2643 struct sockaddr __user *uaddr;
2644 ssize_t err;
2645
2646 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
2647 if (err < 0)
2648 return err;
1da177e4 2649
4257c8ca 2650 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
da184284 2651 kfree(iov);
a2e27255
ACM
2652 return err;
2653}
2654
2655/*
2656 * BSD recvmsg interface
2657 */
2658
03b1230c
JA
2659long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
2660 struct user_msghdr __user *umsg,
2661 struct sockaddr __user *uaddr, unsigned int flags)
aa1fa28f 2662{
03b1230c 2663 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
aa1fa28f
JA
2664}
2665
e1834a32
DB
2666long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2667 bool forbid_cmsg_compat)
a2e27255
ACM
2668{
2669 int fput_needed, err;
2670 struct msghdr msg_sys;
1be374a0
AL
2671 struct socket *sock;
2672
e1834a32
DB
2673 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2674 return -EINVAL;
2675
1be374a0 2676 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2677 if (!sock)
2678 goto out;
2679
a7526eb5 2680 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2681
6cb153ca 2682 fput_light(sock->file, fput_needed);
1da177e4
LT
2683out:
2684 return err;
2685}
2686
666547ff 2687SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2688 unsigned int, flags)
2689{
e1834a32 2690 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2691}
2692
a2e27255
ACM
2693/*
2694 * Linux recvmmsg interface
2695 */
2696
e11d4284
AB
2697static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2698 unsigned int vlen, unsigned int flags,
2699 struct timespec64 *timeout)
a2e27255
ACM
2700{
2701 int fput_needed, err, datagrams;
2702 struct socket *sock;
2703 struct mmsghdr __user *entry;
d7256d0e 2704 struct compat_mmsghdr __user *compat_entry;
a2e27255 2705 struct msghdr msg_sys;
766b9f92
DD
2706 struct timespec64 end_time;
2707 struct timespec64 timeout64;
a2e27255
ACM
2708
2709 if (timeout &&
2710 poll_select_set_timeout(&end_time, timeout->tv_sec,
2711 timeout->tv_nsec))
2712 return -EINVAL;
2713
2714 datagrams = 0;
2715
2716 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2717 if (!sock)
2718 return err;
2719
7797dc41
SHY
2720 if (likely(!(flags & MSG_ERRQUEUE))) {
2721 err = sock_error(sock->sk);
2722 if (err) {
2723 datagrams = err;
2724 goto out_put;
2725 }
e623a9e9 2726 }
a2e27255
ACM
2727
2728 entry = mmsg;
d7256d0e 2729 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2730
2731 while (datagrams < vlen) {
2732 /*
2733 * No need to ask LSM for more than the first datagram.
2734 */
d7256d0e 2735 if (MSG_CMSG_COMPAT & flags) {
666547ff 2736 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2737 &msg_sys, flags & ~MSG_WAITFORONE,
2738 datagrams);
d7256d0e
JMG
2739 if (err < 0)
2740 break;
2741 err = __put_user(err, &compat_entry->msg_len);
2742 ++compat_entry;
2743 } else {
a7526eb5 2744 err = ___sys_recvmsg(sock,
666547ff 2745 (struct user_msghdr __user *)entry,
a7526eb5
AL
2746 &msg_sys, flags & ~MSG_WAITFORONE,
2747 datagrams);
d7256d0e
JMG
2748 if (err < 0)
2749 break;
2750 err = put_user(err, &entry->msg_len);
2751 ++entry;
2752 }
2753
a2e27255
ACM
2754 if (err)
2755 break;
a2e27255
ACM
2756 ++datagrams;
2757
71c5c159
BB
2758 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2759 if (flags & MSG_WAITFORONE)
2760 flags |= MSG_DONTWAIT;
2761
a2e27255 2762 if (timeout) {
766b9f92 2763 ktime_get_ts64(&timeout64);
c2e6c856 2764 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2765 if (timeout->tv_sec < 0) {
2766 timeout->tv_sec = timeout->tv_nsec = 0;
2767 break;
2768 }
2769
2770 /* Timeout, return less than vlen datagrams */
2771 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2772 break;
2773 }
2774
2775 /* Out of band data, return right away */
2776 if (msg_sys.msg_flags & MSG_OOB)
2777 break;
a78cb84c 2778 cond_resched();
a2e27255
ACM
2779 }
2780
a2e27255 2781 if (err == 0)
34b88a68
ACM
2782 goto out_put;
2783
2784 if (datagrams == 0) {
2785 datagrams = err;
2786 goto out_put;
2787 }
a2e27255 2788
34b88a68
ACM
2789 /*
2790 * We may return less entries than requested (vlen) if the
2791 * sock is non block and there aren't enough datagrams...
2792 */
2793 if (err != -EAGAIN) {
a2e27255 2794 /*
34b88a68
ACM
2795 * ... or if recvmsg returns an error after we
2796 * received some datagrams, where we record the
2797 * error to return on the next call or if the
2798 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2799 */
34b88a68 2800 sock->sk->sk_err = -err;
a2e27255 2801 }
34b88a68
ACM
2802out_put:
2803 fput_light(sock->file, fput_needed);
a2e27255 2804
34b88a68 2805 return datagrams;
a2e27255
ACM
2806}
2807
e11d4284
AB
2808int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2809 unsigned int vlen, unsigned int flags,
2810 struct __kernel_timespec __user *timeout,
2811 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2812{
2813 int datagrams;
c2e6c856 2814 struct timespec64 timeout_sys;
a2e27255 2815
e11d4284
AB
2816 if (timeout && get_timespec64(&timeout_sys, timeout))
2817 return -EFAULT;
a2e27255 2818
e11d4284 2819 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2820 return -EFAULT;
2821
e11d4284
AB
2822 if (!timeout && !timeout32)
2823 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2824
2825 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2826
e11d4284
AB
2827 if (datagrams <= 0)
2828 return datagrams;
2829
2830 if (timeout && put_timespec64(&timeout_sys, timeout))
2831 datagrams = -EFAULT;
2832
2833 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2834 datagrams = -EFAULT;
2835
2836 return datagrams;
2837}
2838
1255e269
DB
2839SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2840 unsigned int, vlen, unsigned int, flags,
c2e6c856 2841 struct __kernel_timespec __user *, timeout)
1255e269 2842{
e11d4284
AB
2843 if (flags & MSG_CMSG_COMPAT)
2844 return -EINVAL;
2845
2846 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2847}
2848
2849#ifdef CONFIG_COMPAT_32BIT_TIME
2850SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2851 unsigned int, vlen, unsigned int, flags,
2852 struct old_timespec32 __user *, timeout)
2853{
2854 if (flags & MSG_CMSG_COMPAT)
2855 return -EINVAL;
2856
2857 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2858}
e11d4284 2859#endif
1255e269 2860
a2e27255 2861#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2862/* Argument list sizes for sys_socketcall */
2863#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2864static const unsigned char nargs[21] = {
c6d409cf
ED
2865 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2866 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2867 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2868 AL(4), AL(5), AL(4)
89bddce5
SH
2869};
2870
1da177e4
LT
2871#undef AL
2872
2873/*
89bddce5 2874 * System call vectors.
1da177e4
LT
2875 *
2876 * Argument checking cleaned up. Saved 20% in size.
2877 * This function doesn't need to set the kernel lock because
89bddce5 2878 * it is set by the callees.
1da177e4
LT
2879 */
2880
3e0fa65f 2881SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2882{
2950fa9d 2883 unsigned long a[AUDITSC_ARGS];
89bddce5 2884 unsigned long a0, a1;
1da177e4 2885 int err;
47379052 2886 unsigned int len;
1da177e4 2887
228e548e 2888 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2889 return -EINVAL;
c8e8cd57 2890 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2891
47379052
AV
2892 len = nargs[call];
2893 if (len > sizeof(a))
2894 return -EINVAL;
2895
1da177e4 2896 /* copy_from_user should be SMP safe. */
47379052 2897 if (copy_from_user(a, args, len))
1da177e4 2898 return -EFAULT;
3ec3b2fb 2899
2950fa9d
CG
2900 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2901 if (err)
2902 return err;
3ec3b2fb 2903
89bddce5
SH
2904 a0 = a[0];
2905 a1 = a[1];
2906
2907 switch (call) {
2908 case SYS_SOCKET:
9d6a15c3 2909 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2910 break;
2911 case SYS_BIND:
a87d35d8 2912 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2913 break;
2914 case SYS_CONNECT:
1387c2c2 2915 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2916 break;
2917 case SYS_LISTEN:
25e290ee 2918 err = __sys_listen(a0, a1);
89bddce5
SH
2919 break;
2920 case SYS_ACCEPT:
4541e805
DB
2921 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2922 (int __user *)a[2], 0);
89bddce5
SH
2923 break;
2924 case SYS_GETSOCKNAME:
2925 err =
8882a107
DB
2926 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2927 (int __user *)a[2]);
89bddce5
SH
2928 break;
2929 case SYS_GETPEERNAME:
2930 err =
b21c8f83
DB
2931 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2932 (int __user *)a[2]);
89bddce5
SH
2933 break;
2934 case SYS_SOCKETPAIR:
6debc8d8 2935 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2936 break;
2937 case SYS_SEND:
f3bf896b
DB
2938 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2939 NULL, 0);
89bddce5
SH
2940 break;
2941 case SYS_SENDTO:
211b634b
DB
2942 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2943 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2944 break;
2945 case SYS_RECV:
d27e9afc
DB
2946 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2947 NULL, NULL);
89bddce5
SH
2948 break;
2949 case SYS_RECVFROM:
7a09e1eb
DB
2950 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2951 (struct sockaddr __user *)a[4],
2952 (int __user *)a[5]);
89bddce5
SH
2953 break;
2954 case SYS_SHUTDOWN:
005a1aea 2955 err = __sys_shutdown(a0, a1);
89bddce5
SH
2956 break;
2957 case SYS_SETSOCKOPT:
cc36dca0
DB
2958 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2959 a[4]);
89bddce5
SH
2960 break;
2961 case SYS_GETSOCKOPT:
2962 err =
13a2d70e
DB
2963 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2964 (int __user *)a[4]);
89bddce5
SH
2965 break;
2966 case SYS_SENDMSG:
e1834a32
DB
2967 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2968 a[2], true);
89bddce5 2969 break;
228e548e 2970 case SYS_SENDMMSG:
e1834a32
DB
2971 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2972 a[3], true);
228e548e 2973 break;
89bddce5 2974 case SYS_RECVMSG:
e1834a32
DB
2975 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2976 a[2], true);
89bddce5 2977 break;
a2e27255 2978 case SYS_RECVMMSG:
3ca47e95 2979 if (IS_ENABLED(CONFIG_64BIT))
e11d4284
AB
2980 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2981 a[2], a[3],
2982 (struct __kernel_timespec __user *)a[4],
2983 NULL);
2984 else
2985 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2986 a[2], a[3], NULL,
2987 (struct old_timespec32 __user *)a[4]);
a2e27255 2988 break;
de11defe 2989 case SYS_ACCEPT4:
4541e805
DB
2990 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2991 (int __user *)a[2], a[3]);
aaca0bdc 2992 break;
89bddce5
SH
2993 default:
2994 err = -EINVAL;
2995 break;
1da177e4
LT
2996 }
2997 return err;
2998}
2999
89bddce5 3000#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 3001
55737fda
SH
3002/**
3003 * sock_register - add a socket protocol handler
3004 * @ops: description of protocol
3005 *
1da177e4
LT
3006 * This function is called by a protocol handler that wants to
3007 * advertise its address family, and have it linked into the
e793c0f7 3008 * socket interface. The value ops->family corresponds to the
55737fda 3009 * socket system call protocol family.
1da177e4 3010 */
f0fd27d4 3011int sock_register(const struct net_proto_family *ops)
1da177e4
LT
3012{
3013 int err;
3014
3015 if (ops->family >= NPROTO) {
3410f22e 3016 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
3017 return -ENOBUFS;
3018 }
55737fda
SH
3019
3020 spin_lock(&net_family_lock);
190683a9
ED
3021 if (rcu_dereference_protected(net_families[ops->family],
3022 lockdep_is_held(&net_family_lock)))
55737fda
SH
3023 err = -EEXIST;
3024 else {
cf778b00 3025 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
3026 err = 0;
3027 }
55737fda
SH
3028 spin_unlock(&net_family_lock);
3029
fe0bdbde 3030 pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
1da177e4
LT
3031 return err;
3032}
c6d409cf 3033EXPORT_SYMBOL(sock_register);
1da177e4 3034
55737fda
SH
3035/**
3036 * sock_unregister - remove a protocol handler
3037 * @family: protocol family to remove
3038 *
1da177e4
LT
3039 * This function is called by a protocol handler that wants to
3040 * remove its address family, and have it unlinked from the
55737fda
SH
3041 * new socket creation.
3042 *
3043 * If protocol handler is a module, then it can use module reference
3044 * counts to protect against new references. If protocol handler is not
3045 * a module then it needs to provide its own protection in
3046 * the ops->create routine.
1da177e4 3047 */
f0fd27d4 3048void sock_unregister(int family)
1da177e4 3049{
f0fd27d4 3050 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 3051
55737fda 3052 spin_lock(&net_family_lock);
a9b3cd7f 3053 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
3054 spin_unlock(&net_family_lock);
3055
3056 synchronize_rcu();
3057
fe0bdbde 3058 pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
1da177e4 3059}
c6d409cf 3060EXPORT_SYMBOL(sock_unregister);
1da177e4 3061
bf2ae2e4
XL
3062bool sock_is_registered(int family)
3063{
66b51b0a 3064 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
3065}
3066
77d76ea3 3067static int __init sock_init(void)
1da177e4 3068{
b3e19d92 3069 int err;
2ca794e5
EB
3070 /*
3071 * Initialize the network sysctl infrastructure.
3072 */
3073 err = net_sysctl_init();
3074 if (err)
3075 goto out;
b3e19d92 3076
1da177e4 3077 /*
89bddce5 3078 * Initialize skbuff SLAB cache
1da177e4
LT
3079 */
3080 skb_init();
1da177e4
LT
3081
3082 /*
89bddce5 3083 * Initialize the protocols module.
1da177e4
LT
3084 */
3085
3086 init_inodecache();
b3e19d92
NP
3087
3088 err = register_filesystem(&sock_fs_type);
3089 if (err)
47260ba9 3090 goto out;
1da177e4 3091 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
3092 if (IS_ERR(sock_mnt)) {
3093 err = PTR_ERR(sock_mnt);
3094 goto out_mount;
3095 }
77d76ea3
AK
3096
3097 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
3098 */
3099
3100#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
3101 err = netfilter_init();
3102 if (err)
3103 goto out;
1da177e4 3104#endif
cbeb321a 3105
408eccce 3106 ptp_classifier_init();
c1f19b51 3107
b3e19d92
NP
3108out:
3109 return err;
3110
3111out_mount:
3112 unregister_filesystem(&sock_fs_type);
b3e19d92 3113 goto out;
1da177e4
LT
3114}
3115
77d76ea3
AK
3116core_initcall(sock_init); /* early initcall */
3117
1da177e4
LT
3118#ifdef CONFIG_PROC_FS
3119void socket_seq_show(struct seq_file *seq)
3120{
648845ab
TZ
3121 seq_printf(seq, "sockets: used %d\n",
3122 sock_inuse_get(seq->private));
1da177e4 3123}
89bddce5 3124#endif /* CONFIG_PROC_FS */
1da177e4 3125
29c49648
AB
3126/* Handle the fact that while struct ifreq has the same *layout* on
3127 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3128 * which are handled elsewhere, it still has different *size* due to
3129 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3130 * resulting in struct ifreq being 32 and 40 bytes respectively).
3131 * As a result, if the struct happens to be at the end of a page and
3132 * the next page isn't readable/writable, we get a fault. To prevent
3133 * that, copy back and forth to the full size.
3134 */
3135int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
3136{
3137 if (in_compat_syscall()) {
3138 struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
3139
3140 memset(ifr, 0, sizeof(*ifr));
3141 if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
3142 return -EFAULT;
3143
3144 if (ifrdata)
3145 *ifrdata = compat_ptr(ifr32->ifr_data);
3146
3147 return 0;
3148 }
3149
3150 if (copy_from_user(ifr, arg, sizeof(*ifr)))
3151 return -EFAULT;
3152
3153 if (ifrdata)
3154 *ifrdata = ifr->ifr_data;
3155
3156 return 0;
3157}
3158EXPORT_SYMBOL(get_user_ifreq);
3159
3160int put_user_ifreq(struct ifreq *ifr, void __user *arg)
3161{
3162 size_t size = sizeof(*ifr);
3163
3164 if (in_compat_syscall())
3165 size = sizeof(struct compat_ifreq);
3166
3167 if (copy_to_user(arg, ifr, size))
3168 return -EFAULT;
3169
3170 return 0;
3171}
3172EXPORT_SYMBOL(put_user_ifreq);
3173
89bbfc95 3174#ifdef CONFIG_COMPAT
7a50a240
AB
3175static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3176{
7a50a240 3177 compat_uptr_t uptr32;
44c02a2c
AV
3178 struct ifreq ifr;
3179 void __user *saved;
3180 int err;
7a50a240 3181
29c49648 3182 if (get_user_ifreq(&ifr, NULL, uifr32))
7a50a240
AB
3183 return -EFAULT;
3184
3185 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3186 return -EFAULT;
3187
44c02a2c
AV
3188 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3189 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3190
a554bf96 3191 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL, NULL);
44c02a2c
AV
3192 if (!err) {
3193 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
29c49648 3194 if (put_user_ifreq(&ifr, uifr32))
44c02a2c 3195 err = -EFAULT;
ccbd6a5a 3196 }
44c02a2c 3197 return err;
7a229387
AB
3198}
3199
590d4693
BH
3200/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3201static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3202 struct compat_ifreq __user *u_ifreq32)
7a229387 3203{
44c02a2c 3204 struct ifreq ifreq;
a554bf96 3205 void __user *data;
7a229387 3206
a554bf96 3207 if (get_user_ifreq(&ifreq, &data, u_ifreq32))
7a229387 3208 return -EFAULT;
a554bf96 3209 ifreq.ifr_data = data;
7a229387 3210
a554bf96 3211 return dev_ioctl(net, cmd, &ifreq, data, NULL);
37ac39bd
JB
3212}
3213
7a229387
AB
3214/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3215 * for some operations; this forces use of the newer bridge-utils that
25985edc 3216 * use compatible ioctls
7a229387 3217 */
6b96018b 3218static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3219{
6b96018b 3220 compat_ulong_t tmp;
7a229387 3221
6b96018b 3222 if (get_user(tmp, argp))
7a229387
AB
3223 return -EFAULT;
3224 if (tmp == BRCTL_GET_VERSION)
3225 return BRCTL_VERSION + 1;
3226 return -EINVAL;
3227}
3228
6b96018b
AB
3229static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3230 unsigned int cmd, unsigned long arg)
3231{
3232 void __user *argp = compat_ptr(arg);
3233 struct sock *sk = sock->sk;
3234 struct net *net = sock_net(sk);
7a229387 3235
6b96018b 3236 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
88fc023f 3237 return sock_ioctl(file, cmd, (unsigned long)argp);
6b96018b
AB
3238
3239 switch (cmd) {
3240 case SIOCSIFBR:
3241 case SIOCGIFBR:
3242 return old_bridge_ioctl(argp);
7a50a240
AB
3243 case SIOCWANDEV:
3244 return compat_siocwandev(net, argp);
0768e170
AB
3245 case SIOCGSTAMP_OLD:
3246 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3247 if (!sock->ops->gettstamp)
3248 return -ENOIOCTLCMD;
0768e170 3249 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3250 !COMPAT_USE_64BIT_TIME);
3251
dd98d289 3252 case SIOCETHTOOL:
590d4693
BH
3253 case SIOCBONDSLAVEINFOQUERY:
3254 case SIOCBONDINFOQUERY:
a2116ed2 3255 case SIOCSHWTSTAMP:
fd468c74 3256 case SIOCGHWTSTAMP:
590d4693 3257 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3258
3259 case FIOSETOWN:
3260 case SIOCSPGRP:
3261 case FIOGETOWN:
3262 case SIOCGPGRP:
3263 case SIOCBRADDBR:
3264 case SIOCBRDELBR:
3265 case SIOCGIFVLAN:
3266 case SIOCSIFVLAN:
c62cce2c 3267 case SIOCGSKNS:
0768e170
AB
3268 case SIOCGSTAMP_NEW:
3269 case SIOCGSTAMPNS_NEW:
876f0bf9 3270 case SIOCGIFCONF:
6b96018b
AB
3271 return sock_ioctl(file, cmd, arg);
3272
3273 case SIOCGIFFLAGS:
3274 case SIOCSIFFLAGS:
709566d7
AB
3275 case SIOCGIFMAP:
3276 case SIOCSIFMAP:
6b96018b
AB
3277 case SIOCGIFMETRIC:
3278 case SIOCSIFMETRIC:
3279 case SIOCGIFMTU:
3280 case SIOCSIFMTU:
3281 case SIOCGIFMEM:
3282 case SIOCSIFMEM:
3283 case SIOCGIFHWADDR:
3284 case SIOCSIFHWADDR:
3285 case SIOCADDMULTI:
3286 case SIOCDELMULTI:
3287 case SIOCGIFINDEX:
6b96018b
AB
3288 case SIOCGIFADDR:
3289 case SIOCSIFADDR:
3290 case SIOCSIFHWBROADCAST:
6b96018b 3291 case SIOCDIFADDR:
6b96018b
AB
3292 case SIOCGIFBRDADDR:
3293 case SIOCSIFBRDADDR:
3294 case SIOCGIFDSTADDR:
3295 case SIOCSIFDSTADDR:
3296 case SIOCGIFNETMASK:
3297 case SIOCSIFNETMASK:
3298 case SIOCSIFPFLAGS:
3299 case SIOCGIFPFLAGS:
3300 case SIOCGIFTXQLEN:
3301 case SIOCSIFTXQLEN:
3302 case SIOCBRADDIF:
3303 case SIOCBRDELIF:
c6c9fee3 3304 case SIOCGIFNAME:
9177efd3
AB
3305 case SIOCSIFNAME:
3306 case SIOCGMIIPHY:
3307 case SIOCGMIIREG:
3308 case SIOCSMIIREG:
f92d4fc9
AV
3309 case SIOCBONDENSLAVE:
3310 case SIOCBONDRELEASE:
3311 case SIOCBONDSETHWADDR:
3312 case SIOCBONDCHANGEACTIVE:
6b96018b
AB
3313 case SIOCSARP:
3314 case SIOCGARP:
3315 case SIOCDARP:
c7dc504e 3316 case SIOCOUTQ:
9d7bf41f 3317 case SIOCOUTQNSD:
6b96018b 3318 case SIOCATMARK:
63ff03ab 3319 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3320 }
3321
6b96018b
AB
3322 return -ENOIOCTLCMD;
3323}
7a229387 3324
95c96174 3325static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3326 unsigned long arg)
89bbfc95
SP
3327{
3328 struct socket *sock = file->private_data;
3329 int ret = -ENOIOCTLCMD;
87de87d5
DM
3330 struct sock *sk;
3331 struct net *net;
3332
3333 sk = sock->sk;
3334 net = sock_net(sk);
89bbfc95
SP
3335
3336 if (sock->ops->compat_ioctl)
3337 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3338
87de87d5
DM
3339 if (ret == -ENOIOCTLCMD &&
3340 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3341 ret = compat_wext_handle_ioctl(net, cmd, arg);
3342
6b96018b
AB
3343 if (ret == -ENOIOCTLCMD)
3344 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3345
89bbfc95
SP
3346 return ret;
3347}
3348#endif
3349
8a3c245c
PT
3350/**
3351 * kernel_bind - bind an address to a socket (kernel space)
3352 * @sock: socket
3353 * @addr: address
3354 * @addrlen: length of address
3355 *
3356 * Returns 0 or an error.
3357 */
3358
ac5a488e
SS
3359int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3360{
3361 return sock->ops->bind(sock, addr, addrlen);
3362}
c6d409cf 3363EXPORT_SYMBOL(kernel_bind);
ac5a488e 3364
8a3c245c
PT
3365/**
3366 * kernel_listen - move socket to listening state (kernel space)
3367 * @sock: socket
3368 * @backlog: pending connections queue size
3369 *
3370 * Returns 0 or an error.
3371 */
3372
ac5a488e
SS
3373int kernel_listen(struct socket *sock, int backlog)
3374{
3375 return sock->ops->listen(sock, backlog);
3376}
c6d409cf 3377EXPORT_SYMBOL(kernel_listen);
ac5a488e 3378
8a3c245c
PT
3379/**
3380 * kernel_accept - accept a connection (kernel space)
3381 * @sock: listening socket
3382 * @newsock: new connected socket
3383 * @flags: flags
3384 *
3385 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3386 * If it fails, @newsock is guaranteed to be %NULL.
3387 * Returns 0 or an error.
3388 */
3389
ac5a488e
SS
3390int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3391{
3392 struct sock *sk = sock->sk;
3393 int err;
3394
3395 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3396 newsock);
3397 if (err < 0)
3398 goto done;
3399
cdfbabfb 3400 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3401 if (err < 0) {
3402 sock_release(*newsock);
fa8705b0 3403 *newsock = NULL;
ac5a488e
SS
3404 goto done;
3405 }
3406
3407 (*newsock)->ops = sock->ops;
1b08534e 3408 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3409
3410done:
3411 return err;
3412}
c6d409cf 3413EXPORT_SYMBOL(kernel_accept);
ac5a488e 3414
8a3c245c
PT
3415/**
3416 * kernel_connect - connect a socket (kernel space)
3417 * @sock: socket
3418 * @addr: address
3419 * @addrlen: address length
3420 * @flags: flags (O_NONBLOCK, ...)
3421 *
f1dcffcc 3422 * For datagram sockets, @addr is the address to which datagrams are sent
8a3c245c
PT
3423 * by default, and the only address from which datagrams are received.
3424 * For stream sockets, attempts to connect to @addr.
3425 * Returns 0 or an error code.
3426 */
3427
ac5a488e 3428int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3429 int flags)
ac5a488e
SS
3430{
3431 return sock->ops->connect(sock, addr, addrlen, flags);
3432}
c6d409cf 3433EXPORT_SYMBOL(kernel_connect);
ac5a488e 3434
8a3c245c
PT
3435/**
3436 * kernel_getsockname - get the address which the socket is bound (kernel space)
3437 * @sock: socket
3438 * @addr: address holder
3439 *
3440 * Fills the @addr pointer with the address which the socket is bound.
3441 * Returns 0 or an error code.
3442 */
3443
9b2c45d4 3444int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3445{
9b2c45d4 3446 return sock->ops->getname(sock, addr, 0);
ac5a488e 3447}
c6d409cf 3448EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3449
8a3c245c 3450/**
645f0897 3451 * kernel_getpeername - get the address which the socket is connected (kernel space)
8a3c245c
PT
3452 * @sock: socket
3453 * @addr: address holder
3454 *
3455 * Fills the @addr pointer with the address which the socket is connected.
3456 * Returns 0 or an error code.
3457 */
3458
9b2c45d4 3459int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3460{
9b2c45d4 3461 return sock->ops->getname(sock, addr, 1);
ac5a488e 3462}
c6d409cf 3463EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3464
8a3c245c
PT
3465/**
3466 * kernel_sendpage - send a &page through a socket (kernel space)
3467 * @sock: socket
3468 * @page: page
3469 * @offset: page offset
3470 * @size: total size in bytes
3471 * @flags: flags (MSG_DONTWAIT, ...)
3472 *
3473 * Returns the total amount sent in bytes or an error.
3474 */
3475
ac5a488e
SS
3476int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3477 size_t size, int flags)
3478{
7b62d31d
CL
3479 if (sock->ops->sendpage) {
3480 /* Warn in case the improper page to zero-copy send */
3481 WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
ac5a488e 3482 return sock->ops->sendpage(sock, page, offset, size, flags);
7b62d31d 3483 }
ac5a488e
SS
3484 return sock_no_sendpage(sock, page, offset, size, flags);
3485}
c6d409cf 3486EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3487
8a3c245c
PT
3488/**
3489 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3490 * @sk: sock
3491 * @page: page
3492 * @offset: page offset
3493 * @size: total size in bytes
3494 * @flags: flags (MSG_DONTWAIT, ...)
3495 *
3496 * Returns the total amount sent in bytes or an error.
3497 * Caller must hold @sk.
3498 */
3499
306b13eb
TH
3500int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3501 size_t size, int flags)
3502{
3503 struct socket *sock = sk->sk_socket;
3504
3505 if (sock->ops->sendpage_locked)
3506 return sock->ops->sendpage_locked(sk, page, offset, size,
3507 flags);
3508
3509 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3510}
3511EXPORT_SYMBOL(kernel_sendpage_locked);
3512
8a3c245c 3513/**
645f0897 3514 * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
8a3c245c
PT
3515 * @sock: socket
3516 * @how: connection part
3517 *
3518 * Returns 0 or an error.
3519 */
3520
91cf45f0
TM
3521int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3522{
3523 return sock->ops->shutdown(sock, how);
3524}
91cf45f0 3525EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3526
8a3c245c
PT
3527/**
3528 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3529 * @sk: socket
3530 *
3531 * This routine returns the IP overhead imposed by a socket i.e.
3532 * the length of the underlying IP header, depending on whether
3533 * this is an IPv4 or IPv6 socket and the length from IP options turned
3534 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3535 */
8a3c245c 3536
113c3075
P
3537u32 kernel_sock_ip_overhead(struct sock *sk)
3538{
3539 struct inet_sock *inet;
3540 struct ip_options_rcu *opt;
3541 u32 overhead = 0;
113c3075
P
3542#if IS_ENABLED(CONFIG_IPV6)
3543 struct ipv6_pinfo *np;
3544 struct ipv6_txoptions *optv6 = NULL;
3545#endif /* IS_ENABLED(CONFIG_IPV6) */
3546
3547 if (!sk)
3548 return overhead;
3549
113c3075
P
3550 switch (sk->sk_family) {
3551 case AF_INET:
3552 inet = inet_sk(sk);
3553 overhead += sizeof(struct iphdr);
3554 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3555 sock_owned_by_user(sk));
113c3075
P
3556 if (opt)
3557 overhead += opt->opt.optlen;
3558 return overhead;
3559#if IS_ENABLED(CONFIG_IPV6)
3560 case AF_INET6:
3561 np = inet6_sk(sk);
3562 overhead += sizeof(struct ipv6hdr);
3563 if (np)
3564 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3565 sock_owned_by_user(sk));
113c3075
P
3566 if (optv6)
3567 overhead += (optv6->opt_flen + optv6->opt_nflen);
3568 return overhead;
3569#endif /* IS_ENABLED(CONFIG_IPV6) */
3570 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3571 return overhead;
3572 }
3573}
3574EXPORT_SYMBOL(kernel_sock_ip_overhead);