]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/socket.c
Merge tag 'xfs-4.18-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
[mirror_ubuntu-jammy-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4 92
7c0f6ba6 93#include <linux/uaccess.h>
1da177e4
LT
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b 106#include <linux/sockios.h>
076bb0c8 107#include <net/busy_poll.h>
f24b9be5 108#include <linux/errqueue.h>
06021292 109
e0d1095a 110#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
111unsigned int sysctl_net_busy_read __read_mostly;
112unsigned int sysctl_net_busy_poll __read_mostly;
06021292 113#endif
6b96018b 114
8ae5e030
AV
115static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
116static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 117static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
118
119static int sock_close(struct inode *inode, struct file *file);
15252423
CH
120static struct wait_queue_head *sock_get_poll_head(struct file *file,
121 __poll_t events);
122static __poll_t sock_poll_mask(struct file *file, __poll_t);
123static __poll_t sock_poll(struct file *file, struct poll_table_struct *wait);
89bddce5 124static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
125#ifdef CONFIG_COMPAT
126static long compat_sock_ioctl(struct file *file,
89bddce5 127 unsigned int cmd, unsigned long arg);
89bbfc95 128#endif
1da177e4 129static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
130static ssize_t sock_sendpage(struct file *file, struct page *page,
131 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 132static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 133 struct pipe_inode_info *pipe, size_t len,
9c55e01c 134 unsigned int flags);
1da177e4 135
1da177e4
LT
136/*
137 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
138 * in the operation structures but are done directly via the socketcall() multiplexor.
139 */
140
da7071d7 141static const struct file_operations socket_file_ops = {
1da177e4
LT
142 .owner = THIS_MODULE,
143 .llseek = no_llseek,
8ae5e030
AV
144 .read_iter = sock_read_iter,
145 .write_iter = sock_write_iter,
15252423
CH
146 .get_poll_head = sock_get_poll_head,
147 .poll_mask = sock_poll_mask,
1da177e4
LT
148 .poll = sock_poll,
149 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
150#ifdef CONFIG_COMPAT
151 .compat_ioctl = compat_sock_ioctl,
152#endif
1da177e4 153 .mmap = sock_mmap,
1da177e4
LT
154 .release = sock_close,
155 .fasync = sock_fasync,
5274f052
JA
156 .sendpage = sock_sendpage,
157 .splice_write = generic_splice_sendpage,
9c55e01c 158 .splice_read = sock_splice_read,
1da177e4
LT
159};
160
161/*
162 * The protocol list. Each protocol is registered in here.
163 */
164
1da177e4 165static DEFINE_SPINLOCK(net_family_lock);
190683a9 166static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 167
1da177e4 168/*
89bddce5
SH
169 * Support routines.
170 * Move socket addresses back and forth across the kernel/user
171 * divide and look after the messy bits.
1da177e4
LT
172 */
173
1da177e4
LT
174/**
175 * move_addr_to_kernel - copy a socket address into kernel space
176 * @uaddr: Address in user space
177 * @kaddr: Address in kernel space
178 * @ulen: Length in user space
179 *
180 * The address is copied into kernel space. If the provided address is
181 * too long an error code of -EINVAL is returned. If the copy gives
182 * invalid addresses -EFAULT is returned. On a success 0 is returned.
183 */
184
43db362d 185int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 186{
230b1839 187 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 188 return -EINVAL;
89bddce5 189 if (ulen == 0)
1da177e4 190 return 0;
89bddce5 191 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 192 return -EFAULT;
3ec3b2fb 193 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
194}
195
196/**
197 * move_addr_to_user - copy an address to user space
198 * @kaddr: kernel space address
199 * @klen: length of address in kernel
200 * @uaddr: user space address
201 * @ulen: pointer to user length field
202 *
203 * The value pointed to by ulen on entry is the buffer length available.
204 * This is overwritten with the buffer space used. -EINVAL is returned
205 * if an overlong buffer is specified or a negative buffer size. -EFAULT
206 * is returned if either the buffer or the length field are not
207 * accessible.
208 * After copying the data up to the limit the user specifies, the true
209 * length of the data is written over the length limit the user
210 * specified. Zero is returned for a success.
211 */
89bddce5 212
43db362d 213static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 214 void __user *uaddr, int __user *ulen)
1da177e4
LT
215{
216 int err;
217 int len;
218
68c6beb3 219 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
220 err = get_user(len, ulen);
221 if (err)
1da177e4 222 return err;
89bddce5
SH
223 if (len > klen)
224 len = klen;
68c6beb3 225 if (len < 0)
1da177e4 226 return -EINVAL;
89bddce5 227 if (len) {
d6fe3945
SG
228 if (audit_sockaddr(klen, kaddr))
229 return -ENOMEM;
89bddce5 230 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
231 return -EFAULT;
232 }
233 /*
89bddce5
SH
234 * "fromlen shall refer to the value before truncation.."
235 * 1003.1g
1da177e4
LT
236 */
237 return __put_user(klen, ulen);
238}
239
08009a76 240static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
241
242static struct inode *sock_alloc_inode(struct super_block *sb)
243{
244 struct socket_alloc *ei;
eaefd110 245 struct socket_wq *wq;
89bddce5 246
e94b1766 247 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
248 if (!ei)
249 return NULL;
eaefd110
ED
250 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
251 if (!wq) {
43815482
ED
252 kmem_cache_free(sock_inode_cachep, ei);
253 return NULL;
254 }
eaefd110
ED
255 init_waitqueue_head(&wq->wait);
256 wq->fasync_list = NULL;
574aab1e 257 wq->flags = 0;
eaefd110 258 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 259
1da177e4
LT
260 ei->socket.state = SS_UNCONNECTED;
261 ei->socket.flags = 0;
262 ei->socket.ops = NULL;
263 ei->socket.sk = NULL;
264 ei->socket.file = NULL;
1da177e4
LT
265
266 return &ei->vfs_inode;
267}
268
269static void sock_destroy_inode(struct inode *inode)
270{
43815482 271 struct socket_alloc *ei;
eaefd110 272 struct socket_wq *wq;
43815482
ED
273
274 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 275 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 276 kfree_rcu(wq, rcu);
43815482 277 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
278}
279
51cc5068 280static void init_once(void *foo)
1da177e4 281{
89bddce5 282 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 283
a35afb83 284 inode_init_once(&ei->vfs_inode);
1da177e4 285}
89bddce5 286
1e911632 287static void init_inodecache(void)
1da177e4
LT
288{
289 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
290 sizeof(struct socket_alloc),
291 0,
292 (SLAB_HWCACHE_ALIGN |
293 SLAB_RECLAIM_ACCOUNT |
5d097056 294 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 295 init_once);
1e911632 296 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
297}
298
b87221de 299static const struct super_operations sockfs_ops = {
c6d409cf
ED
300 .alloc_inode = sock_alloc_inode,
301 .destroy_inode = sock_destroy_inode,
302 .statfs = simple_statfs,
1da177e4
LT
303};
304
c23fbb6b
ED
305/*
306 * sockfs_dname() is called from d_path().
307 */
308static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
309{
310 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 311 d_inode(dentry)->i_ino);
c23fbb6b
ED
312}
313
3ba13d17 314static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 315 .d_dname = sockfs_dname,
1da177e4
LT
316};
317
bba0bd31
AG
318static int sockfs_xattr_get(const struct xattr_handler *handler,
319 struct dentry *dentry, struct inode *inode,
320 const char *suffix, void *value, size_t size)
321{
322 if (value) {
323 if (dentry->d_name.len + 1 > size)
324 return -ERANGE;
325 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
326 }
327 return dentry->d_name.len + 1;
328}
329
330#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
331#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
332#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
333
334static const struct xattr_handler sockfs_xattr_handler = {
335 .name = XATTR_NAME_SOCKPROTONAME,
336 .get = sockfs_xattr_get,
337};
338
4a590153
AG
339static int sockfs_security_xattr_set(const struct xattr_handler *handler,
340 struct dentry *dentry, struct inode *inode,
341 const char *suffix, const void *value,
342 size_t size, int flags)
343{
344 /* Handled by LSM. */
345 return -EAGAIN;
346}
347
348static const struct xattr_handler sockfs_security_xattr_handler = {
349 .prefix = XATTR_SECURITY_PREFIX,
350 .set = sockfs_security_xattr_set,
351};
352
bba0bd31
AG
353static const struct xattr_handler *sockfs_xattr_handlers[] = {
354 &sockfs_xattr_handler,
4a590153 355 &sockfs_security_xattr_handler,
bba0bd31
AG
356 NULL
357};
358
c74a1cbb
AV
359static struct dentry *sockfs_mount(struct file_system_type *fs_type,
360 int flags, const char *dev_name, void *data)
361{
bba0bd31
AG
362 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
363 sockfs_xattr_handlers,
364 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
365}
366
367static struct vfsmount *sock_mnt __read_mostly;
368
369static struct file_system_type sock_fs_type = {
370 .name = "sockfs",
371 .mount = sockfs_mount,
372 .kill_sb = kill_anon_super,
373};
374
1da177e4
LT
375/*
376 * Obtains the first available file descriptor and sets it up for use.
377 *
39d8c1b6
DM
378 * These functions create file structures and maps them to fd space
379 * of the current process. On success it returns file descriptor
1da177e4
LT
380 * and file struct implicitly stored in sock->file.
381 * Note that another thread may close file descriptor before we return
382 * from this function. We use the fact that now we do not refer
383 * to socket after mapping. If one day we will need it, this
384 * function will increment ref. count on file by 1.
385 *
386 * In any case returned fd MAY BE not valid!
387 * This race condition is unavoidable
388 * with shared fd spaces, we cannot solve it inside kernel,
389 * but we take care of internal coherence yet.
390 */
391
aab174f0 392struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 393{
7cbe66b6 394 struct qstr name = { .name = "" };
2c48b9c4 395 struct path path;
7cbe66b6 396 struct file *file;
1da177e4 397
600e1779
MY
398 if (dname) {
399 name.name = dname;
400 name.len = strlen(name.name);
401 } else if (sock->sk) {
402 name.name = sock->sk->sk_prot_creator->name;
403 name.len = strlen(name.name);
404 }
4b936885 405 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
8e1611e2
AV
406 if (unlikely(!path.dentry)) {
407 sock_release(sock);
28407630 408 return ERR_PTR(-ENOMEM);
8e1611e2 409 }
2c48b9c4 410 path.mnt = mntget(sock_mnt);
39d8c1b6 411
2c48b9c4 412 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 413
2c48b9c4 414 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 415 &socket_file_ops);
b5ffe634 416 if (IS_ERR(file)) {
8e1611e2 417 /* drop dentry, keep inode for a bit */
c5ef6035 418 ihold(d_inode(path.dentry));
2c48b9c4 419 path_put(&path);
8e1611e2
AV
420 /* ... and now kill it properly */
421 sock_release(sock);
39b65252 422 return file;
cc3808f8
AV
423 }
424
425 sock->file = file;
77d27200 426 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 427 file->private_data = sock;
28407630 428 return file;
39d8c1b6 429}
56b31d1c 430EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 431
56b31d1c 432static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
433{
434 struct file *newfile;
28407630 435 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
436 if (unlikely(fd < 0)) {
437 sock_release(sock);
28407630 438 return fd;
ce4bb04c 439 }
39d8c1b6 440
aab174f0 441 newfile = sock_alloc_file(sock, flags, NULL);
28407630 442 if (likely(!IS_ERR(newfile))) {
39d8c1b6 443 fd_install(fd, newfile);
28407630
AV
444 return fd;
445 }
7cbe66b6 446
28407630
AV
447 put_unused_fd(fd);
448 return PTR_ERR(newfile);
1da177e4
LT
449}
450
406a3c63 451struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 452{
6cb153ca
BL
453 if (file->f_op == &socket_file_ops)
454 return file->private_data; /* set in sock_map_fd */
455
23bb80d2
ED
456 *err = -ENOTSOCK;
457 return NULL;
6cb153ca 458}
406a3c63 459EXPORT_SYMBOL(sock_from_file);
6cb153ca 460
1da177e4 461/**
c6d409cf 462 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
463 * @fd: file handle
464 * @err: pointer to an error code return
465 *
466 * The file handle passed in is locked and the socket it is bound
241c4667 467 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
468 * with a negative errno code and NULL is returned. The function checks
469 * for both invalid handles and passing a handle which is not a socket.
470 *
471 * On a success the socket object pointer is returned.
472 */
473
474struct socket *sockfd_lookup(int fd, int *err)
475{
476 struct file *file;
1da177e4
LT
477 struct socket *sock;
478
89bddce5
SH
479 file = fget(fd);
480 if (!file) {
1da177e4
LT
481 *err = -EBADF;
482 return NULL;
483 }
89bddce5 484
6cb153ca
BL
485 sock = sock_from_file(file, err);
486 if (!sock)
1da177e4 487 fput(file);
6cb153ca
BL
488 return sock;
489}
c6d409cf 490EXPORT_SYMBOL(sockfd_lookup);
1da177e4 491
6cb153ca
BL
492static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
493{
00e188ef 494 struct fd f = fdget(fd);
6cb153ca
BL
495 struct socket *sock;
496
3672558c 497 *err = -EBADF;
00e188ef
AV
498 if (f.file) {
499 sock = sock_from_file(f.file, err);
500 if (likely(sock)) {
501 *fput_needed = f.flags;
6cb153ca 502 return sock;
00e188ef
AV
503 }
504 fdput(f);
1da177e4 505 }
6cb153ca 506 return NULL;
1da177e4
LT
507}
508
600e1779
MY
509static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
510 size_t size)
511{
512 ssize_t len;
513 ssize_t used = 0;
514
c5ef6035 515 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
516 if (len < 0)
517 return len;
518 used += len;
519 if (buffer) {
520 if (size < used)
521 return -ERANGE;
522 buffer += len;
523 }
524
525 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
526 used += len;
527 if (buffer) {
528 if (size < used)
529 return -ERANGE;
530 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
531 buffer += len;
532 }
533
534 return used;
535}
536
dc647ec8 537static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
538{
539 int err = simple_setattr(dentry, iattr);
540
e1a3a60a 541 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
542 struct socket *sock = SOCKET_I(d_inode(dentry));
543
6d8c50dc
CW
544 if (sock->sk)
545 sock->sk->sk_uid = iattr->ia_uid;
546 else
547 err = -ENOENT;
86741ec2
LC
548 }
549
550 return err;
551}
552
600e1779 553static const struct inode_operations sockfs_inode_ops = {
600e1779 554 .listxattr = sockfs_listxattr,
86741ec2 555 .setattr = sockfs_setattr,
600e1779
MY
556};
557
1da177e4
LT
558/**
559 * sock_alloc - allocate a socket
89bddce5 560 *
1da177e4
LT
561 * Allocate a new inode and socket object. The two are bound together
562 * and initialised. The socket is then returned. If we are out of inodes
563 * NULL is returned.
564 */
565
f4a00aac 566struct socket *sock_alloc(void)
1da177e4 567{
89bddce5
SH
568 struct inode *inode;
569 struct socket *sock;
1da177e4 570
a209dfc7 571 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
572 if (!inode)
573 return NULL;
574
575 sock = SOCKET_I(inode);
576
85fe4025 577 inode->i_ino = get_next_ino();
89bddce5 578 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
579 inode->i_uid = current_fsuid();
580 inode->i_gid = current_fsgid();
600e1779 581 inode->i_op = &sockfs_inode_ops;
1da177e4 582
1da177e4
LT
583 return sock;
584}
f4a00aac 585EXPORT_SYMBOL(sock_alloc);
1da177e4 586
1da177e4
LT
587/**
588 * sock_release - close a socket
589 * @sock: socket to close
590 *
591 * The socket is released from the protocol stack if it has a release
592 * callback, and the inode is then released if the socket is bound to
89bddce5 593 * an inode not a file.
1da177e4 594 */
89bddce5 595
6d8c50dc 596static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
597{
598 if (sock->ops) {
599 struct module *owner = sock->ops->owner;
600
6d8c50dc
CW
601 if (inode)
602 inode_lock(inode);
1da177e4 603 sock->ops->release(sock);
6d8c50dc
CW
604 if (inode)
605 inode_unlock(inode);
1da177e4
LT
606 sock->ops = NULL;
607 module_put(owner);
608 }
609
eaefd110 610 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 611 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 612
1da177e4
LT
613 if (!sock->file) {
614 iput(SOCK_INODE(sock));
615 return;
616 }
89bddce5 617 sock->file = NULL;
1da177e4 618}
6d8c50dc
CW
619
620void sock_release(struct socket *sock)
621{
622 __sock_release(sock, NULL);
623}
c6d409cf 624EXPORT_SYMBOL(sock_release);
1da177e4 625
c14ac945 626void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 627{
140c55d4
ED
628 u8 flags = *tx_flags;
629
c14ac945 630 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
631 flags |= SKBTX_HW_TSTAMP;
632
c14ac945 633 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
634 flags |= SKBTX_SW_TSTAMP;
635
c14ac945 636 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
637 flags |= SKBTX_SCHED_TSTAMP;
638
140c55d4 639 *tx_flags = flags;
20d49473 640}
67cc0d40 641EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 642
d8725c86 643static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 644{
01e97e65 645 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
646 BUG_ON(ret == -EIOCBQUEUED);
647 return ret;
1da177e4
LT
648}
649
d8725c86 650int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 651{
d8725c86 652 int err = security_socket_sendmsg(sock, msg,
01e97e65 653 msg_data_left(msg));
228e548e 654
d8725c86 655 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 656}
c6d409cf 657EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
658
659int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
660 struct kvec *vec, size_t num, size_t size)
661{
6aa24814 662 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 663 return sock_sendmsg(sock, msg);
1da177e4 664}
c6d409cf 665EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 666
306b13eb
TH
667int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
668 struct kvec *vec, size_t num, size_t size)
669{
670 struct socket *sock = sk->sk_socket;
671
672 if (!sock->ops->sendmsg_locked)
db5980d8 673 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb
TH
674
675 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
676
677 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
678}
679EXPORT_SYMBOL(kernel_sendmsg_locked);
680
8605330a
SHY
681static bool skb_is_err_queue(const struct sk_buff *skb)
682{
683 /* pkt_type of skbs enqueued on the error queue are set to
684 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
685 * in recvmsg, since skbs received on a local socket will never
686 * have a pkt_type of PACKET_OUTGOING.
687 */
688 return skb->pkt_type == PACKET_OUTGOING;
689}
690
b50a5c70
ML
691/* On transmit, software and hardware timestamps are returned independently.
692 * As the two skb clones share the hardware timestamp, which may be updated
693 * before the software timestamp is received, a hardware TX timestamp may be
694 * returned only if there is no software TX timestamp. Ignore false software
695 * timestamps, which may be made in the __sock_recv_timestamp() call when the
696 * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
697 * hardware timestamp.
698 */
699static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
700{
701 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
702}
703
aad9c8c4
ML
704static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
705{
706 struct scm_ts_pktinfo ts_pktinfo;
707 struct net_device *orig_dev;
708
709 if (!skb_mac_header_was_set(skb))
710 return;
711
712 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
713
714 rcu_read_lock();
715 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
716 if (orig_dev)
717 ts_pktinfo.if_index = orig_dev->ifindex;
718 rcu_read_unlock();
719
720 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
721 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
722 sizeof(ts_pktinfo), &ts_pktinfo);
723}
724
92f37fd2
ED
725/*
726 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
727 */
728void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
729 struct sk_buff *skb)
730{
20d49473 731 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 732 struct scm_timestamping tss;
b50a5c70 733 int empty = 1, false_tstamp = 0;
20d49473
PO
734 struct skb_shared_hwtstamps *shhwtstamps =
735 skb_hwtstamps(skb);
736
737 /* Race occurred between timestamp enabling and packet
738 receiving. Fill in the current time for now. */
b50a5c70 739 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 740 __net_timestamp(skb);
b50a5c70
ML
741 false_tstamp = 1;
742 }
20d49473
PO
743
744 if (need_software_tstamp) {
745 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
746 struct timeval tv;
747 skb_get_timestamp(skb, &tv);
748 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
749 sizeof(tv), &tv);
750 } else {
f24b9be5
WB
751 struct timespec ts;
752 skb_get_timestampns(skb, &ts);
20d49473 753 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 754 sizeof(ts), &ts);
20d49473
PO
755 }
756 }
757
f24b9be5 758 memset(&tss, 0, sizeof(tss));
c199105d 759 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 760 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 761 empty = 0;
4d276eb6 762 if (shhwtstamps &&
b9f40e21 763 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 764 !skb_is_swtx_tstamp(skb, false_tstamp) &&
aad9c8c4 765 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 766 empty = 0;
aad9c8c4
ML
767 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
768 !skb_is_err_queue(skb))
769 put_ts_pktinfo(msg, skb);
770 }
1c885808 771 if (!empty) {
20d49473 772 put_cmsg(msg, SOL_SOCKET,
f24b9be5 773 SCM_TIMESTAMPING, sizeof(tss), &tss);
1c885808 774
8605330a 775 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 776 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
777 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
778 skb->len, skb->data);
779 }
92f37fd2 780}
7c81fd8b
ACM
781EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
782
6e3e939f
JB
783void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
784 struct sk_buff *skb)
785{
786 int ack;
787
788 if (!sock_flag(sk, SOCK_WIFI_STATUS))
789 return;
790 if (!skb->wifi_acked_valid)
791 return;
792
793 ack = skb->wifi_acked;
794
795 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
796}
797EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
798
11165f14 799static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
800 struct sk_buff *skb)
3b885787 801{
744d5a3e 802 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 803 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 804 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
805}
806
767dd033 807void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
808 struct sk_buff *skb)
809{
810 sock_recv_timestamp(msg, sk, skb);
811 sock_recv_drops(msg, sk, skb);
812}
767dd033 813EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 814
1b784140 815static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 816 int flags)
1da177e4 817{
2da62906 818 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
819}
820
2da62906 821int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 822{
2da62906 823 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 824
2da62906 825 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 826}
c6d409cf 827EXPORT_SYMBOL(sock_recvmsg);
1da177e4 828
c1249c0a
ML
829/**
830 * kernel_recvmsg - Receive a message from a socket (kernel space)
831 * @sock: The socket to receive the message from
832 * @msg: Received message
833 * @vec: Input s/g array for message data
834 * @num: Size of input s/g array
835 * @size: Number of bytes to read
836 * @flags: Message flags (MSG_DONTWAIT, etc...)
837 *
838 * On return the msg structure contains the scatter/gather array passed in the
839 * vec argument. The array is modified so that it consists of the unfilled
840 * portion of the original array.
841 *
842 * The returned value is the total number of bytes received, or an error.
843 */
89bddce5
SH
844int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
845 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
846{
847 mm_segment_t oldfs = get_fs();
848 int result;
849
6aa24814 850 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 851 set_fs(KERNEL_DS);
2da62906 852 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
853 set_fs(oldfs);
854 return result;
855}
c6d409cf 856EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 857
ce1d4d3e
CH
858static ssize_t sock_sendpage(struct file *file, struct page *page,
859 int offset, size_t size, loff_t *ppos, int more)
1da177e4 860{
1da177e4
LT
861 struct socket *sock;
862 int flags;
863
ce1d4d3e
CH
864 sock = file->private_data;
865
35f9c09f
ED
866 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
867 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
868 flags |= more;
ce1d4d3e 869
e6949583 870 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 871}
1da177e4 872
9c55e01c 873static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 874 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
875 unsigned int flags)
876{
877 struct socket *sock = file->private_data;
878
997b37da
RDC
879 if (unlikely(!sock->ops->splice_read))
880 return -EINVAL;
881
9c55e01c
JA
882 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
883}
884
8ae5e030 885static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 886{
6d652330
AV
887 struct file *file = iocb->ki_filp;
888 struct socket *sock = file->private_data;
0345f931 889 struct msghdr msg = {.msg_iter = *to,
890 .msg_iocb = iocb};
8ae5e030 891 ssize_t res;
ce1d4d3e 892
8ae5e030
AV
893 if (file->f_flags & O_NONBLOCK)
894 msg.msg_flags = MSG_DONTWAIT;
895
896 if (iocb->ki_pos != 0)
1da177e4 897 return -ESPIPE;
027445c3 898
66ee59af 899 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
900 return 0;
901
2da62906 902 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
903 *to = msg.msg_iter;
904 return res;
1da177e4
LT
905}
906
8ae5e030 907static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 908{
6d652330
AV
909 struct file *file = iocb->ki_filp;
910 struct socket *sock = file->private_data;
0345f931 911 struct msghdr msg = {.msg_iter = *from,
912 .msg_iocb = iocb};
8ae5e030 913 ssize_t res;
1da177e4 914
8ae5e030 915 if (iocb->ki_pos != 0)
ce1d4d3e 916 return -ESPIPE;
027445c3 917
8ae5e030
AV
918 if (file->f_flags & O_NONBLOCK)
919 msg.msg_flags = MSG_DONTWAIT;
920
6d652330
AV
921 if (sock->type == SOCK_SEQPACKET)
922 msg.msg_flags |= MSG_EOR;
923
d8725c86 924 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
925 *from = msg.msg_iter;
926 return res;
1da177e4
LT
927}
928
1da177e4
LT
929/*
930 * Atomic setting of ioctl hooks to avoid race
931 * with module unload.
932 */
933
4a3e2f71 934static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 935static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 936
881d966b 937void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 938{
4a3e2f71 939 mutex_lock(&br_ioctl_mutex);
1da177e4 940 br_ioctl_hook = hook;
4a3e2f71 941 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
942}
943EXPORT_SYMBOL(brioctl_set);
944
4a3e2f71 945static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 946static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 947
881d966b 948void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 949{
4a3e2f71 950 mutex_lock(&vlan_ioctl_mutex);
1da177e4 951 vlan_ioctl_hook = hook;
4a3e2f71 952 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
953}
954EXPORT_SYMBOL(vlan_ioctl_set);
955
4a3e2f71 956static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 957static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 958
89bddce5 959void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 960{
4a3e2f71 961 mutex_lock(&dlci_ioctl_mutex);
1da177e4 962 dlci_ioctl_hook = hook;
4a3e2f71 963 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
964}
965EXPORT_SYMBOL(dlci_ioctl_set);
966
6b96018b
AB
967static long sock_do_ioctl(struct net *net, struct socket *sock,
968 unsigned int cmd, unsigned long arg)
969{
970 int err;
971 void __user *argp = (void __user *)arg;
972
973 err = sock->ops->ioctl(sock, cmd, arg);
974
975 /*
976 * If this ioctl is unknown try to hand it down
977 * to the NIC driver.
978 */
36fd633e
AV
979 if (err != -ENOIOCTLCMD)
980 return err;
6b96018b 981
36fd633e
AV
982 if (cmd == SIOCGIFCONF) {
983 struct ifconf ifc;
984 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
985 return -EFAULT;
986 rtnl_lock();
987 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
988 rtnl_unlock();
989 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
990 err = -EFAULT;
44c02a2c
AV
991 } else {
992 struct ifreq ifr;
993 bool need_copyout;
994 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
995 return -EFAULT;
996 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
997 if (!err && need_copyout)
998 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
999 return -EFAULT;
36fd633e 1000 }
6b96018b
AB
1001 return err;
1002}
1003
1da177e4
LT
1004/*
1005 * With an ioctl, arg may well be a user mode pointer, but we don't know
1006 * what to do with it - that's up to the protocol still.
1007 */
1008
d8d211a2 1009struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1010{
1011 return &get_net(container_of(ns, struct net, ns))->ns;
1012}
d8d211a2 1013EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1014
1da177e4
LT
1015static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1016{
1017 struct socket *sock;
881d966b 1018 struct sock *sk;
1da177e4
LT
1019 void __user *argp = (void __user *)arg;
1020 int pid, err;
881d966b 1021 struct net *net;
1da177e4 1022
b69aee04 1023 sock = file->private_data;
881d966b 1024 sk = sock->sk;
3b1e0a65 1025 net = sock_net(sk);
44c02a2c
AV
1026 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1027 struct ifreq ifr;
1028 bool need_copyout;
1029 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1030 return -EFAULT;
1031 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1032 if (!err && need_copyout)
1033 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1034 return -EFAULT;
1da177e4 1035 } else
3d23e349 1036#ifdef CONFIG_WEXT_CORE
1da177e4 1037 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1038 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1039 } else
3d23e349 1040#endif
89bddce5 1041 switch (cmd) {
1da177e4
LT
1042 case FIOSETOWN:
1043 case SIOCSPGRP:
1044 err = -EFAULT;
1045 if (get_user(pid, (int __user *)argp))
1046 break;
393cc3f5 1047 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1048 break;
1049 case FIOGETOWN:
1050 case SIOCGPGRP:
609d7fa9 1051 err = put_user(f_getown(sock->file),
89bddce5 1052 (int __user *)argp);
1da177e4
LT
1053 break;
1054 case SIOCGIFBR:
1055 case SIOCSIFBR:
1056 case SIOCBRADDBR:
1057 case SIOCBRDELBR:
1058 err = -ENOPKG;
1059 if (!br_ioctl_hook)
1060 request_module("bridge");
1061
4a3e2f71 1062 mutex_lock(&br_ioctl_mutex);
89bddce5 1063 if (br_ioctl_hook)
881d966b 1064 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1065 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1066 break;
1067 case SIOCGIFVLAN:
1068 case SIOCSIFVLAN:
1069 err = -ENOPKG;
1070 if (!vlan_ioctl_hook)
1071 request_module("8021q");
1072
4a3e2f71 1073 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1074 if (vlan_ioctl_hook)
881d966b 1075 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1076 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1077 break;
1da177e4
LT
1078 case SIOCADDDLCI:
1079 case SIOCDELDLCI:
1080 err = -ENOPKG;
1081 if (!dlci_ioctl_hook)
1082 request_module("dlci");
1083
7512cbf6
PE
1084 mutex_lock(&dlci_ioctl_mutex);
1085 if (dlci_ioctl_hook)
1da177e4 1086 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1087 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1088 break;
c62cce2c
AV
1089 case SIOCGSKNS:
1090 err = -EPERM;
1091 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1092 break;
1093
1094 err = open_related_ns(&net->ns, get_net_ns);
1095 break;
1da177e4 1096 default:
6b96018b 1097 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1098 break;
89bddce5 1099 }
1da177e4
LT
1100 return err;
1101}
1102
1103int sock_create_lite(int family, int type, int protocol, struct socket **res)
1104{
1105 int err;
1106 struct socket *sock = NULL;
89bddce5 1107
1da177e4
LT
1108 err = security_socket_create(family, type, protocol, 1);
1109 if (err)
1110 goto out;
1111
1112 sock = sock_alloc();
1113 if (!sock) {
1114 err = -ENOMEM;
1115 goto out;
1116 }
1117
1da177e4 1118 sock->type = type;
7420ed23
VY
1119 err = security_socket_post_create(sock, family, type, protocol, 1);
1120 if (err)
1121 goto out_release;
1122
1da177e4
LT
1123out:
1124 *res = sock;
1125 return err;
7420ed23
VY
1126out_release:
1127 sock_release(sock);
1128 sock = NULL;
1129 goto out;
1da177e4 1130}
c6d409cf 1131EXPORT_SYMBOL(sock_create_lite);
1da177e4 1132
15252423
CH
1133static struct wait_queue_head *sock_get_poll_head(struct file *file,
1134 __poll_t events)
1135{
1136 struct socket *sock = file->private_data;
1137
1138 if (!sock->ops->poll_mask)
1139 return NULL;
1140 sock_poll_busy_loop(sock, events);
1141 return sk_sleep(sock->sk);
1142}
1143
1144static __poll_t sock_poll_mask(struct file *file, __poll_t events)
1145{
1146 struct socket *sock = file->private_data;
1147
1148 /*
1149 * We need to be sure we are in sync with the socket flags modification.
1150 *
1151 * This memory barrier is paired in the wq_has_sleeper.
1152 */
1153 smp_mb();
1154
1155 /* this socket can poll_ll so tell the system call */
1156 return sock->ops->poll_mask(sock, events) |
1157 (sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0);
1158}
1159
1da177e4 1160/* No kernel lock held - perfect */
ade994f4 1161static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1162{
3cafb376 1163 struct socket *sock = file->private_data;
15252423
CH
1164 __poll_t events = poll_requested_events(wait), mask = 0;
1165
1166 if (sock->ops->poll) {
1167 sock_poll_busy_loop(sock, events);
1168 mask = sock->ops->poll(file, sock, wait);
1169 } else if (sock->ops->poll_mask) {
1170 sock_poll_wait(file, sock_get_poll_head(file, events), wait);
1171 mask = sock->ops->poll_mask(sock, events);
1172 }
2d48d67f 1173
15252423 1174 return mask | sock_poll_busy_flag(sock);
1da177e4
LT
1175}
1176
89bddce5 1177static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1178{
b69aee04 1179 struct socket *sock = file->private_data;
1da177e4
LT
1180
1181 return sock->ops->mmap(file, sock, vma);
1182}
1183
20380731 1184static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1185{
6d8c50dc 1186 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1187 return 0;
1188}
1189
1190/*
1191 * Update the socket async list
1192 *
1193 * Fasync_list locking strategy.
1194 *
1195 * 1. fasync_list is modified only under process context socket lock
1196 * i.e. under semaphore.
1197 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1198 * or under socket lock
1da177e4
LT
1199 */
1200
1201static int sock_fasync(int fd, struct file *filp, int on)
1202{
989a2979
ED
1203 struct socket *sock = filp->private_data;
1204 struct sock *sk = sock->sk;
eaefd110 1205 struct socket_wq *wq;
1da177e4 1206
989a2979 1207 if (sk == NULL)
1da177e4 1208 return -EINVAL;
1da177e4
LT
1209
1210 lock_sock(sk);
1e1d04e6 1211 wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
eaefd110 1212 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1213
eaefd110 1214 if (!wq->fasync_list)
989a2979
ED
1215 sock_reset_flag(sk, SOCK_FASYNC);
1216 else
bcdce719 1217 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1218
989a2979 1219 release_sock(sk);
1da177e4
LT
1220 return 0;
1221}
1222
ceb5d58b 1223/* This function may be called only under rcu_lock */
1da177e4 1224
ceb5d58b 1225int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1226{
ceb5d58b 1227 if (!wq || !wq->fasync_list)
1da177e4 1228 return -1;
ceb5d58b 1229
89bddce5 1230 switch (how) {
8d8ad9d7 1231 case SOCK_WAKE_WAITD:
ceb5d58b 1232 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1233 break;
1234 goto call_kill;
8d8ad9d7 1235 case SOCK_WAKE_SPACE:
ceb5d58b 1236 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1237 break;
1238 /* fall through */
8d8ad9d7 1239 case SOCK_WAKE_IO:
89bddce5 1240call_kill:
43815482 1241 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1242 break;
8d8ad9d7 1243 case SOCK_WAKE_URG:
43815482 1244 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1245 }
ceb5d58b 1246
1da177e4
LT
1247 return 0;
1248}
c6d409cf 1249EXPORT_SYMBOL(sock_wake_async);
1da177e4 1250
721db93a 1251int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1252 struct socket **res, int kern)
1da177e4
LT
1253{
1254 int err;
1255 struct socket *sock;
55737fda 1256 const struct net_proto_family *pf;
1da177e4
LT
1257
1258 /*
89bddce5 1259 * Check protocol is in range
1da177e4
LT
1260 */
1261 if (family < 0 || family >= NPROTO)
1262 return -EAFNOSUPPORT;
1263 if (type < 0 || type >= SOCK_MAX)
1264 return -EINVAL;
1265
1266 /* Compatibility.
1267
1268 This uglymoron is moved from INET layer to here to avoid
1269 deadlock in module load.
1270 */
1271 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1272 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1273 current->comm);
1da177e4
LT
1274 family = PF_PACKET;
1275 }
1276
1277 err = security_socket_create(family, type, protocol, kern);
1278 if (err)
1279 return err;
89bddce5 1280
55737fda
SH
1281 /*
1282 * Allocate the socket and allow the family to set things up. if
1283 * the protocol is 0, the family is instructed to select an appropriate
1284 * default.
1285 */
1286 sock = sock_alloc();
1287 if (!sock) {
e87cc472 1288 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1289 return -ENFILE; /* Not exactly a match, but its the
1290 closest posix thing */
1291 }
1292
1293 sock->type = type;
1294
95a5afca 1295#ifdef CONFIG_MODULES
89bddce5
SH
1296 /* Attempt to load a protocol module if the find failed.
1297 *
1298 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1299 * requested real, full-featured networking support upon configuration.
1300 * Otherwise module support will break!
1301 */
190683a9 1302 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1303 request_module("net-pf-%d", family);
1da177e4
LT
1304#endif
1305
55737fda
SH
1306 rcu_read_lock();
1307 pf = rcu_dereference(net_families[family]);
1308 err = -EAFNOSUPPORT;
1309 if (!pf)
1310 goto out_release;
1da177e4
LT
1311
1312 /*
1313 * We will call the ->create function, that possibly is in a loadable
1314 * module, so we have to bump that loadable module refcnt first.
1315 */
55737fda 1316 if (!try_module_get(pf->owner))
1da177e4
LT
1317 goto out_release;
1318
55737fda
SH
1319 /* Now protected by module ref count */
1320 rcu_read_unlock();
1321
3f378b68 1322 err = pf->create(net, sock, protocol, kern);
55737fda 1323 if (err < 0)
1da177e4 1324 goto out_module_put;
a79af59e 1325
1da177e4
LT
1326 /*
1327 * Now to bump the refcnt of the [loadable] module that owns this
1328 * socket at sock_release time we decrement its refcnt.
1329 */
55737fda
SH
1330 if (!try_module_get(sock->ops->owner))
1331 goto out_module_busy;
1332
1da177e4
LT
1333 /*
1334 * Now that we're done with the ->create function, the [loadable]
1335 * module can have its refcnt decremented
1336 */
55737fda 1337 module_put(pf->owner);
7420ed23
VY
1338 err = security_socket_post_create(sock, family, type, protocol, kern);
1339 if (err)
3b185525 1340 goto out_sock_release;
55737fda 1341 *res = sock;
1da177e4 1342
55737fda
SH
1343 return 0;
1344
1345out_module_busy:
1346 err = -EAFNOSUPPORT;
1da177e4 1347out_module_put:
55737fda
SH
1348 sock->ops = NULL;
1349 module_put(pf->owner);
1350out_sock_release:
1da177e4 1351 sock_release(sock);
55737fda
SH
1352 return err;
1353
1354out_release:
1355 rcu_read_unlock();
1356 goto out_sock_release;
1da177e4 1357}
721db93a 1358EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1359
1360int sock_create(int family, int type, int protocol, struct socket **res)
1361{
1b8d7ae4 1362 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1363}
c6d409cf 1364EXPORT_SYMBOL(sock_create);
1da177e4 1365
eeb1bd5c 1366int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1367{
eeb1bd5c 1368 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1369}
c6d409cf 1370EXPORT_SYMBOL(sock_create_kern);
1da177e4 1371
9d6a15c3 1372int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1373{
1374 int retval;
1375 struct socket *sock;
a677a039
UD
1376 int flags;
1377
e38b36f3
UD
1378 /* Check the SOCK_* constants for consistency. */
1379 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1380 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1381 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1382 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1383
a677a039 1384 flags = type & ~SOCK_TYPE_MASK;
77d27200 1385 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1386 return -EINVAL;
1387 type &= SOCK_TYPE_MASK;
1da177e4 1388
aaca0bdc
UD
1389 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1390 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1391
1da177e4
LT
1392 retval = sock_create(family, type, protocol, &sock);
1393 if (retval < 0)
8e1611e2 1394 return retval;
1da177e4 1395
8e1611e2 1396 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1397}
1398
9d6a15c3
DB
1399SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1400{
1401 return __sys_socket(family, type, protocol);
1402}
1403
1da177e4
LT
1404/*
1405 * Create a pair of connected sockets.
1406 */
1407
6debc8d8 1408int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1409{
1410 struct socket *sock1, *sock2;
1411 int fd1, fd2, err;
db349509 1412 struct file *newfile1, *newfile2;
a677a039
UD
1413 int flags;
1414
1415 flags = type & ~SOCK_TYPE_MASK;
77d27200 1416 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1417 return -EINVAL;
1418 type &= SOCK_TYPE_MASK;
1da177e4 1419
aaca0bdc
UD
1420 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1421 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1422
016a266b
AV
1423 /*
1424 * reserve descriptors and make sure we won't fail
1425 * to return them to userland.
1426 */
1427 fd1 = get_unused_fd_flags(flags);
1428 if (unlikely(fd1 < 0))
1429 return fd1;
1430
1431 fd2 = get_unused_fd_flags(flags);
1432 if (unlikely(fd2 < 0)) {
1433 put_unused_fd(fd1);
1434 return fd2;
1435 }
1436
1437 err = put_user(fd1, &usockvec[0]);
1438 if (err)
1439 goto out;
1440
1441 err = put_user(fd2, &usockvec[1]);
1442 if (err)
1443 goto out;
1444
1da177e4
LT
1445 /*
1446 * Obtain the first socket and check if the underlying protocol
1447 * supports the socketpair call.
1448 */
1449
1450 err = sock_create(family, type, protocol, &sock1);
016a266b 1451 if (unlikely(err < 0))
1da177e4
LT
1452 goto out;
1453
1454 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1455 if (unlikely(err < 0)) {
1456 sock_release(sock1);
1457 goto out;
bf3c23d1 1458 }
d73aa286 1459
d47cd945
DH
1460 err = security_socket_socketpair(sock1, sock2);
1461 if (unlikely(err)) {
1462 sock_release(sock2);
1463 sock_release(sock1);
1464 goto out;
1465 }
1466
016a266b
AV
1467 err = sock1->ops->socketpair(sock1, sock2);
1468 if (unlikely(err < 0)) {
1469 sock_release(sock2);
1470 sock_release(sock1);
1471 goto out;
28407630
AV
1472 }
1473
aab174f0 1474 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1475 if (IS_ERR(newfile1)) {
28407630 1476 err = PTR_ERR(newfile1);
016a266b
AV
1477 sock_release(sock2);
1478 goto out;
28407630
AV
1479 }
1480
aab174f0 1481 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1482 if (IS_ERR(newfile2)) {
1483 err = PTR_ERR(newfile2);
016a266b
AV
1484 fput(newfile1);
1485 goto out;
db349509
AV
1486 }
1487
157cf649 1488 audit_fd_pair(fd1, fd2);
d73aa286 1489
db349509
AV
1490 fd_install(fd1, newfile1);
1491 fd_install(fd2, newfile2);
d73aa286 1492 return 0;
1da177e4 1493
016a266b 1494out:
d73aa286 1495 put_unused_fd(fd2);
d73aa286 1496 put_unused_fd(fd1);
1da177e4
LT
1497 return err;
1498}
1499
6debc8d8
DB
1500SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1501 int __user *, usockvec)
1502{
1503 return __sys_socketpair(family, type, protocol, usockvec);
1504}
1505
1da177e4
LT
1506/*
1507 * Bind a name to a socket. Nothing much to do here since it's
1508 * the protocol's responsibility to handle the local address.
1509 *
1510 * We move the socket address to kernel space before we call
1511 * the protocol layer (having also checked the address is ok).
1512 */
1513
a87d35d8 1514int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1515{
1516 struct socket *sock;
230b1839 1517 struct sockaddr_storage address;
6cb153ca 1518 int err, fput_needed;
1da177e4 1519
89bddce5 1520 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1521 if (sock) {
43db362d 1522 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1523 if (err >= 0) {
1524 err = security_socket_bind(sock,
230b1839 1525 (struct sockaddr *)&address,
89bddce5 1526 addrlen);
6cb153ca
BL
1527 if (!err)
1528 err = sock->ops->bind(sock,
89bddce5 1529 (struct sockaddr *)
230b1839 1530 &address, addrlen);
1da177e4 1531 }
6cb153ca 1532 fput_light(sock->file, fput_needed);
89bddce5 1533 }
1da177e4
LT
1534 return err;
1535}
1536
a87d35d8
DB
1537SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1538{
1539 return __sys_bind(fd, umyaddr, addrlen);
1540}
1541
1da177e4
LT
1542/*
1543 * Perform a listen. Basically, we allow the protocol to do anything
1544 * necessary for a listen, and if that works, we mark the socket as
1545 * ready for listening.
1546 */
1547
25e290ee 1548int __sys_listen(int fd, int backlog)
1da177e4
LT
1549{
1550 struct socket *sock;
6cb153ca 1551 int err, fput_needed;
b8e1f9b5 1552 int somaxconn;
89bddce5
SH
1553
1554 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1555 if (sock) {
8efa6e93 1556 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1557 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1558 backlog = somaxconn;
1da177e4
LT
1559
1560 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1561 if (!err)
1562 err = sock->ops->listen(sock, backlog);
1da177e4 1563
6cb153ca 1564 fput_light(sock->file, fput_needed);
1da177e4
LT
1565 }
1566 return err;
1567}
1568
25e290ee
DB
1569SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1570{
1571 return __sys_listen(fd, backlog);
1572}
1573
1da177e4
LT
1574/*
1575 * For accept, we attempt to create a new socket, set up the link
1576 * with the client, wake up the client, then return the new
1577 * connected fd. We collect the address of the connector in kernel
1578 * space and move it to user at the very end. This is unclean because
1579 * we open the socket then return an error.
1580 *
1581 * 1003.1g adds the ability to recvmsg() to query connection pending
1582 * status to recvmsg. We need to add that support in a way thats
b903036a 1583 * clean when we restructure accept also.
1da177e4
LT
1584 */
1585
4541e805
DB
1586int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1587 int __user *upeer_addrlen, int flags)
1da177e4
LT
1588{
1589 struct socket *sock, *newsock;
39d8c1b6 1590 struct file *newfile;
6cb153ca 1591 int err, len, newfd, fput_needed;
230b1839 1592 struct sockaddr_storage address;
1da177e4 1593
77d27200 1594 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1595 return -EINVAL;
1596
1597 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1598 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1599
6cb153ca 1600 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1601 if (!sock)
1602 goto out;
1603
1604 err = -ENFILE;
c6d409cf
ED
1605 newsock = sock_alloc();
1606 if (!newsock)
1da177e4
LT
1607 goto out_put;
1608
1609 newsock->type = sock->type;
1610 newsock->ops = sock->ops;
1611
1da177e4
LT
1612 /*
1613 * We don't need try_module_get here, as the listening socket (sock)
1614 * has the protocol module (sock->ops->owner) held.
1615 */
1616 __module_get(newsock->ops->owner);
1617
28407630 1618 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1619 if (unlikely(newfd < 0)) {
1620 err = newfd;
9a1875e6
DM
1621 sock_release(newsock);
1622 goto out_put;
39d8c1b6 1623 }
aab174f0 1624 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1625 if (IS_ERR(newfile)) {
28407630
AV
1626 err = PTR_ERR(newfile);
1627 put_unused_fd(newfd);
28407630
AV
1628 goto out_put;
1629 }
39d8c1b6 1630
a79af59e
FF
1631 err = security_socket_accept(sock, newsock);
1632 if (err)
39d8c1b6 1633 goto out_fd;
a79af59e 1634
cdfbabfb 1635 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1636 if (err < 0)
39d8c1b6 1637 goto out_fd;
1da177e4
LT
1638
1639 if (upeer_sockaddr) {
9b2c45d4
DV
1640 len = newsock->ops->getname(newsock,
1641 (struct sockaddr *)&address, 2);
1642 if (len < 0) {
1da177e4 1643 err = -ECONNABORTED;
39d8c1b6 1644 goto out_fd;
1da177e4 1645 }
43db362d 1646 err = move_addr_to_user(&address,
230b1839 1647 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1648 if (err < 0)
39d8c1b6 1649 goto out_fd;
1da177e4
LT
1650 }
1651
1652 /* File flags are not inherited via accept() unlike another OSes. */
1653
39d8c1b6
DM
1654 fd_install(newfd, newfile);
1655 err = newfd;
1da177e4 1656
1da177e4 1657out_put:
6cb153ca 1658 fput_light(sock->file, fput_needed);
1da177e4
LT
1659out:
1660 return err;
39d8c1b6 1661out_fd:
9606a216 1662 fput(newfile);
39d8c1b6 1663 put_unused_fd(newfd);
1da177e4
LT
1664 goto out_put;
1665}
1666
4541e805
DB
1667SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1668 int __user *, upeer_addrlen, int, flags)
1669{
1670 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1671}
1672
20f37034
HC
1673SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1674 int __user *, upeer_addrlen)
aaca0bdc 1675{
4541e805 1676 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1677}
1678
1da177e4
LT
1679/*
1680 * Attempt to connect to a socket with the server address. The address
1681 * is in user space so we verify it is OK and move it to kernel space.
1682 *
1683 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1684 * break bindings
1685 *
1686 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1687 * other SEQPACKET protocols that take time to connect() as it doesn't
1688 * include the -EINPROGRESS status for such sockets.
1689 */
1690
1387c2c2 1691int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1692{
1693 struct socket *sock;
230b1839 1694 struct sockaddr_storage address;
6cb153ca 1695 int err, fput_needed;
1da177e4 1696
6cb153ca 1697 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1698 if (!sock)
1699 goto out;
43db362d 1700 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1701 if (err < 0)
1702 goto out_put;
1703
89bddce5 1704 err =
230b1839 1705 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1706 if (err)
1707 goto out_put;
1708
230b1839 1709 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1710 sock->file->f_flags);
1711out_put:
6cb153ca 1712 fput_light(sock->file, fput_needed);
1da177e4
LT
1713out:
1714 return err;
1715}
1716
1387c2c2
DB
1717SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1718 int, addrlen)
1719{
1720 return __sys_connect(fd, uservaddr, addrlen);
1721}
1722
1da177e4
LT
1723/*
1724 * Get the local address ('name') of a socket object. Move the obtained
1725 * name to user space.
1726 */
1727
8882a107
DB
1728int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1729 int __user *usockaddr_len)
1da177e4
LT
1730{
1731 struct socket *sock;
230b1839 1732 struct sockaddr_storage address;
9b2c45d4 1733 int err, fput_needed;
89bddce5 1734
6cb153ca 1735 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1736 if (!sock)
1737 goto out;
1738
1739 err = security_socket_getsockname(sock);
1740 if (err)
1741 goto out_put;
1742
9b2c45d4
DV
1743 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1744 if (err < 0)
1da177e4 1745 goto out_put;
9b2c45d4
DV
1746 /* "err" is actually length in this case */
1747 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1748
1749out_put:
6cb153ca 1750 fput_light(sock->file, fput_needed);
1da177e4
LT
1751out:
1752 return err;
1753}
1754
8882a107
DB
1755SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1756 int __user *, usockaddr_len)
1757{
1758 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1759}
1760
1da177e4
LT
1761/*
1762 * Get the remote address ('name') of a socket object. Move the obtained
1763 * name to user space.
1764 */
1765
b21c8f83
DB
1766int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1767 int __user *usockaddr_len)
1da177e4
LT
1768{
1769 struct socket *sock;
230b1839 1770 struct sockaddr_storage address;
9b2c45d4 1771 int err, fput_needed;
1da177e4 1772
89bddce5
SH
1773 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1774 if (sock != NULL) {
1da177e4
LT
1775 err = security_socket_getpeername(sock);
1776 if (err) {
6cb153ca 1777 fput_light(sock->file, fput_needed);
1da177e4
LT
1778 return err;
1779 }
1780
9b2c45d4
DV
1781 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1782 if (err >= 0)
1783 /* "err" is actually length in this case */
1784 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1785 usockaddr_len);
6cb153ca 1786 fput_light(sock->file, fput_needed);
1da177e4
LT
1787 }
1788 return err;
1789}
1790
b21c8f83
DB
1791SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1792 int __user *, usockaddr_len)
1793{
1794 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1795}
1796
1da177e4
LT
1797/*
1798 * Send a datagram to a given address. We move the address into kernel
1799 * space and check the user space data area is readable before invoking
1800 * the protocol.
1801 */
211b634b
DB
1802int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1803 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1804{
1805 struct socket *sock;
230b1839 1806 struct sockaddr_storage address;
1da177e4
LT
1807 int err;
1808 struct msghdr msg;
1809 struct iovec iov;
6cb153ca 1810 int fput_needed;
6cb153ca 1811
602bd0e9
AV
1812 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1813 if (unlikely(err))
1814 return err;
de0fa95c
PE
1815 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1816 if (!sock)
4387ff75 1817 goto out;
6cb153ca 1818
89bddce5 1819 msg.msg_name = NULL;
89bddce5
SH
1820 msg.msg_control = NULL;
1821 msg.msg_controllen = 0;
1822 msg.msg_namelen = 0;
6cb153ca 1823 if (addr) {
43db362d 1824 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1825 if (err < 0)
1826 goto out_put;
230b1839 1827 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1828 msg.msg_namelen = addr_len;
1da177e4
LT
1829 }
1830 if (sock->file->f_flags & O_NONBLOCK)
1831 flags |= MSG_DONTWAIT;
1832 msg.msg_flags = flags;
d8725c86 1833 err = sock_sendmsg(sock, &msg);
1da177e4 1834
89bddce5 1835out_put:
de0fa95c 1836 fput_light(sock->file, fput_needed);
4387ff75 1837out:
1da177e4
LT
1838 return err;
1839}
1840
211b634b
DB
1841SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1842 unsigned int, flags, struct sockaddr __user *, addr,
1843 int, addr_len)
1844{
1845 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1846}
1847
1da177e4 1848/*
89bddce5 1849 * Send a datagram down a socket.
1da177e4
LT
1850 */
1851
3e0fa65f 1852SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1853 unsigned int, flags)
1da177e4 1854{
211b634b 1855 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1856}
1857
1858/*
89bddce5 1859 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1860 * sender. We verify the buffers are writable and if needed move the
1861 * sender address from kernel to user space.
1862 */
7a09e1eb
DB
1863int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
1864 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
1865{
1866 struct socket *sock;
1867 struct iovec iov;
1868 struct msghdr msg;
230b1839 1869 struct sockaddr_storage address;
89bddce5 1870 int err, err2;
6cb153ca
BL
1871 int fput_needed;
1872
602bd0e9
AV
1873 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1874 if (unlikely(err))
1875 return err;
de0fa95c 1876 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1877 if (!sock)
de0fa95c 1878 goto out;
1da177e4 1879
89bddce5
SH
1880 msg.msg_control = NULL;
1881 msg.msg_controllen = 0;
f3d33426
HFS
1882 /* Save some cycles and don't copy the address if not needed */
1883 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1884 /* We assume all kernel code knows the size of sockaddr_storage */
1885 msg.msg_namelen = 0;
130ed5d1 1886 msg.msg_iocb = NULL;
9f138fa6 1887 msg.msg_flags = 0;
1da177e4
LT
1888 if (sock->file->f_flags & O_NONBLOCK)
1889 flags |= MSG_DONTWAIT;
2da62906 1890 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1891
89bddce5 1892 if (err >= 0 && addr != NULL) {
43db362d 1893 err2 = move_addr_to_user(&address,
230b1839 1894 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1895 if (err2 < 0)
1896 err = err2;
1da177e4 1897 }
de0fa95c
PE
1898
1899 fput_light(sock->file, fput_needed);
4387ff75 1900out:
1da177e4
LT
1901 return err;
1902}
1903
7a09e1eb
DB
1904SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1905 unsigned int, flags, struct sockaddr __user *, addr,
1906 int __user *, addr_len)
1907{
1908 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
1909}
1910
1da177e4 1911/*
89bddce5 1912 * Receive a datagram from a socket.
1da177e4
LT
1913 */
1914
b7c0ddf5
JG
1915SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1916 unsigned int, flags)
1da177e4 1917{
7a09e1eb 1918 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
1919}
1920
1921/*
1922 * Set a socket option. Because we don't know the option lengths we have
1923 * to pass the user mode parameter for the protocols to sort out.
1924 */
1925
cc36dca0
DB
1926static int __sys_setsockopt(int fd, int level, int optname,
1927 char __user *optval, int optlen)
1da177e4 1928{
6cb153ca 1929 int err, fput_needed;
1da177e4
LT
1930 struct socket *sock;
1931
1932 if (optlen < 0)
1933 return -EINVAL;
89bddce5
SH
1934
1935 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1936 if (sock != NULL) {
1937 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1938 if (err)
1939 goto out_put;
1da177e4
LT
1940
1941 if (level == SOL_SOCKET)
89bddce5
SH
1942 err =
1943 sock_setsockopt(sock, level, optname, optval,
1944 optlen);
1da177e4 1945 else
89bddce5
SH
1946 err =
1947 sock->ops->setsockopt(sock, level, optname, optval,
1948 optlen);
6cb153ca
BL
1949out_put:
1950 fput_light(sock->file, fput_needed);
1da177e4
LT
1951 }
1952 return err;
1953}
1954
cc36dca0
DB
1955SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1956 char __user *, optval, int, optlen)
1957{
1958 return __sys_setsockopt(fd, level, optname, optval, optlen);
1959}
1960
1da177e4
LT
1961/*
1962 * Get a socket option. Because we don't know the option lengths we have
1963 * to pass a user mode parameter for the protocols to sort out.
1964 */
1965
13a2d70e
DB
1966static int __sys_getsockopt(int fd, int level, int optname,
1967 char __user *optval, int __user *optlen)
1da177e4 1968{
6cb153ca 1969 int err, fput_needed;
1da177e4
LT
1970 struct socket *sock;
1971
89bddce5
SH
1972 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1973 if (sock != NULL) {
6cb153ca
BL
1974 err = security_socket_getsockopt(sock, level, optname);
1975 if (err)
1976 goto out_put;
1da177e4
LT
1977
1978 if (level == SOL_SOCKET)
89bddce5
SH
1979 err =
1980 sock_getsockopt(sock, level, optname, optval,
1981 optlen);
1da177e4 1982 else
89bddce5
SH
1983 err =
1984 sock->ops->getsockopt(sock, level, optname, optval,
1985 optlen);
6cb153ca
BL
1986out_put:
1987 fput_light(sock->file, fput_needed);
1da177e4
LT
1988 }
1989 return err;
1990}
1991
13a2d70e
DB
1992SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1993 char __user *, optval, int __user *, optlen)
1994{
1995 return __sys_getsockopt(fd, level, optname, optval, optlen);
1996}
1997
1da177e4
LT
1998/*
1999 * Shutdown a socket.
2000 */
2001
005a1aea 2002int __sys_shutdown(int fd, int how)
1da177e4 2003{
6cb153ca 2004 int err, fput_needed;
1da177e4
LT
2005 struct socket *sock;
2006
89bddce5
SH
2007 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2008 if (sock != NULL) {
1da177e4 2009 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2010 if (!err)
2011 err = sock->ops->shutdown(sock, how);
2012 fput_light(sock->file, fput_needed);
1da177e4
LT
2013 }
2014 return err;
2015}
2016
005a1aea
DB
2017SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2018{
2019 return __sys_shutdown(fd, how);
2020}
2021
89bddce5 2022/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2023 * fields which are the same type (int / unsigned) on our platforms.
2024 */
2025#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2026#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2027#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2028
c71d8ebe
TH
2029struct used_address {
2030 struct sockaddr_storage name;
2031 unsigned int name_len;
2032};
2033
da184284
AV
2034static int copy_msghdr_from_user(struct msghdr *kmsg,
2035 struct user_msghdr __user *umsg,
2036 struct sockaddr __user **save_addr,
2037 struct iovec **iov)
1661bf36 2038{
ffb07550 2039 struct user_msghdr msg;
08adb7da
AV
2040 ssize_t err;
2041
ffb07550 2042 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2043 return -EFAULT;
dbb490b9 2044
864d9664 2045 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2046 kmsg->msg_controllen = msg.msg_controllen;
2047 kmsg->msg_flags = msg.msg_flags;
2048
2049 kmsg->msg_namelen = msg.msg_namelen;
2050 if (!msg.msg_name)
6a2a2b3a
AS
2051 kmsg->msg_namelen = 0;
2052
dbb490b9
ML
2053 if (kmsg->msg_namelen < 0)
2054 return -EINVAL;
2055
1661bf36 2056 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2057 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2058
2059 if (save_addr)
ffb07550 2060 *save_addr = msg.msg_name;
08adb7da 2061
ffb07550 2062 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2063 if (!save_addr) {
864d9664
PA
2064 err = move_addr_to_kernel(msg.msg_name,
2065 kmsg->msg_namelen,
08adb7da
AV
2066 kmsg->msg_name);
2067 if (err < 0)
2068 return err;
2069 }
2070 } else {
2071 kmsg->msg_name = NULL;
2072 kmsg->msg_namelen = 0;
2073 }
2074
ffb07550 2075 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2076 return -EMSGSIZE;
2077
0345f931 2078 kmsg->msg_iocb = NULL;
2079
ffb07550
AV
2080 return import_iovec(save_addr ? READ : WRITE,
2081 msg.msg_iov, msg.msg_iovlen,
da184284 2082 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
2083}
2084
666547ff 2085static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2086 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2087 struct used_address *used_address,
2088 unsigned int allowed_msghdr_flags)
1da177e4 2089{
89bddce5
SH
2090 struct compat_msghdr __user *msg_compat =
2091 (struct compat_msghdr __user *)msg;
230b1839 2092 struct sockaddr_storage address;
1da177e4 2093 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2094 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2095 __aligned(sizeof(__kernel_size_t));
89bddce5 2096 /* 20 is size of ipv6_pktinfo */
1da177e4 2097 unsigned char *ctl_buf = ctl;
d8725c86 2098 int ctl_len;
08adb7da 2099 ssize_t err;
89bddce5 2100
08adb7da 2101 msg_sys->msg_name = &address;
1da177e4 2102
08449320 2103 if (MSG_CMSG_COMPAT & flags)
08adb7da 2104 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2105 else
08adb7da 2106 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2107 if (err < 0)
da184284 2108 return err;
1da177e4
LT
2109
2110 err = -ENOBUFS;
2111
228e548e 2112 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2113 goto out_freeiov;
28a94d8f 2114 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2115 ctl_len = msg_sys->msg_controllen;
1da177e4 2116 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2117 err =
228e548e 2118 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2119 sizeof(ctl));
1da177e4
LT
2120 if (err)
2121 goto out_freeiov;
228e548e
AB
2122 ctl_buf = msg_sys->msg_control;
2123 ctl_len = msg_sys->msg_controllen;
1da177e4 2124 } else if (ctl_len) {
ac4340fc
DM
2125 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2126 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2127 if (ctl_len > sizeof(ctl)) {
1da177e4 2128 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2129 if (ctl_buf == NULL)
1da177e4
LT
2130 goto out_freeiov;
2131 }
2132 err = -EFAULT;
2133 /*
228e548e 2134 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2135 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2136 * checking falls down on this.
2137 */
fb8621bb 2138 if (copy_from_user(ctl_buf,
228e548e 2139 (void __user __force *)msg_sys->msg_control,
89bddce5 2140 ctl_len))
1da177e4 2141 goto out_freectl;
228e548e 2142 msg_sys->msg_control = ctl_buf;
1da177e4 2143 }
228e548e 2144 msg_sys->msg_flags = flags;
1da177e4
LT
2145
2146 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2147 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2148 /*
2149 * If this is sendmmsg() and current destination address is same as
2150 * previously succeeded address, omit asking LSM's decision.
2151 * used_address->name_len is initialized to UINT_MAX so that the first
2152 * destination address never matches.
2153 */
bc909d9d
MD
2154 if (used_address && msg_sys->msg_name &&
2155 used_address->name_len == msg_sys->msg_namelen &&
2156 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2157 used_address->name_len)) {
d8725c86 2158 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2159 goto out_freectl;
2160 }
d8725c86 2161 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2162 /*
2163 * If this is sendmmsg() and sending to current destination address was
2164 * successful, remember it.
2165 */
2166 if (used_address && err >= 0) {
2167 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2168 if (msg_sys->msg_name)
2169 memcpy(&used_address->name, msg_sys->msg_name,
2170 used_address->name_len);
c71d8ebe 2171 }
1da177e4
LT
2172
2173out_freectl:
89bddce5 2174 if (ctl_buf != ctl)
1da177e4
LT
2175 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2176out_freeiov:
da184284 2177 kfree(iov);
228e548e
AB
2178 return err;
2179}
2180
2181/*
2182 * BSD sendmsg interface
2183 */
2184
e1834a32
DB
2185long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2186 bool forbid_cmsg_compat)
228e548e
AB
2187{
2188 int fput_needed, err;
2189 struct msghdr msg_sys;
1be374a0
AL
2190 struct socket *sock;
2191
e1834a32
DB
2192 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2193 return -EINVAL;
2194
1be374a0 2195 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2196 if (!sock)
2197 goto out;
2198
28a94d8f 2199 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2200
6cb153ca 2201 fput_light(sock->file, fput_needed);
89bddce5 2202out:
1da177e4
LT
2203 return err;
2204}
2205
666547ff 2206SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2207{
e1834a32 2208 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2209}
2210
228e548e
AB
2211/*
2212 * Linux sendmmsg interface
2213 */
2214
2215int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2216 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2217{
2218 int fput_needed, err, datagrams;
2219 struct socket *sock;
2220 struct mmsghdr __user *entry;
2221 struct compat_mmsghdr __user *compat_entry;
2222 struct msghdr msg_sys;
c71d8ebe 2223 struct used_address used_address;
f092276d 2224 unsigned int oflags = flags;
228e548e 2225
e1834a32
DB
2226 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2227 return -EINVAL;
2228
98382f41
AB
2229 if (vlen > UIO_MAXIOV)
2230 vlen = UIO_MAXIOV;
228e548e
AB
2231
2232 datagrams = 0;
2233
2234 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2235 if (!sock)
2236 return err;
2237
c71d8ebe 2238 used_address.name_len = UINT_MAX;
228e548e
AB
2239 entry = mmsg;
2240 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2241 err = 0;
f092276d 2242 flags |= MSG_BATCH;
228e548e
AB
2243
2244 while (datagrams < vlen) {
f092276d
TH
2245 if (datagrams == vlen - 1)
2246 flags = oflags;
2247
228e548e 2248 if (MSG_CMSG_COMPAT & flags) {
666547ff 2249 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2250 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2251 if (err < 0)
2252 break;
2253 err = __put_user(err, &compat_entry->msg_len);
2254 ++compat_entry;
2255 } else {
a7526eb5 2256 err = ___sys_sendmsg(sock,
666547ff 2257 (struct user_msghdr __user *)entry,
28a94d8f 2258 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2259 if (err < 0)
2260 break;
2261 err = put_user(err, &entry->msg_len);
2262 ++entry;
2263 }
2264
2265 if (err)
2266 break;
2267 ++datagrams;
3023898b
SHY
2268 if (msg_data_left(&msg_sys))
2269 break;
a78cb84c 2270 cond_resched();
228e548e
AB
2271 }
2272
228e548e
AB
2273 fput_light(sock->file, fput_needed);
2274
728ffb86
AB
2275 /* We only return an error if no datagrams were able to be sent */
2276 if (datagrams != 0)
228e548e
AB
2277 return datagrams;
2278
228e548e
AB
2279 return err;
2280}
2281
2282SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2283 unsigned int, vlen, unsigned int, flags)
2284{
e1834a32 2285 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2286}
2287
666547ff 2288static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2289 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2290{
89bddce5
SH
2291 struct compat_msghdr __user *msg_compat =
2292 (struct compat_msghdr __user *)msg;
1da177e4 2293 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2294 struct iovec *iov = iovstack;
1da177e4 2295 unsigned long cmsg_ptr;
2da62906 2296 int len;
08adb7da 2297 ssize_t err;
1da177e4
LT
2298
2299 /* kernel mode address */
230b1839 2300 struct sockaddr_storage addr;
1da177e4
LT
2301
2302 /* user mode address pointers */
2303 struct sockaddr __user *uaddr;
08adb7da 2304 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2305
08adb7da 2306 msg_sys->msg_name = &addr;
1da177e4 2307
f3d33426 2308 if (MSG_CMSG_COMPAT & flags)
08adb7da 2309 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2310 else
08adb7da 2311 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2312 if (err < 0)
da184284 2313 return err;
1da177e4 2314
a2e27255
ACM
2315 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2316 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2317
f3d33426
HFS
2318 /* We assume all kernel code knows the size of sockaddr_storage */
2319 msg_sys->msg_namelen = 0;
2320
1da177e4
LT
2321 if (sock->file->f_flags & O_NONBLOCK)
2322 flags |= MSG_DONTWAIT;
2da62906 2323 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2324 if (err < 0)
2325 goto out_freeiov;
2326 len = err;
2327
2328 if (uaddr != NULL) {
43db362d 2329 err = move_addr_to_user(&addr,
a2e27255 2330 msg_sys->msg_namelen, uaddr,
89bddce5 2331 uaddr_len);
1da177e4
LT
2332 if (err < 0)
2333 goto out_freeiov;
2334 }
a2e27255 2335 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2336 COMPAT_FLAGS(msg));
1da177e4
LT
2337 if (err)
2338 goto out_freeiov;
2339 if (MSG_CMSG_COMPAT & flags)
a2e27255 2340 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2341 &msg_compat->msg_controllen);
2342 else
a2e27255 2343 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2344 &msg->msg_controllen);
2345 if (err)
2346 goto out_freeiov;
2347 err = len;
2348
2349out_freeiov:
da184284 2350 kfree(iov);
a2e27255
ACM
2351 return err;
2352}
2353
2354/*
2355 * BSD recvmsg interface
2356 */
2357
e1834a32
DB
2358long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2359 bool forbid_cmsg_compat)
a2e27255
ACM
2360{
2361 int fput_needed, err;
2362 struct msghdr msg_sys;
1be374a0
AL
2363 struct socket *sock;
2364
e1834a32
DB
2365 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2366 return -EINVAL;
2367
1be374a0 2368 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2369 if (!sock)
2370 goto out;
2371
a7526eb5 2372 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2373
6cb153ca 2374 fput_light(sock->file, fput_needed);
1da177e4
LT
2375out:
2376 return err;
2377}
2378
666547ff 2379SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2380 unsigned int, flags)
2381{
e1834a32 2382 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2383}
2384
a2e27255
ACM
2385/*
2386 * Linux recvmmsg interface
2387 */
2388
2389int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2390 unsigned int flags, struct timespec *timeout)
2391{
2392 int fput_needed, err, datagrams;
2393 struct socket *sock;
2394 struct mmsghdr __user *entry;
d7256d0e 2395 struct compat_mmsghdr __user *compat_entry;
a2e27255 2396 struct msghdr msg_sys;
766b9f92
DD
2397 struct timespec64 end_time;
2398 struct timespec64 timeout64;
a2e27255
ACM
2399
2400 if (timeout &&
2401 poll_select_set_timeout(&end_time, timeout->tv_sec,
2402 timeout->tv_nsec))
2403 return -EINVAL;
2404
2405 datagrams = 0;
2406
2407 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2408 if (!sock)
2409 return err;
2410
7797dc41
SHY
2411 if (likely(!(flags & MSG_ERRQUEUE))) {
2412 err = sock_error(sock->sk);
2413 if (err) {
2414 datagrams = err;
2415 goto out_put;
2416 }
e623a9e9 2417 }
a2e27255
ACM
2418
2419 entry = mmsg;
d7256d0e 2420 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2421
2422 while (datagrams < vlen) {
2423 /*
2424 * No need to ask LSM for more than the first datagram.
2425 */
d7256d0e 2426 if (MSG_CMSG_COMPAT & flags) {
666547ff 2427 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2428 &msg_sys, flags & ~MSG_WAITFORONE,
2429 datagrams);
d7256d0e
JMG
2430 if (err < 0)
2431 break;
2432 err = __put_user(err, &compat_entry->msg_len);
2433 ++compat_entry;
2434 } else {
a7526eb5 2435 err = ___sys_recvmsg(sock,
666547ff 2436 (struct user_msghdr __user *)entry,
a7526eb5
AL
2437 &msg_sys, flags & ~MSG_WAITFORONE,
2438 datagrams);
d7256d0e
JMG
2439 if (err < 0)
2440 break;
2441 err = put_user(err, &entry->msg_len);
2442 ++entry;
2443 }
2444
a2e27255
ACM
2445 if (err)
2446 break;
a2e27255
ACM
2447 ++datagrams;
2448
71c5c159
BB
2449 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2450 if (flags & MSG_WAITFORONE)
2451 flags |= MSG_DONTWAIT;
2452
a2e27255 2453 if (timeout) {
766b9f92
DD
2454 ktime_get_ts64(&timeout64);
2455 *timeout = timespec64_to_timespec(
2456 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2457 if (timeout->tv_sec < 0) {
2458 timeout->tv_sec = timeout->tv_nsec = 0;
2459 break;
2460 }
2461
2462 /* Timeout, return less than vlen datagrams */
2463 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2464 break;
2465 }
2466
2467 /* Out of band data, return right away */
2468 if (msg_sys.msg_flags & MSG_OOB)
2469 break;
a78cb84c 2470 cond_resched();
a2e27255
ACM
2471 }
2472
a2e27255 2473 if (err == 0)
34b88a68
ACM
2474 goto out_put;
2475
2476 if (datagrams == 0) {
2477 datagrams = err;
2478 goto out_put;
2479 }
a2e27255 2480
34b88a68
ACM
2481 /*
2482 * We may return less entries than requested (vlen) if the
2483 * sock is non block and there aren't enough datagrams...
2484 */
2485 if (err != -EAGAIN) {
a2e27255 2486 /*
34b88a68
ACM
2487 * ... or if recvmsg returns an error after we
2488 * received some datagrams, where we record the
2489 * error to return on the next call or if the
2490 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2491 */
34b88a68 2492 sock->sk->sk_err = -err;
a2e27255 2493 }
34b88a68
ACM
2494out_put:
2495 fput_light(sock->file, fput_needed);
a2e27255 2496
34b88a68 2497 return datagrams;
a2e27255
ACM
2498}
2499
1255e269
DB
2500static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2501 unsigned int vlen, unsigned int flags,
2502 struct timespec __user *timeout)
a2e27255
ACM
2503{
2504 int datagrams;
2505 struct timespec timeout_sys;
2506
1be374a0
AL
2507 if (flags & MSG_CMSG_COMPAT)
2508 return -EINVAL;
2509
a2e27255
ACM
2510 if (!timeout)
2511 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2512
2513 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2514 return -EFAULT;
2515
2516 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2517
2518 if (datagrams > 0 &&
2519 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2520 datagrams = -EFAULT;
2521
2522 return datagrams;
2523}
2524
1255e269
DB
2525SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2526 unsigned int, vlen, unsigned int, flags,
2527 struct timespec __user *, timeout)
2528{
2529 return do_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
2530}
2531
a2e27255 2532#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2533/* Argument list sizes for sys_socketcall */
2534#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2535static const unsigned char nargs[21] = {
c6d409cf
ED
2536 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2537 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2538 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2539 AL(4), AL(5), AL(4)
89bddce5
SH
2540};
2541
1da177e4
LT
2542#undef AL
2543
2544/*
89bddce5 2545 * System call vectors.
1da177e4
LT
2546 *
2547 * Argument checking cleaned up. Saved 20% in size.
2548 * This function doesn't need to set the kernel lock because
89bddce5 2549 * it is set by the callees.
1da177e4
LT
2550 */
2551
3e0fa65f 2552SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2553{
2950fa9d 2554 unsigned long a[AUDITSC_ARGS];
89bddce5 2555 unsigned long a0, a1;
1da177e4 2556 int err;
47379052 2557 unsigned int len;
1da177e4 2558
228e548e 2559 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2560 return -EINVAL;
2561
47379052
AV
2562 len = nargs[call];
2563 if (len > sizeof(a))
2564 return -EINVAL;
2565
1da177e4 2566 /* copy_from_user should be SMP safe. */
47379052 2567 if (copy_from_user(a, args, len))
1da177e4 2568 return -EFAULT;
3ec3b2fb 2569
2950fa9d
CG
2570 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2571 if (err)
2572 return err;
3ec3b2fb 2573
89bddce5
SH
2574 a0 = a[0];
2575 a1 = a[1];
2576
2577 switch (call) {
2578 case SYS_SOCKET:
9d6a15c3 2579 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2580 break;
2581 case SYS_BIND:
a87d35d8 2582 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2583 break;
2584 case SYS_CONNECT:
1387c2c2 2585 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2586 break;
2587 case SYS_LISTEN:
25e290ee 2588 err = __sys_listen(a0, a1);
89bddce5
SH
2589 break;
2590 case SYS_ACCEPT:
4541e805
DB
2591 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2592 (int __user *)a[2], 0);
89bddce5
SH
2593 break;
2594 case SYS_GETSOCKNAME:
2595 err =
8882a107
DB
2596 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2597 (int __user *)a[2]);
89bddce5
SH
2598 break;
2599 case SYS_GETPEERNAME:
2600 err =
b21c8f83
DB
2601 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2602 (int __user *)a[2]);
89bddce5
SH
2603 break;
2604 case SYS_SOCKETPAIR:
6debc8d8 2605 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2606 break;
2607 case SYS_SEND:
f3bf896b
DB
2608 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2609 NULL, 0);
89bddce5
SH
2610 break;
2611 case SYS_SENDTO:
211b634b
DB
2612 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2613 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2614 break;
2615 case SYS_RECV:
d27e9afc
DB
2616 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2617 NULL, NULL);
89bddce5
SH
2618 break;
2619 case SYS_RECVFROM:
7a09e1eb
DB
2620 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2621 (struct sockaddr __user *)a[4],
2622 (int __user *)a[5]);
89bddce5
SH
2623 break;
2624 case SYS_SHUTDOWN:
005a1aea 2625 err = __sys_shutdown(a0, a1);
89bddce5
SH
2626 break;
2627 case SYS_SETSOCKOPT:
cc36dca0
DB
2628 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2629 a[4]);
89bddce5
SH
2630 break;
2631 case SYS_GETSOCKOPT:
2632 err =
13a2d70e
DB
2633 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2634 (int __user *)a[4]);
89bddce5
SH
2635 break;
2636 case SYS_SENDMSG:
e1834a32
DB
2637 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2638 a[2], true);
89bddce5 2639 break;
228e548e 2640 case SYS_SENDMMSG:
e1834a32
DB
2641 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2642 a[3], true);
228e548e 2643 break;
89bddce5 2644 case SYS_RECVMSG:
e1834a32
DB
2645 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2646 a[2], true);
89bddce5 2647 break;
a2e27255 2648 case SYS_RECVMMSG:
1255e269
DB
2649 err = do_sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2650 a[3], (struct timespec __user *)a[4]);
a2e27255 2651 break;
de11defe 2652 case SYS_ACCEPT4:
4541e805
DB
2653 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2654 (int __user *)a[2], a[3]);
aaca0bdc 2655 break;
89bddce5
SH
2656 default:
2657 err = -EINVAL;
2658 break;
1da177e4
LT
2659 }
2660 return err;
2661}
2662
89bddce5 2663#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2664
55737fda
SH
2665/**
2666 * sock_register - add a socket protocol handler
2667 * @ops: description of protocol
2668 *
1da177e4
LT
2669 * This function is called by a protocol handler that wants to
2670 * advertise its address family, and have it linked into the
e793c0f7 2671 * socket interface. The value ops->family corresponds to the
55737fda 2672 * socket system call protocol family.
1da177e4 2673 */
f0fd27d4 2674int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2675{
2676 int err;
2677
2678 if (ops->family >= NPROTO) {
3410f22e 2679 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2680 return -ENOBUFS;
2681 }
55737fda
SH
2682
2683 spin_lock(&net_family_lock);
190683a9
ED
2684 if (rcu_dereference_protected(net_families[ops->family],
2685 lockdep_is_held(&net_family_lock)))
55737fda
SH
2686 err = -EEXIST;
2687 else {
cf778b00 2688 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2689 err = 0;
2690 }
55737fda
SH
2691 spin_unlock(&net_family_lock);
2692
3410f22e 2693 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2694 return err;
2695}
c6d409cf 2696EXPORT_SYMBOL(sock_register);
1da177e4 2697
55737fda
SH
2698/**
2699 * sock_unregister - remove a protocol handler
2700 * @family: protocol family to remove
2701 *
1da177e4
LT
2702 * This function is called by a protocol handler that wants to
2703 * remove its address family, and have it unlinked from the
55737fda
SH
2704 * new socket creation.
2705 *
2706 * If protocol handler is a module, then it can use module reference
2707 * counts to protect against new references. If protocol handler is not
2708 * a module then it needs to provide its own protection in
2709 * the ops->create routine.
1da177e4 2710 */
f0fd27d4 2711void sock_unregister(int family)
1da177e4 2712{
f0fd27d4 2713 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2714
55737fda 2715 spin_lock(&net_family_lock);
a9b3cd7f 2716 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2717 spin_unlock(&net_family_lock);
2718
2719 synchronize_rcu();
2720
3410f22e 2721 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2722}
c6d409cf 2723EXPORT_SYMBOL(sock_unregister);
1da177e4 2724
bf2ae2e4
XL
2725bool sock_is_registered(int family)
2726{
2727 return family < NPROTO && rcu_access_pointer(net_families[family]);
2728}
2729
77d76ea3 2730static int __init sock_init(void)
1da177e4 2731{
b3e19d92 2732 int err;
2ca794e5
EB
2733 /*
2734 * Initialize the network sysctl infrastructure.
2735 */
2736 err = net_sysctl_init();
2737 if (err)
2738 goto out;
b3e19d92 2739
1da177e4 2740 /*
89bddce5 2741 * Initialize skbuff SLAB cache
1da177e4
LT
2742 */
2743 skb_init();
1da177e4
LT
2744
2745 /*
89bddce5 2746 * Initialize the protocols module.
1da177e4
LT
2747 */
2748
2749 init_inodecache();
b3e19d92
NP
2750
2751 err = register_filesystem(&sock_fs_type);
2752 if (err)
2753 goto out_fs;
1da177e4 2754 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2755 if (IS_ERR(sock_mnt)) {
2756 err = PTR_ERR(sock_mnt);
2757 goto out_mount;
2758 }
77d76ea3
AK
2759
2760 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2761 */
2762
2763#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2764 err = netfilter_init();
2765 if (err)
2766 goto out;
1da177e4 2767#endif
cbeb321a 2768
408eccce 2769 ptp_classifier_init();
c1f19b51 2770
b3e19d92
NP
2771out:
2772 return err;
2773
2774out_mount:
2775 unregister_filesystem(&sock_fs_type);
2776out_fs:
2777 goto out;
1da177e4
LT
2778}
2779
77d76ea3
AK
2780core_initcall(sock_init); /* early initcall */
2781
1da177e4
LT
2782#ifdef CONFIG_PROC_FS
2783void socket_seq_show(struct seq_file *seq)
2784{
648845ab
TZ
2785 seq_printf(seq, "sockets: used %d\n",
2786 sock_inuse_get(seq->private));
1da177e4 2787}
89bddce5 2788#endif /* CONFIG_PROC_FS */
1da177e4 2789
89bbfc95 2790#ifdef CONFIG_COMPAT
6b96018b 2791static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2792 unsigned int cmd, void __user *up)
7a229387 2793{
7a229387
AB
2794 mm_segment_t old_fs = get_fs();
2795 struct timeval ktv;
2796 int err;
2797
2798 set_fs(KERNEL_DS);
6b96018b 2799 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2800 set_fs(old_fs);
644595f8 2801 if (!err)
ed6fe9d6 2802 err = compat_put_timeval(&ktv, up);
644595f8 2803
7a229387
AB
2804 return err;
2805}
2806
6b96018b 2807static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2808 unsigned int cmd, void __user *up)
7a229387 2809{
7a229387
AB
2810 mm_segment_t old_fs = get_fs();
2811 struct timespec kts;
2812 int err;
2813
2814 set_fs(KERNEL_DS);
6b96018b 2815 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2816 set_fs(old_fs);
644595f8 2817 if (!err)
ed6fe9d6 2818 err = compat_put_timespec(&kts, up);
644595f8 2819
7a229387
AB
2820 return err;
2821}
2822
36fd633e 2823static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2824{
6b96018b 2825 struct compat_ifconf ifc32;
7a229387 2826 struct ifconf ifc;
7a229387
AB
2827 int err;
2828
6b96018b 2829 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2830 return -EFAULT;
2831
36fd633e
AV
2832 ifc.ifc_len = ifc32.ifc_len;
2833 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 2834
36fd633e
AV
2835 rtnl_lock();
2836 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2837 rtnl_unlock();
7a229387
AB
2838 if (err)
2839 return err;
2840
36fd633e 2841 ifc32.ifc_len = ifc.ifc_len;
6b96018b 2842 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2843 return -EFAULT;
2844
2845 return 0;
2846}
2847
6b96018b 2848static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2849{
3a7da39d
BH
2850 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2851 bool convert_in = false, convert_out = false;
44c02a2c
AV
2852 size_t buf_size = 0;
2853 struct ethtool_rxnfc __user *rxnfc = NULL;
2854 struct ifreq ifr;
3a7da39d
BH
2855 u32 rule_cnt = 0, actual_rule_cnt;
2856 u32 ethcmd;
7a229387 2857 u32 data;
3a7da39d 2858 int ret;
7a229387 2859
3a7da39d
BH
2860 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2861 return -EFAULT;
7a229387 2862
3a7da39d
BH
2863 compat_rxnfc = compat_ptr(data);
2864
2865 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2866 return -EFAULT;
2867
3a7da39d
BH
2868 /* Most ethtool structures are defined without padding.
2869 * Unfortunately struct ethtool_rxnfc is an exception.
2870 */
2871 switch (ethcmd) {
2872 default:
2873 break;
2874 case ETHTOOL_GRXCLSRLALL:
2875 /* Buffer size is variable */
2876 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2877 return -EFAULT;
2878 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2879 return -ENOMEM;
2880 buf_size += rule_cnt * sizeof(u32);
2881 /* fall through */
2882 case ETHTOOL_GRXRINGS:
2883 case ETHTOOL_GRXCLSRLCNT:
2884 case ETHTOOL_GRXCLSRULE:
55664f32 2885 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2886 convert_out = true;
2887 /* fall through */
2888 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2889 buf_size += sizeof(struct ethtool_rxnfc);
2890 convert_in = true;
44c02a2c 2891 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
2892 break;
2893 }
2894
44c02a2c 2895 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2896 return -EFAULT;
2897
44c02a2c 2898 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 2899
3a7da39d 2900 if (convert_in) {
127fe533 2901 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2902 * fs.ring_cookie and at the end of fs, but nowhere else.
2903 */
127fe533
AD
2904 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2905 sizeof(compat_rxnfc->fs.m_ext) !=
2906 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2907 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2908 BUILD_BUG_ON(
2909 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2910 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2911 offsetof(struct ethtool_rxnfc, fs.location) -
2912 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2913
2914 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2915 (void __user *)(&rxnfc->fs.m_ext + 1) -
2916 (void __user *)rxnfc) ||
3a7da39d
BH
2917 copy_in_user(&rxnfc->fs.ring_cookie,
2918 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2919 (void __user *)(&rxnfc->fs.location + 1) -
2920 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2921 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2922 sizeof(rxnfc->rule_cnt)))
2923 return -EFAULT;
2924 }
2925
44c02a2c 2926 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
2927 if (ret)
2928 return ret;
2929
2930 if (convert_out) {
2931 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2932 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2933 (const void __user *)rxnfc) ||
3a7da39d
BH
2934 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2935 &rxnfc->fs.ring_cookie,
954b1244
SH
2936 (const void __user *)(&rxnfc->fs.location + 1) -
2937 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2938 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2939 sizeof(rxnfc->rule_cnt)))
2940 return -EFAULT;
2941
2942 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2943 /* As an optimisation, we only copy the actual
2944 * number of rules that the underlying
2945 * function returned. Since Mallory might
2946 * change the rule count in user memory, we
2947 * check that it is less than the rule count
2948 * originally given (as the user buffer size),
2949 * which has been range-checked.
2950 */
2951 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2952 return -EFAULT;
2953 if (actual_rule_cnt < rule_cnt)
2954 rule_cnt = actual_rule_cnt;
2955 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2956 &rxnfc->rule_locs[0],
2957 rule_cnt * sizeof(u32)))
2958 return -EFAULT;
2959 }
2960 }
2961
2962 return 0;
7a229387
AB
2963}
2964
7a50a240
AB
2965static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2966{
7a50a240 2967 compat_uptr_t uptr32;
44c02a2c
AV
2968 struct ifreq ifr;
2969 void __user *saved;
2970 int err;
7a50a240 2971
44c02a2c 2972 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
2973 return -EFAULT;
2974
2975 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2976 return -EFAULT;
2977
44c02a2c
AV
2978 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
2979 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 2980
44c02a2c
AV
2981 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
2982 if (!err) {
2983 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
2984 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
2985 err = -EFAULT;
ccbd6a5a 2986 }
44c02a2c 2987 return err;
7a229387
AB
2988}
2989
590d4693
BH
2990/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2991static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2992 struct compat_ifreq __user *u_ifreq32)
7a229387 2993{
44c02a2c 2994 struct ifreq ifreq;
7a229387
AB
2995 u32 data32;
2996
44c02a2c 2997 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 2998 return -EFAULT;
44c02a2c 2999 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3000 return -EFAULT;
44c02a2c 3001 ifreq.ifr_data = compat_ptr(data32);
7a229387 3002
44c02a2c 3003 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3004}
3005
a2116ed2
AB
3006static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3007 struct compat_ifreq __user *uifr32)
3008{
3009 struct ifreq ifr;
3010 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3011 int err;
3012
3013 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3014 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3015 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3016 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3017 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3018 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3019 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3020 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3021 if (err)
3022 return -EFAULT;
3023
44c02a2c 3024 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3025
3026 if (cmd == SIOCGIFMAP && !err) {
3027 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3028 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3029 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3030 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3031 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3032 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3033 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3034 if (err)
3035 err = -EFAULT;
3036 }
3037 return err;
3038}
3039
7a229387 3040struct rtentry32 {
c6d409cf 3041 u32 rt_pad1;
7a229387
AB
3042 struct sockaddr rt_dst; /* target address */
3043 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3044 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3045 unsigned short rt_flags;
3046 short rt_pad2;
3047 u32 rt_pad3;
3048 unsigned char rt_tos;
3049 unsigned char rt_class;
3050 short rt_pad4;
3051 short rt_metric; /* +1 for binary compatibility! */
7a229387 3052 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3053 u32 rt_mtu; /* per route MTU/Window */
3054 u32 rt_window; /* Window clamping */
7a229387
AB
3055 unsigned short rt_irtt; /* Initial RTT */
3056};
3057
3058struct in6_rtmsg32 {
3059 struct in6_addr rtmsg_dst;
3060 struct in6_addr rtmsg_src;
3061 struct in6_addr rtmsg_gateway;
3062 u32 rtmsg_type;
3063 u16 rtmsg_dst_len;
3064 u16 rtmsg_src_len;
3065 u32 rtmsg_metric;
3066 u32 rtmsg_info;
3067 u32 rtmsg_flags;
3068 s32 rtmsg_ifindex;
3069};
3070
6b96018b
AB
3071static int routing_ioctl(struct net *net, struct socket *sock,
3072 unsigned int cmd, void __user *argp)
7a229387
AB
3073{
3074 int ret;
3075 void *r = NULL;
3076 struct in6_rtmsg r6;
3077 struct rtentry r4;
3078 char devname[16];
3079 u32 rtdev;
3080 mm_segment_t old_fs = get_fs();
3081
6b96018b
AB
3082 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3083 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3084 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3085 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3086 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3087 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3088 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3089 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3090 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3091 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3092 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3093
3094 r = (void *) &r6;
3095 } else { /* ipv4 */
6b96018b 3096 struct rtentry32 __user *ur4 = argp;
c6d409cf 3097 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3098 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3099 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3100 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3101 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3102 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3103 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3104 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3105 if (rtdev) {
c6d409cf 3106 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3107 r4.rt_dev = (char __user __force *)devname;
3108 devname[15] = 0;
7a229387
AB
3109 } else
3110 r4.rt_dev = NULL;
3111
3112 r = (void *) &r4;
3113 }
3114
3115 if (ret) {
3116 ret = -EFAULT;
3117 goto out;
3118 }
3119
c6d409cf 3120 set_fs(KERNEL_DS);
6b96018b 3121 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3122 set_fs(old_fs);
7a229387
AB
3123
3124out:
7a229387
AB
3125 return ret;
3126}
3127
3128/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3129 * for some operations; this forces use of the newer bridge-utils that
25985edc 3130 * use compatible ioctls
7a229387 3131 */
6b96018b 3132static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3133{
6b96018b 3134 compat_ulong_t tmp;
7a229387 3135
6b96018b 3136 if (get_user(tmp, argp))
7a229387
AB
3137 return -EFAULT;
3138 if (tmp == BRCTL_GET_VERSION)
3139 return BRCTL_VERSION + 1;
3140 return -EINVAL;
3141}
3142
6b96018b
AB
3143static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3144 unsigned int cmd, unsigned long arg)
3145{
3146 void __user *argp = compat_ptr(arg);
3147 struct sock *sk = sock->sk;
3148 struct net *net = sock_net(sk);
7a229387 3149
6b96018b 3150 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3151 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3152
3153 switch (cmd) {
3154 case SIOCSIFBR:
3155 case SIOCGIFBR:
3156 return old_bridge_ioctl(argp);
6b96018b 3157 case SIOCGIFCONF:
36fd633e 3158 return compat_dev_ifconf(net, argp);
6b96018b
AB
3159 case SIOCETHTOOL:
3160 return ethtool_ioctl(net, argp);
7a50a240
AB
3161 case SIOCWANDEV:
3162 return compat_siocwandev(net, argp);
a2116ed2
AB
3163 case SIOCGIFMAP:
3164 case SIOCSIFMAP:
3165 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3166 case SIOCADDRT:
3167 case SIOCDELRT:
3168 return routing_ioctl(net, sock, cmd, argp);
3169 case SIOCGSTAMP:
3170 return do_siocgstamp(net, sock, cmd, argp);
3171 case SIOCGSTAMPNS:
3172 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3173 case SIOCBONDSLAVEINFOQUERY:
3174 case SIOCBONDINFOQUERY:
a2116ed2 3175 case SIOCSHWTSTAMP:
fd468c74 3176 case SIOCGHWTSTAMP:
590d4693 3177 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3178
3179 case FIOSETOWN:
3180 case SIOCSPGRP:
3181 case FIOGETOWN:
3182 case SIOCGPGRP:
3183 case SIOCBRADDBR:
3184 case SIOCBRDELBR:
3185 case SIOCGIFVLAN:
3186 case SIOCSIFVLAN:
3187 case SIOCADDDLCI:
3188 case SIOCDELDLCI:
c62cce2c 3189 case SIOCGSKNS:
6b96018b
AB
3190 return sock_ioctl(file, cmd, arg);
3191
3192 case SIOCGIFFLAGS:
3193 case SIOCSIFFLAGS:
3194 case SIOCGIFMETRIC:
3195 case SIOCSIFMETRIC:
3196 case SIOCGIFMTU:
3197 case SIOCSIFMTU:
3198 case SIOCGIFMEM:
3199 case SIOCSIFMEM:
3200 case SIOCGIFHWADDR:
3201 case SIOCSIFHWADDR:
3202 case SIOCADDMULTI:
3203 case SIOCDELMULTI:
3204 case SIOCGIFINDEX:
6b96018b
AB
3205 case SIOCGIFADDR:
3206 case SIOCSIFADDR:
3207 case SIOCSIFHWBROADCAST:
6b96018b 3208 case SIOCDIFADDR:
6b96018b
AB
3209 case SIOCGIFBRDADDR:
3210 case SIOCSIFBRDADDR:
3211 case SIOCGIFDSTADDR:
3212 case SIOCSIFDSTADDR:
3213 case SIOCGIFNETMASK:
3214 case SIOCSIFNETMASK:
3215 case SIOCSIFPFLAGS:
3216 case SIOCGIFPFLAGS:
3217 case SIOCGIFTXQLEN:
3218 case SIOCSIFTXQLEN:
3219 case SIOCBRADDIF:
3220 case SIOCBRDELIF:
9177efd3
AB
3221 case SIOCSIFNAME:
3222 case SIOCGMIIPHY:
3223 case SIOCGMIIREG:
3224 case SIOCSMIIREG:
6b96018b
AB
3225 case SIOCSARP:
3226 case SIOCGARP:
3227 case SIOCDARP:
6b96018b 3228 case SIOCATMARK:
f92d4fc9
AV
3229 case SIOCBONDENSLAVE:
3230 case SIOCBONDRELEASE:
3231 case SIOCBONDSETHWADDR:
3232 case SIOCBONDCHANGEACTIVE:
4cf808e7 3233 case SIOCGIFNAME:
9177efd3
AB
3234 return sock_do_ioctl(net, sock, cmd, arg);
3235 }
3236
6b96018b
AB
3237 return -ENOIOCTLCMD;
3238}
7a229387 3239
95c96174 3240static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3241 unsigned long arg)
89bbfc95
SP
3242{
3243 struct socket *sock = file->private_data;
3244 int ret = -ENOIOCTLCMD;
87de87d5
DM
3245 struct sock *sk;
3246 struct net *net;
3247
3248 sk = sock->sk;
3249 net = sock_net(sk);
89bbfc95
SP
3250
3251 if (sock->ops->compat_ioctl)
3252 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3253
87de87d5
DM
3254 if (ret == -ENOIOCTLCMD &&
3255 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3256 ret = compat_wext_handle_ioctl(net, cmd, arg);
3257
6b96018b
AB
3258 if (ret == -ENOIOCTLCMD)
3259 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3260
89bbfc95
SP
3261 return ret;
3262}
3263#endif
3264
ac5a488e
SS
3265int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3266{
3267 return sock->ops->bind(sock, addr, addrlen);
3268}
c6d409cf 3269EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3270
3271int kernel_listen(struct socket *sock, int backlog)
3272{
3273 return sock->ops->listen(sock, backlog);
3274}
c6d409cf 3275EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3276
3277int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3278{
3279 struct sock *sk = sock->sk;
3280 int err;
3281
3282 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3283 newsock);
3284 if (err < 0)
3285 goto done;
3286
cdfbabfb 3287 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3288 if (err < 0) {
3289 sock_release(*newsock);
fa8705b0 3290 *newsock = NULL;
ac5a488e
SS
3291 goto done;
3292 }
3293
3294 (*newsock)->ops = sock->ops;
1b08534e 3295 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3296
3297done:
3298 return err;
3299}
c6d409cf 3300EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3301
3302int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3303 int flags)
ac5a488e
SS
3304{
3305 return sock->ops->connect(sock, addr, addrlen, flags);
3306}
c6d409cf 3307EXPORT_SYMBOL(kernel_connect);
ac5a488e 3308
9b2c45d4 3309int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3310{
9b2c45d4 3311 return sock->ops->getname(sock, addr, 0);
ac5a488e 3312}
c6d409cf 3313EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3314
9b2c45d4 3315int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3316{
9b2c45d4 3317 return sock->ops->getname(sock, addr, 1);
ac5a488e 3318}
c6d409cf 3319EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3320
3321int kernel_getsockopt(struct socket *sock, int level, int optname,
3322 char *optval, int *optlen)
3323{
3324 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3325 char __user *uoptval;
3326 int __user *uoptlen;
ac5a488e
SS
3327 int err;
3328
fb8621bb
NK
3329 uoptval = (char __user __force *) optval;
3330 uoptlen = (int __user __force *) optlen;
3331
ac5a488e
SS
3332 set_fs(KERNEL_DS);
3333 if (level == SOL_SOCKET)
fb8621bb 3334 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3335 else
fb8621bb
NK
3336 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3337 uoptlen);
ac5a488e
SS
3338 set_fs(oldfs);
3339 return err;
3340}
c6d409cf 3341EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3342
3343int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3344 char *optval, unsigned int optlen)
ac5a488e
SS
3345{
3346 mm_segment_t oldfs = get_fs();
fb8621bb 3347 char __user *uoptval;
ac5a488e
SS
3348 int err;
3349
fb8621bb
NK
3350 uoptval = (char __user __force *) optval;
3351
ac5a488e
SS
3352 set_fs(KERNEL_DS);
3353 if (level == SOL_SOCKET)
fb8621bb 3354 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3355 else
fb8621bb 3356 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3357 optlen);
3358 set_fs(oldfs);
3359 return err;
3360}
c6d409cf 3361EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3362
3363int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3364 size_t size, int flags)
3365{
3366 if (sock->ops->sendpage)
3367 return sock->ops->sendpage(sock, page, offset, size, flags);
3368
3369 return sock_no_sendpage(sock, page, offset, size, flags);
3370}
c6d409cf 3371EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3372
306b13eb
TH
3373int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3374 size_t size, int flags)
3375{
3376 struct socket *sock = sk->sk_socket;
3377
3378 if (sock->ops->sendpage_locked)
3379 return sock->ops->sendpage_locked(sk, page, offset, size,
3380 flags);
3381
3382 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3383}
3384EXPORT_SYMBOL(kernel_sendpage_locked);
3385
91cf45f0
TM
3386int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3387{
3388 return sock->ops->shutdown(sock, how);
3389}
91cf45f0 3390EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075
P
3391
3392/* This routine returns the IP overhead imposed by a socket i.e.
3393 * the length of the underlying IP header, depending on whether
3394 * this is an IPv4 or IPv6 socket and the length from IP options turned
57240d00 3395 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075
P
3396 */
3397u32 kernel_sock_ip_overhead(struct sock *sk)
3398{
3399 struct inet_sock *inet;
3400 struct ip_options_rcu *opt;
3401 u32 overhead = 0;
113c3075
P
3402#if IS_ENABLED(CONFIG_IPV6)
3403 struct ipv6_pinfo *np;
3404 struct ipv6_txoptions *optv6 = NULL;
3405#endif /* IS_ENABLED(CONFIG_IPV6) */
3406
3407 if (!sk)
3408 return overhead;
3409
113c3075
P
3410 switch (sk->sk_family) {
3411 case AF_INET:
3412 inet = inet_sk(sk);
3413 overhead += sizeof(struct iphdr);
3414 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3415 sock_owned_by_user(sk));
113c3075
P
3416 if (opt)
3417 overhead += opt->opt.optlen;
3418 return overhead;
3419#if IS_ENABLED(CONFIG_IPV6)
3420 case AF_INET6:
3421 np = inet6_sk(sk);
3422 overhead += sizeof(struct ipv6hdr);
3423 if (np)
3424 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3425 sock_owned_by_user(sk));
113c3075
P
3426 if (optv6)
3427 overhead += (optv6->opt_flen + optv6->opt_nflen);
3428 return overhead;
3429#endif /* IS_ENABLED(CONFIG_IPV6) */
3430 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3431 return overhead;
3432 }
3433}
3434EXPORT_SYMBOL(kernel_sock_ip_overhead);