]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/socket.c
UBUNTU: Ubuntu-4.15.0-96.97
[mirror_ubuntu-bionic-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
db3a93c7 92#include <linux/nospec.h>
1da177e4 93
7c0f6ba6 94#include <linux/uaccess.h>
1da177e4
LT
95#include <asm/unistd.h>
96
97#include <net/compat.h>
87de87d5 98#include <net/wext.h>
f8451725 99#include <net/cls_cgroup.h>
1da177e4
LT
100
101#include <net/sock.h>
102#include <linux/netfilter.h>
103
6b96018b
AB
104#include <linux/if_tun.h>
105#include <linux/ipv6_route.h>
106#include <linux/route.h>
6b96018b
AB
107#include <linux/sockios.h>
108#include <linux/atalk.h>
076bb0c8 109#include <net/busy_poll.h>
f24b9be5 110#include <linux/errqueue.h>
06021292 111
e0d1095a 112#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
113unsigned int sysctl_net_busy_read __read_mostly;
114unsigned int sysctl_net_busy_poll __read_mostly;
06021292 115#endif
6b96018b 116
8ae5e030
AV
117static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
118static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 119static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
120
121static int sock_close(struct inode *inode, struct file *file);
122static unsigned int sock_poll(struct file *file,
123 struct poll_table_struct *wait);
89bddce5 124static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
125#ifdef CONFIG_COMPAT
126static long compat_sock_ioctl(struct file *file,
89bddce5 127 unsigned int cmd, unsigned long arg);
89bbfc95 128#endif
1da177e4 129static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
130static ssize_t sock_sendpage(struct file *file, struct page *page,
131 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 132static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 133 struct pipe_inode_info *pipe, size_t len,
9c55e01c 134 unsigned int flags);
1da177e4 135
1da177e4
LT
136/*
137 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
138 * in the operation structures but are done directly via the socketcall() multiplexor.
139 */
140
da7071d7 141static const struct file_operations socket_file_ops = {
1da177e4
LT
142 .owner = THIS_MODULE,
143 .llseek = no_llseek,
8ae5e030
AV
144 .read_iter = sock_read_iter,
145 .write_iter = sock_write_iter,
1da177e4
LT
146 .poll = sock_poll,
147 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
148#ifdef CONFIG_COMPAT
149 .compat_ioctl = compat_sock_ioctl,
150#endif
1da177e4 151 .mmap = sock_mmap,
1da177e4
LT
152 .release = sock_close,
153 .fasync = sock_fasync,
5274f052
JA
154 .sendpage = sock_sendpage,
155 .splice_write = generic_splice_sendpage,
9c55e01c 156 .splice_read = sock_splice_read,
1da177e4
LT
157};
158
159/*
160 * The protocol list. Each protocol is registered in here.
161 */
162
1da177e4 163static DEFINE_SPINLOCK(net_family_lock);
190683a9 164static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 165
1da177e4
LT
166/*
167 * Statistics counters of the socket lists
168 */
169
c6d409cf 170static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
171
172/*
89bddce5
SH
173 * Support routines.
174 * Move socket addresses back and forth across the kernel/user
175 * divide and look after the messy bits.
1da177e4
LT
176 */
177
1da177e4
LT
178/**
179 * move_addr_to_kernel - copy a socket address into kernel space
180 * @uaddr: Address in user space
181 * @kaddr: Address in kernel space
182 * @ulen: Length in user space
183 *
184 * The address is copied into kernel space. If the provided address is
185 * too long an error code of -EINVAL is returned. If the copy gives
186 * invalid addresses -EFAULT is returned. On a success 0 is returned.
187 */
188
43db362d 189int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 190{
230b1839 191 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 192 return -EINVAL;
89bddce5 193 if (ulen == 0)
1da177e4 194 return 0;
89bddce5 195 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 196 return -EFAULT;
3ec3b2fb 197 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
198}
199
200/**
201 * move_addr_to_user - copy an address to user space
202 * @kaddr: kernel space address
203 * @klen: length of address in kernel
204 * @uaddr: user space address
205 * @ulen: pointer to user length field
206 *
207 * The value pointed to by ulen on entry is the buffer length available.
208 * This is overwritten with the buffer space used. -EINVAL is returned
209 * if an overlong buffer is specified or a negative buffer size. -EFAULT
210 * is returned if either the buffer or the length field are not
211 * accessible.
212 * After copying the data up to the limit the user specifies, the true
213 * length of the data is written over the length limit the user
214 * specified. Zero is returned for a success.
215 */
89bddce5 216
43db362d 217static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 218 void __user *uaddr, int __user *ulen)
1da177e4
LT
219{
220 int err;
221 int len;
222
68c6beb3 223 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
224 err = get_user(len, ulen);
225 if (err)
1da177e4 226 return err;
89bddce5
SH
227 if (len > klen)
228 len = klen;
68c6beb3 229 if (len < 0)
1da177e4 230 return -EINVAL;
89bddce5 231 if (len) {
d6fe3945
SG
232 if (audit_sockaddr(klen, kaddr))
233 return -ENOMEM;
89bddce5 234 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
235 return -EFAULT;
236 }
237 /*
89bddce5
SH
238 * "fromlen shall refer to the value before truncation.."
239 * 1003.1g
1da177e4
LT
240 */
241 return __put_user(klen, ulen);
242}
243
e18b890b 244static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
245
246static struct inode *sock_alloc_inode(struct super_block *sb)
247{
248 struct socket_alloc *ei;
eaefd110 249 struct socket_wq *wq;
89bddce5 250
e94b1766 251 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
252 if (!ei)
253 return NULL;
eaefd110
ED
254 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
255 if (!wq) {
43815482
ED
256 kmem_cache_free(sock_inode_cachep, ei);
257 return NULL;
258 }
eaefd110
ED
259 init_waitqueue_head(&wq->wait);
260 wq->fasync_list = NULL;
574aab1e 261 wq->flags = 0;
eaefd110 262 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 263
1da177e4
LT
264 ei->socket.state = SS_UNCONNECTED;
265 ei->socket.flags = 0;
266 ei->socket.ops = NULL;
267 ei->socket.sk = NULL;
268 ei->socket.file = NULL;
1da177e4
LT
269
270 return &ei->vfs_inode;
271}
272
273static void sock_destroy_inode(struct inode *inode)
274{
43815482 275 struct socket_alloc *ei;
eaefd110 276 struct socket_wq *wq;
43815482
ED
277
278 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 279 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 280 kfree_rcu(wq, rcu);
43815482 281 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
282}
283
51cc5068 284static void init_once(void *foo)
1da177e4 285{
89bddce5 286 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 287
a35afb83 288 inode_init_once(&ei->vfs_inode);
1da177e4 289}
89bddce5 290
1e911632 291static void init_inodecache(void)
1da177e4
LT
292{
293 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
294 sizeof(struct socket_alloc),
295 0,
296 (SLAB_HWCACHE_ALIGN |
297 SLAB_RECLAIM_ACCOUNT |
5d097056 298 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 299 init_once);
1e911632 300 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
301}
302
b87221de 303static const struct super_operations sockfs_ops = {
c6d409cf
ED
304 .alloc_inode = sock_alloc_inode,
305 .destroy_inode = sock_destroy_inode,
306 .statfs = simple_statfs,
1da177e4
LT
307};
308
c23fbb6b
ED
309/*
310 * sockfs_dname() is called from d_path().
311 */
312static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
313{
314 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 315 d_inode(dentry)->i_ino);
c23fbb6b
ED
316}
317
3ba13d17 318static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 319 .d_dname = sockfs_dname,
1da177e4
LT
320};
321
bba0bd31
AG
322static int sockfs_xattr_get(const struct xattr_handler *handler,
323 struct dentry *dentry, struct inode *inode,
324 const char *suffix, void *value, size_t size)
325{
326 if (value) {
327 if (dentry->d_name.len + 1 > size)
328 return -ERANGE;
329 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
330 }
331 return dentry->d_name.len + 1;
332}
333
334#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
335#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
336#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
337
338static const struct xattr_handler sockfs_xattr_handler = {
339 .name = XATTR_NAME_SOCKPROTONAME,
340 .get = sockfs_xattr_get,
341};
342
4a590153
AG
343static int sockfs_security_xattr_set(const struct xattr_handler *handler,
344 struct dentry *dentry, struct inode *inode,
345 const char *suffix, const void *value,
346 size_t size, int flags)
347{
348 /* Handled by LSM. */
349 return -EAGAIN;
350}
351
352static const struct xattr_handler sockfs_security_xattr_handler = {
353 .prefix = XATTR_SECURITY_PREFIX,
354 .set = sockfs_security_xattr_set,
355};
356
bba0bd31
AG
357static const struct xattr_handler *sockfs_xattr_handlers[] = {
358 &sockfs_xattr_handler,
4a590153 359 &sockfs_security_xattr_handler,
bba0bd31
AG
360 NULL
361};
362
c74a1cbb
AV
363static struct dentry *sockfs_mount(struct file_system_type *fs_type,
364 int flags, const char *dev_name, void *data)
365{
bba0bd31
AG
366 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
367 sockfs_xattr_handlers,
368 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
369}
370
371static struct vfsmount *sock_mnt __read_mostly;
372
373static struct file_system_type sock_fs_type = {
374 .name = "sockfs",
375 .mount = sockfs_mount,
376 .kill_sb = kill_anon_super,
377};
378
1da177e4
LT
379/*
380 * Obtains the first available file descriptor and sets it up for use.
381 *
39d8c1b6
DM
382 * These functions create file structures and maps them to fd space
383 * of the current process. On success it returns file descriptor
1da177e4
LT
384 * and file struct implicitly stored in sock->file.
385 * Note that another thread may close file descriptor before we return
386 * from this function. We use the fact that now we do not refer
387 * to socket after mapping. If one day we will need it, this
388 * function will increment ref. count on file by 1.
389 *
390 * In any case returned fd MAY BE not valid!
391 * This race condition is unavoidable
392 * with shared fd spaces, we cannot solve it inside kernel,
393 * but we take care of internal coherence yet.
394 */
395
aab174f0 396struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 397{
7cbe66b6 398 struct qstr name = { .name = "" };
2c48b9c4 399 struct path path;
7cbe66b6 400 struct file *file;
1da177e4 401
600e1779
MY
402 if (dname) {
403 name.name = dname;
404 name.len = strlen(name.name);
405 } else if (sock->sk) {
406 name.name = sock->sk->sk_prot_creator->name;
407 name.len = strlen(name.name);
408 }
4b936885 409 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
8e1611e2
AV
410 if (unlikely(!path.dentry)) {
411 sock_release(sock);
28407630 412 return ERR_PTR(-ENOMEM);
8e1611e2 413 }
2c48b9c4 414 path.mnt = mntget(sock_mnt);
39d8c1b6 415
2c48b9c4 416 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 417
2c48b9c4 418 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 419 &socket_file_ops);
b5ffe634 420 if (IS_ERR(file)) {
8e1611e2 421 /* drop dentry, keep inode for a bit */
c5ef6035 422 ihold(d_inode(path.dentry));
2c48b9c4 423 path_put(&path);
8e1611e2
AV
424 /* ... and now kill it properly */
425 sock_release(sock);
39b65252 426 return file;
cc3808f8
AV
427 }
428
429 sock->file = file;
77d27200 430 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 431 file->private_data = sock;
28407630 432 return file;
39d8c1b6 433}
56b31d1c 434EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 435
56b31d1c 436static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
437{
438 struct file *newfile;
28407630 439 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
440 if (unlikely(fd < 0)) {
441 sock_release(sock);
28407630 442 return fd;
ce4bb04c 443 }
39d8c1b6 444
aab174f0 445 newfile = sock_alloc_file(sock, flags, NULL);
28407630 446 if (likely(!IS_ERR(newfile))) {
39d8c1b6 447 fd_install(fd, newfile);
28407630
AV
448 return fd;
449 }
7cbe66b6 450
28407630
AV
451 put_unused_fd(fd);
452 return PTR_ERR(newfile);
1da177e4
LT
453}
454
406a3c63 455struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 456{
6cb153ca
BL
457 if (file->f_op == &socket_file_ops)
458 return file->private_data; /* set in sock_map_fd */
459
23bb80d2
ED
460 *err = -ENOTSOCK;
461 return NULL;
6cb153ca 462}
406a3c63 463EXPORT_SYMBOL(sock_from_file);
6cb153ca 464
1da177e4 465/**
c6d409cf 466 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
467 * @fd: file handle
468 * @err: pointer to an error code return
469 *
470 * The file handle passed in is locked and the socket it is bound
241c4667 471 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
472 * with a negative errno code and NULL is returned. The function checks
473 * for both invalid handles and passing a handle which is not a socket.
474 *
475 * On a success the socket object pointer is returned.
476 */
477
478struct socket *sockfd_lookup(int fd, int *err)
479{
480 struct file *file;
1da177e4
LT
481 struct socket *sock;
482
89bddce5
SH
483 file = fget(fd);
484 if (!file) {
1da177e4
LT
485 *err = -EBADF;
486 return NULL;
487 }
89bddce5 488
6cb153ca
BL
489 sock = sock_from_file(file, err);
490 if (!sock)
1da177e4 491 fput(file);
6cb153ca
BL
492 return sock;
493}
c6d409cf 494EXPORT_SYMBOL(sockfd_lookup);
1da177e4 495
6cb153ca
BL
496static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
497{
00e188ef 498 struct fd f = fdget(fd);
6cb153ca
BL
499 struct socket *sock;
500
3672558c 501 *err = -EBADF;
00e188ef
AV
502 if (f.file) {
503 sock = sock_from_file(f.file, err);
504 if (likely(sock)) {
505 *fput_needed = f.flags;
6cb153ca 506 return sock;
00e188ef
AV
507 }
508 fdput(f);
1da177e4 509 }
6cb153ca 510 return NULL;
1da177e4
LT
511}
512
600e1779
MY
513static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
514 size_t size)
515{
516 ssize_t len;
517 ssize_t used = 0;
518
c5ef6035 519 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
520 if (len < 0)
521 return len;
522 used += len;
523 if (buffer) {
524 if (size < used)
525 return -ERANGE;
526 buffer += len;
527 }
528
529 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
530 used += len;
531 if (buffer) {
532 if (size < used)
533 return -ERANGE;
534 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
535 buffer += len;
536 }
537
538 return used;
539}
540
dc647ec8 541static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
542{
543 int err = simple_setattr(dentry, iattr);
544
e1a3a60a 545 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
546 struct socket *sock = SOCKET_I(d_inode(dentry));
547
4ba6b27d
CW
548 if (sock->sk)
549 sock->sk->sk_uid = iattr->ia_uid;
550 else
551 err = -ENOENT;
86741ec2
LC
552 }
553
554 return err;
555}
556
600e1779 557static const struct inode_operations sockfs_inode_ops = {
600e1779 558 .listxattr = sockfs_listxattr,
86741ec2 559 .setattr = sockfs_setattr,
600e1779
MY
560};
561
1da177e4
LT
562/**
563 * sock_alloc - allocate a socket
89bddce5 564 *
1da177e4
LT
565 * Allocate a new inode and socket object. The two are bound together
566 * and initialised. The socket is then returned. If we are out of inodes
567 * NULL is returned.
568 */
569
f4a00aac 570struct socket *sock_alloc(void)
1da177e4 571{
89bddce5
SH
572 struct inode *inode;
573 struct socket *sock;
1da177e4 574
a209dfc7 575 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
576 if (!inode)
577 return NULL;
578
579 sock = SOCKET_I(inode);
580
85fe4025 581 inode->i_ino = get_next_ino();
89bddce5 582 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
583 inode->i_uid = current_fsuid();
584 inode->i_gid = current_fsgid();
600e1779 585 inode->i_op = &sockfs_inode_ops;
1da177e4 586
19e8d69c 587 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
588 return sock;
589}
f4a00aac 590EXPORT_SYMBOL(sock_alloc);
1da177e4 591
1da177e4
LT
592/**
593 * sock_release - close a socket
594 * @sock: socket to close
595 *
596 * The socket is released from the protocol stack if it has a release
597 * callback, and the inode is then released if the socket is bound to
89bddce5 598 * an inode not a file.
1da177e4 599 */
89bddce5 600
4ba6b27d 601static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
602{
603 if (sock->ops) {
604 struct module *owner = sock->ops->owner;
605
4ba6b27d
CW
606 if (inode)
607 inode_lock(inode);
1da177e4 608 sock->ops->release(sock);
54cdde8d 609 sock->sk = NULL;
4ba6b27d
CW
610 if (inode)
611 inode_unlock(inode);
1da177e4
LT
612 sock->ops = NULL;
613 module_put(owner);
614 }
615
eaefd110 616 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 617 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 618
19e8d69c 619 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
620 if (!sock->file) {
621 iput(SOCK_INODE(sock));
622 return;
623 }
89bddce5 624 sock->file = NULL;
1da177e4 625}
4ba6b27d
CW
626
627void sock_release(struct socket *sock)
628{
629 __sock_release(sock, NULL);
630}
c6d409cf 631EXPORT_SYMBOL(sock_release);
1da177e4 632
c14ac945 633void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 634{
140c55d4
ED
635 u8 flags = *tx_flags;
636
c14ac945 637 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
638 flags |= SKBTX_HW_TSTAMP;
639
c14ac945 640 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
641 flags |= SKBTX_SW_TSTAMP;
642
c14ac945 643 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
644 flags |= SKBTX_SCHED_TSTAMP;
645
140c55d4 646 *tx_flags = flags;
20d49473 647}
67cc0d40 648EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 649
d8725c86 650static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 651{
01e97e65 652 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
653 BUG_ON(ret == -EIOCBQUEUED);
654 return ret;
1da177e4
LT
655}
656
d8725c86 657int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 658{
d8725c86 659 int err = security_socket_sendmsg(sock, msg,
01e97e65 660 msg_data_left(msg));
228e548e 661
d8725c86 662 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 663}
c6d409cf 664EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
665
666int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
667 struct kvec *vec, size_t num, size_t size)
668{
6aa24814 669 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 670 return sock_sendmsg(sock, msg);
1da177e4 671}
c6d409cf 672EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 673
306b13eb
TH
674int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
675 struct kvec *vec, size_t num, size_t size)
676{
677 struct socket *sock = sk->sk_socket;
678
679 if (!sock->ops->sendmsg_locked)
db5980d8 680 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb
TH
681
682 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
683
684 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
685}
686EXPORT_SYMBOL(kernel_sendmsg_locked);
687
8605330a
SHY
688static bool skb_is_err_queue(const struct sk_buff *skb)
689{
690 /* pkt_type of skbs enqueued on the error queue are set to
691 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
692 * in recvmsg, since skbs received on a local socket will never
693 * have a pkt_type of PACKET_OUTGOING.
694 */
695 return skb->pkt_type == PACKET_OUTGOING;
696}
697
b50a5c70
ML
698/* On transmit, software and hardware timestamps are returned independently.
699 * As the two skb clones share the hardware timestamp, which may be updated
700 * before the software timestamp is received, a hardware TX timestamp may be
701 * returned only if there is no software TX timestamp. Ignore false software
702 * timestamps, which may be made in the __sock_recv_timestamp() call when the
703 * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
704 * hardware timestamp.
705 */
706static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
707{
708 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
709}
710
aad9c8c4
ML
711static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
712{
713 struct scm_ts_pktinfo ts_pktinfo;
714 struct net_device *orig_dev;
715
716 if (!skb_mac_header_was_set(skb))
717 return;
718
719 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
720
721 rcu_read_lock();
722 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
723 if (orig_dev)
724 ts_pktinfo.if_index = orig_dev->ifindex;
725 rcu_read_unlock();
726
727 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
728 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
729 sizeof(ts_pktinfo), &ts_pktinfo);
730}
731
92f37fd2
ED
732/*
733 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
734 */
735void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
736 struct sk_buff *skb)
737{
20d49473 738 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 739 struct scm_timestamping tss;
b50a5c70 740 int empty = 1, false_tstamp = 0;
20d49473
PO
741 struct skb_shared_hwtstamps *shhwtstamps =
742 skb_hwtstamps(skb);
743
744 /* Race occurred between timestamp enabling and packet
745 receiving. Fill in the current time for now. */
b50a5c70 746 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 747 __net_timestamp(skb);
b50a5c70
ML
748 false_tstamp = 1;
749 }
20d49473
PO
750
751 if (need_software_tstamp) {
752 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
753 struct timeval tv;
754 skb_get_timestamp(skb, &tv);
755 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
756 sizeof(tv), &tv);
757 } else {
f24b9be5
WB
758 struct timespec ts;
759 skb_get_timestampns(skb, &ts);
20d49473 760 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 761 sizeof(ts), &ts);
20d49473
PO
762 }
763 }
764
f24b9be5 765 memset(&tss, 0, sizeof(tss));
c199105d 766 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 767 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 768 empty = 0;
4d276eb6 769 if (shhwtstamps &&
b9f40e21 770 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 771 !skb_is_swtx_tstamp(skb, false_tstamp) &&
aad9c8c4 772 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 773 empty = 0;
aad9c8c4
ML
774 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
775 !skb_is_err_queue(skb))
776 put_ts_pktinfo(msg, skb);
777 }
1c885808 778 if (!empty) {
20d49473 779 put_cmsg(msg, SOL_SOCKET,
f24b9be5 780 SCM_TIMESTAMPING, sizeof(tss), &tss);
1c885808 781
8605330a 782 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 783 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
784 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
785 skb->len, skb->data);
786 }
92f37fd2 787}
7c81fd8b
ACM
788EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
789
6e3e939f
JB
790void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
791 struct sk_buff *skb)
792{
793 int ack;
794
795 if (!sock_flag(sk, SOCK_WIFI_STATUS))
796 return;
797 if (!skb->wifi_acked_valid)
798 return;
799
800 ack = skb->wifi_acked;
801
802 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
803}
804EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
805
11165f14 806static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
807 struct sk_buff *skb)
3b885787 808{
744d5a3e 809 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 810 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 811 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
812}
813
767dd033 814void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
815 struct sk_buff *skb)
816{
817 sock_recv_timestamp(msg, sk, skb);
818 sock_recv_drops(msg, sk, skb);
819}
767dd033 820EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 821
1b784140 822static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 823 int flags)
1da177e4 824{
2da62906 825 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
826}
827
2da62906 828int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 829{
2da62906 830 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 831
2da62906 832 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 833}
c6d409cf 834EXPORT_SYMBOL(sock_recvmsg);
1da177e4 835
c1249c0a
ML
836/**
837 * kernel_recvmsg - Receive a message from a socket (kernel space)
838 * @sock: The socket to receive the message from
839 * @msg: Received message
840 * @vec: Input s/g array for message data
841 * @num: Size of input s/g array
842 * @size: Number of bytes to read
843 * @flags: Message flags (MSG_DONTWAIT, etc...)
844 *
845 * On return the msg structure contains the scatter/gather array passed in the
846 * vec argument. The array is modified so that it consists of the unfilled
847 * portion of the original array.
848 *
849 * The returned value is the total number of bytes received, or an error.
850 */
89bddce5
SH
851int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
852 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
853{
854 mm_segment_t oldfs = get_fs();
855 int result;
856
6aa24814 857 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 858 set_fs(KERNEL_DS);
2da62906 859 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
860 set_fs(oldfs);
861 return result;
862}
c6d409cf 863EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 864
ce1d4d3e
CH
865static ssize_t sock_sendpage(struct file *file, struct page *page,
866 int offset, size_t size, loff_t *ppos, int more)
1da177e4 867{
1da177e4
LT
868 struct socket *sock;
869 int flags;
870
ce1d4d3e
CH
871 sock = file->private_data;
872
35f9c09f
ED
873 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
874 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
875 flags |= more;
ce1d4d3e 876
e6949583 877 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 878}
1da177e4 879
9c55e01c 880static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 881 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
882 unsigned int flags)
883{
884 struct socket *sock = file->private_data;
885
997b37da
RDC
886 if (unlikely(!sock->ops->splice_read))
887 return -EINVAL;
888
9c55e01c
JA
889 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
890}
891
8ae5e030 892static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 893{
6d652330
AV
894 struct file *file = iocb->ki_filp;
895 struct socket *sock = file->private_data;
0345f931 896 struct msghdr msg = {.msg_iter = *to,
897 .msg_iocb = iocb};
8ae5e030 898 ssize_t res;
ce1d4d3e 899
b35a5552 900 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
901 msg.msg_flags = MSG_DONTWAIT;
902
903 if (iocb->ki_pos != 0)
1da177e4 904 return -ESPIPE;
027445c3 905
66ee59af 906 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
907 return 0;
908
2da62906 909 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
910 *to = msg.msg_iter;
911 return res;
1da177e4
LT
912}
913
8ae5e030 914static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 915{
6d652330
AV
916 struct file *file = iocb->ki_filp;
917 struct socket *sock = file->private_data;
0345f931 918 struct msghdr msg = {.msg_iter = *from,
919 .msg_iocb = iocb};
8ae5e030 920 ssize_t res;
1da177e4 921
8ae5e030 922 if (iocb->ki_pos != 0)
ce1d4d3e 923 return -ESPIPE;
027445c3 924
b35a5552 925 if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
8ae5e030
AV
926 msg.msg_flags = MSG_DONTWAIT;
927
6d652330
AV
928 if (sock->type == SOCK_SEQPACKET)
929 msg.msg_flags |= MSG_EOR;
930
d8725c86 931 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
932 *from = msg.msg_iter;
933 return res;
1da177e4
LT
934}
935
1da177e4
LT
936/*
937 * Atomic setting of ioctl hooks to avoid race
938 * with module unload.
939 */
940
4a3e2f71 941static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 942static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 943
881d966b 944void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 945{
4a3e2f71 946 mutex_lock(&br_ioctl_mutex);
1da177e4 947 br_ioctl_hook = hook;
4a3e2f71 948 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
949}
950EXPORT_SYMBOL(brioctl_set);
951
4a3e2f71 952static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 953static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 954
881d966b 955void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 956{
4a3e2f71 957 mutex_lock(&vlan_ioctl_mutex);
1da177e4 958 vlan_ioctl_hook = hook;
4a3e2f71 959 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
960}
961EXPORT_SYMBOL(vlan_ioctl_set);
962
4a3e2f71 963static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 964static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 965
89bddce5 966void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 967{
4a3e2f71 968 mutex_lock(&dlci_ioctl_mutex);
1da177e4 969 dlci_ioctl_hook = hook;
4a3e2f71 970 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
971}
972EXPORT_SYMBOL(dlci_ioctl_set);
973
6b96018b
AB
974static long sock_do_ioctl(struct net *net, struct socket *sock,
975 unsigned int cmd, unsigned long arg)
976{
977 int err;
978 void __user *argp = (void __user *)arg;
979
980 err = sock->ops->ioctl(sock, cmd, arg);
981
982 /*
983 * If this ioctl is unknown try to hand it down
984 * to the NIC driver.
985 */
986 if (err == -ENOIOCTLCMD)
987 err = dev_ioctl(net, cmd, argp);
988
989 return err;
990}
991
1da177e4
LT
992/*
993 * With an ioctl, arg may well be a user mode pointer, but we don't know
994 * what to do with it - that's up to the protocol still.
995 */
996
c62cce2c
AV
997static struct ns_common *get_net_ns(struct ns_common *ns)
998{
999 return &get_net(container_of(ns, struct net, ns))->ns;
1000}
1001
1da177e4
LT
1002static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1003{
1004 struct socket *sock;
881d966b 1005 struct sock *sk;
1da177e4
LT
1006 void __user *argp = (void __user *)arg;
1007 int pid, err;
881d966b 1008 struct net *net;
1da177e4 1009
b69aee04 1010 sock = file->private_data;
881d966b 1011 sk = sock->sk;
3b1e0a65 1012 net = sock_net(sk);
1da177e4 1013 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1014 err = dev_ioctl(net, cmd, argp);
1da177e4 1015 } else
3d23e349 1016#ifdef CONFIG_WEXT_CORE
1da177e4 1017 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1018 err = dev_ioctl(net, cmd, argp);
1da177e4 1019 } else
3d23e349 1020#endif
89bddce5 1021 switch (cmd) {
1da177e4
LT
1022 case FIOSETOWN:
1023 case SIOCSPGRP:
1024 err = -EFAULT;
1025 if (get_user(pid, (int __user *)argp))
1026 break;
393cc3f5 1027 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1028 break;
1029 case FIOGETOWN:
1030 case SIOCGPGRP:
609d7fa9 1031 err = put_user(f_getown(sock->file),
89bddce5 1032 (int __user *)argp);
1da177e4
LT
1033 break;
1034 case SIOCGIFBR:
1035 case SIOCSIFBR:
1036 case SIOCBRADDBR:
1037 case SIOCBRDELBR:
1038 err = -ENOPKG;
1039 if (!br_ioctl_hook)
1040 request_module("bridge");
1041
4a3e2f71 1042 mutex_lock(&br_ioctl_mutex);
89bddce5 1043 if (br_ioctl_hook)
881d966b 1044 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1045 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1046 break;
1047 case SIOCGIFVLAN:
1048 case SIOCSIFVLAN:
1049 err = -ENOPKG;
1050 if (!vlan_ioctl_hook)
1051 request_module("8021q");
1052
4a3e2f71 1053 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1054 if (vlan_ioctl_hook)
881d966b 1055 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1056 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1057 break;
1da177e4
LT
1058 case SIOCADDDLCI:
1059 case SIOCDELDLCI:
1060 err = -ENOPKG;
1061 if (!dlci_ioctl_hook)
1062 request_module("dlci");
1063
7512cbf6
PE
1064 mutex_lock(&dlci_ioctl_mutex);
1065 if (dlci_ioctl_hook)
1da177e4 1066 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1067 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1068 break;
c62cce2c
AV
1069 case SIOCGSKNS:
1070 err = -EPERM;
1071 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1072 break;
1073
1074 err = open_related_ns(&net->ns, get_net_ns);
1075 break;
1da177e4 1076 default:
6b96018b 1077 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1078 break;
89bddce5 1079 }
1da177e4
LT
1080 return err;
1081}
1082
1083int sock_create_lite(int family, int type, int protocol, struct socket **res)
1084{
1085 int err;
1086 struct socket *sock = NULL;
89bddce5 1087
1da177e4
LT
1088 err = security_socket_create(family, type, protocol, 1);
1089 if (err)
1090 goto out;
1091
1092 sock = sock_alloc();
1093 if (!sock) {
1094 err = -ENOMEM;
1095 goto out;
1096 }
1097
1da177e4 1098 sock->type = type;
7420ed23
VY
1099 err = security_socket_post_create(sock, family, type, protocol, 1);
1100 if (err)
1101 goto out_release;
1102
1da177e4
LT
1103out:
1104 *res = sock;
1105 return err;
7420ed23
VY
1106out_release:
1107 sock_release(sock);
1108 sock = NULL;
1109 goto out;
1da177e4 1110}
c6d409cf 1111EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1112
1113/* No kernel lock held - perfect */
89bddce5 1114static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1115{
cbf55001 1116 unsigned int busy_flag = 0;
1da177e4
LT
1117 struct socket *sock;
1118
1119 /*
89bddce5 1120 * We can't return errors to poll, so it's either yes or no.
1da177e4 1121 */
b69aee04 1122 sock = file->private_data;
2d48d67f 1123
cbf55001 1124 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1125 /* this socket can poll_ll so tell the system call */
cbf55001 1126 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1127
1128 /* once, only if requested by syscall */
cbf55001
ET
1129 if (wait && (wait->_key & POLL_BUSY_LOOP))
1130 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1131 }
1132
cbf55001 1133 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1134}
1135
89bddce5 1136static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1137{
b69aee04 1138 struct socket *sock = file->private_data;
1da177e4
LT
1139
1140 return sock->ops->mmap(file, sock, vma);
1141}
1142
20380731 1143static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1144{
4ba6b27d 1145 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1146 return 0;
1147}
1148
1149/*
1150 * Update the socket async list
1151 *
1152 * Fasync_list locking strategy.
1153 *
1154 * 1. fasync_list is modified only under process context socket lock
1155 * i.e. under semaphore.
1156 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1157 * or under socket lock
1da177e4
LT
1158 */
1159
1160static int sock_fasync(int fd, struct file *filp, int on)
1161{
989a2979
ED
1162 struct socket *sock = filp->private_data;
1163 struct sock *sk = sock->sk;
eaefd110 1164 struct socket_wq *wq;
1da177e4 1165
989a2979 1166 if (sk == NULL)
1da177e4 1167 return -EINVAL;
1da177e4
LT
1168
1169 lock_sock(sk);
1e1d04e6 1170 wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
eaefd110 1171 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1172
eaefd110 1173 if (!wq->fasync_list)
989a2979
ED
1174 sock_reset_flag(sk, SOCK_FASYNC);
1175 else
bcdce719 1176 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1177
989a2979 1178 release_sock(sk);
1da177e4
LT
1179 return 0;
1180}
1181
ceb5d58b 1182/* This function may be called only under rcu_lock */
1da177e4 1183
ceb5d58b 1184int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1185{
ceb5d58b 1186 if (!wq || !wq->fasync_list)
1da177e4 1187 return -1;
ceb5d58b 1188
89bddce5 1189 switch (how) {
8d8ad9d7 1190 case SOCK_WAKE_WAITD:
ceb5d58b 1191 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1192 break;
1193 goto call_kill;
8d8ad9d7 1194 case SOCK_WAKE_SPACE:
ceb5d58b 1195 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1196 break;
1197 /* fall through */
8d8ad9d7 1198 case SOCK_WAKE_IO:
89bddce5 1199call_kill:
43815482 1200 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1201 break;
8d8ad9d7 1202 case SOCK_WAKE_URG:
43815482 1203 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1204 }
ceb5d58b 1205
1da177e4
LT
1206 return 0;
1207}
c6d409cf 1208EXPORT_SYMBOL(sock_wake_async);
1da177e4 1209
721db93a 1210int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1211 struct socket **res, int kern)
1da177e4
LT
1212{
1213 int err;
1214 struct socket *sock;
55737fda 1215 const struct net_proto_family *pf;
1da177e4
LT
1216
1217 /*
89bddce5 1218 * Check protocol is in range
1da177e4
LT
1219 */
1220 if (family < 0 || family >= NPROTO)
1221 return -EAFNOSUPPORT;
1222 if (type < 0 || type >= SOCK_MAX)
1223 return -EINVAL;
1224
1225 /* Compatibility.
1226
1227 This uglymoron is moved from INET layer to here to avoid
1228 deadlock in module load.
1229 */
1230 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1231 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1232 current->comm);
1da177e4
LT
1233 family = PF_PACKET;
1234 }
1235
1236 err = security_socket_create(family, type, protocol, kern);
1237 if (err)
1238 return err;
89bddce5 1239
55737fda
SH
1240 /*
1241 * Allocate the socket and allow the family to set things up. if
1242 * the protocol is 0, the family is instructed to select an appropriate
1243 * default.
1244 */
1245 sock = sock_alloc();
1246 if (!sock) {
e87cc472 1247 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1248 return -ENFILE; /* Not exactly a match, but its the
1249 closest posix thing */
1250 }
1251
1252 sock->type = type;
1253
95a5afca 1254#ifdef CONFIG_MODULES
89bddce5
SH
1255 /* Attempt to load a protocol module if the find failed.
1256 *
1257 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1258 * requested real, full-featured networking support upon configuration.
1259 * Otherwise module support will break!
1260 */
190683a9 1261 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1262 request_module("net-pf-%d", family);
1da177e4
LT
1263#endif
1264
55737fda
SH
1265 rcu_read_lock();
1266 pf = rcu_dereference(net_families[family]);
1267 err = -EAFNOSUPPORT;
1268 if (!pf)
1269 goto out_release;
1da177e4
LT
1270
1271 /*
1272 * We will call the ->create function, that possibly is in a loadable
1273 * module, so we have to bump that loadable module refcnt first.
1274 */
55737fda 1275 if (!try_module_get(pf->owner))
1da177e4
LT
1276 goto out_release;
1277
55737fda
SH
1278 /* Now protected by module ref count */
1279 rcu_read_unlock();
1280
3f378b68 1281 err = pf->create(net, sock, protocol, kern);
55737fda 1282 if (err < 0)
1da177e4 1283 goto out_module_put;
a79af59e 1284
1da177e4
LT
1285 /*
1286 * Now to bump the refcnt of the [loadable] module that owns this
1287 * socket at sock_release time we decrement its refcnt.
1288 */
55737fda
SH
1289 if (!try_module_get(sock->ops->owner))
1290 goto out_module_busy;
1291
1da177e4
LT
1292 /*
1293 * Now that we're done with the ->create function, the [loadable]
1294 * module can have its refcnt decremented
1295 */
55737fda 1296 module_put(pf->owner);
7420ed23
VY
1297 err = security_socket_post_create(sock, family, type, protocol, kern);
1298 if (err)
3b185525 1299 goto out_sock_release;
55737fda 1300 *res = sock;
1da177e4 1301
55737fda
SH
1302 return 0;
1303
1304out_module_busy:
1305 err = -EAFNOSUPPORT;
1da177e4 1306out_module_put:
55737fda
SH
1307 sock->ops = NULL;
1308 module_put(pf->owner);
1309out_sock_release:
1da177e4 1310 sock_release(sock);
55737fda
SH
1311 return err;
1312
1313out_release:
1314 rcu_read_unlock();
1315 goto out_sock_release;
1da177e4 1316}
721db93a 1317EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1318
1319int sock_create(int family, int type, int protocol, struct socket **res)
1320{
1b8d7ae4 1321 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1322}
c6d409cf 1323EXPORT_SYMBOL(sock_create);
1da177e4 1324
eeb1bd5c 1325int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1326{
eeb1bd5c 1327 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1328}
c6d409cf 1329EXPORT_SYMBOL(sock_create_kern);
1da177e4 1330
3e0fa65f 1331SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1332{
1333 int retval;
1334 struct socket *sock;
a677a039
UD
1335 int flags;
1336
e38b36f3
UD
1337 /* Check the SOCK_* constants for consistency. */
1338 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1339 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1340 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1341 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1342
a677a039 1343 flags = type & ~SOCK_TYPE_MASK;
77d27200 1344 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1345 return -EINVAL;
1346 type &= SOCK_TYPE_MASK;
1da177e4 1347
aaca0bdc
UD
1348 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1349 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1350
1da177e4
LT
1351 retval = sock_create(family, type, protocol, &sock);
1352 if (retval < 0)
8e1611e2 1353 return retval;
1da177e4 1354
8e1611e2 1355 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1356}
1357
1358/*
1359 * Create a pair of connected sockets.
1360 */
1361
3e0fa65f
HC
1362SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1363 int __user *, usockvec)
1da177e4
LT
1364{
1365 struct socket *sock1, *sock2;
1366 int fd1, fd2, err;
db349509 1367 struct file *newfile1, *newfile2;
a677a039
UD
1368 int flags;
1369
1370 flags = type & ~SOCK_TYPE_MASK;
77d27200 1371 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1372 return -EINVAL;
1373 type &= SOCK_TYPE_MASK;
1da177e4 1374
aaca0bdc
UD
1375 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1376 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1377
016a266b
AV
1378 /*
1379 * reserve descriptors and make sure we won't fail
1380 * to return them to userland.
1381 */
1382 fd1 = get_unused_fd_flags(flags);
1383 if (unlikely(fd1 < 0))
1384 return fd1;
1385
1386 fd2 = get_unused_fd_flags(flags);
1387 if (unlikely(fd2 < 0)) {
1388 put_unused_fd(fd1);
1389 return fd2;
1390 }
1391
1392 err = put_user(fd1, &usockvec[0]);
1393 if (err)
1394 goto out;
1395
1396 err = put_user(fd2, &usockvec[1]);
1397 if (err)
1398 goto out;
1399
1da177e4
LT
1400 /*
1401 * Obtain the first socket and check if the underlying protocol
1402 * supports the socketpair call.
1403 */
1404
1405 err = sock_create(family, type, protocol, &sock1);
016a266b 1406 if (unlikely(err < 0))
1da177e4
LT
1407 goto out;
1408
1409 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1410 if (unlikely(err < 0)) {
1411 sock_release(sock1);
1412 goto out;
bf3c23d1 1413 }
d73aa286 1414
016a266b
AV
1415 err = sock1->ops->socketpair(sock1, sock2);
1416 if (unlikely(err < 0)) {
1417 sock_release(sock2);
1418 sock_release(sock1);
1419 goto out;
28407630
AV
1420 }
1421
aab174f0 1422 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1423 if (IS_ERR(newfile1)) {
28407630 1424 err = PTR_ERR(newfile1);
016a266b
AV
1425 sock_release(sock2);
1426 goto out;
28407630
AV
1427 }
1428
aab174f0 1429 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1430 if (IS_ERR(newfile2)) {
1431 err = PTR_ERR(newfile2);
016a266b
AV
1432 fput(newfile1);
1433 goto out;
db349509
AV
1434 }
1435
157cf649 1436 audit_fd_pair(fd1, fd2);
d73aa286 1437
db349509
AV
1438 fd_install(fd1, newfile1);
1439 fd_install(fd2, newfile2);
d73aa286 1440 return 0;
1da177e4 1441
016a266b 1442out:
d73aa286 1443 put_unused_fd(fd2);
d73aa286 1444 put_unused_fd(fd1);
1da177e4
LT
1445 return err;
1446}
1447
1da177e4
LT
1448/*
1449 * Bind a name to a socket. Nothing much to do here since it's
1450 * the protocol's responsibility to handle the local address.
1451 *
1452 * We move the socket address to kernel space before we call
1453 * the protocol layer (having also checked the address is ok).
1454 */
1455
20f37034 1456SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1457{
1458 struct socket *sock;
230b1839 1459 struct sockaddr_storage address;
6cb153ca 1460 int err, fput_needed;
1da177e4 1461
89bddce5 1462 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1463 if (sock) {
43db362d 1464 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1465 if (err >= 0) {
1466 err = security_socket_bind(sock,
230b1839 1467 (struct sockaddr *)&address,
89bddce5 1468 addrlen);
6cb153ca
BL
1469 if (!err)
1470 err = sock->ops->bind(sock,
89bddce5 1471 (struct sockaddr *)
230b1839 1472 &address, addrlen);
1da177e4 1473 }
6cb153ca 1474 fput_light(sock->file, fput_needed);
89bddce5 1475 }
1da177e4
LT
1476 return err;
1477}
1478
1da177e4
LT
1479/*
1480 * Perform a listen. Basically, we allow the protocol to do anything
1481 * necessary for a listen, and if that works, we mark the socket as
1482 * ready for listening.
1483 */
1484
3e0fa65f 1485SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1486{
1487 struct socket *sock;
6cb153ca 1488 int err, fput_needed;
b8e1f9b5 1489 int somaxconn;
89bddce5
SH
1490
1491 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1492 if (sock) {
8efa6e93 1493 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1494 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1495 backlog = somaxconn;
1da177e4
LT
1496
1497 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1498 if (!err)
1499 err = sock->ops->listen(sock, backlog);
1da177e4 1500
6cb153ca 1501 fput_light(sock->file, fput_needed);
1da177e4
LT
1502 }
1503 return err;
1504}
1505
1da177e4
LT
1506/*
1507 * For accept, we attempt to create a new socket, set up the link
1508 * with the client, wake up the client, then return the new
1509 * connected fd. We collect the address of the connector in kernel
1510 * space and move it to user at the very end. This is unclean because
1511 * we open the socket then return an error.
1512 *
1513 * 1003.1g adds the ability to recvmsg() to query connection pending
1514 * status to recvmsg. We need to add that support in a way thats
1515 * clean when we restucture accept also.
1516 */
1517
20f37034
HC
1518SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1519 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1520{
1521 struct socket *sock, *newsock;
39d8c1b6 1522 struct file *newfile;
6cb153ca 1523 int err, len, newfd, fput_needed;
230b1839 1524 struct sockaddr_storage address;
1da177e4 1525
77d27200 1526 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1527 return -EINVAL;
1528
1529 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1530 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1531
6cb153ca 1532 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1533 if (!sock)
1534 goto out;
1535
1536 err = -ENFILE;
c6d409cf
ED
1537 newsock = sock_alloc();
1538 if (!newsock)
1da177e4
LT
1539 goto out_put;
1540
1541 newsock->type = sock->type;
1542 newsock->ops = sock->ops;
1543
1da177e4
LT
1544 /*
1545 * We don't need try_module_get here, as the listening socket (sock)
1546 * has the protocol module (sock->ops->owner) held.
1547 */
1548 __module_get(newsock->ops->owner);
1549
28407630 1550 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1551 if (unlikely(newfd < 0)) {
1552 err = newfd;
9a1875e6
DM
1553 sock_release(newsock);
1554 goto out_put;
39d8c1b6 1555 }
aab174f0 1556 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1557 if (IS_ERR(newfile)) {
28407630
AV
1558 err = PTR_ERR(newfile);
1559 put_unused_fd(newfd);
28407630
AV
1560 goto out_put;
1561 }
39d8c1b6 1562
a79af59e
FF
1563 err = security_socket_accept(sock, newsock);
1564 if (err)
39d8c1b6 1565 goto out_fd;
a79af59e 1566
cdfbabfb 1567 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1568 if (err < 0)
39d8c1b6 1569 goto out_fd;
1da177e4
LT
1570
1571 if (upeer_sockaddr) {
230b1839 1572 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1573 &len, 2) < 0) {
1da177e4 1574 err = -ECONNABORTED;
39d8c1b6 1575 goto out_fd;
1da177e4 1576 }
43db362d 1577 err = move_addr_to_user(&address,
230b1839 1578 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1579 if (err < 0)
39d8c1b6 1580 goto out_fd;
1da177e4
LT
1581 }
1582
1583 /* File flags are not inherited via accept() unlike another OSes. */
1584
39d8c1b6
DM
1585 fd_install(newfd, newfile);
1586 err = newfd;
1da177e4 1587
1da177e4 1588out_put:
6cb153ca 1589 fput_light(sock->file, fput_needed);
1da177e4
LT
1590out:
1591 return err;
39d8c1b6 1592out_fd:
9606a216 1593 fput(newfile);
39d8c1b6 1594 put_unused_fd(newfd);
1da177e4
LT
1595 goto out_put;
1596}
1597
20f37034
HC
1598SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1599 int __user *, upeer_addrlen)
aaca0bdc 1600{
de11defe 1601 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1602}
1603
1da177e4
LT
1604/*
1605 * Attempt to connect to a socket with the server address. The address
1606 * is in user space so we verify it is OK and move it to kernel space.
1607 *
1608 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1609 * break bindings
1610 *
1611 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1612 * other SEQPACKET protocols that take time to connect() as it doesn't
1613 * include the -EINPROGRESS status for such sockets.
1614 */
1615
20f37034
HC
1616SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1617 int, addrlen)
1da177e4
LT
1618{
1619 struct socket *sock;
230b1839 1620 struct sockaddr_storage address;
6cb153ca 1621 int err, fput_needed;
1da177e4 1622
6cb153ca 1623 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1624 if (!sock)
1625 goto out;
43db362d 1626 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1627 if (err < 0)
1628 goto out_put;
1629
89bddce5 1630 err =
230b1839 1631 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1632 if (err)
1633 goto out_put;
1634
230b1839 1635 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1636 sock->file->f_flags);
1637out_put:
6cb153ca 1638 fput_light(sock->file, fput_needed);
1da177e4
LT
1639out:
1640 return err;
1641}
1642
1643/*
1644 * Get the local address ('name') of a socket object. Move the obtained
1645 * name to user space.
1646 */
1647
20f37034
HC
1648SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1649 int __user *, usockaddr_len)
1da177e4
LT
1650{
1651 struct socket *sock;
230b1839 1652 struct sockaddr_storage address;
6cb153ca 1653 int len, err, fput_needed;
89bddce5 1654
6cb153ca 1655 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1656 if (!sock)
1657 goto out;
1658
1659 err = security_socket_getsockname(sock);
1660 if (err)
1661 goto out_put;
1662
230b1839 1663 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1664 if (err)
1665 goto out_put;
43db362d 1666 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1667
1668out_put:
6cb153ca 1669 fput_light(sock->file, fput_needed);
1da177e4
LT
1670out:
1671 return err;
1672}
1673
1674/*
1675 * Get the remote address ('name') of a socket object. Move the obtained
1676 * name to user space.
1677 */
1678
20f37034
HC
1679SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1680 int __user *, usockaddr_len)
1da177e4
LT
1681{
1682 struct socket *sock;
230b1839 1683 struct sockaddr_storage address;
6cb153ca 1684 int len, err, fput_needed;
1da177e4 1685
89bddce5
SH
1686 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1687 if (sock != NULL) {
1da177e4
LT
1688 err = security_socket_getpeername(sock);
1689 if (err) {
6cb153ca 1690 fput_light(sock->file, fput_needed);
1da177e4
LT
1691 return err;
1692 }
1693
89bddce5 1694 err =
230b1839 1695 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1696 1);
1da177e4 1697 if (!err)
43db362d 1698 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1699 usockaddr_len);
6cb153ca 1700 fput_light(sock->file, fput_needed);
1da177e4
LT
1701 }
1702 return err;
1703}
1704
1705/*
1706 * Send a datagram to a given address. We move the address into kernel
1707 * space and check the user space data area is readable before invoking
1708 * the protocol.
1709 */
1710
3e0fa65f 1711SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1712 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1713 int, addr_len)
1da177e4
LT
1714{
1715 struct socket *sock;
230b1839 1716 struct sockaddr_storage address;
1da177e4
LT
1717 int err;
1718 struct msghdr msg;
1719 struct iovec iov;
6cb153ca 1720 int fput_needed;
6cb153ca 1721
602bd0e9
AV
1722 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1723 if (unlikely(err))
1724 return err;
de0fa95c
PE
1725 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1726 if (!sock)
4387ff75 1727 goto out;
6cb153ca 1728
89bddce5 1729 msg.msg_name = NULL;
89bddce5
SH
1730 msg.msg_control = NULL;
1731 msg.msg_controllen = 0;
1732 msg.msg_namelen = 0;
6cb153ca 1733 if (addr) {
43db362d 1734 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1735 if (err < 0)
1736 goto out_put;
230b1839 1737 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1738 msg.msg_namelen = addr_len;
1da177e4
LT
1739 }
1740 if (sock->file->f_flags & O_NONBLOCK)
1741 flags |= MSG_DONTWAIT;
1742 msg.msg_flags = flags;
d8725c86 1743 err = sock_sendmsg(sock, &msg);
1da177e4 1744
89bddce5 1745out_put:
de0fa95c 1746 fput_light(sock->file, fput_needed);
4387ff75 1747out:
1da177e4
LT
1748 return err;
1749}
1750
1751/*
89bddce5 1752 * Send a datagram down a socket.
1da177e4
LT
1753 */
1754
3e0fa65f 1755SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1756 unsigned int, flags)
1da177e4
LT
1757{
1758 return sys_sendto(fd, buff, len, flags, NULL, 0);
1759}
1760
1761/*
89bddce5 1762 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1763 * sender. We verify the buffers are writable and if needed move the
1764 * sender address from kernel to user space.
1765 */
1766
3e0fa65f 1767SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1768 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1769 int __user *, addr_len)
1da177e4
LT
1770{
1771 struct socket *sock;
1772 struct iovec iov;
1773 struct msghdr msg;
230b1839 1774 struct sockaddr_storage address;
89bddce5 1775 int err, err2;
6cb153ca
BL
1776 int fput_needed;
1777
602bd0e9
AV
1778 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1779 if (unlikely(err))
1780 return err;
de0fa95c 1781 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1782 if (!sock)
de0fa95c 1783 goto out;
1da177e4 1784
89bddce5
SH
1785 msg.msg_control = NULL;
1786 msg.msg_controllen = 0;
f3d33426
HFS
1787 /* Save some cycles and don't copy the address if not needed */
1788 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1789 /* We assume all kernel code knows the size of sockaddr_storage */
1790 msg.msg_namelen = 0;
130ed5d1 1791 msg.msg_iocb = NULL;
9f138fa6 1792 msg.msg_flags = 0;
1da177e4
LT
1793 if (sock->file->f_flags & O_NONBLOCK)
1794 flags |= MSG_DONTWAIT;
2da62906 1795 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1796
89bddce5 1797 if (err >= 0 && addr != NULL) {
43db362d 1798 err2 = move_addr_to_user(&address,
230b1839 1799 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1800 if (err2 < 0)
1801 err = err2;
1da177e4 1802 }
de0fa95c
PE
1803
1804 fput_light(sock->file, fput_needed);
4387ff75 1805out:
1da177e4
LT
1806 return err;
1807}
1808
1809/*
89bddce5 1810 * Receive a datagram from a socket.
1da177e4
LT
1811 */
1812
b7c0ddf5
JG
1813SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1814 unsigned int, flags)
1da177e4
LT
1815{
1816 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1817}
1818
1819/*
1820 * Set a socket option. Because we don't know the option lengths we have
1821 * to pass the user mode parameter for the protocols to sort out.
1822 */
1823
20f37034
HC
1824SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1825 char __user *, optval, int, optlen)
1da177e4 1826{
6cb153ca 1827 int err, fput_needed;
1da177e4
LT
1828 struct socket *sock;
1829
1830 if (optlen < 0)
1831 return -EINVAL;
89bddce5
SH
1832
1833 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1834 if (sock != NULL) {
1835 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1836 if (err)
1837 goto out_put;
1da177e4
LT
1838
1839 if (level == SOL_SOCKET)
89bddce5
SH
1840 err =
1841 sock_setsockopt(sock, level, optname, optval,
1842 optlen);
1da177e4 1843 else
89bddce5
SH
1844 err =
1845 sock->ops->setsockopt(sock, level, optname, optval,
1846 optlen);
6cb153ca
BL
1847out_put:
1848 fput_light(sock->file, fput_needed);
1da177e4
LT
1849 }
1850 return err;
1851}
1852
1853/*
1854 * Get a socket option. Because we don't know the option lengths we have
1855 * to pass a user mode parameter for the protocols to sort out.
1856 */
1857
20f37034
HC
1858SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1859 char __user *, optval, int __user *, optlen)
1da177e4 1860{
6cb153ca 1861 int err, fput_needed;
1da177e4
LT
1862 struct socket *sock;
1863
89bddce5
SH
1864 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1865 if (sock != NULL) {
6cb153ca
BL
1866 err = security_socket_getsockopt(sock, level, optname);
1867 if (err)
1868 goto out_put;
1da177e4
LT
1869
1870 if (level == SOL_SOCKET)
89bddce5
SH
1871 err =
1872 sock_getsockopt(sock, level, optname, optval,
1873 optlen);
1da177e4 1874 else
89bddce5
SH
1875 err =
1876 sock->ops->getsockopt(sock, level, optname, optval,
1877 optlen);
6cb153ca
BL
1878out_put:
1879 fput_light(sock->file, fput_needed);
1da177e4
LT
1880 }
1881 return err;
1882}
1883
1da177e4
LT
1884/*
1885 * Shutdown a socket.
1886 */
1887
754fe8d2 1888SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1889{
6cb153ca 1890 int err, fput_needed;
1da177e4
LT
1891 struct socket *sock;
1892
89bddce5
SH
1893 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1894 if (sock != NULL) {
1da177e4 1895 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1896 if (!err)
1897 err = sock->ops->shutdown(sock, how);
1898 fput_light(sock->file, fput_needed);
1da177e4
LT
1899 }
1900 return err;
1901}
1902
89bddce5 1903/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1904 * fields which are the same type (int / unsigned) on our platforms.
1905 */
1906#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1907#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1908#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1909
c71d8ebe
TH
1910struct used_address {
1911 struct sockaddr_storage name;
1912 unsigned int name_len;
1913};
1914
da184284
AV
1915static int copy_msghdr_from_user(struct msghdr *kmsg,
1916 struct user_msghdr __user *umsg,
1917 struct sockaddr __user **save_addr,
1918 struct iovec **iov)
1661bf36 1919{
ffb07550 1920 struct user_msghdr msg;
08adb7da
AV
1921 ssize_t err;
1922
ffb07550 1923 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 1924 return -EFAULT;
dbb490b9 1925
864d9664 1926 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
1927 kmsg->msg_controllen = msg.msg_controllen;
1928 kmsg->msg_flags = msg.msg_flags;
1929
1930 kmsg->msg_namelen = msg.msg_namelen;
1931 if (!msg.msg_name)
6a2a2b3a
AS
1932 kmsg->msg_namelen = 0;
1933
dbb490b9
ML
1934 if (kmsg->msg_namelen < 0)
1935 return -EINVAL;
1936
1661bf36 1937 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1938 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1939
1940 if (save_addr)
ffb07550 1941 *save_addr = msg.msg_name;
08adb7da 1942
ffb07550 1943 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 1944 if (!save_addr) {
864d9664
PA
1945 err = move_addr_to_kernel(msg.msg_name,
1946 kmsg->msg_namelen,
08adb7da
AV
1947 kmsg->msg_name);
1948 if (err < 0)
1949 return err;
1950 }
1951 } else {
1952 kmsg->msg_name = NULL;
1953 kmsg->msg_namelen = 0;
1954 }
1955
ffb07550 1956 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
1957 return -EMSGSIZE;
1958
0345f931 1959 kmsg->msg_iocb = NULL;
1960
ffb07550
AV
1961 return import_iovec(save_addr ? READ : WRITE,
1962 msg.msg_iov, msg.msg_iovlen,
da184284 1963 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1964}
1965
666547ff 1966static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1967 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1968 struct used_address *used_address,
1969 unsigned int allowed_msghdr_flags)
1da177e4 1970{
89bddce5
SH
1971 struct compat_msghdr __user *msg_compat =
1972 (struct compat_msghdr __user *)msg;
230b1839 1973 struct sockaddr_storage address;
1da177e4 1974 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1975 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 1976 __aligned(sizeof(__kernel_size_t));
89bddce5 1977 /* 20 is size of ipv6_pktinfo */
1da177e4 1978 unsigned char *ctl_buf = ctl;
d8725c86 1979 int ctl_len;
08adb7da 1980 ssize_t err;
89bddce5 1981
08adb7da 1982 msg_sys->msg_name = &address;
1da177e4 1983
08449320 1984 if (MSG_CMSG_COMPAT & flags)
08adb7da 1985 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1986 else
08adb7da 1987 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1988 if (err < 0)
da184284 1989 return err;
1da177e4
LT
1990
1991 err = -ENOBUFS;
1992
228e548e 1993 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1994 goto out_freeiov;
28a94d8f 1995 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 1996 ctl_len = msg_sys->msg_controllen;
1da177e4 1997 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1998 err =
228e548e 1999 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2000 sizeof(ctl));
1da177e4
LT
2001 if (err)
2002 goto out_freeiov;
228e548e
AB
2003 ctl_buf = msg_sys->msg_control;
2004 ctl_len = msg_sys->msg_controllen;
1da177e4 2005 } else if (ctl_len) {
ac4340fc
DM
2006 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2007 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2008 if (ctl_len > sizeof(ctl)) {
1da177e4 2009 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2010 if (ctl_buf == NULL)
1da177e4
LT
2011 goto out_freeiov;
2012 }
2013 err = -EFAULT;
2014 /*
228e548e 2015 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2016 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2017 * checking falls down on this.
2018 */
fb8621bb 2019 if (copy_from_user(ctl_buf,
228e548e 2020 (void __user __force *)msg_sys->msg_control,
89bddce5 2021 ctl_len))
1da177e4 2022 goto out_freectl;
228e548e 2023 msg_sys->msg_control = ctl_buf;
1da177e4 2024 }
228e548e 2025 msg_sys->msg_flags = flags;
1da177e4
LT
2026
2027 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2028 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2029 /*
2030 * If this is sendmmsg() and current destination address is same as
2031 * previously succeeded address, omit asking LSM's decision.
2032 * used_address->name_len is initialized to UINT_MAX so that the first
2033 * destination address never matches.
2034 */
bc909d9d
MD
2035 if (used_address && msg_sys->msg_name &&
2036 used_address->name_len == msg_sys->msg_namelen &&
2037 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2038 used_address->name_len)) {
d8725c86 2039 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2040 goto out_freectl;
2041 }
d8725c86 2042 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2043 /*
2044 * If this is sendmmsg() and sending to current destination address was
2045 * successful, remember it.
2046 */
2047 if (used_address && err >= 0) {
2048 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2049 if (msg_sys->msg_name)
2050 memcpy(&used_address->name, msg_sys->msg_name,
2051 used_address->name_len);
c71d8ebe 2052 }
1da177e4
LT
2053
2054out_freectl:
89bddce5 2055 if (ctl_buf != ctl)
1da177e4
LT
2056 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2057out_freeiov:
da184284 2058 kfree(iov);
228e548e
AB
2059 return err;
2060}
2061
2062/*
2063 * BSD sendmsg interface
2064 */
2065
666547ff 2066long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
2067{
2068 int fput_needed, err;
2069 struct msghdr msg_sys;
1be374a0
AL
2070 struct socket *sock;
2071
1be374a0 2072 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2073 if (!sock)
2074 goto out;
2075
28a94d8f 2076 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2077
6cb153ca 2078 fput_light(sock->file, fput_needed);
89bddce5 2079out:
1da177e4
LT
2080 return err;
2081}
2082
666547ff 2083SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
2084{
2085 if (flags & MSG_CMSG_COMPAT)
2086 return -EINVAL;
2087 return __sys_sendmsg(fd, msg, flags);
2088}
2089
228e548e
AB
2090/*
2091 * Linux sendmmsg interface
2092 */
2093
2094int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2095 unsigned int flags)
2096{
2097 int fput_needed, err, datagrams;
2098 struct socket *sock;
2099 struct mmsghdr __user *entry;
2100 struct compat_mmsghdr __user *compat_entry;
2101 struct msghdr msg_sys;
c71d8ebe 2102 struct used_address used_address;
f092276d 2103 unsigned int oflags = flags;
228e548e 2104
98382f41
AB
2105 if (vlen > UIO_MAXIOV)
2106 vlen = UIO_MAXIOV;
228e548e
AB
2107
2108 datagrams = 0;
2109
2110 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2111 if (!sock)
2112 return err;
2113
c71d8ebe 2114 used_address.name_len = UINT_MAX;
228e548e
AB
2115 entry = mmsg;
2116 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2117 err = 0;
f092276d 2118 flags |= MSG_BATCH;
228e548e
AB
2119
2120 while (datagrams < vlen) {
f092276d
TH
2121 if (datagrams == vlen - 1)
2122 flags = oflags;
2123
228e548e 2124 if (MSG_CMSG_COMPAT & flags) {
666547ff 2125 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2126 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2127 if (err < 0)
2128 break;
2129 err = __put_user(err, &compat_entry->msg_len);
2130 ++compat_entry;
2131 } else {
a7526eb5 2132 err = ___sys_sendmsg(sock,
666547ff 2133 (struct user_msghdr __user *)entry,
28a94d8f 2134 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2135 if (err < 0)
2136 break;
2137 err = put_user(err, &entry->msg_len);
2138 ++entry;
2139 }
2140
2141 if (err)
2142 break;
2143 ++datagrams;
3023898b
SHY
2144 if (msg_data_left(&msg_sys))
2145 break;
a78cb84c 2146 cond_resched();
228e548e
AB
2147 }
2148
228e548e
AB
2149 fput_light(sock->file, fput_needed);
2150
728ffb86
AB
2151 /* We only return an error if no datagrams were able to be sent */
2152 if (datagrams != 0)
228e548e
AB
2153 return datagrams;
2154
228e548e
AB
2155 return err;
2156}
2157
2158SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2159 unsigned int, vlen, unsigned int, flags)
2160{
1be374a0
AL
2161 if (flags & MSG_CMSG_COMPAT)
2162 return -EINVAL;
228e548e
AB
2163 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2164}
2165
666547ff 2166static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2167 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2168{
89bddce5
SH
2169 struct compat_msghdr __user *msg_compat =
2170 (struct compat_msghdr __user *)msg;
1da177e4 2171 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2172 struct iovec *iov = iovstack;
1da177e4 2173 unsigned long cmsg_ptr;
2da62906 2174 int len;
08adb7da 2175 ssize_t err;
1da177e4
LT
2176
2177 /* kernel mode address */
230b1839 2178 struct sockaddr_storage addr;
1da177e4
LT
2179
2180 /* user mode address pointers */
2181 struct sockaddr __user *uaddr;
08adb7da 2182 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2183
08adb7da 2184 msg_sys->msg_name = &addr;
1da177e4 2185
f3d33426 2186 if (MSG_CMSG_COMPAT & flags)
08adb7da 2187 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2188 else
08adb7da 2189 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2190 if (err < 0)
da184284 2191 return err;
1da177e4 2192
a2e27255
ACM
2193 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2194 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2195
f3d33426
HFS
2196 /* We assume all kernel code knows the size of sockaddr_storage */
2197 msg_sys->msg_namelen = 0;
2198
1da177e4
LT
2199 if (sock->file->f_flags & O_NONBLOCK)
2200 flags |= MSG_DONTWAIT;
2da62906 2201 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2202 if (err < 0)
2203 goto out_freeiov;
2204 len = err;
2205
2206 if (uaddr != NULL) {
43db362d 2207 err = move_addr_to_user(&addr,
a2e27255 2208 msg_sys->msg_namelen, uaddr,
89bddce5 2209 uaddr_len);
1da177e4
LT
2210 if (err < 0)
2211 goto out_freeiov;
2212 }
a2e27255 2213 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2214 COMPAT_FLAGS(msg));
1da177e4
LT
2215 if (err)
2216 goto out_freeiov;
2217 if (MSG_CMSG_COMPAT & flags)
a2e27255 2218 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2219 &msg_compat->msg_controllen);
2220 else
a2e27255 2221 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2222 &msg->msg_controllen);
2223 if (err)
2224 goto out_freeiov;
2225 err = len;
2226
2227out_freeiov:
da184284 2228 kfree(iov);
a2e27255
ACM
2229 return err;
2230}
2231
2232/*
2233 * BSD recvmsg interface
2234 */
2235
666547ff 2236long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2237{
2238 int fput_needed, err;
2239 struct msghdr msg_sys;
1be374a0
AL
2240 struct socket *sock;
2241
1be374a0 2242 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2243 if (!sock)
2244 goto out;
2245
a7526eb5 2246 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2247
6cb153ca 2248 fput_light(sock->file, fput_needed);
1da177e4
LT
2249out:
2250 return err;
2251}
2252
666547ff 2253SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2254 unsigned int, flags)
2255{
2256 if (flags & MSG_CMSG_COMPAT)
2257 return -EINVAL;
2258 return __sys_recvmsg(fd, msg, flags);
2259}
2260
a2e27255
ACM
2261/*
2262 * Linux recvmmsg interface
2263 */
2264
2265int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2266 unsigned int flags, struct timespec *timeout)
2267{
2268 int fput_needed, err, datagrams;
2269 struct socket *sock;
2270 struct mmsghdr __user *entry;
d7256d0e 2271 struct compat_mmsghdr __user *compat_entry;
a2e27255 2272 struct msghdr msg_sys;
766b9f92
DD
2273 struct timespec64 end_time;
2274 struct timespec64 timeout64;
a2e27255
ACM
2275
2276 if (timeout &&
2277 poll_select_set_timeout(&end_time, timeout->tv_sec,
2278 timeout->tv_nsec))
2279 return -EINVAL;
2280
2281 datagrams = 0;
2282
2283 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2284 if (!sock)
2285 return err;
2286
2287 err = sock_error(sock->sk);
e623a9e9
MJ
2288 if (err) {
2289 datagrams = err;
a2e27255 2290 goto out_put;
e623a9e9 2291 }
a2e27255
ACM
2292
2293 entry = mmsg;
d7256d0e 2294 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2295
2296 while (datagrams < vlen) {
2297 /*
2298 * No need to ask LSM for more than the first datagram.
2299 */
d7256d0e 2300 if (MSG_CMSG_COMPAT & flags) {
666547ff 2301 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2302 &msg_sys, flags & ~MSG_WAITFORONE,
2303 datagrams);
d7256d0e
JMG
2304 if (err < 0)
2305 break;
2306 err = __put_user(err, &compat_entry->msg_len);
2307 ++compat_entry;
2308 } else {
a7526eb5 2309 err = ___sys_recvmsg(sock,
666547ff 2310 (struct user_msghdr __user *)entry,
a7526eb5
AL
2311 &msg_sys, flags & ~MSG_WAITFORONE,
2312 datagrams);
d7256d0e
JMG
2313 if (err < 0)
2314 break;
2315 err = put_user(err, &entry->msg_len);
2316 ++entry;
2317 }
2318
a2e27255
ACM
2319 if (err)
2320 break;
a2e27255
ACM
2321 ++datagrams;
2322
71c5c159
BB
2323 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2324 if (flags & MSG_WAITFORONE)
2325 flags |= MSG_DONTWAIT;
2326
a2e27255 2327 if (timeout) {
766b9f92
DD
2328 ktime_get_ts64(&timeout64);
2329 *timeout = timespec64_to_timespec(
2330 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2331 if (timeout->tv_sec < 0) {
2332 timeout->tv_sec = timeout->tv_nsec = 0;
2333 break;
2334 }
2335
2336 /* Timeout, return less than vlen datagrams */
2337 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2338 break;
2339 }
2340
2341 /* Out of band data, return right away */
2342 if (msg_sys.msg_flags & MSG_OOB)
2343 break;
a78cb84c 2344 cond_resched();
a2e27255
ACM
2345 }
2346
a2e27255 2347 if (err == 0)
34b88a68
ACM
2348 goto out_put;
2349
2350 if (datagrams == 0) {
2351 datagrams = err;
2352 goto out_put;
2353 }
a2e27255 2354
34b88a68
ACM
2355 /*
2356 * We may return less entries than requested (vlen) if the
2357 * sock is non block and there aren't enough datagrams...
2358 */
2359 if (err != -EAGAIN) {
a2e27255 2360 /*
34b88a68
ACM
2361 * ... or if recvmsg returns an error after we
2362 * received some datagrams, where we record the
2363 * error to return on the next call or if the
2364 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2365 */
34b88a68 2366 sock->sk->sk_err = -err;
a2e27255 2367 }
34b88a68
ACM
2368out_put:
2369 fput_light(sock->file, fput_needed);
a2e27255 2370
34b88a68 2371 return datagrams;
a2e27255
ACM
2372}
2373
2374SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2375 unsigned int, vlen, unsigned int, flags,
2376 struct timespec __user *, timeout)
2377{
2378 int datagrams;
2379 struct timespec timeout_sys;
2380
1be374a0
AL
2381 if (flags & MSG_CMSG_COMPAT)
2382 return -EINVAL;
2383
a2e27255
ACM
2384 if (!timeout)
2385 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2386
2387 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2388 return -EFAULT;
2389
2390 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2391
2392 if (datagrams > 0 &&
2393 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2394 datagrams = -EFAULT;
2395
2396 return datagrams;
2397}
2398
2399#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2400/* Argument list sizes for sys_socketcall */
2401#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2402static const unsigned char nargs[21] = {
c6d409cf
ED
2403 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2404 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2405 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2406 AL(4), AL(5), AL(4)
89bddce5
SH
2407};
2408
1da177e4
LT
2409#undef AL
2410
2411/*
89bddce5 2412 * System call vectors.
1da177e4
LT
2413 *
2414 * Argument checking cleaned up. Saved 20% in size.
2415 * This function doesn't need to set the kernel lock because
89bddce5 2416 * it is set by the callees.
1da177e4
LT
2417 */
2418
3e0fa65f 2419SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2420{
2950fa9d 2421 unsigned long a[AUDITSC_ARGS];
89bddce5 2422 unsigned long a0, a1;
1da177e4 2423 int err;
47379052 2424 unsigned int len;
1da177e4 2425
228e548e 2426 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2427 return -EINVAL;
db3a93c7 2428 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2429
47379052
AV
2430 len = nargs[call];
2431 if (len > sizeof(a))
2432 return -EINVAL;
2433
1da177e4 2434 /* copy_from_user should be SMP safe. */
47379052 2435 if (copy_from_user(a, args, len))
1da177e4 2436 return -EFAULT;
3ec3b2fb 2437
2950fa9d
CG
2438 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2439 if (err)
2440 return err;
3ec3b2fb 2441
89bddce5
SH
2442 a0 = a[0];
2443 a1 = a[1];
2444
2445 switch (call) {
2446 case SYS_SOCKET:
2447 err = sys_socket(a0, a1, a[2]);
2448 break;
2449 case SYS_BIND:
2450 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2451 break;
2452 case SYS_CONNECT:
2453 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2454 break;
2455 case SYS_LISTEN:
2456 err = sys_listen(a0, a1);
2457 break;
2458 case SYS_ACCEPT:
de11defe
UD
2459 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2460 (int __user *)a[2], 0);
89bddce5
SH
2461 break;
2462 case SYS_GETSOCKNAME:
2463 err =
2464 sys_getsockname(a0, (struct sockaddr __user *)a1,
2465 (int __user *)a[2]);
2466 break;
2467 case SYS_GETPEERNAME:
2468 err =
2469 sys_getpeername(a0, (struct sockaddr __user *)a1,
2470 (int __user *)a[2]);
2471 break;
2472 case SYS_SOCKETPAIR:
2473 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2474 break;
2475 case SYS_SEND:
2476 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2477 break;
2478 case SYS_SENDTO:
2479 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2480 (struct sockaddr __user *)a[4], a[5]);
2481 break;
2482 case SYS_RECV:
2483 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2484 break;
2485 case SYS_RECVFROM:
2486 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2487 (struct sockaddr __user *)a[4],
2488 (int __user *)a[5]);
2489 break;
2490 case SYS_SHUTDOWN:
2491 err = sys_shutdown(a0, a1);
2492 break;
2493 case SYS_SETSOCKOPT:
2494 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2495 break;
2496 case SYS_GETSOCKOPT:
2497 err =
2498 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2499 (int __user *)a[4]);
2500 break;
2501 case SYS_SENDMSG:
666547ff 2502 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2503 break;
228e548e
AB
2504 case SYS_SENDMMSG:
2505 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2506 break;
89bddce5 2507 case SYS_RECVMSG:
666547ff 2508 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2509 break;
a2e27255
ACM
2510 case SYS_RECVMMSG:
2511 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2512 (struct timespec __user *)a[4]);
2513 break;
de11defe
UD
2514 case SYS_ACCEPT4:
2515 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2516 (int __user *)a[2], a[3]);
aaca0bdc 2517 break;
89bddce5
SH
2518 default:
2519 err = -EINVAL;
2520 break;
1da177e4
LT
2521 }
2522 return err;
2523}
2524
89bddce5 2525#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2526
55737fda
SH
2527/**
2528 * sock_register - add a socket protocol handler
2529 * @ops: description of protocol
2530 *
1da177e4
LT
2531 * This function is called by a protocol handler that wants to
2532 * advertise its address family, and have it linked into the
e793c0f7 2533 * socket interface. The value ops->family corresponds to the
55737fda 2534 * socket system call protocol family.
1da177e4 2535 */
f0fd27d4 2536int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2537{
2538 int err;
2539
2540 if (ops->family >= NPROTO) {
3410f22e 2541 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2542 return -ENOBUFS;
2543 }
55737fda
SH
2544
2545 spin_lock(&net_family_lock);
5ee68f99
JC
2546 if (rcu_dereference_protected(net_families[ops->family],
2547 lockdep_is_held(&net_family_lock)))
55737fda
SH
2548 err = -EEXIST;
2549 else {
5ee68f99 2550 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2551 err = 0;
2552 }
55737fda
SH
2553 spin_unlock(&net_family_lock);
2554
3410f22e 2555 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2556 return err;
2557}
c6d409cf 2558EXPORT_SYMBOL(sock_register);
1da177e4 2559
55737fda
SH
2560/**
2561 * sock_unregister - remove a protocol handler
2562 * @family: protocol family to remove
2563 *
1da177e4
LT
2564 * This function is called by a protocol handler that wants to
2565 * remove its address family, and have it unlinked from the
55737fda
SH
2566 * new socket creation.
2567 *
2568 * If protocol handler is a module, then it can use module reference
2569 * counts to protect against new references. If protocol handler is not
2570 * a module then it needs to provide its own protection in
2571 * the ops->create routine.
1da177e4 2572 */
f0fd27d4 2573void sock_unregister(int family)
1da177e4 2574{
f0fd27d4 2575 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2576
55737fda 2577 spin_lock(&net_family_lock);
a9b3cd7f 2578 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2579 spin_unlock(&net_family_lock);
2580
2581 synchronize_rcu();
2582
3410f22e 2583 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2584}
c6d409cf 2585EXPORT_SYMBOL(sock_unregister);
1da177e4 2586
77d76ea3 2587static int __init sock_init(void)
1da177e4 2588{
b3e19d92 2589 int err;
2ca794e5
EB
2590 /*
2591 * Initialize the network sysctl infrastructure.
2592 */
2593 err = net_sysctl_init();
2594 if (err)
2595 goto out;
b3e19d92 2596
1da177e4 2597 /*
89bddce5 2598 * Initialize skbuff SLAB cache
1da177e4
LT
2599 */
2600 skb_init();
1da177e4
LT
2601
2602 /*
89bddce5 2603 * Initialize the protocols module.
1da177e4
LT
2604 */
2605
2606 init_inodecache();
b3e19d92
NP
2607
2608 err = register_filesystem(&sock_fs_type);
2609 if (err)
2610 goto out_fs;
1da177e4 2611 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2612 if (IS_ERR(sock_mnt)) {
2613 err = PTR_ERR(sock_mnt);
2614 goto out_mount;
2615 }
77d76ea3
AK
2616
2617 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2618 */
2619
2620#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2621 err = netfilter_init();
2622 if (err)
2623 goto out;
1da177e4 2624#endif
cbeb321a 2625
408eccce 2626 ptp_classifier_init();
c1f19b51 2627
b3e19d92
NP
2628out:
2629 return err;
2630
2631out_mount:
2632 unregister_filesystem(&sock_fs_type);
2633out_fs:
2634 goto out;
1da177e4
LT
2635}
2636
77d76ea3
AK
2637core_initcall(sock_init); /* early initcall */
2638
1da177e4
LT
2639#ifdef CONFIG_PROC_FS
2640void socket_seq_show(struct seq_file *seq)
2641{
2642 int cpu;
2643 int counter = 0;
2644
6f912042 2645 for_each_possible_cpu(cpu)
89bddce5 2646 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2647
2648 /* It can be negative, by the way. 8) */
2649 if (counter < 0)
2650 counter = 0;
2651
2652 seq_printf(seq, "sockets: used %d\n", counter);
2653}
89bddce5 2654#endif /* CONFIG_PROC_FS */
1da177e4 2655
89bbfc95 2656#ifdef CONFIG_COMPAT
6b96018b 2657static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2658 unsigned int cmd, void __user *up)
7a229387 2659{
7a229387
AB
2660 mm_segment_t old_fs = get_fs();
2661 struct timeval ktv;
2662 int err;
2663
2664 set_fs(KERNEL_DS);
6b96018b 2665 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2666 set_fs(old_fs);
644595f8 2667 if (!err)
ed6fe9d6 2668 err = compat_put_timeval(&ktv, up);
644595f8 2669
7a229387
AB
2670 return err;
2671}
2672
6b96018b 2673static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2674 unsigned int cmd, void __user *up)
7a229387 2675{
7a229387
AB
2676 mm_segment_t old_fs = get_fs();
2677 struct timespec kts;
2678 int err;
2679
2680 set_fs(KERNEL_DS);
6b96018b 2681 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2682 set_fs(old_fs);
644595f8 2683 if (!err)
ed6fe9d6 2684 err = compat_put_timespec(&kts, up);
644595f8 2685
7a229387
AB
2686 return err;
2687}
2688
6b96018b 2689static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2690{
2691 struct ifreq __user *uifr;
2692 int err;
2693
2694 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2695 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2696 return -EFAULT;
2697
6b96018b 2698 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2699 if (err)
2700 return err;
2701
6b96018b 2702 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2703 return -EFAULT;
2704
2705 return 0;
2706}
2707
6b96018b 2708static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2709{
6b96018b 2710 struct compat_ifconf ifc32;
7a229387
AB
2711 struct ifconf ifc;
2712 struct ifconf __user *uifc;
6b96018b 2713 struct compat_ifreq __user *ifr32;
7a229387
AB
2714 struct ifreq __user *ifr;
2715 unsigned int i, j;
2716 int err;
2717
6b96018b 2718 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2719 return -EFAULT;
2720
43da5f2e 2721 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2722 if (ifc32.ifcbuf == 0) {
2723 ifc32.ifc_len = 0;
2724 ifc.ifc_len = 0;
2725 ifc.ifc_req = NULL;
2726 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2727 } else {
c6d409cf
ED
2728 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2729 sizeof(struct ifreq);
7a229387
AB
2730 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2731 ifc.ifc_len = len;
2732 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2733 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2734 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2735 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2736 return -EFAULT;
2737 ifr++;
2738 ifr32++;
2739 }
2740 }
2741 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2742 return -EFAULT;
2743
6b96018b 2744 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2745 if (err)
2746 return err;
2747
2748 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2749 return -EFAULT;
2750
2751 ifr = ifc.ifc_req;
2752 ifr32 = compat_ptr(ifc32.ifcbuf);
2753 for (i = 0, j = 0;
c6d409cf
ED
2754 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2755 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2756 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2757 return -EFAULT;
2758 ifr32++;
2759 ifr++;
2760 }
2761
2762 if (ifc32.ifcbuf == 0) {
2763 /* Translate from 64-bit structure multiple to
2764 * a 32-bit one.
2765 */
2766 i = ifc.ifc_len;
6b96018b 2767 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2768 ifc32.ifc_len = i;
2769 } else {
2770 ifc32.ifc_len = i;
2771 }
6b96018b 2772 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2773 return -EFAULT;
2774
2775 return 0;
2776}
2777
6b96018b 2778static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2779{
3a7da39d
BH
2780 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2781 bool convert_in = false, convert_out = false;
2782 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2783 struct ethtool_rxnfc __user *rxnfc;
7a229387 2784 struct ifreq __user *ifr;
3a7da39d
BH
2785 u32 rule_cnt = 0, actual_rule_cnt;
2786 u32 ethcmd;
7a229387 2787 u32 data;
3a7da39d 2788 int ret;
7a229387 2789
3a7da39d
BH
2790 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2791 return -EFAULT;
7a229387 2792
3a7da39d
BH
2793 compat_rxnfc = compat_ptr(data);
2794
2795 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2796 return -EFAULT;
2797
3a7da39d
BH
2798 /* Most ethtool structures are defined without padding.
2799 * Unfortunately struct ethtool_rxnfc is an exception.
2800 */
2801 switch (ethcmd) {
2802 default:
2803 break;
2804 case ETHTOOL_GRXCLSRLALL:
2805 /* Buffer size is variable */
2806 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2807 return -EFAULT;
2808 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2809 return -ENOMEM;
2810 buf_size += rule_cnt * sizeof(u32);
2811 /* fall through */
2812 case ETHTOOL_GRXRINGS:
2813 case ETHTOOL_GRXCLSRLCNT:
2814 case ETHTOOL_GRXCLSRULE:
55664f32 2815 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2816 convert_out = true;
2817 /* fall through */
2818 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2819 buf_size += sizeof(struct ethtool_rxnfc);
2820 convert_in = true;
2821 break;
2822 }
2823
2824 ifr = compat_alloc_user_space(buf_size);
954b1244 2825 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2826
2827 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2828 return -EFAULT;
2829
3a7da39d
BH
2830 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2831 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2832 return -EFAULT;
2833
3a7da39d 2834 if (convert_in) {
127fe533 2835 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2836 * fs.ring_cookie and at the end of fs, but nowhere else.
2837 */
127fe533
AD
2838 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2839 sizeof(compat_rxnfc->fs.m_ext) !=
2840 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2841 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2842 BUILD_BUG_ON(
2843 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2844 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2845 offsetof(struct ethtool_rxnfc, fs.location) -
2846 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2847
2848 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2849 (void __user *)(&rxnfc->fs.m_ext + 1) -
2850 (void __user *)rxnfc) ||
3a7da39d
BH
2851 copy_in_user(&rxnfc->fs.ring_cookie,
2852 &compat_rxnfc->fs.ring_cookie,
954b1244 2853 (void __user *)(&rxnfc->fs.location + 1) -
9a47918b
WW
2854 (void __user *)&rxnfc->fs.ring_cookie))
2855 return -EFAULT;
2856 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2857 if (put_user(rule_cnt, &rxnfc->rule_cnt))
2858 return -EFAULT;
2859 } else if (copy_in_user(&rxnfc->rule_cnt,
2860 &compat_rxnfc->rule_cnt,
2861 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
2862 return -EFAULT;
2863 }
2864
2865 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2866 if (ret)
2867 return ret;
2868
2869 if (convert_out) {
2870 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2871 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2872 (const void __user *)rxnfc) ||
3a7da39d
BH
2873 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2874 &rxnfc->fs.ring_cookie,
954b1244
SH
2875 (const void __user *)(&rxnfc->fs.location + 1) -
2876 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2877 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2878 sizeof(rxnfc->rule_cnt)))
2879 return -EFAULT;
2880
2881 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2882 /* As an optimisation, we only copy the actual
2883 * number of rules that the underlying
2884 * function returned. Since Mallory might
2885 * change the rule count in user memory, we
2886 * check that it is less than the rule count
2887 * originally given (as the user buffer size),
2888 * which has been range-checked.
2889 */
2890 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2891 return -EFAULT;
2892 if (actual_rule_cnt < rule_cnt)
2893 rule_cnt = actual_rule_cnt;
2894 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2895 &rxnfc->rule_locs[0],
2896 rule_cnt * sizeof(u32)))
2897 return -EFAULT;
2898 }
2899 }
2900
2901 return 0;
7a229387
AB
2902}
2903
7a50a240
AB
2904static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2905{
2906 void __user *uptr;
2907 compat_uptr_t uptr32;
2908 struct ifreq __user *uifr;
2909
c6d409cf 2910 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2911 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2912 return -EFAULT;
2913
2914 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2915 return -EFAULT;
2916
2917 uptr = compat_ptr(uptr32);
2918
2919 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2920 return -EFAULT;
2921
2922 return dev_ioctl(net, SIOCWANDEV, uifr);
2923}
2924
6b96018b
AB
2925static int bond_ioctl(struct net *net, unsigned int cmd,
2926 struct compat_ifreq __user *ifr32)
7a229387
AB
2927{
2928 struct ifreq kifr;
7a229387
AB
2929 mm_segment_t old_fs;
2930 int err;
7a229387
AB
2931
2932 switch (cmd) {
2933 case SIOCBONDENSLAVE:
2934 case SIOCBONDRELEASE:
2935 case SIOCBONDSETHWADDR:
2936 case SIOCBONDCHANGEACTIVE:
6b96018b 2937 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2938 return -EFAULT;
2939
2940 old_fs = get_fs();
c6d409cf 2941 set_fs(KERNEL_DS);
c3f52ae6 2942 err = dev_ioctl(net, cmd,
2943 (struct ifreq __user __force *) &kifr);
c6d409cf 2944 set_fs(old_fs);
7a229387
AB
2945
2946 return err;
7a229387 2947 default:
07d106d0 2948 return -ENOIOCTLCMD;
ccbd6a5a 2949 }
7a229387
AB
2950}
2951
590d4693
BH
2952/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2953static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2954 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2955{
2956 struct ifreq __user *u_ifreq64;
7a229387
AB
2957 char tmp_buf[IFNAMSIZ];
2958 void __user *data64;
2959 u32 data32;
2960
2961 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2962 IFNAMSIZ))
2963 return -EFAULT;
417c3522 2964 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2965 return -EFAULT;
2966 data64 = compat_ptr(data32);
2967
2968 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2969
7a229387
AB
2970 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2971 IFNAMSIZ))
2972 return -EFAULT;
417c3522 2973 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2974 return -EFAULT;
2975
6b96018b 2976 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2977}
2978
6b96018b
AB
2979static int dev_ifsioc(struct net *net, struct socket *sock,
2980 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2981{
a2116ed2 2982 struct ifreq __user *uifr;
7a229387
AB
2983 int err;
2984
a2116ed2
AB
2985 uifr = compat_alloc_user_space(sizeof(*uifr));
2986 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2987 return -EFAULT;
2988
2989 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2990
7a229387
AB
2991 if (!err) {
2992 switch (cmd) {
2993 case SIOCGIFFLAGS:
2994 case SIOCGIFMETRIC:
2995 case SIOCGIFMTU:
2996 case SIOCGIFMEM:
2997 case SIOCGIFHWADDR:
2998 case SIOCGIFINDEX:
2999 case SIOCGIFADDR:
3000 case SIOCGIFBRDADDR:
3001 case SIOCGIFDSTADDR:
3002 case SIOCGIFNETMASK:
fab2532b 3003 case SIOCGIFPFLAGS:
7a229387 3004 case SIOCGIFTXQLEN:
fab2532b
AB
3005 case SIOCGMIIPHY:
3006 case SIOCGMIIREG:
a2116ed2 3007 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
3008 err = -EFAULT;
3009 break;
3010 }
3011 }
3012 return err;
3013}
3014
a2116ed2
AB
3015static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3016 struct compat_ifreq __user *uifr32)
3017{
3018 struct ifreq ifr;
3019 struct compat_ifmap __user *uifmap32;
3020 mm_segment_t old_fs;
3021 int err;
3022
3023 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3024 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3025 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3026 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3027 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3028 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3029 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3030 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3031 if (err)
3032 return -EFAULT;
3033
3034 old_fs = get_fs();
c6d409cf 3035 set_fs(KERNEL_DS);
c3f52ae6 3036 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 3037 set_fs(old_fs);
a2116ed2
AB
3038
3039 if (cmd == SIOCGIFMAP && !err) {
3040 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3041 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3042 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3043 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3044 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3045 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3046 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3047 if (err)
3048 err = -EFAULT;
3049 }
3050 return err;
3051}
3052
7a229387 3053struct rtentry32 {
c6d409cf 3054 u32 rt_pad1;
7a229387
AB
3055 struct sockaddr rt_dst; /* target address */
3056 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3057 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3058 unsigned short rt_flags;
3059 short rt_pad2;
3060 u32 rt_pad3;
3061 unsigned char rt_tos;
3062 unsigned char rt_class;
3063 short rt_pad4;
3064 short rt_metric; /* +1 for binary compatibility! */
7a229387 3065 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3066 u32 rt_mtu; /* per route MTU/Window */
3067 u32 rt_window; /* Window clamping */
7a229387
AB
3068 unsigned short rt_irtt; /* Initial RTT */
3069};
3070
3071struct in6_rtmsg32 {
3072 struct in6_addr rtmsg_dst;
3073 struct in6_addr rtmsg_src;
3074 struct in6_addr rtmsg_gateway;
3075 u32 rtmsg_type;
3076 u16 rtmsg_dst_len;
3077 u16 rtmsg_src_len;
3078 u32 rtmsg_metric;
3079 u32 rtmsg_info;
3080 u32 rtmsg_flags;
3081 s32 rtmsg_ifindex;
3082};
3083
6b96018b
AB
3084static int routing_ioctl(struct net *net, struct socket *sock,
3085 unsigned int cmd, void __user *argp)
7a229387
AB
3086{
3087 int ret;
3088 void *r = NULL;
3089 struct in6_rtmsg r6;
3090 struct rtentry r4;
3091 char devname[16];
3092 u32 rtdev;
3093 mm_segment_t old_fs = get_fs();
3094
6b96018b
AB
3095 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3096 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3097 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3098 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3099 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3100 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3101 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3102 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3103 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3104 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3105 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3106
3107 r = (void *) &r6;
3108 } else { /* ipv4 */
6b96018b 3109 struct rtentry32 __user *ur4 = argp;
c6d409cf 3110 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3111 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3112 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3113 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3114 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3115 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3116 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3117 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3118 if (rtdev) {
c6d409cf 3119 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3120 r4.rt_dev = (char __user __force *)devname;
3121 devname[15] = 0;
7a229387
AB
3122 } else
3123 r4.rt_dev = NULL;
3124
3125 r = (void *) &r4;
3126 }
3127
3128 if (ret) {
3129 ret = -EFAULT;
3130 goto out;
3131 }
3132
c6d409cf 3133 set_fs(KERNEL_DS);
6b96018b 3134 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3135 set_fs(old_fs);
7a229387
AB
3136
3137out:
7a229387
AB
3138 return ret;
3139}
3140
3141/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3142 * for some operations; this forces use of the newer bridge-utils that
25985edc 3143 * use compatible ioctls
7a229387 3144 */
6b96018b 3145static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3146{
6b96018b 3147 compat_ulong_t tmp;
7a229387 3148
6b96018b 3149 if (get_user(tmp, argp))
7a229387
AB
3150 return -EFAULT;
3151 if (tmp == BRCTL_GET_VERSION)
3152 return BRCTL_VERSION + 1;
3153 return -EINVAL;
3154}
3155
6b96018b
AB
3156static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3157 unsigned int cmd, unsigned long arg)
3158{
3159 void __user *argp = compat_ptr(arg);
3160 struct sock *sk = sock->sk;
3161 struct net *net = sock_net(sk);
7a229387 3162
6b96018b 3163 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3164 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3165
3166 switch (cmd) {
3167 case SIOCSIFBR:
3168 case SIOCGIFBR:
3169 return old_bridge_ioctl(argp);
3170 case SIOCGIFNAME:
3171 return dev_ifname32(net, argp);
3172 case SIOCGIFCONF:
3173 return dev_ifconf(net, argp);
3174 case SIOCETHTOOL:
3175 return ethtool_ioctl(net, argp);
7a50a240
AB
3176 case SIOCWANDEV:
3177 return compat_siocwandev(net, argp);
a2116ed2
AB
3178 case SIOCGIFMAP:
3179 case SIOCSIFMAP:
3180 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3181 case SIOCBONDENSLAVE:
3182 case SIOCBONDRELEASE:
3183 case SIOCBONDSETHWADDR:
6b96018b
AB
3184 case SIOCBONDCHANGEACTIVE:
3185 return bond_ioctl(net, cmd, argp);
3186 case SIOCADDRT:
3187 case SIOCDELRT:
3188 return routing_ioctl(net, sock, cmd, argp);
3189 case SIOCGSTAMP:
3190 return do_siocgstamp(net, sock, cmd, argp);
3191 case SIOCGSTAMPNS:
3192 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3193 case SIOCBONDSLAVEINFOQUERY:
3194 case SIOCBONDINFOQUERY:
a2116ed2 3195 case SIOCSHWTSTAMP:
fd468c74 3196 case SIOCGHWTSTAMP:
590d4693 3197 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3198
3199 case FIOSETOWN:
3200 case SIOCSPGRP:
3201 case FIOGETOWN:
3202 case SIOCGPGRP:
3203 case SIOCBRADDBR:
3204 case SIOCBRDELBR:
3205 case SIOCGIFVLAN:
3206 case SIOCSIFVLAN:
3207 case SIOCADDDLCI:
3208 case SIOCDELDLCI:
c62cce2c 3209 case SIOCGSKNS:
6b96018b
AB
3210 return sock_ioctl(file, cmd, arg);
3211
3212 case SIOCGIFFLAGS:
3213 case SIOCSIFFLAGS:
3214 case SIOCGIFMETRIC:
3215 case SIOCSIFMETRIC:
3216 case SIOCGIFMTU:
3217 case SIOCSIFMTU:
3218 case SIOCGIFMEM:
3219 case SIOCSIFMEM:
3220 case SIOCGIFHWADDR:
3221 case SIOCSIFHWADDR:
3222 case SIOCADDMULTI:
3223 case SIOCDELMULTI:
3224 case SIOCGIFINDEX:
6b96018b
AB
3225 case SIOCGIFADDR:
3226 case SIOCSIFADDR:
3227 case SIOCSIFHWBROADCAST:
6b96018b 3228 case SIOCDIFADDR:
6b96018b
AB
3229 case SIOCGIFBRDADDR:
3230 case SIOCSIFBRDADDR:
3231 case SIOCGIFDSTADDR:
3232 case SIOCSIFDSTADDR:
3233 case SIOCGIFNETMASK:
3234 case SIOCSIFNETMASK:
3235 case SIOCSIFPFLAGS:
3236 case SIOCGIFPFLAGS:
3237 case SIOCGIFTXQLEN:
3238 case SIOCSIFTXQLEN:
3239 case SIOCBRADDIF:
3240 case SIOCBRDELIF:
9177efd3
AB
3241 case SIOCSIFNAME:
3242 case SIOCGMIIPHY:
3243 case SIOCGMIIREG:
3244 case SIOCSMIIREG:
6b96018b 3245 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3246
6b96018b
AB
3247 case SIOCSARP:
3248 case SIOCGARP:
3249 case SIOCDARP:
b55d6fce 3250 case SIOCOUTQNSD:
6b96018b 3251 case SIOCATMARK:
9177efd3
AB
3252 return sock_do_ioctl(net, sock, cmd, arg);
3253 }
3254
6b96018b
AB
3255 return -ENOIOCTLCMD;
3256}
7a229387 3257
95c96174 3258static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3259 unsigned long arg)
89bbfc95
SP
3260{
3261 struct socket *sock = file->private_data;
3262 int ret = -ENOIOCTLCMD;
87de87d5
DM
3263 struct sock *sk;
3264 struct net *net;
3265
3266 sk = sock->sk;
3267 net = sock_net(sk);
89bbfc95
SP
3268
3269 if (sock->ops->compat_ioctl)
3270 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3271
87de87d5
DM
3272 if (ret == -ENOIOCTLCMD &&
3273 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3274 ret = compat_wext_handle_ioctl(net, cmd, arg);
3275
6b96018b
AB
3276 if (ret == -ENOIOCTLCMD)
3277 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3278
89bbfc95
SP
3279 return ret;
3280}
3281#endif
3282
ac5a488e
SS
3283int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3284{
3285 return sock->ops->bind(sock, addr, addrlen);
3286}
c6d409cf 3287EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3288
3289int kernel_listen(struct socket *sock, int backlog)
3290{
3291 return sock->ops->listen(sock, backlog);
3292}
c6d409cf 3293EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3294
3295int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3296{
3297 struct sock *sk = sock->sk;
3298 int err;
3299
3300 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3301 newsock);
3302 if (err < 0)
3303 goto done;
3304
cdfbabfb 3305 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3306 if (err < 0) {
3307 sock_release(*newsock);
fa8705b0 3308 *newsock = NULL;
ac5a488e
SS
3309 goto done;
3310 }
3311
3312 (*newsock)->ops = sock->ops;
1b08534e 3313 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3314
3315done:
3316 return err;
3317}
c6d409cf 3318EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3319
3320int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3321 int flags)
ac5a488e
SS
3322{
3323 return sock->ops->connect(sock, addr, addrlen, flags);
3324}
c6d409cf 3325EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3326
3327int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3328 int *addrlen)
3329{
3330 return sock->ops->getname(sock, addr, addrlen, 0);
3331}
c6d409cf 3332EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3333
3334int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3335 int *addrlen)
3336{
3337 return sock->ops->getname(sock, addr, addrlen, 1);
3338}
c6d409cf 3339EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3340
3341int kernel_getsockopt(struct socket *sock, int level, int optname,
3342 char *optval, int *optlen)
3343{
3344 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3345 char __user *uoptval;
3346 int __user *uoptlen;
ac5a488e
SS
3347 int err;
3348
fb8621bb
NK
3349 uoptval = (char __user __force *) optval;
3350 uoptlen = (int __user __force *) optlen;
3351
ac5a488e
SS
3352 set_fs(KERNEL_DS);
3353 if (level == SOL_SOCKET)
fb8621bb 3354 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3355 else
fb8621bb
NK
3356 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3357 uoptlen);
ac5a488e
SS
3358 set_fs(oldfs);
3359 return err;
3360}
c6d409cf 3361EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3362
3363int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3364 char *optval, unsigned int optlen)
ac5a488e
SS
3365{
3366 mm_segment_t oldfs = get_fs();
fb8621bb 3367 char __user *uoptval;
ac5a488e
SS
3368 int err;
3369
fb8621bb
NK
3370 uoptval = (char __user __force *) optval;
3371
ac5a488e
SS
3372 set_fs(KERNEL_DS);
3373 if (level == SOL_SOCKET)
fb8621bb 3374 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3375 else
fb8621bb 3376 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3377 optlen);
3378 set_fs(oldfs);
3379 return err;
3380}
c6d409cf 3381EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3382
3383int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3384 size_t size, int flags)
3385{
3386 if (sock->ops->sendpage)
3387 return sock->ops->sendpage(sock, page, offset, size, flags);
3388
3389 return sock_no_sendpage(sock, page, offset, size, flags);
3390}
c6d409cf 3391EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3392
306b13eb
TH
3393int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3394 size_t size, int flags)
3395{
3396 struct socket *sock = sk->sk_socket;
3397
3398 if (sock->ops->sendpage_locked)
3399 return sock->ops->sendpage_locked(sk, page, offset, size,
3400 flags);
3401
3402 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3403}
3404EXPORT_SYMBOL(kernel_sendpage_locked);
3405
ac5a488e
SS
3406int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3407{
3408 mm_segment_t oldfs = get_fs();
3409 int err;
3410
3411 set_fs(KERNEL_DS);
3412 err = sock->ops->ioctl(sock, cmd, arg);
3413 set_fs(oldfs);
3414
3415 return err;
3416}
c6d409cf 3417EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3418
91cf45f0
TM
3419int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3420{
3421 return sock->ops->shutdown(sock, how);
3422}
91cf45f0 3423EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075
P
3424
3425/* This routine returns the IP overhead imposed by a socket i.e.
3426 * the length of the underlying IP header, depending on whether
3427 * this is an IPv4 or IPv6 socket and the length from IP options turned
57240d00 3428 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075
P
3429 */
3430u32 kernel_sock_ip_overhead(struct sock *sk)
3431{
3432 struct inet_sock *inet;
3433 struct ip_options_rcu *opt;
3434 u32 overhead = 0;
113c3075
P
3435#if IS_ENABLED(CONFIG_IPV6)
3436 struct ipv6_pinfo *np;
3437 struct ipv6_txoptions *optv6 = NULL;
3438#endif /* IS_ENABLED(CONFIG_IPV6) */
3439
3440 if (!sk)
3441 return overhead;
3442
113c3075
P
3443 switch (sk->sk_family) {
3444 case AF_INET:
3445 inet = inet_sk(sk);
3446 overhead += sizeof(struct iphdr);
3447 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3448 sock_owned_by_user(sk));
113c3075
P
3449 if (opt)
3450 overhead += opt->opt.optlen;
3451 return overhead;
3452#if IS_ENABLED(CONFIG_IPV6)
3453 case AF_INET6:
3454 np = inet6_sk(sk);
3455 overhead += sizeof(struct ipv6hdr);
3456 if (np)
3457 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3458 sock_owned_by_user(sk));
113c3075
P
3459 if (optv6)
3460 overhead += (optv6->opt_flen + optv6->opt_nflen);
3461 return overhead;
3462#endif /* IS_ENABLED(CONFIG_IPV6) */
3463 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3464 return overhead;
3465 }
3466}
3467EXPORT_SYMBOL(kernel_sock_ip_overhead);