]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/socket.c
Revert "sit: reload iphdr in ipip6_rcv"
[mirror_ubuntu-artful-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4 92
7c0f6ba6 93#include <linux/uaccess.h>
1da177e4
LT
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1e911632 290static void init_inodecache(void)
1da177e4
LT
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1e911632 299 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
300}
301
b87221de 302static const struct super_operations sockfs_ops = {
c6d409cf
ED
303 .alloc_inode = sock_alloc_inode,
304 .destroy_inode = sock_destroy_inode,
305 .statfs = simple_statfs,
1da177e4
LT
306};
307
c23fbb6b
ED
308/*
309 * sockfs_dname() is called from d_path().
310 */
311static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
312{
313 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 314 d_inode(dentry)->i_ino);
c23fbb6b
ED
315}
316
3ba13d17 317static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 318 .d_dname = sockfs_dname,
1da177e4
LT
319};
320
bba0bd31
AG
321static int sockfs_xattr_get(const struct xattr_handler *handler,
322 struct dentry *dentry, struct inode *inode,
323 const char *suffix, void *value, size_t size)
324{
325 if (value) {
326 if (dentry->d_name.len + 1 > size)
327 return -ERANGE;
328 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
329 }
330 return dentry->d_name.len + 1;
331}
332
333#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
334#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
335#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
336
337static const struct xattr_handler sockfs_xattr_handler = {
338 .name = XATTR_NAME_SOCKPROTONAME,
339 .get = sockfs_xattr_get,
340};
341
4a590153
AG
342static int sockfs_security_xattr_set(const struct xattr_handler *handler,
343 struct dentry *dentry, struct inode *inode,
344 const char *suffix, const void *value,
345 size_t size, int flags)
346{
347 /* Handled by LSM. */
348 return -EAGAIN;
349}
350
351static const struct xattr_handler sockfs_security_xattr_handler = {
352 .prefix = XATTR_SECURITY_PREFIX,
353 .set = sockfs_security_xattr_set,
354};
355
bba0bd31
AG
356static const struct xattr_handler *sockfs_xattr_handlers[] = {
357 &sockfs_xattr_handler,
4a590153 358 &sockfs_security_xattr_handler,
bba0bd31
AG
359 NULL
360};
361
c74a1cbb
AV
362static struct dentry *sockfs_mount(struct file_system_type *fs_type,
363 int flags, const char *dev_name, void *data)
364{
bba0bd31
AG
365 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
366 sockfs_xattr_handlers,
367 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
368}
369
370static struct vfsmount *sock_mnt __read_mostly;
371
372static struct file_system_type sock_fs_type = {
373 .name = "sockfs",
374 .mount = sockfs_mount,
375 .kill_sb = kill_anon_super,
376};
377
1da177e4
LT
378/*
379 * Obtains the first available file descriptor and sets it up for use.
380 *
39d8c1b6
DM
381 * These functions create file structures and maps them to fd space
382 * of the current process. On success it returns file descriptor
1da177e4
LT
383 * and file struct implicitly stored in sock->file.
384 * Note that another thread may close file descriptor before we return
385 * from this function. We use the fact that now we do not refer
386 * to socket after mapping. If one day we will need it, this
387 * function will increment ref. count on file by 1.
388 *
389 * In any case returned fd MAY BE not valid!
390 * This race condition is unavoidable
391 * with shared fd spaces, we cannot solve it inside kernel,
392 * but we take care of internal coherence yet.
393 */
394
aab174f0 395struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 396{
7cbe66b6 397 struct qstr name = { .name = "" };
2c48b9c4 398 struct path path;
7cbe66b6 399 struct file *file;
1da177e4 400
600e1779
MY
401 if (dname) {
402 name.name = dname;
403 name.len = strlen(name.name);
404 } else if (sock->sk) {
405 name.name = sock->sk->sk_prot_creator->name;
406 name.len = strlen(name.name);
407 }
4b936885 408 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
409 if (unlikely(!path.dentry))
410 return ERR_PTR(-ENOMEM);
2c48b9c4 411 path.mnt = mntget(sock_mnt);
39d8c1b6 412
2c48b9c4 413 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 414
2c48b9c4 415 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 416 &socket_file_ops);
b5ffe634 417 if (IS_ERR(file)) {
cc3808f8 418 /* drop dentry, keep inode */
c5ef6035 419 ihold(d_inode(path.dentry));
2c48b9c4 420 path_put(&path);
39b65252 421 return file;
cc3808f8
AV
422 }
423
424 sock->file = file;
77d27200 425 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 426 file->private_data = sock;
28407630 427 return file;
39d8c1b6 428}
56b31d1c 429EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 430
56b31d1c 431static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
432{
433 struct file *newfile;
28407630
AV
434 int fd = get_unused_fd_flags(flags);
435 if (unlikely(fd < 0))
436 return fd;
39d8c1b6 437
aab174f0 438 newfile = sock_alloc_file(sock, flags, NULL);
28407630 439 if (likely(!IS_ERR(newfile))) {
39d8c1b6 440 fd_install(fd, newfile);
28407630
AV
441 return fd;
442 }
7cbe66b6 443
28407630
AV
444 put_unused_fd(fd);
445 return PTR_ERR(newfile);
1da177e4
LT
446}
447
406a3c63 448struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 449{
6cb153ca
BL
450 if (file->f_op == &socket_file_ops)
451 return file->private_data; /* set in sock_map_fd */
452
23bb80d2
ED
453 *err = -ENOTSOCK;
454 return NULL;
6cb153ca 455}
406a3c63 456EXPORT_SYMBOL(sock_from_file);
6cb153ca 457
1da177e4 458/**
c6d409cf 459 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
460 * @fd: file handle
461 * @err: pointer to an error code return
462 *
463 * The file handle passed in is locked and the socket it is bound
464 * too is returned. If an error occurs the err pointer is overwritten
465 * with a negative errno code and NULL is returned. The function checks
466 * for both invalid handles and passing a handle which is not a socket.
467 *
468 * On a success the socket object pointer is returned.
469 */
470
471struct socket *sockfd_lookup(int fd, int *err)
472{
473 struct file *file;
1da177e4
LT
474 struct socket *sock;
475
89bddce5
SH
476 file = fget(fd);
477 if (!file) {
1da177e4
LT
478 *err = -EBADF;
479 return NULL;
480 }
89bddce5 481
6cb153ca
BL
482 sock = sock_from_file(file, err);
483 if (!sock)
1da177e4 484 fput(file);
6cb153ca
BL
485 return sock;
486}
c6d409cf 487EXPORT_SYMBOL(sockfd_lookup);
1da177e4 488
6cb153ca
BL
489static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
490{
00e188ef 491 struct fd f = fdget(fd);
6cb153ca
BL
492 struct socket *sock;
493
3672558c 494 *err = -EBADF;
00e188ef
AV
495 if (f.file) {
496 sock = sock_from_file(f.file, err);
497 if (likely(sock)) {
498 *fput_needed = f.flags;
6cb153ca 499 return sock;
00e188ef
AV
500 }
501 fdput(f);
1da177e4 502 }
6cb153ca 503 return NULL;
1da177e4
LT
504}
505
600e1779
MY
506static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
507 size_t size)
508{
509 ssize_t len;
510 ssize_t used = 0;
511
c5ef6035 512 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
513 if (len < 0)
514 return len;
515 used += len;
516 if (buffer) {
517 if (size < used)
518 return -ERANGE;
519 buffer += len;
520 }
521
522 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
523 used += len;
524 if (buffer) {
525 if (size < used)
526 return -ERANGE;
527 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
528 buffer += len;
529 }
530
531 return used;
532}
533
dc647ec8 534static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
535{
536 int err = simple_setattr(dentry, iattr);
537
e1a3a60a 538 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
539 struct socket *sock = SOCKET_I(d_inode(dentry));
540
541 sock->sk->sk_uid = iattr->ia_uid;
542 }
543
544 return err;
545}
546
600e1779 547static const struct inode_operations sockfs_inode_ops = {
600e1779 548 .listxattr = sockfs_listxattr,
86741ec2 549 .setattr = sockfs_setattr,
600e1779
MY
550};
551
1da177e4
LT
552/**
553 * sock_alloc - allocate a socket
89bddce5 554 *
1da177e4
LT
555 * Allocate a new inode and socket object. The two are bound together
556 * and initialised. The socket is then returned. If we are out of inodes
557 * NULL is returned.
558 */
559
f4a00aac 560struct socket *sock_alloc(void)
1da177e4 561{
89bddce5
SH
562 struct inode *inode;
563 struct socket *sock;
1da177e4 564
a209dfc7 565 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
566 if (!inode)
567 return NULL;
568
569 sock = SOCKET_I(inode);
570
29a020d3 571 kmemcheck_annotate_bitfield(sock, type);
85fe4025 572 inode->i_ino = get_next_ino();
89bddce5 573 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
574 inode->i_uid = current_fsuid();
575 inode->i_gid = current_fsgid();
600e1779 576 inode->i_op = &sockfs_inode_ops;
1da177e4 577
19e8d69c 578 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
579 return sock;
580}
f4a00aac 581EXPORT_SYMBOL(sock_alloc);
1da177e4 582
1da177e4
LT
583/**
584 * sock_release - close a socket
585 * @sock: socket to close
586 *
587 * The socket is released from the protocol stack if it has a release
588 * callback, and the inode is then released if the socket is bound to
89bddce5 589 * an inode not a file.
1da177e4 590 */
89bddce5 591
1da177e4
LT
592void sock_release(struct socket *sock)
593{
594 if (sock->ops) {
595 struct module *owner = sock->ops->owner;
596
597 sock->ops->release(sock);
598 sock->ops = NULL;
599 module_put(owner);
600 }
601
eaefd110 602 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 603 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 604
19e8d69c 605 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
606 if (!sock->file) {
607 iput(SOCK_INODE(sock));
608 return;
609 }
89bddce5 610 sock->file = NULL;
1da177e4 611}
c6d409cf 612EXPORT_SYMBOL(sock_release);
1da177e4 613
c14ac945 614void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 615{
140c55d4
ED
616 u8 flags = *tx_flags;
617
c14ac945 618 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
619 flags |= SKBTX_HW_TSTAMP;
620
c14ac945 621 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
622 flags |= SKBTX_SW_TSTAMP;
623
c14ac945 624 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
625 flags |= SKBTX_SCHED_TSTAMP;
626
140c55d4 627 *tx_flags = flags;
20d49473 628}
67cc0d40 629EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 630
d8725c86 631static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 632{
01e97e65 633 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
634 BUG_ON(ret == -EIOCBQUEUED);
635 return ret;
1da177e4
LT
636}
637
d8725c86 638int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 639{
d8725c86 640 int err = security_socket_sendmsg(sock, msg,
01e97e65 641 msg_data_left(msg));
228e548e 642
d8725c86 643 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 644}
c6d409cf 645EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
646
647int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
648 struct kvec *vec, size_t num, size_t size)
649{
6aa24814 650 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 651 return sock_sendmsg(sock, msg);
1da177e4 652}
c6d409cf 653EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 654
8605330a
SHY
655static bool skb_is_err_queue(const struct sk_buff *skb)
656{
657 /* pkt_type of skbs enqueued on the error queue are set to
658 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
659 * in recvmsg, since skbs received on a local socket will never
660 * have a pkt_type of PACKET_OUTGOING.
661 */
662 return skb->pkt_type == PACKET_OUTGOING;
663}
664
92f37fd2
ED
665/*
666 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
667 */
668void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
669 struct sk_buff *skb)
670{
20d49473 671 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 672 struct scm_timestamping tss;
20d49473
PO
673 int empty = 1;
674 struct skb_shared_hwtstamps *shhwtstamps =
675 skb_hwtstamps(skb);
676
677 /* Race occurred between timestamp enabling and packet
678 receiving. Fill in the current time for now. */
2456e855 679 if (need_software_tstamp && skb->tstamp == 0)
20d49473
PO
680 __net_timestamp(skb);
681
682 if (need_software_tstamp) {
683 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
684 struct timeval tv;
685 skb_get_timestamp(skb, &tv);
686 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
687 sizeof(tv), &tv);
688 } else {
f24b9be5
WB
689 struct timespec ts;
690 skb_get_timestampns(skb, &ts);
20d49473 691 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 692 sizeof(ts), &ts);
20d49473
PO
693 }
694 }
695
f24b9be5 696 memset(&tss, 0, sizeof(tss));
c199105d 697 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 698 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 699 empty = 0;
4d276eb6 700 if (shhwtstamps &&
b9f40e21 701 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 702 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 703 empty = 0;
1c885808 704 if (!empty) {
20d49473 705 put_cmsg(msg, SOL_SOCKET,
f24b9be5 706 SCM_TIMESTAMPING, sizeof(tss), &tss);
1c885808 707
8605330a 708 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 709 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
710 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
711 skb->len, skb->data);
712 }
92f37fd2 713}
7c81fd8b
ACM
714EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
715
6e3e939f
JB
716void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
717 struct sk_buff *skb)
718{
719 int ack;
720
721 if (!sock_flag(sk, SOCK_WIFI_STATUS))
722 return;
723 if (!skb->wifi_acked_valid)
724 return;
725
726 ack = skb->wifi_acked;
727
728 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
729}
730EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
731
11165f14 732static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
733 struct sk_buff *skb)
3b885787 734{
744d5a3e 735 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 736 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 737 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
738}
739
767dd033 740void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
741 struct sk_buff *skb)
742{
743 sock_recv_timestamp(msg, sk, skb);
744 sock_recv_drops(msg, sk, skb);
745}
767dd033 746EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 747
1b784140 748static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 749 int flags)
1da177e4 750{
2da62906 751 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
752}
753
2da62906 754int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 755{
2da62906 756 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 757
2da62906 758 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 759}
c6d409cf 760EXPORT_SYMBOL(sock_recvmsg);
1da177e4 761
c1249c0a
ML
762/**
763 * kernel_recvmsg - Receive a message from a socket (kernel space)
764 * @sock: The socket to receive the message from
765 * @msg: Received message
766 * @vec: Input s/g array for message data
767 * @num: Size of input s/g array
768 * @size: Number of bytes to read
769 * @flags: Message flags (MSG_DONTWAIT, etc...)
770 *
771 * On return the msg structure contains the scatter/gather array passed in the
772 * vec argument. The array is modified so that it consists of the unfilled
773 * portion of the original array.
774 *
775 * The returned value is the total number of bytes received, or an error.
776 */
89bddce5
SH
777int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
778 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
779{
780 mm_segment_t oldfs = get_fs();
781 int result;
782
6aa24814 783 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 784 set_fs(KERNEL_DS);
2da62906 785 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
786 set_fs(oldfs);
787 return result;
788}
c6d409cf 789EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 790
ce1d4d3e
CH
791static ssize_t sock_sendpage(struct file *file, struct page *page,
792 int offset, size_t size, loff_t *ppos, int more)
1da177e4 793{
1da177e4
LT
794 struct socket *sock;
795 int flags;
796
ce1d4d3e
CH
797 sock = file->private_data;
798
35f9c09f
ED
799 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
800 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
801 flags |= more;
ce1d4d3e 802
e6949583 803 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 804}
1da177e4 805
9c55e01c 806static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 807 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
808 unsigned int flags)
809{
810 struct socket *sock = file->private_data;
811
997b37da
RDC
812 if (unlikely(!sock->ops->splice_read))
813 return -EINVAL;
814
9c55e01c
JA
815 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
816}
817
8ae5e030 818static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 819{
6d652330
AV
820 struct file *file = iocb->ki_filp;
821 struct socket *sock = file->private_data;
0345f931 822 struct msghdr msg = {.msg_iter = *to,
823 .msg_iocb = iocb};
8ae5e030 824 ssize_t res;
ce1d4d3e 825
8ae5e030
AV
826 if (file->f_flags & O_NONBLOCK)
827 msg.msg_flags = MSG_DONTWAIT;
828
829 if (iocb->ki_pos != 0)
1da177e4 830 return -ESPIPE;
027445c3 831
66ee59af 832 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
833 return 0;
834
2da62906 835 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
836 *to = msg.msg_iter;
837 return res;
1da177e4
LT
838}
839
8ae5e030 840static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 841{
6d652330
AV
842 struct file *file = iocb->ki_filp;
843 struct socket *sock = file->private_data;
0345f931 844 struct msghdr msg = {.msg_iter = *from,
845 .msg_iocb = iocb};
8ae5e030 846 ssize_t res;
1da177e4 847
8ae5e030 848 if (iocb->ki_pos != 0)
ce1d4d3e 849 return -ESPIPE;
027445c3 850
8ae5e030
AV
851 if (file->f_flags & O_NONBLOCK)
852 msg.msg_flags = MSG_DONTWAIT;
853
6d652330
AV
854 if (sock->type == SOCK_SEQPACKET)
855 msg.msg_flags |= MSG_EOR;
856
d8725c86 857 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
858 *from = msg.msg_iter;
859 return res;
1da177e4
LT
860}
861
1da177e4
LT
862/*
863 * Atomic setting of ioctl hooks to avoid race
864 * with module unload.
865 */
866
4a3e2f71 867static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 868static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 869
881d966b 870void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 871{
4a3e2f71 872 mutex_lock(&br_ioctl_mutex);
1da177e4 873 br_ioctl_hook = hook;
4a3e2f71 874 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
875}
876EXPORT_SYMBOL(brioctl_set);
877
4a3e2f71 878static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 879static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 880
881d966b 881void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 882{
4a3e2f71 883 mutex_lock(&vlan_ioctl_mutex);
1da177e4 884 vlan_ioctl_hook = hook;
4a3e2f71 885 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
886}
887EXPORT_SYMBOL(vlan_ioctl_set);
888
4a3e2f71 889static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 890static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 891
89bddce5 892void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 893{
4a3e2f71 894 mutex_lock(&dlci_ioctl_mutex);
1da177e4 895 dlci_ioctl_hook = hook;
4a3e2f71 896 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
897}
898EXPORT_SYMBOL(dlci_ioctl_set);
899
6b96018b
AB
900static long sock_do_ioctl(struct net *net, struct socket *sock,
901 unsigned int cmd, unsigned long arg)
902{
903 int err;
904 void __user *argp = (void __user *)arg;
905
906 err = sock->ops->ioctl(sock, cmd, arg);
907
908 /*
909 * If this ioctl is unknown try to hand it down
910 * to the NIC driver.
911 */
912 if (err == -ENOIOCTLCMD)
913 err = dev_ioctl(net, cmd, argp);
914
915 return err;
916}
917
1da177e4
LT
918/*
919 * With an ioctl, arg may well be a user mode pointer, but we don't know
920 * what to do with it - that's up to the protocol still.
921 */
922
c62cce2c
AV
923static struct ns_common *get_net_ns(struct ns_common *ns)
924{
925 return &get_net(container_of(ns, struct net, ns))->ns;
926}
927
1da177e4
LT
928static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
929{
930 struct socket *sock;
881d966b 931 struct sock *sk;
1da177e4
LT
932 void __user *argp = (void __user *)arg;
933 int pid, err;
881d966b 934 struct net *net;
1da177e4 935
b69aee04 936 sock = file->private_data;
881d966b 937 sk = sock->sk;
3b1e0a65 938 net = sock_net(sk);
1da177e4 939 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 940 err = dev_ioctl(net, cmd, argp);
1da177e4 941 } else
3d23e349 942#ifdef CONFIG_WEXT_CORE
1da177e4 943 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 944 err = dev_ioctl(net, cmd, argp);
1da177e4 945 } else
3d23e349 946#endif
89bddce5 947 switch (cmd) {
1da177e4
LT
948 case FIOSETOWN:
949 case SIOCSPGRP:
950 err = -EFAULT;
951 if (get_user(pid, (int __user *)argp))
952 break;
e0b93edd
JL
953 f_setown(sock->file, pid, 1);
954 err = 0;
1da177e4
LT
955 break;
956 case FIOGETOWN:
957 case SIOCGPGRP:
609d7fa9 958 err = put_user(f_getown(sock->file),
89bddce5 959 (int __user *)argp);
1da177e4
LT
960 break;
961 case SIOCGIFBR:
962 case SIOCSIFBR:
963 case SIOCBRADDBR:
964 case SIOCBRDELBR:
965 err = -ENOPKG;
966 if (!br_ioctl_hook)
967 request_module("bridge");
968
4a3e2f71 969 mutex_lock(&br_ioctl_mutex);
89bddce5 970 if (br_ioctl_hook)
881d966b 971 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 972 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
973 break;
974 case SIOCGIFVLAN:
975 case SIOCSIFVLAN:
976 err = -ENOPKG;
977 if (!vlan_ioctl_hook)
978 request_module("8021q");
979
4a3e2f71 980 mutex_lock(&vlan_ioctl_mutex);
1da177e4 981 if (vlan_ioctl_hook)
881d966b 982 err = vlan_ioctl_hook(net, argp);
4a3e2f71 983 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 984 break;
1da177e4
LT
985 case SIOCADDDLCI:
986 case SIOCDELDLCI:
987 err = -ENOPKG;
988 if (!dlci_ioctl_hook)
989 request_module("dlci");
990
7512cbf6
PE
991 mutex_lock(&dlci_ioctl_mutex);
992 if (dlci_ioctl_hook)
1da177e4 993 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 994 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 995 break;
c62cce2c
AV
996 case SIOCGSKNS:
997 err = -EPERM;
998 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
999 break;
1000
1001 err = open_related_ns(&net->ns, get_net_ns);
1002 break;
1da177e4 1003 default:
6b96018b 1004 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1005 break;
89bddce5 1006 }
1da177e4
LT
1007 return err;
1008}
1009
1010int sock_create_lite(int family, int type, int protocol, struct socket **res)
1011{
1012 int err;
1013 struct socket *sock = NULL;
89bddce5 1014
1da177e4
LT
1015 err = security_socket_create(family, type, protocol, 1);
1016 if (err)
1017 goto out;
1018
1019 sock = sock_alloc();
1020 if (!sock) {
1021 err = -ENOMEM;
1022 goto out;
1023 }
1024
1da177e4 1025 sock->type = type;
7420ed23
VY
1026 err = security_socket_post_create(sock, family, type, protocol, 1);
1027 if (err)
1028 goto out_release;
1029
1da177e4
LT
1030out:
1031 *res = sock;
1032 return err;
7420ed23
VY
1033out_release:
1034 sock_release(sock);
1035 sock = NULL;
1036 goto out;
1da177e4 1037}
c6d409cf 1038EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1039
1040/* No kernel lock held - perfect */
89bddce5 1041static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1042{
cbf55001 1043 unsigned int busy_flag = 0;
1da177e4
LT
1044 struct socket *sock;
1045
1046 /*
89bddce5 1047 * We can't return errors to poll, so it's either yes or no.
1da177e4 1048 */
b69aee04 1049 sock = file->private_data;
2d48d67f 1050
cbf55001 1051 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1052 /* this socket can poll_ll so tell the system call */
cbf55001 1053 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1054
1055 /* once, only if requested by syscall */
cbf55001
ET
1056 if (wait && (wait->_key & POLL_BUSY_LOOP))
1057 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1058 }
1059
cbf55001 1060 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1061}
1062
89bddce5 1063static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1064{
b69aee04 1065 struct socket *sock = file->private_data;
1da177e4
LT
1066
1067 return sock->ops->mmap(file, sock, vma);
1068}
1069
20380731 1070static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1071{
1da177e4
LT
1072 sock_release(SOCKET_I(inode));
1073 return 0;
1074}
1075
1076/*
1077 * Update the socket async list
1078 *
1079 * Fasync_list locking strategy.
1080 *
1081 * 1. fasync_list is modified only under process context socket lock
1082 * i.e. under semaphore.
1083 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1084 * or under socket lock
1da177e4
LT
1085 */
1086
1087static int sock_fasync(int fd, struct file *filp, int on)
1088{
989a2979
ED
1089 struct socket *sock = filp->private_data;
1090 struct sock *sk = sock->sk;
eaefd110 1091 struct socket_wq *wq;
1da177e4 1092
989a2979 1093 if (sk == NULL)
1da177e4 1094 return -EINVAL;
1da177e4
LT
1095
1096 lock_sock(sk);
1e1d04e6 1097 wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
eaefd110 1098 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1099
eaefd110 1100 if (!wq->fasync_list)
989a2979
ED
1101 sock_reset_flag(sk, SOCK_FASYNC);
1102 else
bcdce719 1103 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1104
989a2979 1105 release_sock(sk);
1da177e4
LT
1106 return 0;
1107}
1108
ceb5d58b 1109/* This function may be called only under rcu_lock */
1da177e4 1110
ceb5d58b 1111int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1112{
ceb5d58b 1113 if (!wq || !wq->fasync_list)
1da177e4 1114 return -1;
ceb5d58b 1115
89bddce5 1116 switch (how) {
8d8ad9d7 1117 case SOCK_WAKE_WAITD:
ceb5d58b 1118 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1119 break;
1120 goto call_kill;
8d8ad9d7 1121 case SOCK_WAKE_SPACE:
ceb5d58b 1122 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1123 break;
1124 /* fall through */
8d8ad9d7 1125 case SOCK_WAKE_IO:
89bddce5 1126call_kill:
43815482 1127 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1128 break;
8d8ad9d7 1129 case SOCK_WAKE_URG:
43815482 1130 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1131 }
ceb5d58b 1132
1da177e4
LT
1133 return 0;
1134}
c6d409cf 1135EXPORT_SYMBOL(sock_wake_async);
1da177e4 1136
721db93a 1137int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1138 struct socket **res, int kern)
1da177e4
LT
1139{
1140 int err;
1141 struct socket *sock;
55737fda 1142 const struct net_proto_family *pf;
1da177e4
LT
1143
1144 /*
89bddce5 1145 * Check protocol is in range
1da177e4
LT
1146 */
1147 if (family < 0 || family >= NPROTO)
1148 return -EAFNOSUPPORT;
1149 if (type < 0 || type >= SOCK_MAX)
1150 return -EINVAL;
1151
1152 /* Compatibility.
1153
1154 This uglymoron is moved from INET layer to here to avoid
1155 deadlock in module load.
1156 */
1157 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1158 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1159 current->comm);
1da177e4
LT
1160 family = PF_PACKET;
1161 }
1162
1163 err = security_socket_create(family, type, protocol, kern);
1164 if (err)
1165 return err;
89bddce5 1166
55737fda
SH
1167 /*
1168 * Allocate the socket and allow the family to set things up. if
1169 * the protocol is 0, the family is instructed to select an appropriate
1170 * default.
1171 */
1172 sock = sock_alloc();
1173 if (!sock) {
e87cc472 1174 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1175 return -ENFILE; /* Not exactly a match, but its the
1176 closest posix thing */
1177 }
1178
1179 sock->type = type;
1180
95a5afca 1181#ifdef CONFIG_MODULES
89bddce5
SH
1182 /* Attempt to load a protocol module if the find failed.
1183 *
1184 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1185 * requested real, full-featured networking support upon configuration.
1186 * Otherwise module support will break!
1187 */
190683a9 1188 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1189 request_module("net-pf-%d", family);
1da177e4
LT
1190#endif
1191
55737fda
SH
1192 rcu_read_lock();
1193 pf = rcu_dereference(net_families[family]);
1194 err = -EAFNOSUPPORT;
1195 if (!pf)
1196 goto out_release;
1da177e4
LT
1197
1198 /*
1199 * We will call the ->create function, that possibly is in a loadable
1200 * module, so we have to bump that loadable module refcnt first.
1201 */
55737fda 1202 if (!try_module_get(pf->owner))
1da177e4
LT
1203 goto out_release;
1204
55737fda
SH
1205 /* Now protected by module ref count */
1206 rcu_read_unlock();
1207
3f378b68 1208 err = pf->create(net, sock, protocol, kern);
55737fda 1209 if (err < 0)
1da177e4 1210 goto out_module_put;
a79af59e 1211
1da177e4
LT
1212 /*
1213 * Now to bump the refcnt of the [loadable] module that owns this
1214 * socket at sock_release time we decrement its refcnt.
1215 */
55737fda
SH
1216 if (!try_module_get(sock->ops->owner))
1217 goto out_module_busy;
1218
1da177e4
LT
1219 /*
1220 * Now that we're done with the ->create function, the [loadable]
1221 * module can have its refcnt decremented
1222 */
55737fda 1223 module_put(pf->owner);
7420ed23
VY
1224 err = security_socket_post_create(sock, family, type, protocol, kern);
1225 if (err)
3b185525 1226 goto out_sock_release;
55737fda 1227 *res = sock;
1da177e4 1228
55737fda
SH
1229 return 0;
1230
1231out_module_busy:
1232 err = -EAFNOSUPPORT;
1da177e4 1233out_module_put:
55737fda
SH
1234 sock->ops = NULL;
1235 module_put(pf->owner);
1236out_sock_release:
1da177e4 1237 sock_release(sock);
55737fda
SH
1238 return err;
1239
1240out_release:
1241 rcu_read_unlock();
1242 goto out_sock_release;
1da177e4 1243}
721db93a 1244EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1245
1246int sock_create(int family, int type, int protocol, struct socket **res)
1247{
1b8d7ae4 1248 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1249}
c6d409cf 1250EXPORT_SYMBOL(sock_create);
1da177e4 1251
eeb1bd5c 1252int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1253{
eeb1bd5c 1254 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1255}
c6d409cf 1256EXPORT_SYMBOL(sock_create_kern);
1da177e4 1257
3e0fa65f 1258SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1259{
1260 int retval;
1261 struct socket *sock;
a677a039
UD
1262 int flags;
1263
e38b36f3
UD
1264 /* Check the SOCK_* constants for consistency. */
1265 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1266 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1267 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1268 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1269
a677a039 1270 flags = type & ~SOCK_TYPE_MASK;
77d27200 1271 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1272 return -EINVAL;
1273 type &= SOCK_TYPE_MASK;
1da177e4 1274
aaca0bdc
UD
1275 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1276 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1277
1da177e4
LT
1278 retval = sock_create(family, type, protocol, &sock);
1279 if (retval < 0)
1280 goto out;
1281
77d27200 1282 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1283 if (retval < 0)
1284 goto out_release;
1285
1286out:
1287 /* It may be already another descriptor 8) Not kernel problem. */
1288 return retval;
1289
1290out_release:
1291 sock_release(sock);
1292 return retval;
1293}
1294
1295/*
1296 * Create a pair of connected sockets.
1297 */
1298
3e0fa65f
HC
1299SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1300 int __user *, usockvec)
1da177e4
LT
1301{
1302 struct socket *sock1, *sock2;
1303 int fd1, fd2, err;
db349509 1304 struct file *newfile1, *newfile2;
a677a039
UD
1305 int flags;
1306
1307 flags = type & ~SOCK_TYPE_MASK;
77d27200 1308 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1309 return -EINVAL;
1310 type &= SOCK_TYPE_MASK;
1da177e4 1311
aaca0bdc
UD
1312 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1313 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1314
1da177e4
LT
1315 /*
1316 * Obtain the first socket and check if the underlying protocol
1317 * supports the socketpair call.
1318 */
1319
1320 err = sock_create(family, type, protocol, &sock1);
1321 if (err < 0)
1322 goto out;
1323
1324 err = sock_create(family, type, protocol, &sock2);
1325 if (err < 0)
1326 goto out_release_1;
1327
1328 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1329 if (err < 0)
1da177e4
LT
1330 goto out_release_both;
1331
28407630 1332 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1333 if (unlikely(fd1 < 0)) {
1334 err = fd1;
db349509 1335 goto out_release_both;
bf3c23d1 1336 }
d73aa286 1337
28407630 1338 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1339 if (unlikely(fd2 < 0)) {
1340 err = fd2;
d73aa286 1341 goto out_put_unused_1;
28407630
AV
1342 }
1343
aab174f0 1344 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1345 if (IS_ERR(newfile1)) {
28407630 1346 err = PTR_ERR(newfile1);
d73aa286 1347 goto out_put_unused_both;
28407630
AV
1348 }
1349
aab174f0 1350 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1351 if (IS_ERR(newfile2)) {
1352 err = PTR_ERR(newfile2);
d73aa286 1353 goto out_fput_1;
db349509
AV
1354 }
1355
d73aa286
YD
1356 err = put_user(fd1, &usockvec[0]);
1357 if (err)
1358 goto out_fput_both;
1359
1360 err = put_user(fd2, &usockvec[1]);
1361 if (err)
1362 goto out_fput_both;
1363
157cf649 1364 audit_fd_pair(fd1, fd2);
d73aa286 1365
db349509
AV
1366 fd_install(fd1, newfile1);
1367 fd_install(fd2, newfile2);
1da177e4
LT
1368 /* fd1 and fd2 may be already another descriptors.
1369 * Not kernel problem.
1370 */
1371
d73aa286 1372 return 0;
1da177e4 1373
d73aa286
YD
1374out_fput_both:
1375 fput(newfile2);
1376 fput(newfile1);
1377 put_unused_fd(fd2);
1378 put_unused_fd(fd1);
1379 goto out;
1380
1381out_fput_1:
1382 fput(newfile1);
1383 put_unused_fd(fd2);
1384 put_unused_fd(fd1);
1385 sock_release(sock2);
1386 goto out;
1da177e4 1387
d73aa286
YD
1388out_put_unused_both:
1389 put_unused_fd(fd2);
1390out_put_unused_1:
1391 put_unused_fd(fd1);
1da177e4 1392out_release_both:
89bddce5 1393 sock_release(sock2);
1da177e4 1394out_release_1:
89bddce5 1395 sock_release(sock1);
1da177e4
LT
1396out:
1397 return err;
1398}
1399
1da177e4
LT
1400/*
1401 * Bind a name to a socket. Nothing much to do here since it's
1402 * the protocol's responsibility to handle the local address.
1403 *
1404 * We move the socket address to kernel space before we call
1405 * the protocol layer (having also checked the address is ok).
1406 */
1407
20f37034 1408SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1409{
1410 struct socket *sock;
230b1839 1411 struct sockaddr_storage address;
6cb153ca 1412 int err, fput_needed;
1da177e4 1413
89bddce5 1414 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1415 if (sock) {
43db362d 1416 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1417 if (err >= 0) {
1418 err = security_socket_bind(sock,
230b1839 1419 (struct sockaddr *)&address,
89bddce5 1420 addrlen);
6cb153ca
BL
1421 if (!err)
1422 err = sock->ops->bind(sock,
89bddce5 1423 (struct sockaddr *)
230b1839 1424 &address, addrlen);
1da177e4 1425 }
6cb153ca 1426 fput_light(sock->file, fput_needed);
89bddce5 1427 }
1da177e4
LT
1428 return err;
1429}
1430
1da177e4
LT
1431/*
1432 * Perform a listen. Basically, we allow the protocol to do anything
1433 * necessary for a listen, and if that works, we mark the socket as
1434 * ready for listening.
1435 */
1436
3e0fa65f 1437SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1438{
1439 struct socket *sock;
6cb153ca 1440 int err, fput_needed;
b8e1f9b5 1441 int somaxconn;
89bddce5
SH
1442
1443 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1444 if (sock) {
8efa6e93 1445 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1446 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1447 backlog = somaxconn;
1da177e4
LT
1448
1449 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1450 if (!err)
1451 err = sock->ops->listen(sock, backlog);
1da177e4 1452
6cb153ca 1453 fput_light(sock->file, fput_needed);
1da177e4
LT
1454 }
1455 return err;
1456}
1457
1da177e4
LT
1458/*
1459 * For accept, we attempt to create a new socket, set up the link
1460 * with the client, wake up the client, then return the new
1461 * connected fd. We collect the address of the connector in kernel
1462 * space and move it to user at the very end. This is unclean because
1463 * we open the socket then return an error.
1464 *
1465 * 1003.1g adds the ability to recvmsg() to query connection pending
1466 * status to recvmsg. We need to add that support in a way thats
1467 * clean when we restucture accept also.
1468 */
1469
20f37034
HC
1470SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1471 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1472{
1473 struct socket *sock, *newsock;
39d8c1b6 1474 struct file *newfile;
6cb153ca 1475 int err, len, newfd, fput_needed;
230b1839 1476 struct sockaddr_storage address;
1da177e4 1477
77d27200 1478 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1479 return -EINVAL;
1480
1481 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1482 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1483
6cb153ca 1484 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1485 if (!sock)
1486 goto out;
1487
1488 err = -ENFILE;
c6d409cf
ED
1489 newsock = sock_alloc();
1490 if (!newsock)
1da177e4
LT
1491 goto out_put;
1492
1493 newsock->type = sock->type;
1494 newsock->ops = sock->ops;
1495
1da177e4
LT
1496 /*
1497 * We don't need try_module_get here, as the listening socket (sock)
1498 * has the protocol module (sock->ops->owner) held.
1499 */
1500 __module_get(newsock->ops->owner);
1501
28407630 1502 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1503 if (unlikely(newfd < 0)) {
1504 err = newfd;
9a1875e6
DM
1505 sock_release(newsock);
1506 goto out_put;
39d8c1b6 1507 }
aab174f0 1508 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1509 if (IS_ERR(newfile)) {
28407630
AV
1510 err = PTR_ERR(newfile);
1511 put_unused_fd(newfd);
1512 sock_release(newsock);
1513 goto out_put;
1514 }
39d8c1b6 1515
a79af59e
FF
1516 err = security_socket_accept(sock, newsock);
1517 if (err)
39d8c1b6 1518 goto out_fd;
a79af59e 1519
cdfbabfb 1520 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1521 if (err < 0)
39d8c1b6 1522 goto out_fd;
1da177e4
LT
1523
1524 if (upeer_sockaddr) {
230b1839 1525 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1526 &len, 2) < 0) {
1da177e4 1527 err = -ECONNABORTED;
39d8c1b6 1528 goto out_fd;
1da177e4 1529 }
43db362d 1530 err = move_addr_to_user(&address,
230b1839 1531 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1532 if (err < 0)
39d8c1b6 1533 goto out_fd;
1da177e4
LT
1534 }
1535
1536 /* File flags are not inherited via accept() unlike another OSes. */
1537
39d8c1b6
DM
1538 fd_install(newfd, newfile);
1539 err = newfd;
1da177e4 1540
1da177e4 1541out_put:
6cb153ca 1542 fput_light(sock->file, fput_needed);
1da177e4
LT
1543out:
1544 return err;
39d8c1b6 1545out_fd:
9606a216 1546 fput(newfile);
39d8c1b6 1547 put_unused_fd(newfd);
1da177e4
LT
1548 goto out_put;
1549}
1550
20f37034
HC
1551SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1552 int __user *, upeer_addrlen)
aaca0bdc 1553{
de11defe 1554 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1555}
1556
1da177e4
LT
1557/*
1558 * Attempt to connect to a socket with the server address. The address
1559 * is in user space so we verify it is OK and move it to kernel space.
1560 *
1561 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1562 * break bindings
1563 *
1564 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1565 * other SEQPACKET protocols that take time to connect() as it doesn't
1566 * include the -EINPROGRESS status for such sockets.
1567 */
1568
20f37034
HC
1569SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1570 int, addrlen)
1da177e4
LT
1571{
1572 struct socket *sock;
230b1839 1573 struct sockaddr_storage address;
6cb153ca 1574 int err, fput_needed;
1da177e4 1575
6cb153ca 1576 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1577 if (!sock)
1578 goto out;
43db362d 1579 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1580 if (err < 0)
1581 goto out_put;
1582
89bddce5 1583 err =
230b1839 1584 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1585 if (err)
1586 goto out_put;
1587
230b1839 1588 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1589 sock->file->f_flags);
1590out_put:
6cb153ca 1591 fput_light(sock->file, fput_needed);
1da177e4
LT
1592out:
1593 return err;
1594}
1595
1596/*
1597 * Get the local address ('name') of a socket object. Move the obtained
1598 * name to user space.
1599 */
1600
20f37034
HC
1601SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1602 int __user *, usockaddr_len)
1da177e4
LT
1603{
1604 struct socket *sock;
230b1839 1605 struct sockaddr_storage address;
6cb153ca 1606 int len, err, fput_needed;
89bddce5 1607
6cb153ca 1608 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1609 if (!sock)
1610 goto out;
1611
1612 err = security_socket_getsockname(sock);
1613 if (err)
1614 goto out_put;
1615
230b1839 1616 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1617 if (err)
1618 goto out_put;
43db362d 1619 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1620
1621out_put:
6cb153ca 1622 fput_light(sock->file, fput_needed);
1da177e4
LT
1623out:
1624 return err;
1625}
1626
1627/*
1628 * Get the remote address ('name') of a socket object. Move the obtained
1629 * name to user space.
1630 */
1631
20f37034
HC
1632SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1633 int __user *, usockaddr_len)
1da177e4
LT
1634{
1635 struct socket *sock;
230b1839 1636 struct sockaddr_storage address;
6cb153ca 1637 int len, err, fput_needed;
1da177e4 1638
89bddce5
SH
1639 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1640 if (sock != NULL) {
1da177e4
LT
1641 err = security_socket_getpeername(sock);
1642 if (err) {
6cb153ca 1643 fput_light(sock->file, fput_needed);
1da177e4
LT
1644 return err;
1645 }
1646
89bddce5 1647 err =
230b1839 1648 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1649 1);
1da177e4 1650 if (!err)
43db362d 1651 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1652 usockaddr_len);
6cb153ca 1653 fput_light(sock->file, fput_needed);
1da177e4
LT
1654 }
1655 return err;
1656}
1657
1658/*
1659 * Send a datagram to a given address. We move the address into kernel
1660 * space and check the user space data area is readable before invoking
1661 * the protocol.
1662 */
1663
3e0fa65f 1664SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1665 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1666 int, addr_len)
1da177e4
LT
1667{
1668 struct socket *sock;
230b1839 1669 struct sockaddr_storage address;
1da177e4
LT
1670 int err;
1671 struct msghdr msg;
1672 struct iovec iov;
6cb153ca 1673 int fput_needed;
6cb153ca 1674
602bd0e9
AV
1675 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1676 if (unlikely(err))
1677 return err;
de0fa95c
PE
1678 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1679 if (!sock)
4387ff75 1680 goto out;
6cb153ca 1681
89bddce5 1682 msg.msg_name = NULL;
89bddce5
SH
1683 msg.msg_control = NULL;
1684 msg.msg_controllen = 0;
1685 msg.msg_namelen = 0;
6cb153ca 1686 if (addr) {
43db362d 1687 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1688 if (err < 0)
1689 goto out_put;
230b1839 1690 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1691 msg.msg_namelen = addr_len;
1da177e4
LT
1692 }
1693 if (sock->file->f_flags & O_NONBLOCK)
1694 flags |= MSG_DONTWAIT;
1695 msg.msg_flags = flags;
d8725c86 1696 err = sock_sendmsg(sock, &msg);
1da177e4 1697
89bddce5 1698out_put:
de0fa95c 1699 fput_light(sock->file, fput_needed);
4387ff75 1700out:
1da177e4
LT
1701 return err;
1702}
1703
1704/*
89bddce5 1705 * Send a datagram down a socket.
1da177e4
LT
1706 */
1707
3e0fa65f 1708SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1709 unsigned int, flags)
1da177e4
LT
1710{
1711 return sys_sendto(fd, buff, len, flags, NULL, 0);
1712}
1713
1714/*
89bddce5 1715 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1716 * sender. We verify the buffers are writable and if needed move the
1717 * sender address from kernel to user space.
1718 */
1719
3e0fa65f 1720SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1721 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1722 int __user *, addr_len)
1da177e4
LT
1723{
1724 struct socket *sock;
1725 struct iovec iov;
1726 struct msghdr msg;
230b1839 1727 struct sockaddr_storage address;
89bddce5 1728 int err, err2;
6cb153ca
BL
1729 int fput_needed;
1730
602bd0e9
AV
1731 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1732 if (unlikely(err))
1733 return err;
de0fa95c 1734 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1735 if (!sock)
de0fa95c 1736 goto out;
1da177e4 1737
89bddce5
SH
1738 msg.msg_control = NULL;
1739 msg.msg_controllen = 0;
f3d33426
HFS
1740 /* Save some cycles and don't copy the address if not needed */
1741 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1742 /* We assume all kernel code knows the size of sockaddr_storage */
1743 msg.msg_namelen = 0;
130ed5d1 1744 msg.msg_iocb = NULL;
9f138fa6 1745 msg.msg_flags = 0;
1da177e4
LT
1746 if (sock->file->f_flags & O_NONBLOCK)
1747 flags |= MSG_DONTWAIT;
2da62906 1748 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1749
89bddce5 1750 if (err >= 0 && addr != NULL) {
43db362d 1751 err2 = move_addr_to_user(&address,
230b1839 1752 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1753 if (err2 < 0)
1754 err = err2;
1da177e4 1755 }
de0fa95c
PE
1756
1757 fput_light(sock->file, fput_needed);
4387ff75 1758out:
1da177e4
LT
1759 return err;
1760}
1761
1762/*
89bddce5 1763 * Receive a datagram from a socket.
1da177e4
LT
1764 */
1765
b7c0ddf5
JG
1766SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1767 unsigned int, flags)
1da177e4
LT
1768{
1769 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1770}
1771
1772/*
1773 * Set a socket option. Because we don't know the option lengths we have
1774 * to pass the user mode parameter for the protocols to sort out.
1775 */
1776
20f37034
HC
1777SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1778 char __user *, optval, int, optlen)
1da177e4 1779{
6cb153ca 1780 int err, fput_needed;
1da177e4
LT
1781 struct socket *sock;
1782
1783 if (optlen < 0)
1784 return -EINVAL;
89bddce5
SH
1785
1786 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1787 if (sock != NULL) {
1788 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1789 if (err)
1790 goto out_put;
1da177e4
LT
1791
1792 if (level == SOL_SOCKET)
89bddce5
SH
1793 err =
1794 sock_setsockopt(sock, level, optname, optval,
1795 optlen);
1da177e4 1796 else
89bddce5
SH
1797 err =
1798 sock->ops->setsockopt(sock, level, optname, optval,
1799 optlen);
6cb153ca
BL
1800out_put:
1801 fput_light(sock->file, fput_needed);
1da177e4
LT
1802 }
1803 return err;
1804}
1805
1806/*
1807 * Get a socket option. Because we don't know the option lengths we have
1808 * to pass a user mode parameter for the protocols to sort out.
1809 */
1810
20f37034
HC
1811SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1812 char __user *, optval, int __user *, optlen)
1da177e4 1813{
6cb153ca 1814 int err, fput_needed;
1da177e4
LT
1815 struct socket *sock;
1816
89bddce5
SH
1817 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1818 if (sock != NULL) {
6cb153ca
BL
1819 err = security_socket_getsockopt(sock, level, optname);
1820 if (err)
1821 goto out_put;
1da177e4
LT
1822
1823 if (level == SOL_SOCKET)
89bddce5
SH
1824 err =
1825 sock_getsockopt(sock, level, optname, optval,
1826 optlen);
1da177e4 1827 else
89bddce5
SH
1828 err =
1829 sock->ops->getsockopt(sock, level, optname, optval,
1830 optlen);
6cb153ca
BL
1831out_put:
1832 fput_light(sock->file, fput_needed);
1da177e4
LT
1833 }
1834 return err;
1835}
1836
1da177e4
LT
1837/*
1838 * Shutdown a socket.
1839 */
1840
754fe8d2 1841SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1842{
6cb153ca 1843 int err, fput_needed;
1da177e4
LT
1844 struct socket *sock;
1845
89bddce5
SH
1846 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1847 if (sock != NULL) {
1da177e4 1848 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1849 if (!err)
1850 err = sock->ops->shutdown(sock, how);
1851 fput_light(sock->file, fput_needed);
1da177e4
LT
1852 }
1853 return err;
1854}
1855
89bddce5 1856/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1857 * fields which are the same type (int / unsigned) on our platforms.
1858 */
1859#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1860#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1861#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1862
c71d8ebe
TH
1863struct used_address {
1864 struct sockaddr_storage name;
1865 unsigned int name_len;
1866};
1867
da184284
AV
1868static int copy_msghdr_from_user(struct msghdr *kmsg,
1869 struct user_msghdr __user *umsg,
1870 struct sockaddr __user **save_addr,
1871 struct iovec **iov)
1661bf36 1872{
08adb7da
AV
1873 struct sockaddr __user *uaddr;
1874 struct iovec __user *uiov;
c0371da6 1875 size_t nr_segs;
08adb7da
AV
1876 ssize_t err;
1877
1878 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1879 __get_user(uaddr, &umsg->msg_name) ||
1880 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1881 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1882 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1883 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1884 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1885 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1886 return -EFAULT;
dbb490b9 1887
08adb7da 1888 if (!uaddr)
6a2a2b3a
AS
1889 kmsg->msg_namelen = 0;
1890
dbb490b9
ML
1891 if (kmsg->msg_namelen < 0)
1892 return -EINVAL;
1893
1661bf36 1894 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1895 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1896
1897 if (save_addr)
1898 *save_addr = uaddr;
1899
1900 if (uaddr && kmsg->msg_namelen) {
1901 if (!save_addr) {
1902 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1903 kmsg->msg_name);
1904 if (err < 0)
1905 return err;
1906 }
1907 } else {
1908 kmsg->msg_name = NULL;
1909 kmsg->msg_namelen = 0;
1910 }
1911
c0371da6 1912 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1913 return -EMSGSIZE;
1914
0345f931 1915 kmsg->msg_iocb = NULL;
1916
da184284
AV
1917 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1918 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1919}
1920
666547ff 1921static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1922 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1923 struct used_address *used_address,
1924 unsigned int allowed_msghdr_flags)
1da177e4 1925{
89bddce5
SH
1926 struct compat_msghdr __user *msg_compat =
1927 (struct compat_msghdr __user *)msg;
230b1839 1928 struct sockaddr_storage address;
1da177e4 1929 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1930 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 1931 __aligned(sizeof(__kernel_size_t));
89bddce5 1932 /* 20 is size of ipv6_pktinfo */
1da177e4 1933 unsigned char *ctl_buf = ctl;
d8725c86 1934 int ctl_len;
08adb7da 1935 ssize_t err;
89bddce5 1936
08adb7da 1937 msg_sys->msg_name = &address;
1da177e4 1938
08449320 1939 if (MSG_CMSG_COMPAT & flags)
08adb7da 1940 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1941 else
08adb7da 1942 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1943 if (err < 0)
da184284 1944 return err;
1da177e4
LT
1945
1946 err = -ENOBUFS;
1947
228e548e 1948 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1949 goto out_freeiov;
28a94d8f 1950 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 1951 ctl_len = msg_sys->msg_controllen;
1da177e4 1952 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1953 err =
228e548e 1954 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1955 sizeof(ctl));
1da177e4
LT
1956 if (err)
1957 goto out_freeiov;
228e548e
AB
1958 ctl_buf = msg_sys->msg_control;
1959 ctl_len = msg_sys->msg_controllen;
1da177e4 1960 } else if (ctl_len) {
ac4340fc
DM
1961 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
1962 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 1963 if (ctl_len > sizeof(ctl)) {
1da177e4 1964 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1965 if (ctl_buf == NULL)
1da177e4
LT
1966 goto out_freeiov;
1967 }
1968 err = -EFAULT;
1969 /*
228e548e 1970 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1971 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1972 * checking falls down on this.
1973 */
fb8621bb 1974 if (copy_from_user(ctl_buf,
228e548e 1975 (void __user __force *)msg_sys->msg_control,
89bddce5 1976 ctl_len))
1da177e4 1977 goto out_freectl;
228e548e 1978 msg_sys->msg_control = ctl_buf;
1da177e4 1979 }
228e548e 1980 msg_sys->msg_flags = flags;
1da177e4
LT
1981
1982 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1983 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1984 /*
1985 * If this is sendmmsg() and current destination address is same as
1986 * previously succeeded address, omit asking LSM's decision.
1987 * used_address->name_len is initialized to UINT_MAX so that the first
1988 * destination address never matches.
1989 */
bc909d9d
MD
1990 if (used_address && msg_sys->msg_name &&
1991 used_address->name_len == msg_sys->msg_namelen &&
1992 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1993 used_address->name_len)) {
d8725c86 1994 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1995 goto out_freectl;
1996 }
d8725c86 1997 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1998 /*
1999 * If this is sendmmsg() and sending to current destination address was
2000 * successful, remember it.
2001 */
2002 if (used_address && err >= 0) {
2003 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2004 if (msg_sys->msg_name)
2005 memcpy(&used_address->name, msg_sys->msg_name,
2006 used_address->name_len);
c71d8ebe 2007 }
1da177e4
LT
2008
2009out_freectl:
89bddce5 2010 if (ctl_buf != ctl)
1da177e4
LT
2011 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2012out_freeiov:
da184284 2013 kfree(iov);
228e548e
AB
2014 return err;
2015}
2016
2017/*
2018 * BSD sendmsg interface
2019 */
2020
666547ff 2021long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
2022{
2023 int fput_needed, err;
2024 struct msghdr msg_sys;
1be374a0
AL
2025 struct socket *sock;
2026
1be374a0 2027 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2028 if (!sock)
2029 goto out;
2030
28a94d8f 2031 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2032
6cb153ca 2033 fput_light(sock->file, fput_needed);
89bddce5 2034out:
1da177e4
LT
2035 return err;
2036}
2037
666547ff 2038SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
2039{
2040 if (flags & MSG_CMSG_COMPAT)
2041 return -EINVAL;
2042 return __sys_sendmsg(fd, msg, flags);
2043}
2044
228e548e
AB
2045/*
2046 * Linux sendmmsg interface
2047 */
2048
2049int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2050 unsigned int flags)
2051{
2052 int fput_needed, err, datagrams;
2053 struct socket *sock;
2054 struct mmsghdr __user *entry;
2055 struct compat_mmsghdr __user *compat_entry;
2056 struct msghdr msg_sys;
c71d8ebe 2057 struct used_address used_address;
f092276d 2058 unsigned int oflags = flags;
228e548e 2059
98382f41
AB
2060 if (vlen > UIO_MAXIOV)
2061 vlen = UIO_MAXIOV;
228e548e
AB
2062
2063 datagrams = 0;
2064
2065 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2066 if (!sock)
2067 return err;
2068
c71d8ebe 2069 used_address.name_len = UINT_MAX;
228e548e
AB
2070 entry = mmsg;
2071 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2072 err = 0;
f092276d 2073 flags |= MSG_BATCH;
228e548e
AB
2074
2075 while (datagrams < vlen) {
f092276d
TH
2076 if (datagrams == vlen - 1)
2077 flags = oflags;
2078
228e548e 2079 if (MSG_CMSG_COMPAT & flags) {
666547ff 2080 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2081 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2082 if (err < 0)
2083 break;
2084 err = __put_user(err, &compat_entry->msg_len);
2085 ++compat_entry;
2086 } else {
a7526eb5 2087 err = ___sys_sendmsg(sock,
666547ff 2088 (struct user_msghdr __user *)entry,
28a94d8f 2089 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2090 if (err < 0)
2091 break;
2092 err = put_user(err, &entry->msg_len);
2093 ++entry;
2094 }
2095
2096 if (err)
2097 break;
2098 ++datagrams;
3023898b
SHY
2099 if (msg_data_left(&msg_sys))
2100 break;
a78cb84c 2101 cond_resched();
228e548e
AB
2102 }
2103
228e548e
AB
2104 fput_light(sock->file, fput_needed);
2105
728ffb86
AB
2106 /* We only return an error if no datagrams were able to be sent */
2107 if (datagrams != 0)
228e548e
AB
2108 return datagrams;
2109
228e548e
AB
2110 return err;
2111}
2112
2113SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2114 unsigned int, vlen, unsigned int, flags)
2115{
1be374a0
AL
2116 if (flags & MSG_CMSG_COMPAT)
2117 return -EINVAL;
228e548e
AB
2118 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2119}
2120
666547ff 2121static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2122 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2123{
89bddce5
SH
2124 struct compat_msghdr __user *msg_compat =
2125 (struct compat_msghdr __user *)msg;
1da177e4 2126 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2127 struct iovec *iov = iovstack;
1da177e4 2128 unsigned long cmsg_ptr;
2da62906 2129 int len;
08adb7da 2130 ssize_t err;
1da177e4
LT
2131
2132 /* kernel mode address */
230b1839 2133 struct sockaddr_storage addr;
1da177e4
LT
2134
2135 /* user mode address pointers */
2136 struct sockaddr __user *uaddr;
08adb7da 2137 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2138
08adb7da 2139 msg_sys->msg_name = &addr;
1da177e4 2140
f3d33426 2141 if (MSG_CMSG_COMPAT & flags)
08adb7da 2142 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2143 else
08adb7da 2144 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2145 if (err < 0)
da184284 2146 return err;
1da177e4 2147
a2e27255
ACM
2148 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2149 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2150
f3d33426
HFS
2151 /* We assume all kernel code knows the size of sockaddr_storage */
2152 msg_sys->msg_namelen = 0;
2153
1da177e4
LT
2154 if (sock->file->f_flags & O_NONBLOCK)
2155 flags |= MSG_DONTWAIT;
2da62906 2156 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2157 if (err < 0)
2158 goto out_freeiov;
2159 len = err;
2160
2161 if (uaddr != NULL) {
43db362d 2162 err = move_addr_to_user(&addr,
a2e27255 2163 msg_sys->msg_namelen, uaddr,
89bddce5 2164 uaddr_len);
1da177e4
LT
2165 if (err < 0)
2166 goto out_freeiov;
2167 }
a2e27255 2168 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2169 COMPAT_FLAGS(msg));
1da177e4
LT
2170 if (err)
2171 goto out_freeiov;
2172 if (MSG_CMSG_COMPAT & flags)
a2e27255 2173 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2174 &msg_compat->msg_controllen);
2175 else
a2e27255 2176 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2177 &msg->msg_controllen);
2178 if (err)
2179 goto out_freeiov;
2180 err = len;
2181
2182out_freeiov:
da184284 2183 kfree(iov);
a2e27255
ACM
2184 return err;
2185}
2186
2187/*
2188 * BSD recvmsg interface
2189 */
2190
666547ff 2191long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2192{
2193 int fput_needed, err;
2194 struct msghdr msg_sys;
1be374a0
AL
2195 struct socket *sock;
2196
1be374a0 2197 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2198 if (!sock)
2199 goto out;
2200
a7526eb5 2201 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2202
6cb153ca 2203 fput_light(sock->file, fput_needed);
1da177e4
LT
2204out:
2205 return err;
2206}
2207
666547ff 2208SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2209 unsigned int, flags)
2210{
2211 if (flags & MSG_CMSG_COMPAT)
2212 return -EINVAL;
2213 return __sys_recvmsg(fd, msg, flags);
2214}
2215
a2e27255
ACM
2216/*
2217 * Linux recvmmsg interface
2218 */
2219
2220int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2221 unsigned int flags, struct timespec *timeout)
2222{
2223 int fput_needed, err, datagrams;
2224 struct socket *sock;
2225 struct mmsghdr __user *entry;
d7256d0e 2226 struct compat_mmsghdr __user *compat_entry;
a2e27255 2227 struct msghdr msg_sys;
766b9f92
DD
2228 struct timespec64 end_time;
2229 struct timespec64 timeout64;
a2e27255
ACM
2230
2231 if (timeout &&
2232 poll_select_set_timeout(&end_time, timeout->tv_sec,
2233 timeout->tv_nsec))
2234 return -EINVAL;
2235
2236 datagrams = 0;
2237
2238 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2239 if (!sock)
2240 return err;
2241
2242 err = sock_error(sock->sk);
e623a9e9
MJ
2243 if (err) {
2244 datagrams = err;
a2e27255 2245 goto out_put;
e623a9e9 2246 }
a2e27255
ACM
2247
2248 entry = mmsg;
d7256d0e 2249 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2250
2251 while (datagrams < vlen) {
2252 /*
2253 * No need to ask LSM for more than the first datagram.
2254 */
d7256d0e 2255 if (MSG_CMSG_COMPAT & flags) {
666547ff 2256 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2257 &msg_sys, flags & ~MSG_WAITFORONE,
2258 datagrams);
d7256d0e
JMG
2259 if (err < 0)
2260 break;
2261 err = __put_user(err, &compat_entry->msg_len);
2262 ++compat_entry;
2263 } else {
a7526eb5 2264 err = ___sys_recvmsg(sock,
666547ff 2265 (struct user_msghdr __user *)entry,
a7526eb5
AL
2266 &msg_sys, flags & ~MSG_WAITFORONE,
2267 datagrams);
d7256d0e
JMG
2268 if (err < 0)
2269 break;
2270 err = put_user(err, &entry->msg_len);
2271 ++entry;
2272 }
2273
a2e27255
ACM
2274 if (err)
2275 break;
a2e27255
ACM
2276 ++datagrams;
2277
71c5c159
BB
2278 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2279 if (flags & MSG_WAITFORONE)
2280 flags |= MSG_DONTWAIT;
2281
a2e27255 2282 if (timeout) {
766b9f92
DD
2283 ktime_get_ts64(&timeout64);
2284 *timeout = timespec64_to_timespec(
2285 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2286 if (timeout->tv_sec < 0) {
2287 timeout->tv_sec = timeout->tv_nsec = 0;
2288 break;
2289 }
2290
2291 /* Timeout, return less than vlen datagrams */
2292 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2293 break;
2294 }
2295
2296 /* Out of band data, return right away */
2297 if (msg_sys.msg_flags & MSG_OOB)
2298 break;
a78cb84c 2299 cond_resched();
a2e27255
ACM
2300 }
2301
a2e27255 2302 if (err == 0)
34b88a68
ACM
2303 goto out_put;
2304
2305 if (datagrams == 0) {
2306 datagrams = err;
2307 goto out_put;
2308 }
a2e27255 2309
34b88a68
ACM
2310 /*
2311 * We may return less entries than requested (vlen) if the
2312 * sock is non block and there aren't enough datagrams...
2313 */
2314 if (err != -EAGAIN) {
a2e27255 2315 /*
34b88a68
ACM
2316 * ... or if recvmsg returns an error after we
2317 * received some datagrams, where we record the
2318 * error to return on the next call or if the
2319 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2320 */
34b88a68 2321 sock->sk->sk_err = -err;
a2e27255 2322 }
34b88a68
ACM
2323out_put:
2324 fput_light(sock->file, fput_needed);
a2e27255 2325
34b88a68 2326 return datagrams;
a2e27255
ACM
2327}
2328
2329SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2330 unsigned int, vlen, unsigned int, flags,
2331 struct timespec __user *, timeout)
2332{
2333 int datagrams;
2334 struct timespec timeout_sys;
2335
1be374a0
AL
2336 if (flags & MSG_CMSG_COMPAT)
2337 return -EINVAL;
2338
a2e27255
ACM
2339 if (!timeout)
2340 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2341
2342 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2343 return -EFAULT;
2344
2345 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2346
2347 if (datagrams > 0 &&
2348 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2349 datagrams = -EFAULT;
2350
2351 return datagrams;
2352}
2353
2354#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2355/* Argument list sizes for sys_socketcall */
2356#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2357static const unsigned char nargs[21] = {
c6d409cf
ED
2358 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2359 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2360 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2361 AL(4), AL(5), AL(4)
89bddce5
SH
2362};
2363
1da177e4
LT
2364#undef AL
2365
2366/*
89bddce5 2367 * System call vectors.
1da177e4
LT
2368 *
2369 * Argument checking cleaned up. Saved 20% in size.
2370 * This function doesn't need to set the kernel lock because
89bddce5 2371 * it is set by the callees.
1da177e4
LT
2372 */
2373
3e0fa65f 2374SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2375{
2950fa9d 2376 unsigned long a[AUDITSC_ARGS];
89bddce5 2377 unsigned long a0, a1;
1da177e4 2378 int err;
47379052 2379 unsigned int len;
1da177e4 2380
228e548e 2381 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2382 return -EINVAL;
2383
47379052
AV
2384 len = nargs[call];
2385 if (len > sizeof(a))
2386 return -EINVAL;
2387
1da177e4 2388 /* copy_from_user should be SMP safe. */
47379052 2389 if (copy_from_user(a, args, len))
1da177e4 2390 return -EFAULT;
3ec3b2fb 2391
2950fa9d
CG
2392 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2393 if (err)
2394 return err;
3ec3b2fb 2395
89bddce5
SH
2396 a0 = a[0];
2397 a1 = a[1];
2398
2399 switch (call) {
2400 case SYS_SOCKET:
2401 err = sys_socket(a0, a1, a[2]);
2402 break;
2403 case SYS_BIND:
2404 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2405 break;
2406 case SYS_CONNECT:
2407 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2408 break;
2409 case SYS_LISTEN:
2410 err = sys_listen(a0, a1);
2411 break;
2412 case SYS_ACCEPT:
de11defe
UD
2413 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2414 (int __user *)a[2], 0);
89bddce5
SH
2415 break;
2416 case SYS_GETSOCKNAME:
2417 err =
2418 sys_getsockname(a0, (struct sockaddr __user *)a1,
2419 (int __user *)a[2]);
2420 break;
2421 case SYS_GETPEERNAME:
2422 err =
2423 sys_getpeername(a0, (struct sockaddr __user *)a1,
2424 (int __user *)a[2]);
2425 break;
2426 case SYS_SOCKETPAIR:
2427 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2428 break;
2429 case SYS_SEND:
2430 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2431 break;
2432 case SYS_SENDTO:
2433 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2434 (struct sockaddr __user *)a[4], a[5]);
2435 break;
2436 case SYS_RECV:
2437 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2438 break;
2439 case SYS_RECVFROM:
2440 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2441 (struct sockaddr __user *)a[4],
2442 (int __user *)a[5]);
2443 break;
2444 case SYS_SHUTDOWN:
2445 err = sys_shutdown(a0, a1);
2446 break;
2447 case SYS_SETSOCKOPT:
2448 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2449 break;
2450 case SYS_GETSOCKOPT:
2451 err =
2452 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2453 (int __user *)a[4]);
2454 break;
2455 case SYS_SENDMSG:
666547ff 2456 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2457 break;
228e548e
AB
2458 case SYS_SENDMMSG:
2459 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2460 break;
89bddce5 2461 case SYS_RECVMSG:
666547ff 2462 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2463 break;
a2e27255
ACM
2464 case SYS_RECVMMSG:
2465 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2466 (struct timespec __user *)a[4]);
2467 break;
de11defe
UD
2468 case SYS_ACCEPT4:
2469 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2470 (int __user *)a[2], a[3]);
aaca0bdc 2471 break;
89bddce5
SH
2472 default:
2473 err = -EINVAL;
2474 break;
1da177e4
LT
2475 }
2476 return err;
2477}
2478
89bddce5 2479#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2480
55737fda
SH
2481/**
2482 * sock_register - add a socket protocol handler
2483 * @ops: description of protocol
2484 *
1da177e4
LT
2485 * This function is called by a protocol handler that wants to
2486 * advertise its address family, and have it linked into the
e793c0f7 2487 * socket interface. The value ops->family corresponds to the
55737fda 2488 * socket system call protocol family.
1da177e4 2489 */
f0fd27d4 2490int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2491{
2492 int err;
2493
2494 if (ops->family >= NPROTO) {
3410f22e 2495 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2496 return -ENOBUFS;
2497 }
55737fda
SH
2498
2499 spin_lock(&net_family_lock);
190683a9
ED
2500 if (rcu_dereference_protected(net_families[ops->family],
2501 lockdep_is_held(&net_family_lock)))
55737fda
SH
2502 err = -EEXIST;
2503 else {
cf778b00 2504 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2505 err = 0;
2506 }
55737fda
SH
2507 spin_unlock(&net_family_lock);
2508
3410f22e 2509 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2510 return err;
2511}
c6d409cf 2512EXPORT_SYMBOL(sock_register);
1da177e4 2513
55737fda
SH
2514/**
2515 * sock_unregister - remove a protocol handler
2516 * @family: protocol family to remove
2517 *
1da177e4
LT
2518 * This function is called by a protocol handler that wants to
2519 * remove its address family, and have it unlinked from the
55737fda
SH
2520 * new socket creation.
2521 *
2522 * If protocol handler is a module, then it can use module reference
2523 * counts to protect against new references. If protocol handler is not
2524 * a module then it needs to provide its own protection in
2525 * the ops->create routine.
1da177e4 2526 */
f0fd27d4 2527void sock_unregister(int family)
1da177e4 2528{
f0fd27d4 2529 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2530
55737fda 2531 spin_lock(&net_family_lock);
a9b3cd7f 2532 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2533 spin_unlock(&net_family_lock);
2534
2535 synchronize_rcu();
2536
3410f22e 2537 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2538}
c6d409cf 2539EXPORT_SYMBOL(sock_unregister);
1da177e4 2540
77d76ea3 2541static int __init sock_init(void)
1da177e4 2542{
b3e19d92 2543 int err;
2ca794e5
EB
2544 /*
2545 * Initialize the network sysctl infrastructure.
2546 */
2547 err = net_sysctl_init();
2548 if (err)
2549 goto out;
b3e19d92 2550
1da177e4 2551 /*
89bddce5 2552 * Initialize skbuff SLAB cache
1da177e4
LT
2553 */
2554 skb_init();
1da177e4
LT
2555
2556 /*
89bddce5 2557 * Initialize the protocols module.
1da177e4
LT
2558 */
2559
2560 init_inodecache();
b3e19d92
NP
2561
2562 err = register_filesystem(&sock_fs_type);
2563 if (err)
2564 goto out_fs;
1da177e4 2565 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2566 if (IS_ERR(sock_mnt)) {
2567 err = PTR_ERR(sock_mnt);
2568 goto out_mount;
2569 }
77d76ea3
AK
2570
2571 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2572 */
2573
2574#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2575 err = netfilter_init();
2576 if (err)
2577 goto out;
1da177e4 2578#endif
cbeb321a 2579
408eccce 2580 ptp_classifier_init();
c1f19b51 2581
b3e19d92
NP
2582out:
2583 return err;
2584
2585out_mount:
2586 unregister_filesystem(&sock_fs_type);
2587out_fs:
2588 goto out;
1da177e4
LT
2589}
2590
77d76ea3
AK
2591core_initcall(sock_init); /* early initcall */
2592
1da177e4
LT
2593#ifdef CONFIG_PROC_FS
2594void socket_seq_show(struct seq_file *seq)
2595{
2596 int cpu;
2597 int counter = 0;
2598
6f912042 2599 for_each_possible_cpu(cpu)
89bddce5 2600 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2601
2602 /* It can be negative, by the way. 8) */
2603 if (counter < 0)
2604 counter = 0;
2605
2606 seq_printf(seq, "sockets: used %d\n", counter);
2607}
89bddce5 2608#endif /* CONFIG_PROC_FS */
1da177e4 2609
89bbfc95 2610#ifdef CONFIG_COMPAT
6b96018b 2611static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2612 unsigned int cmd, void __user *up)
7a229387 2613{
7a229387
AB
2614 mm_segment_t old_fs = get_fs();
2615 struct timeval ktv;
2616 int err;
2617
2618 set_fs(KERNEL_DS);
6b96018b 2619 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2620 set_fs(old_fs);
644595f8 2621 if (!err)
ed6fe9d6 2622 err = compat_put_timeval(&ktv, up);
644595f8 2623
7a229387
AB
2624 return err;
2625}
2626
6b96018b 2627static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2628 unsigned int cmd, void __user *up)
7a229387 2629{
7a229387
AB
2630 mm_segment_t old_fs = get_fs();
2631 struct timespec kts;
2632 int err;
2633
2634 set_fs(KERNEL_DS);
6b96018b 2635 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2636 set_fs(old_fs);
644595f8 2637 if (!err)
ed6fe9d6 2638 err = compat_put_timespec(&kts, up);
644595f8 2639
7a229387
AB
2640 return err;
2641}
2642
6b96018b 2643static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2644{
2645 struct ifreq __user *uifr;
2646 int err;
2647
2648 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2649 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2650 return -EFAULT;
2651
6b96018b 2652 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2653 if (err)
2654 return err;
2655
6b96018b 2656 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2657 return -EFAULT;
2658
2659 return 0;
2660}
2661
6b96018b 2662static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2663{
6b96018b 2664 struct compat_ifconf ifc32;
7a229387
AB
2665 struct ifconf ifc;
2666 struct ifconf __user *uifc;
6b96018b 2667 struct compat_ifreq __user *ifr32;
7a229387
AB
2668 struct ifreq __user *ifr;
2669 unsigned int i, j;
2670 int err;
2671
6b96018b 2672 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2673 return -EFAULT;
2674
43da5f2e 2675 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2676 if (ifc32.ifcbuf == 0) {
2677 ifc32.ifc_len = 0;
2678 ifc.ifc_len = 0;
2679 ifc.ifc_req = NULL;
2680 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2681 } else {
c6d409cf
ED
2682 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2683 sizeof(struct ifreq);
7a229387
AB
2684 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2685 ifc.ifc_len = len;
2686 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2687 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2688 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2689 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2690 return -EFAULT;
2691 ifr++;
2692 ifr32++;
2693 }
2694 }
2695 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2696 return -EFAULT;
2697
6b96018b 2698 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2699 if (err)
2700 return err;
2701
2702 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2703 return -EFAULT;
2704
2705 ifr = ifc.ifc_req;
2706 ifr32 = compat_ptr(ifc32.ifcbuf);
2707 for (i = 0, j = 0;
c6d409cf
ED
2708 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2709 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2710 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2711 return -EFAULT;
2712 ifr32++;
2713 ifr++;
2714 }
2715
2716 if (ifc32.ifcbuf == 0) {
2717 /* Translate from 64-bit structure multiple to
2718 * a 32-bit one.
2719 */
2720 i = ifc.ifc_len;
6b96018b 2721 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2722 ifc32.ifc_len = i;
2723 } else {
2724 ifc32.ifc_len = i;
2725 }
6b96018b 2726 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2727 return -EFAULT;
2728
2729 return 0;
2730}
2731
6b96018b 2732static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2733{
3a7da39d
BH
2734 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2735 bool convert_in = false, convert_out = false;
2736 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2737 struct ethtool_rxnfc __user *rxnfc;
7a229387 2738 struct ifreq __user *ifr;
3a7da39d
BH
2739 u32 rule_cnt = 0, actual_rule_cnt;
2740 u32 ethcmd;
7a229387 2741 u32 data;
3a7da39d 2742 int ret;
7a229387 2743
3a7da39d
BH
2744 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2745 return -EFAULT;
7a229387 2746
3a7da39d
BH
2747 compat_rxnfc = compat_ptr(data);
2748
2749 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2750 return -EFAULT;
2751
3a7da39d
BH
2752 /* Most ethtool structures are defined without padding.
2753 * Unfortunately struct ethtool_rxnfc is an exception.
2754 */
2755 switch (ethcmd) {
2756 default:
2757 break;
2758 case ETHTOOL_GRXCLSRLALL:
2759 /* Buffer size is variable */
2760 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2761 return -EFAULT;
2762 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2763 return -ENOMEM;
2764 buf_size += rule_cnt * sizeof(u32);
2765 /* fall through */
2766 case ETHTOOL_GRXRINGS:
2767 case ETHTOOL_GRXCLSRLCNT:
2768 case ETHTOOL_GRXCLSRULE:
55664f32 2769 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2770 convert_out = true;
2771 /* fall through */
2772 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2773 buf_size += sizeof(struct ethtool_rxnfc);
2774 convert_in = true;
2775 break;
2776 }
2777
2778 ifr = compat_alloc_user_space(buf_size);
954b1244 2779 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2780
2781 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2782 return -EFAULT;
2783
3a7da39d
BH
2784 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2785 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2786 return -EFAULT;
2787
3a7da39d 2788 if (convert_in) {
127fe533 2789 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2790 * fs.ring_cookie and at the end of fs, but nowhere else.
2791 */
127fe533
AD
2792 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2793 sizeof(compat_rxnfc->fs.m_ext) !=
2794 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2795 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2796 BUILD_BUG_ON(
2797 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2798 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2799 offsetof(struct ethtool_rxnfc, fs.location) -
2800 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2801
2802 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2803 (void __user *)(&rxnfc->fs.m_ext + 1) -
2804 (void __user *)rxnfc) ||
3a7da39d
BH
2805 copy_in_user(&rxnfc->fs.ring_cookie,
2806 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2807 (void __user *)(&rxnfc->fs.location + 1) -
2808 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2809 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2810 sizeof(rxnfc->rule_cnt)))
2811 return -EFAULT;
2812 }
2813
2814 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2815 if (ret)
2816 return ret;
2817
2818 if (convert_out) {
2819 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2820 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2821 (const void __user *)rxnfc) ||
3a7da39d
BH
2822 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2823 &rxnfc->fs.ring_cookie,
954b1244
SH
2824 (const void __user *)(&rxnfc->fs.location + 1) -
2825 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2826 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2827 sizeof(rxnfc->rule_cnt)))
2828 return -EFAULT;
2829
2830 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2831 /* As an optimisation, we only copy the actual
2832 * number of rules that the underlying
2833 * function returned. Since Mallory might
2834 * change the rule count in user memory, we
2835 * check that it is less than the rule count
2836 * originally given (as the user buffer size),
2837 * which has been range-checked.
2838 */
2839 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2840 return -EFAULT;
2841 if (actual_rule_cnt < rule_cnt)
2842 rule_cnt = actual_rule_cnt;
2843 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2844 &rxnfc->rule_locs[0],
2845 rule_cnt * sizeof(u32)))
2846 return -EFAULT;
2847 }
2848 }
2849
2850 return 0;
7a229387
AB
2851}
2852
7a50a240
AB
2853static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2854{
2855 void __user *uptr;
2856 compat_uptr_t uptr32;
2857 struct ifreq __user *uifr;
2858
c6d409cf 2859 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2860 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2861 return -EFAULT;
2862
2863 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2864 return -EFAULT;
2865
2866 uptr = compat_ptr(uptr32);
2867
2868 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2869 return -EFAULT;
2870
2871 return dev_ioctl(net, SIOCWANDEV, uifr);
2872}
2873
6b96018b
AB
2874static int bond_ioctl(struct net *net, unsigned int cmd,
2875 struct compat_ifreq __user *ifr32)
7a229387
AB
2876{
2877 struct ifreq kifr;
7a229387
AB
2878 mm_segment_t old_fs;
2879 int err;
7a229387
AB
2880
2881 switch (cmd) {
2882 case SIOCBONDENSLAVE:
2883 case SIOCBONDRELEASE:
2884 case SIOCBONDSETHWADDR:
2885 case SIOCBONDCHANGEACTIVE:
6b96018b 2886 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2887 return -EFAULT;
2888
2889 old_fs = get_fs();
c6d409cf 2890 set_fs(KERNEL_DS);
c3f52ae6 2891 err = dev_ioctl(net, cmd,
2892 (struct ifreq __user __force *) &kifr);
c6d409cf 2893 set_fs(old_fs);
7a229387
AB
2894
2895 return err;
7a229387 2896 default:
07d106d0 2897 return -ENOIOCTLCMD;
ccbd6a5a 2898 }
7a229387
AB
2899}
2900
590d4693
BH
2901/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2902static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2903 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2904{
2905 struct ifreq __user *u_ifreq64;
7a229387
AB
2906 char tmp_buf[IFNAMSIZ];
2907 void __user *data64;
2908 u32 data32;
2909
2910 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2911 IFNAMSIZ))
2912 return -EFAULT;
417c3522 2913 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2914 return -EFAULT;
2915 data64 = compat_ptr(data32);
2916
2917 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2918
7a229387
AB
2919 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2920 IFNAMSIZ))
2921 return -EFAULT;
417c3522 2922 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2923 return -EFAULT;
2924
6b96018b 2925 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2926}
2927
6b96018b
AB
2928static int dev_ifsioc(struct net *net, struct socket *sock,
2929 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2930{
a2116ed2 2931 struct ifreq __user *uifr;
7a229387
AB
2932 int err;
2933
a2116ed2
AB
2934 uifr = compat_alloc_user_space(sizeof(*uifr));
2935 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2936 return -EFAULT;
2937
2938 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2939
7a229387
AB
2940 if (!err) {
2941 switch (cmd) {
2942 case SIOCGIFFLAGS:
2943 case SIOCGIFMETRIC:
2944 case SIOCGIFMTU:
2945 case SIOCGIFMEM:
2946 case SIOCGIFHWADDR:
2947 case SIOCGIFINDEX:
2948 case SIOCGIFADDR:
2949 case SIOCGIFBRDADDR:
2950 case SIOCGIFDSTADDR:
2951 case SIOCGIFNETMASK:
fab2532b 2952 case SIOCGIFPFLAGS:
7a229387 2953 case SIOCGIFTXQLEN:
fab2532b
AB
2954 case SIOCGMIIPHY:
2955 case SIOCGMIIREG:
a2116ed2 2956 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2957 err = -EFAULT;
2958 break;
2959 }
2960 }
2961 return err;
2962}
2963
a2116ed2
AB
2964static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2965 struct compat_ifreq __user *uifr32)
2966{
2967 struct ifreq ifr;
2968 struct compat_ifmap __user *uifmap32;
2969 mm_segment_t old_fs;
2970 int err;
2971
2972 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2973 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2974 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2975 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2976 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2977 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2978 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2979 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2980 if (err)
2981 return -EFAULT;
2982
2983 old_fs = get_fs();
c6d409cf 2984 set_fs(KERNEL_DS);
c3f52ae6 2985 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2986 set_fs(old_fs);
a2116ed2
AB
2987
2988 if (cmd == SIOCGIFMAP && !err) {
2989 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2990 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2991 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2992 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2993 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2994 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2995 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2996 if (err)
2997 err = -EFAULT;
2998 }
2999 return err;
3000}
3001
7a229387 3002struct rtentry32 {
c6d409cf 3003 u32 rt_pad1;
7a229387
AB
3004 struct sockaddr rt_dst; /* target address */
3005 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3006 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3007 unsigned short rt_flags;
3008 short rt_pad2;
3009 u32 rt_pad3;
3010 unsigned char rt_tos;
3011 unsigned char rt_class;
3012 short rt_pad4;
3013 short rt_metric; /* +1 for binary compatibility! */
7a229387 3014 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3015 u32 rt_mtu; /* per route MTU/Window */
3016 u32 rt_window; /* Window clamping */
7a229387
AB
3017 unsigned short rt_irtt; /* Initial RTT */
3018};
3019
3020struct in6_rtmsg32 {
3021 struct in6_addr rtmsg_dst;
3022 struct in6_addr rtmsg_src;
3023 struct in6_addr rtmsg_gateway;
3024 u32 rtmsg_type;
3025 u16 rtmsg_dst_len;
3026 u16 rtmsg_src_len;
3027 u32 rtmsg_metric;
3028 u32 rtmsg_info;
3029 u32 rtmsg_flags;
3030 s32 rtmsg_ifindex;
3031};
3032
6b96018b
AB
3033static int routing_ioctl(struct net *net, struct socket *sock,
3034 unsigned int cmd, void __user *argp)
7a229387
AB
3035{
3036 int ret;
3037 void *r = NULL;
3038 struct in6_rtmsg r6;
3039 struct rtentry r4;
3040 char devname[16];
3041 u32 rtdev;
3042 mm_segment_t old_fs = get_fs();
3043
6b96018b
AB
3044 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3045 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3046 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3047 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3048 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3049 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3050 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3051 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3052 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3053 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3054 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3055
3056 r = (void *) &r6;
3057 } else { /* ipv4 */
6b96018b 3058 struct rtentry32 __user *ur4 = argp;
c6d409cf 3059 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3060 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3061 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3062 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3063 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3064 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3065 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3066 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3067 if (rtdev) {
c6d409cf 3068 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3069 r4.rt_dev = (char __user __force *)devname;
3070 devname[15] = 0;
7a229387
AB
3071 } else
3072 r4.rt_dev = NULL;
3073
3074 r = (void *) &r4;
3075 }
3076
3077 if (ret) {
3078 ret = -EFAULT;
3079 goto out;
3080 }
3081
c6d409cf 3082 set_fs(KERNEL_DS);
6b96018b 3083 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3084 set_fs(old_fs);
7a229387
AB
3085
3086out:
7a229387
AB
3087 return ret;
3088}
3089
3090/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3091 * for some operations; this forces use of the newer bridge-utils that
25985edc 3092 * use compatible ioctls
7a229387 3093 */
6b96018b 3094static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3095{
6b96018b 3096 compat_ulong_t tmp;
7a229387 3097
6b96018b 3098 if (get_user(tmp, argp))
7a229387
AB
3099 return -EFAULT;
3100 if (tmp == BRCTL_GET_VERSION)
3101 return BRCTL_VERSION + 1;
3102 return -EINVAL;
3103}
3104
6b96018b
AB
3105static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3106 unsigned int cmd, unsigned long arg)
3107{
3108 void __user *argp = compat_ptr(arg);
3109 struct sock *sk = sock->sk;
3110 struct net *net = sock_net(sk);
7a229387 3111
6b96018b 3112 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3113 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3114
3115 switch (cmd) {
3116 case SIOCSIFBR:
3117 case SIOCGIFBR:
3118 return old_bridge_ioctl(argp);
3119 case SIOCGIFNAME:
3120 return dev_ifname32(net, argp);
3121 case SIOCGIFCONF:
3122 return dev_ifconf(net, argp);
3123 case SIOCETHTOOL:
3124 return ethtool_ioctl(net, argp);
7a50a240
AB
3125 case SIOCWANDEV:
3126 return compat_siocwandev(net, argp);
a2116ed2
AB
3127 case SIOCGIFMAP:
3128 case SIOCSIFMAP:
3129 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3130 case SIOCBONDENSLAVE:
3131 case SIOCBONDRELEASE:
3132 case SIOCBONDSETHWADDR:
6b96018b
AB
3133 case SIOCBONDCHANGEACTIVE:
3134 return bond_ioctl(net, cmd, argp);
3135 case SIOCADDRT:
3136 case SIOCDELRT:
3137 return routing_ioctl(net, sock, cmd, argp);
3138 case SIOCGSTAMP:
3139 return do_siocgstamp(net, sock, cmd, argp);
3140 case SIOCGSTAMPNS:
3141 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3142 case SIOCBONDSLAVEINFOQUERY:
3143 case SIOCBONDINFOQUERY:
a2116ed2 3144 case SIOCSHWTSTAMP:
fd468c74 3145 case SIOCGHWTSTAMP:
590d4693 3146 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3147
3148 case FIOSETOWN:
3149 case SIOCSPGRP:
3150 case FIOGETOWN:
3151 case SIOCGPGRP:
3152 case SIOCBRADDBR:
3153 case SIOCBRDELBR:
3154 case SIOCGIFVLAN:
3155 case SIOCSIFVLAN:
3156 case SIOCADDDLCI:
3157 case SIOCDELDLCI:
c62cce2c 3158 case SIOCGSKNS:
6b96018b
AB
3159 return sock_ioctl(file, cmd, arg);
3160
3161 case SIOCGIFFLAGS:
3162 case SIOCSIFFLAGS:
3163 case SIOCGIFMETRIC:
3164 case SIOCSIFMETRIC:
3165 case SIOCGIFMTU:
3166 case SIOCSIFMTU:
3167 case SIOCGIFMEM:
3168 case SIOCSIFMEM:
3169 case SIOCGIFHWADDR:
3170 case SIOCSIFHWADDR:
3171 case SIOCADDMULTI:
3172 case SIOCDELMULTI:
3173 case SIOCGIFINDEX:
6b96018b
AB
3174 case SIOCGIFADDR:
3175 case SIOCSIFADDR:
3176 case SIOCSIFHWBROADCAST:
6b96018b 3177 case SIOCDIFADDR:
6b96018b
AB
3178 case SIOCGIFBRDADDR:
3179 case SIOCSIFBRDADDR:
3180 case SIOCGIFDSTADDR:
3181 case SIOCSIFDSTADDR:
3182 case SIOCGIFNETMASK:
3183 case SIOCSIFNETMASK:
3184 case SIOCSIFPFLAGS:
3185 case SIOCGIFPFLAGS:
3186 case SIOCGIFTXQLEN:
3187 case SIOCSIFTXQLEN:
3188 case SIOCBRADDIF:
3189 case SIOCBRDELIF:
9177efd3
AB
3190 case SIOCSIFNAME:
3191 case SIOCGMIIPHY:
3192 case SIOCGMIIREG:
3193 case SIOCSMIIREG:
6b96018b 3194 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3195
6b96018b
AB
3196 case SIOCSARP:
3197 case SIOCGARP:
3198 case SIOCDARP:
6b96018b 3199 case SIOCATMARK:
9177efd3
AB
3200 return sock_do_ioctl(net, sock, cmd, arg);
3201 }
3202
6b96018b
AB
3203 return -ENOIOCTLCMD;
3204}
7a229387 3205
95c96174 3206static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3207 unsigned long arg)
89bbfc95
SP
3208{
3209 struct socket *sock = file->private_data;
3210 int ret = -ENOIOCTLCMD;
87de87d5
DM
3211 struct sock *sk;
3212 struct net *net;
3213
3214 sk = sock->sk;
3215 net = sock_net(sk);
89bbfc95
SP
3216
3217 if (sock->ops->compat_ioctl)
3218 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3219
87de87d5
DM
3220 if (ret == -ENOIOCTLCMD &&
3221 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3222 ret = compat_wext_handle_ioctl(net, cmd, arg);
3223
6b96018b
AB
3224 if (ret == -ENOIOCTLCMD)
3225 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3226
89bbfc95
SP
3227 return ret;
3228}
3229#endif
3230
ac5a488e
SS
3231int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3232{
3233 return sock->ops->bind(sock, addr, addrlen);
3234}
c6d409cf 3235EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3236
3237int kernel_listen(struct socket *sock, int backlog)
3238{
3239 return sock->ops->listen(sock, backlog);
3240}
c6d409cf 3241EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3242
3243int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3244{
3245 struct sock *sk = sock->sk;
3246 int err;
3247
3248 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3249 newsock);
3250 if (err < 0)
3251 goto done;
3252
cdfbabfb 3253 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3254 if (err < 0) {
3255 sock_release(*newsock);
fa8705b0 3256 *newsock = NULL;
ac5a488e
SS
3257 goto done;
3258 }
3259
3260 (*newsock)->ops = sock->ops;
1b08534e 3261 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3262
3263done:
3264 return err;
3265}
c6d409cf 3266EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3267
3268int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3269 int flags)
ac5a488e
SS
3270{
3271 return sock->ops->connect(sock, addr, addrlen, flags);
3272}
c6d409cf 3273EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3274
3275int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3276 int *addrlen)
3277{
3278 return sock->ops->getname(sock, addr, addrlen, 0);
3279}
c6d409cf 3280EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3281
3282int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3283 int *addrlen)
3284{
3285 return sock->ops->getname(sock, addr, addrlen, 1);
3286}
c6d409cf 3287EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3288
3289int kernel_getsockopt(struct socket *sock, int level, int optname,
3290 char *optval, int *optlen)
3291{
3292 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3293 char __user *uoptval;
3294 int __user *uoptlen;
ac5a488e
SS
3295 int err;
3296
fb8621bb
NK
3297 uoptval = (char __user __force *) optval;
3298 uoptlen = (int __user __force *) optlen;
3299
ac5a488e
SS
3300 set_fs(KERNEL_DS);
3301 if (level == SOL_SOCKET)
fb8621bb 3302 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3303 else
fb8621bb
NK
3304 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3305 uoptlen);
ac5a488e
SS
3306 set_fs(oldfs);
3307 return err;
3308}
c6d409cf 3309EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3310
3311int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3312 char *optval, unsigned int optlen)
ac5a488e
SS
3313{
3314 mm_segment_t oldfs = get_fs();
fb8621bb 3315 char __user *uoptval;
ac5a488e
SS
3316 int err;
3317
fb8621bb
NK
3318 uoptval = (char __user __force *) optval;
3319
ac5a488e
SS
3320 set_fs(KERNEL_DS);
3321 if (level == SOL_SOCKET)
fb8621bb 3322 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3323 else
fb8621bb 3324 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3325 optlen);
3326 set_fs(oldfs);
3327 return err;
3328}
c6d409cf 3329EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3330
3331int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3332 size_t size, int flags)
3333{
3334 if (sock->ops->sendpage)
3335 return sock->ops->sendpage(sock, page, offset, size, flags);
3336
3337 return sock_no_sendpage(sock, page, offset, size, flags);
3338}
c6d409cf 3339EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3340
3341int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3342{
3343 mm_segment_t oldfs = get_fs();
3344 int err;
3345
3346 set_fs(KERNEL_DS);
3347 err = sock->ops->ioctl(sock, cmd, arg);
3348 set_fs(oldfs);
3349
3350 return err;
3351}
c6d409cf 3352EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3353
91cf45f0
TM
3354int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3355{
3356 return sock->ops->shutdown(sock, how);
3357}
91cf45f0 3358EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075
P
3359
3360/* This routine returns the IP overhead imposed by a socket i.e.
3361 * the length of the underlying IP header, depending on whether
3362 * this is an IPv4 or IPv6 socket and the length from IP options turned
57240d00 3363 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075
P
3364 */
3365u32 kernel_sock_ip_overhead(struct sock *sk)
3366{
3367 struct inet_sock *inet;
3368 struct ip_options_rcu *opt;
3369 u32 overhead = 0;
3370 bool owned_by_user;
3371#if IS_ENABLED(CONFIG_IPV6)
3372 struct ipv6_pinfo *np;
3373 struct ipv6_txoptions *optv6 = NULL;
3374#endif /* IS_ENABLED(CONFIG_IPV6) */
3375
3376 if (!sk)
3377 return overhead;
3378
3379 owned_by_user = sock_owned_by_user(sk);
3380 switch (sk->sk_family) {
3381 case AF_INET:
3382 inet = inet_sk(sk);
3383 overhead += sizeof(struct iphdr);
3384 opt = rcu_dereference_protected(inet->inet_opt,
3385 owned_by_user);
3386 if (opt)
3387 overhead += opt->opt.optlen;
3388 return overhead;
3389#if IS_ENABLED(CONFIG_IPV6)
3390 case AF_INET6:
3391 np = inet6_sk(sk);
3392 overhead += sizeof(struct ipv6hdr);
3393 if (np)
3394 optv6 = rcu_dereference_protected(np->opt,
3395 owned_by_user);
3396 if (optv6)
3397 overhead += (optv6->opt_flen + optv6->opt_nflen);
3398 return overhead;
3399#endif /* IS_ENABLED(CONFIG_IPV6) */
3400 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3401 return overhead;
3402 }
3403}
3404EXPORT_SYMBOL(kernel_sock_ip_overhead);