]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/socket.c
perf report: Introduce --inline option
[mirror_ubuntu-artful-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4 92
7c0f6ba6 93#include <linux/uaccess.h>
1da177e4
LT
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1e911632 290static void init_inodecache(void)
1da177e4
LT
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1e911632 299 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
300}
301
b87221de 302static const struct super_operations sockfs_ops = {
c6d409cf
ED
303 .alloc_inode = sock_alloc_inode,
304 .destroy_inode = sock_destroy_inode,
305 .statfs = simple_statfs,
1da177e4
LT
306};
307
c23fbb6b
ED
308/*
309 * sockfs_dname() is called from d_path().
310 */
311static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
312{
313 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 314 d_inode(dentry)->i_ino);
c23fbb6b
ED
315}
316
3ba13d17 317static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 318 .d_dname = sockfs_dname,
1da177e4
LT
319};
320
bba0bd31
AG
321static int sockfs_xattr_get(const struct xattr_handler *handler,
322 struct dentry *dentry, struct inode *inode,
323 const char *suffix, void *value, size_t size)
324{
325 if (value) {
326 if (dentry->d_name.len + 1 > size)
327 return -ERANGE;
328 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
329 }
330 return dentry->d_name.len + 1;
331}
332
333#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
334#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
335#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
336
337static const struct xattr_handler sockfs_xattr_handler = {
338 .name = XATTR_NAME_SOCKPROTONAME,
339 .get = sockfs_xattr_get,
340};
341
4a590153
AG
342static int sockfs_security_xattr_set(const struct xattr_handler *handler,
343 struct dentry *dentry, struct inode *inode,
344 const char *suffix, const void *value,
345 size_t size, int flags)
346{
347 /* Handled by LSM. */
348 return -EAGAIN;
349}
350
351static const struct xattr_handler sockfs_security_xattr_handler = {
352 .prefix = XATTR_SECURITY_PREFIX,
353 .set = sockfs_security_xattr_set,
354};
355
bba0bd31
AG
356static const struct xattr_handler *sockfs_xattr_handlers[] = {
357 &sockfs_xattr_handler,
4a590153 358 &sockfs_security_xattr_handler,
bba0bd31
AG
359 NULL
360};
361
c74a1cbb
AV
362static struct dentry *sockfs_mount(struct file_system_type *fs_type,
363 int flags, const char *dev_name, void *data)
364{
bba0bd31
AG
365 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
366 sockfs_xattr_handlers,
367 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
368}
369
370static struct vfsmount *sock_mnt __read_mostly;
371
372static struct file_system_type sock_fs_type = {
373 .name = "sockfs",
374 .mount = sockfs_mount,
375 .kill_sb = kill_anon_super,
376};
377
1da177e4
LT
378/*
379 * Obtains the first available file descriptor and sets it up for use.
380 *
39d8c1b6
DM
381 * These functions create file structures and maps them to fd space
382 * of the current process. On success it returns file descriptor
1da177e4
LT
383 * and file struct implicitly stored in sock->file.
384 * Note that another thread may close file descriptor before we return
385 * from this function. We use the fact that now we do not refer
386 * to socket after mapping. If one day we will need it, this
387 * function will increment ref. count on file by 1.
388 *
389 * In any case returned fd MAY BE not valid!
390 * This race condition is unavoidable
391 * with shared fd spaces, we cannot solve it inside kernel,
392 * but we take care of internal coherence yet.
393 */
394
aab174f0 395struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 396{
7cbe66b6 397 struct qstr name = { .name = "" };
2c48b9c4 398 struct path path;
7cbe66b6 399 struct file *file;
1da177e4 400
600e1779
MY
401 if (dname) {
402 name.name = dname;
403 name.len = strlen(name.name);
404 } else if (sock->sk) {
405 name.name = sock->sk->sk_prot_creator->name;
406 name.len = strlen(name.name);
407 }
4b936885 408 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
409 if (unlikely(!path.dentry))
410 return ERR_PTR(-ENOMEM);
2c48b9c4 411 path.mnt = mntget(sock_mnt);
39d8c1b6 412
2c48b9c4 413 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 414
2c48b9c4 415 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 416 &socket_file_ops);
b5ffe634 417 if (IS_ERR(file)) {
cc3808f8 418 /* drop dentry, keep inode */
c5ef6035 419 ihold(d_inode(path.dentry));
2c48b9c4 420 path_put(&path);
39b65252 421 return file;
cc3808f8
AV
422 }
423
424 sock->file = file;
77d27200 425 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 426 file->private_data = sock;
28407630 427 return file;
39d8c1b6 428}
56b31d1c 429EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 430
56b31d1c 431static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
432{
433 struct file *newfile;
28407630
AV
434 int fd = get_unused_fd_flags(flags);
435 if (unlikely(fd < 0))
436 return fd;
39d8c1b6 437
aab174f0 438 newfile = sock_alloc_file(sock, flags, NULL);
28407630 439 if (likely(!IS_ERR(newfile))) {
39d8c1b6 440 fd_install(fd, newfile);
28407630
AV
441 return fd;
442 }
7cbe66b6 443
28407630
AV
444 put_unused_fd(fd);
445 return PTR_ERR(newfile);
1da177e4
LT
446}
447
406a3c63 448struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 449{
6cb153ca
BL
450 if (file->f_op == &socket_file_ops)
451 return file->private_data; /* set in sock_map_fd */
452
23bb80d2
ED
453 *err = -ENOTSOCK;
454 return NULL;
6cb153ca 455}
406a3c63 456EXPORT_SYMBOL(sock_from_file);
6cb153ca 457
1da177e4 458/**
c6d409cf 459 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
460 * @fd: file handle
461 * @err: pointer to an error code return
462 *
463 * The file handle passed in is locked and the socket it is bound
464 * too is returned. If an error occurs the err pointer is overwritten
465 * with a negative errno code and NULL is returned. The function checks
466 * for both invalid handles and passing a handle which is not a socket.
467 *
468 * On a success the socket object pointer is returned.
469 */
470
471struct socket *sockfd_lookup(int fd, int *err)
472{
473 struct file *file;
1da177e4
LT
474 struct socket *sock;
475
89bddce5
SH
476 file = fget(fd);
477 if (!file) {
1da177e4
LT
478 *err = -EBADF;
479 return NULL;
480 }
89bddce5 481
6cb153ca
BL
482 sock = sock_from_file(file, err);
483 if (!sock)
1da177e4 484 fput(file);
6cb153ca
BL
485 return sock;
486}
c6d409cf 487EXPORT_SYMBOL(sockfd_lookup);
1da177e4 488
6cb153ca
BL
489static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
490{
00e188ef 491 struct fd f = fdget(fd);
6cb153ca
BL
492 struct socket *sock;
493
3672558c 494 *err = -EBADF;
00e188ef
AV
495 if (f.file) {
496 sock = sock_from_file(f.file, err);
497 if (likely(sock)) {
498 *fput_needed = f.flags;
6cb153ca 499 return sock;
00e188ef
AV
500 }
501 fdput(f);
1da177e4 502 }
6cb153ca 503 return NULL;
1da177e4
LT
504}
505
600e1779
MY
506static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
507 size_t size)
508{
509 ssize_t len;
510 ssize_t used = 0;
511
c5ef6035 512 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
513 if (len < 0)
514 return len;
515 used += len;
516 if (buffer) {
517 if (size < used)
518 return -ERANGE;
519 buffer += len;
520 }
521
522 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
523 used += len;
524 if (buffer) {
525 if (size < used)
526 return -ERANGE;
527 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
528 buffer += len;
529 }
530
531 return used;
532}
533
dc647ec8 534static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
535{
536 int err = simple_setattr(dentry, iattr);
537
e1a3a60a 538 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
539 struct socket *sock = SOCKET_I(d_inode(dentry));
540
541 sock->sk->sk_uid = iattr->ia_uid;
542 }
543
544 return err;
545}
546
600e1779 547static const struct inode_operations sockfs_inode_ops = {
600e1779 548 .listxattr = sockfs_listxattr,
86741ec2 549 .setattr = sockfs_setattr,
600e1779
MY
550};
551
1da177e4
LT
552/**
553 * sock_alloc - allocate a socket
89bddce5 554 *
1da177e4
LT
555 * Allocate a new inode and socket object. The two are bound together
556 * and initialised. The socket is then returned. If we are out of inodes
557 * NULL is returned.
558 */
559
f4a00aac 560struct socket *sock_alloc(void)
1da177e4 561{
89bddce5
SH
562 struct inode *inode;
563 struct socket *sock;
1da177e4 564
a209dfc7 565 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
566 if (!inode)
567 return NULL;
568
569 sock = SOCKET_I(inode);
570
29a020d3 571 kmemcheck_annotate_bitfield(sock, type);
85fe4025 572 inode->i_ino = get_next_ino();
89bddce5 573 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
574 inode->i_uid = current_fsuid();
575 inode->i_gid = current_fsgid();
600e1779 576 inode->i_op = &sockfs_inode_ops;
1da177e4 577
19e8d69c 578 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
579 return sock;
580}
f4a00aac 581EXPORT_SYMBOL(sock_alloc);
1da177e4 582
1da177e4
LT
583/**
584 * sock_release - close a socket
585 * @sock: socket to close
586 *
587 * The socket is released from the protocol stack if it has a release
588 * callback, and the inode is then released if the socket is bound to
89bddce5 589 * an inode not a file.
1da177e4 590 */
89bddce5 591
1da177e4
LT
592void sock_release(struct socket *sock)
593{
594 if (sock->ops) {
595 struct module *owner = sock->ops->owner;
596
597 sock->ops->release(sock);
598 sock->ops = NULL;
599 module_put(owner);
600 }
601
eaefd110 602 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 603 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 604
19e8d69c 605 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
606 if (!sock->file) {
607 iput(SOCK_INODE(sock));
608 return;
609 }
89bddce5 610 sock->file = NULL;
1da177e4 611}
c6d409cf 612EXPORT_SYMBOL(sock_release);
1da177e4 613
c14ac945 614void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 615{
140c55d4
ED
616 u8 flags = *tx_flags;
617
c14ac945 618 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
619 flags |= SKBTX_HW_TSTAMP;
620
c14ac945 621 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
622 flags |= SKBTX_SW_TSTAMP;
623
c14ac945 624 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
625 flags |= SKBTX_SCHED_TSTAMP;
626
140c55d4 627 *tx_flags = flags;
20d49473 628}
67cc0d40 629EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 630
d8725c86 631static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 632{
01e97e65 633 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
634 BUG_ON(ret == -EIOCBQUEUED);
635 return ret;
1da177e4
LT
636}
637
d8725c86 638int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 639{
d8725c86 640 int err = security_socket_sendmsg(sock, msg,
01e97e65 641 msg_data_left(msg));
228e548e 642
d8725c86 643 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 644}
c6d409cf 645EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
646
647int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
648 struct kvec *vec, size_t num, size_t size)
649{
6aa24814 650 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 651 return sock_sendmsg(sock, msg);
1da177e4 652}
c6d409cf 653EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 654
92f37fd2
ED
655/*
656 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
657 */
658void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
659 struct sk_buff *skb)
660{
20d49473 661 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 662 struct scm_timestamping tss;
20d49473
PO
663 int empty = 1;
664 struct skb_shared_hwtstamps *shhwtstamps =
665 skb_hwtstamps(skb);
666
667 /* Race occurred between timestamp enabling and packet
668 receiving. Fill in the current time for now. */
2456e855 669 if (need_software_tstamp && skb->tstamp == 0)
20d49473
PO
670 __net_timestamp(skb);
671
672 if (need_software_tstamp) {
673 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
674 struct timeval tv;
675 skb_get_timestamp(skb, &tv);
676 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
677 sizeof(tv), &tv);
678 } else {
f24b9be5
WB
679 struct timespec ts;
680 skb_get_timestampns(skb, &ts);
20d49473 681 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 682 sizeof(ts), &ts);
20d49473
PO
683 }
684 }
685
f24b9be5 686 memset(&tss, 0, sizeof(tss));
c199105d 687 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 688 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 689 empty = 0;
4d276eb6 690 if (shhwtstamps &&
b9f40e21 691 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 692 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 693 empty = 0;
1c885808 694 if (!empty) {
20d49473 695 put_cmsg(msg, SOL_SOCKET,
f24b9be5 696 SCM_TIMESTAMPING, sizeof(tss), &tss);
1c885808
FY
697
698 if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
699 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
700 skb->len, skb->data);
701 }
92f37fd2 702}
7c81fd8b
ACM
703EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
704
6e3e939f
JB
705void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
706 struct sk_buff *skb)
707{
708 int ack;
709
710 if (!sock_flag(sk, SOCK_WIFI_STATUS))
711 return;
712 if (!skb->wifi_acked_valid)
713 return;
714
715 ack = skb->wifi_acked;
716
717 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
718}
719EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
720
11165f14 721static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
722 struct sk_buff *skb)
3b885787 723{
744d5a3e 724 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 725 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 726 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
727}
728
767dd033 729void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
730 struct sk_buff *skb)
731{
732 sock_recv_timestamp(msg, sk, skb);
733 sock_recv_drops(msg, sk, skb);
734}
767dd033 735EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 736
1b784140 737static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 738 int flags)
1da177e4 739{
2da62906 740 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
741}
742
2da62906 743int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 744{
2da62906 745 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 746
2da62906 747 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 748}
c6d409cf 749EXPORT_SYMBOL(sock_recvmsg);
1da177e4 750
c1249c0a
ML
751/**
752 * kernel_recvmsg - Receive a message from a socket (kernel space)
753 * @sock: The socket to receive the message from
754 * @msg: Received message
755 * @vec: Input s/g array for message data
756 * @num: Size of input s/g array
757 * @size: Number of bytes to read
758 * @flags: Message flags (MSG_DONTWAIT, etc...)
759 *
760 * On return the msg structure contains the scatter/gather array passed in the
761 * vec argument. The array is modified so that it consists of the unfilled
762 * portion of the original array.
763 *
764 * The returned value is the total number of bytes received, or an error.
765 */
89bddce5
SH
766int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
767 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
768{
769 mm_segment_t oldfs = get_fs();
770 int result;
771
6aa24814 772 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 773 set_fs(KERNEL_DS);
2da62906 774 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
775 set_fs(oldfs);
776 return result;
777}
c6d409cf 778EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 779
ce1d4d3e
CH
780static ssize_t sock_sendpage(struct file *file, struct page *page,
781 int offset, size_t size, loff_t *ppos, int more)
1da177e4 782{
1da177e4
LT
783 struct socket *sock;
784 int flags;
785
ce1d4d3e
CH
786 sock = file->private_data;
787
35f9c09f
ED
788 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
789 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
790 flags |= more;
ce1d4d3e 791
e6949583 792 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 793}
1da177e4 794
9c55e01c 795static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 796 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
797 unsigned int flags)
798{
799 struct socket *sock = file->private_data;
800
997b37da
RDC
801 if (unlikely(!sock->ops->splice_read))
802 return -EINVAL;
803
9c55e01c
JA
804 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
805}
806
8ae5e030 807static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 808{
6d652330
AV
809 struct file *file = iocb->ki_filp;
810 struct socket *sock = file->private_data;
0345f931 811 struct msghdr msg = {.msg_iter = *to,
812 .msg_iocb = iocb};
8ae5e030 813 ssize_t res;
ce1d4d3e 814
8ae5e030
AV
815 if (file->f_flags & O_NONBLOCK)
816 msg.msg_flags = MSG_DONTWAIT;
817
818 if (iocb->ki_pos != 0)
1da177e4 819 return -ESPIPE;
027445c3 820
66ee59af 821 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
822 return 0;
823
2da62906 824 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
825 *to = msg.msg_iter;
826 return res;
1da177e4
LT
827}
828
8ae5e030 829static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 830{
6d652330
AV
831 struct file *file = iocb->ki_filp;
832 struct socket *sock = file->private_data;
0345f931 833 struct msghdr msg = {.msg_iter = *from,
834 .msg_iocb = iocb};
8ae5e030 835 ssize_t res;
1da177e4 836
8ae5e030 837 if (iocb->ki_pos != 0)
ce1d4d3e 838 return -ESPIPE;
027445c3 839
8ae5e030
AV
840 if (file->f_flags & O_NONBLOCK)
841 msg.msg_flags = MSG_DONTWAIT;
842
6d652330
AV
843 if (sock->type == SOCK_SEQPACKET)
844 msg.msg_flags |= MSG_EOR;
845
d8725c86 846 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
847 *from = msg.msg_iter;
848 return res;
1da177e4
LT
849}
850
1da177e4
LT
851/*
852 * Atomic setting of ioctl hooks to avoid race
853 * with module unload.
854 */
855
4a3e2f71 856static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 857static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 858
881d966b 859void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 860{
4a3e2f71 861 mutex_lock(&br_ioctl_mutex);
1da177e4 862 br_ioctl_hook = hook;
4a3e2f71 863 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
864}
865EXPORT_SYMBOL(brioctl_set);
866
4a3e2f71 867static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 868static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 869
881d966b 870void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 871{
4a3e2f71 872 mutex_lock(&vlan_ioctl_mutex);
1da177e4 873 vlan_ioctl_hook = hook;
4a3e2f71 874 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
875}
876EXPORT_SYMBOL(vlan_ioctl_set);
877
4a3e2f71 878static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 879static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 880
89bddce5 881void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 882{
4a3e2f71 883 mutex_lock(&dlci_ioctl_mutex);
1da177e4 884 dlci_ioctl_hook = hook;
4a3e2f71 885 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
886}
887EXPORT_SYMBOL(dlci_ioctl_set);
888
6b96018b
AB
889static long sock_do_ioctl(struct net *net, struct socket *sock,
890 unsigned int cmd, unsigned long arg)
891{
892 int err;
893 void __user *argp = (void __user *)arg;
894
895 err = sock->ops->ioctl(sock, cmd, arg);
896
897 /*
898 * If this ioctl is unknown try to hand it down
899 * to the NIC driver.
900 */
901 if (err == -ENOIOCTLCMD)
902 err = dev_ioctl(net, cmd, argp);
903
904 return err;
905}
906
1da177e4
LT
907/*
908 * With an ioctl, arg may well be a user mode pointer, but we don't know
909 * what to do with it - that's up to the protocol still.
910 */
911
c62cce2c
AV
912static struct ns_common *get_net_ns(struct ns_common *ns)
913{
914 return &get_net(container_of(ns, struct net, ns))->ns;
915}
916
1da177e4
LT
917static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
918{
919 struct socket *sock;
881d966b 920 struct sock *sk;
1da177e4
LT
921 void __user *argp = (void __user *)arg;
922 int pid, err;
881d966b 923 struct net *net;
1da177e4 924
b69aee04 925 sock = file->private_data;
881d966b 926 sk = sock->sk;
3b1e0a65 927 net = sock_net(sk);
1da177e4 928 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 929 err = dev_ioctl(net, cmd, argp);
1da177e4 930 } else
3d23e349 931#ifdef CONFIG_WEXT_CORE
1da177e4 932 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 933 err = dev_ioctl(net, cmd, argp);
1da177e4 934 } else
3d23e349 935#endif
89bddce5 936 switch (cmd) {
1da177e4
LT
937 case FIOSETOWN:
938 case SIOCSPGRP:
939 err = -EFAULT;
940 if (get_user(pid, (int __user *)argp))
941 break;
e0b93edd
JL
942 f_setown(sock->file, pid, 1);
943 err = 0;
1da177e4
LT
944 break;
945 case FIOGETOWN:
946 case SIOCGPGRP:
609d7fa9 947 err = put_user(f_getown(sock->file),
89bddce5 948 (int __user *)argp);
1da177e4
LT
949 break;
950 case SIOCGIFBR:
951 case SIOCSIFBR:
952 case SIOCBRADDBR:
953 case SIOCBRDELBR:
954 err = -ENOPKG;
955 if (!br_ioctl_hook)
956 request_module("bridge");
957
4a3e2f71 958 mutex_lock(&br_ioctl_mutex);
89bddce5 959 if (br_ioctl_hook)
881d966b 960 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 961 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
962 break;
963 case SIOCGIFVLAN:
964 case SIOCSIFVLAN:
965 err = -ENOPKG;
966 if (!vlan_ioctl_hook)
967 request_module("8021q");
968
4a3e2f71 969 mutex_lock(&vlan_ioctl_mutex);
1da177e4 970 if (vlan_ioctl_hook)
881d966b 971 err = vlan_ioctl_hook(net, argp);
4a3e2f71 972 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 973 break;
1da177e4
LT
974 case SIOCADDDLCI:
975 case SIOCDELDLCI:
976 err = -ENOPKG;
977 if (!dlci_ioctl_hook)
978 request_module("dlci");
979
7512cbf6
PE
980 mutex_lock(&dlci_ioctl_mutex);
981 if (dlci_ioctl_hook)
1da177e4 982 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 983 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 984 break;
c62cce2c
AV
985 case SIOCGSKNS:
986 err = -EPERM;
987 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
988 break;
989
990 err = open_related_ns(&net->ns, get_net_ns);
991 break;
1da177e4 992 default:
6b96018b 993 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 994 break;
89bddce5 995 }
1da177e4
LT
996 return err;
997}
998
999int sock_create_lite(int family, int type, int protocol, struct socket **res)
1000{
1001 int err;
1002 struct socket *sock = NULL;
89bddce5 1003
1da177e4
LT
1004 err = security_socket_create(family, type, protocol, 1);
1005 if (err)
1006 goto out;
1007
1008 sock = sock_alloc();
1009 if (!sock) {
1010 err = -ENOMEM;
1011 goto out;
1012 }
1013
1da177e4 1014 sock->type = type;
7420ed23
VY
1015 err = security_socket_post_create(sock, family, type, protocol, 1);
1016 if (err)
1017 goto out_release;
1018
1da177e4
LT
1019out:
1020 *res = sock;
1021 return err;
7420ed23
VY
1022out_release:
1023 sock_release(sock);
1024 sock = NULL;
1025 goto out;
1da177e4 1026}
c6d409cf 1027EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1028
1029/* No kernel lock held - perfect */
89bddce5 1030static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1031{
cbf55001 1032 unsigned int busy_flag = 0;
1da177e4
LT
1033 struct socket *sock;
1034
1035 /*
89bddce5 1036 * We can't return errors to poll, so it's either yes or no.
1da177e4 1037 */
b69aee04 1038 sock = file->private_data;
2d48d67f 1039
cbf55001 1040 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1041 /* this socket can poll_ll so tell the system call */
cbf55001 1042 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1043
1044 /* once, only if requested by syscall */
cbf55001
ET
1045 if (wait && (wait->_key & POLL_BUSY_LOOP))
1046 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1047 }
1048
cbf55001 1049 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1050}
1051
89bddce5 1052static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1053{
b69aee04 1054 struct socket *sock = file->private_data;
1da177e4
LT
1055
1056 return sock->ops->mmap(file, sock, vma);
1057}
1058
20380731 1059static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1060{
1da177e4
LT
1061 sock_release(SOCKET_I(inode));
1062 return 0;
1063}
1064
1065/*
1066 * Update the socket async list
1067 *
1068 * Fasync_list locking strategy.
1069 *
1070 * 1. fasync_list is modified only under process context socket lock
1071 * i.e. under semaphore.
1072 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1073 * or under socket lock
1da177e4
LT
1074 */
1075
1076static int sock_fasync(int fd, struct file *filp, int on)
1077{
989a2979
ED
1078 struct socket *sock = filp->private_data;
1079 struct sock *sk = sock->sk;
eaefd110 1080 struct socket_wq *wq;
1da177e4 1081
989a2979 1082 if (sk == NULL)
1da177e4 1083 return -EINVAL;
1da177e4
LT
1084
1085 lock_sock(sk);
1e1d04e6 1086 wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
eaefd110 1087 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1088
eaefd110 1089 if (!wq->fasync_list)
989a2979
ED
1090 sock_reset_flag(sk, SOCK_FASYNC);
1091 else
bcdce719 1092 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1093
989a2979 1094 release_sock(sk);
1da177e4
LT
1095 return 0;
1096}
1097
ceb5d58b 1098/* This function may be called only under rcu_lock */
1da177e4 1099
ceb5d58b 1100int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1101{
ceb5d58b 1102 if (!wq || !wq->fasync_list)
1da177e4 1103 return -1;
ceb5d58b 1104
89bddce5 1105 switch (how) {
8d8ad9d7 1106 case SOCK_WAKE_WAITD:
ceb5d58b 1107 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1108 break;
1109 goto call_kill;
8d8ad9d7 1110 case SOCK_WAKE_SPACE:
ceb5d58b 1111 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1112 break;
1113 /* fall through */
8d8ad9d7 1114 case SOCK_WAKE_IO:
89bddce5 1115call_kill:
43815482 1116 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1117 break;
8d8ad9d7 1118 case SOCK_WAKE_URG:
43815482 1119 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1120 }
ceb5d58b 1121
1da177e4
LT
1122 return 0;
1123}
c6d409cf 1124EXPORT_SYMBOL(sock_wake_async);
1da177e4 1125
721db93a 1126int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1127 struct socket **res, int kern)
1da177e4
LT
1128{
1129 int err;
1130 struct socket *sock;
55737fda 1131 const struct net_proto_family *pf;
1da177e4
LT
1132
1133 /*
89bddce5 1134 * Check protocol is in range
1da177e4
LT
1135 */
1136 if (family < 0 || family >= NPROTO)
1137 return -EAFNOSUPPORT;
1138 if (type < 0 || type >= SOCK_MAX)
1139 return -EINVAL;
1140
1141 /* Compatibility.
1142
1143 This uglymoron is moved from INET layer to here to avoid
1144 deadlock in module load.
1145 */
1146 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1147 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1148 current->comm);
1da177e4
LT
1149 family = PF_PACKET;
1150 }
1151
1152 err = security_socket_create(family, type, protocol, kern);
1153 if (err)
1154 return err;
89bddce5 1155
55737fda
SH
1156 /*
1157 * Allocate the socket and allow the family to set things up. if
1158 * the protocol is 0, the family is instructed to select an appropriate
1159 * default.
1160 */
1161 sock = sock_alloc();
1162 if (!sock) {
e87cc472 1163 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1164 return -ENFILE; /* Not exactly a match, but its the
1165 closest posix thing */
1166 }
1167
1168 sock->type = type;
1169
95a5afca 1170#ifdef CONFIG_MODULES
89bddce5
SH
1171 /* Attempt to load a protocol module if the find failed.
1172 *
1173 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1174 * requested real, full-featured networking support upon configuration.
1175 * Otherwise module support will break!
1176 */
190683a9 1177 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1178 request_module("net-pf-%d", family);
1da177e4
LT
1179#endif
1180
55737fda
SH
1181 rcu_read_lock();
1182 pf = rcu_dereference(net_families[family]);
1183 err = -EAFNOSUPPORT;
1184 if (!pf)
1185 goto out_release;
1da177e4
LT
1186
1187 /*
1188 * We will call the ->create function, that possibly is in a loadable
1189 * module, so we have to bump that loadable module refcnt first.
1190 */
55737fda 1191 if (!try_module_get(pf->owner))
1da177e4
LT
1192 goto out_release;
1193
55737fda
SH
1194 /* Now protected by module ref count */
1195 rcu_read_unlock();
1196
3f378b68 1197 err = pf->create(net, sock, protocol, kern);
55737fda 1198 if (err < 0)
1da177e4 1199 goto out_module_put;
a79af59e 1200
1da177e4
LT
1201 /*
1202 * Now to bump the refcnt of the [loadable] module that owns this
1203 * socket at sock_release time we decrement its refcnt.
1204 */
55737fda
SH
1205 if (!try_module_get(sock->ops->owner))
1206 goto out_module_busy;
1207
1da177e4
LT
1208 /*
1209 * Now that we're done with the ->create function, the [loadable]
1210 * module can have its refcnt decremented
1211 */
55737fda 1212 module_put(pf->owner);
7420ed23
VY
1213 err = security_socket_post_create(sock, family, type, protocol, kern);
1214 if (err)
3b185525 1215 goto out_sock_release;
55737fda 1216 *res = sock;
1da177e4 1217
55737fda
SH
1218 return 0;
1219
1220out_module_busy:
1221 err = -EAFNOSUPPORT;
1da177e4 1222out_module_put:
55737fda
SH
1223 sock->ops = NULL;
1224 module_put(pf->owner);
1225out_sock_release:
1da177e4 1226 sock_release(sock);
55737fda
SH
1227 return err;
1228
1229out_release:
1230 rcu_read_unlock();
1231 goto out_sock_release;
1da177e4 1232}
721db93a 1233EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1234
1235int sock_create(int family, int type, int protocol, struct socket **res)
1236{
1b8d7ae4 1237 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1238}
c6d409cf 1239EXPORT_SYMBOL(sock_create);
1da177e4 1240
eeb1bd5c 1241int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1242{
eeb1bd5c 1243 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1244}
c6d409cf 1245EXPORT_SYMBOL(sock_create_kern);
1da177e4 1246
3e0fa65f 1247SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1248{
1249 int retval;
1250 struct socket *sock;
a677a039
UD
1251 int flags;
1252
e38b36f3
UD
1253 /* Check the SOCK_* constants for consistency. */
1254 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1255 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1256 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1257 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1258
a677a039 1259 flags = type & ~SOCK_TYPE_MASK;
77d27200 1260 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1261 return -EINVAL;
1262 type &= SOCK_TYPE_MASK;
1da177e4 1263
aaca0bdc
UD
1264 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1265 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1266
1da177e4
LT
1267 retval = sock_create(family, type, protocol, &sock);
1268 if (retval < 0)
1269 goto out;
1270
77d27200 1271 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1272 if (retval < 0)
1273 goto out_release;
1274
1275out:
1276 /* It may be already another descriptor 8) Not kernel problem. */
1277 return retval;
1278
1279out_release:
1280 sock_release(sock);
1281 return retval;
1282}
1283
1284/*
1285 * Create a pair of connected sockets.
1286 */
1287
3e0fa65f
HC
1288SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1289 int __user *, usockvec)
1da177e4
LT
1290{
1291 struct socket *sock1, *sock2;
1292 int fd1, fd2, err;
db349509 1293 struct file *newfile1, *newfile2;
a677a039
UD
1294 int flags;
1295
1296 flags = type & ~SOCK_TYPE_MASK;
77d27200 1297 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1298 return -EINVAL;
1299 type &= SOCK_TYPE_MASK;
1da177e4 1300
aaca0bdc
UD
1301 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1302 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1303
1da177e4
LT
1304 /*
1305 * Obtain the first socket and check if the underlying protocol
1306 * supports the socketpair call.
1307 */
1308
1309 err = sock_create(family, type, protocol, &sock1);
1310 if (err < 0)
1311 goto out;
1312
1313 err = sock_create(family, type, protocol, &sock2);
1314 if (err < 0)
1315 goto out_release_1;
1316
1317 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1318 if (err < 0)
1da177e4
LT
1319 goto out_release_both;
1320
28407630 1321 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1322 if (unlikely(fd1 < 0)) {
1323 err = fd1;
db349509 1324 goto out_release_both;
bf3c23d1 1325 }
d73aa286 1326
28407630 1327 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1328 if (unlikely(fd2 < 0)) {
1329 err = fd2;
d73aa286 1330 goto out_put_unused_1;
28407630
AV
1331 }
1332
aab174f0 1333 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1334 if (IS_ERR(newfile1)) {
28407630 1335 err = PTR_ERR(newfile1);
d73aa286 1336 goto out_put_unused_both;
28407630
AV
1337 }
1338
aab174f0 1339 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1340 if (IS_ERR(newfile2)) {
1341 err = PTR_ERR(newfile2);
d73aa286 1342 goto out_fput_1;
db349509
AV
1343 }
1344
d73aa286
YD
1345 err = put_user(fd1, &usockvec[0]);
1346 if (err)
1347 goto out_fput_both;
1348
1349 err = put_user(fd2, &usockvec[1]);
1350 if (err)
1351 goto out_fput_both;
1352
157cf649 1353 audit_fd_pair(fd1, fd2);
d73aa286 1354
db349509
AV
1355 fd_install(fd1, newfile1);
1356 fd_install(fd2, newfile2);
1da177e4
LT
1357 /* fd1 and fd2 may be already another descriptors.
1358 * Not kernel problem.
1359 */
1360
d73aa286 1361 return 0;
1da177e4 1362
d73aa286
YD
1363out_fput_both:
1364 fput(newfile2);
1365 fput(newfile1);
1366 put_unused_fd(fd2);
1367 put_unused_fd(fd1);
1368 goto out;
1369
1370out_fput_1:
1371 fput(newfile1);
1372 put_unused_fd(fd2);
1373 put_unused_fd(fd1);
1374 sock_release(sock2);
1375 goto out;
1da177e4 1376
d73aa286
YD
1377out_put_unused_both:
1378 put_unused_fd(fd2);
1379out_put_unused_1:
1380 put_unused_fd(fd1);
1da177e4 1381out_release_both:
89bddce5 1382 sock_release(sock2);
1da177e4 1383out_release_1:
89bddce5 1384 sock_release(sock1);
1da177e4
LT
1385out:
1386 return err;
1387}
1388
1da177e4
LT
1389/*
1390 * Bind a name to a socket. Nothing much to do here since it's
1391 * the protocol's responsibility to handle the local address.
1392 *
1393 * We move the socket address to kernel space before we call
1394 * the protocol layer (having also checked the address is ok).
1395 */
1396
20f37034 1397SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1398{
1399 struct socket *sock;
230b1839 1400 struct sockaddr_storage address;
6cb153ca 1401 int err, fput_needed;
1da177e4 1402
89bddce5 1403 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1404 if (sock) {
43db362d 1405 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1406 if (err >= 0) {
1407 err = security_socket_bind(sock,
230b1839 1408 (struct sockaddr *)&address,
89bddce5 1409 addrlen);
6cb153ca
BL
1410 if (!err)
1411 err = sock->ops->bind(sock,
89bddce5 1412 (struct sockaddr *)
230b1839 1413 &address, addrlen);
1da177e4 1414 }
6cb153ca 1415 fput_light(sock->file, fput_needed);
89bddce5 1416 }
1da177e4
LT
1417 return err;
1418}
1419
1da177e4
LT
1420/*
1421 * Perform a listen. Basically, we allow the protocol to do anything
1422 * necessary for a listen, and if that works, we mark the socket as
1423 * ready for listening.
1424 */
1425
3e0fa65f 1426SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1427{
1428 struct socket *sock;
6cb153ca 1429 int err, fput_needed;
b8e1f9b5 1430 int somaxconn;
89bddce5
SH
1431
1432 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1433 if (sock) {
8efa6e93 1434 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1435 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1436 backlog = somaxconn;
1da177e4
LT
1437
1438 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1439 if (!err)
1440 err = sock->ops->listen(sock, backlog);
1da177e4 1441
6cb153ca 1442 fput_light(sock->file, fput_needed);
1da177e4
LT
1443 }
1444 return err;
1445}
1446
1da177e4
LT
1447/*
1448 * For accept, we attempt to create a new socket, set up the link
1449 * with the client, wake up the client, then return the new
1450 * connected fd. We collect the address of the connector in kernel
1451 * space and move it to user at the very end. This is unclean because
1452 * we open the socket then return an error.
1453 *
1454 * 1003.1g adds the ability to recvmsg() to query connection pending
1455 * status to recvmsg. We need to add that support in a way thats
1456 * clean when we restucture accept also.
1457 */
1458
20f37034
HC
1459SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1460 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1461{
1462 struct socket *sock, *newsock;
39d8c1b6 1463 struct file *newfile;
6cb153ca 1464 int err, len, newfd, fput_needed;
230b1839 1465 struct sockaddr_storage address;
1da177e4 1466
77d27200 1467 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1468 return -EINVAL;
1469
1470 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1471 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1472
6cb153ca 1473 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1474 if (!sock)
1475 goto out;
1476
1477 err = -ENFILE;
c6d409cf
ED
1478 newsock = sock_alloc();
1479 if (!newsock)
1da177e4
LT
1480 goto out_put;
1481
1482 newsock->type = sock->type;
1483 newsock->ops = sock->ops;
1484
1da177e4
LT
1485 /*
1486 * We don't need try_module_get here, as the listening socket (sock)
1487 * has the protocol module (sock->ops->owner) held.
1488 */
1489 __module_get(newsock->ops->owner);
1490
28407630 1491 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1492 if (unlikely(newfd < 0)) {
1493 err = newfd;
9a1875e6
DM
1494 sock_release(newsock);
1495 goto out_put;
39d8c1b6 1496 }
aab174f0 1497 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1498 if (IS_ERR(newfile)) {
28407630
AV
1499 err = PTR_ERR(newfile);
1500 put_unused_fd(newfd);
1501 sock_release(newsock);
1502 goto out_put;
1503 }
39d8c1b6 1504
a79af59e
FF
1505 err = security_socket_accept(sock, newsock);
1506 if (err)
39d8c1b6 1507 goto out_fd;
a79af59e 1508
cdfbabfb 1509 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1510 if (err < 0)
39d8c1b6 1511 goto out_fd;
1da177e4
LT
1512
1513 if (upeer_sockaddr) {
230b1839 1514 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1515 &len, 2) < 0) {
1da177e4 1516 err = -ECONNABORTED;
39d8c1b6 1517 goto out_fd;
1da177e4 1518 }
43db362d 1519 err = move_addr_to_user(&address,
230b1839 1520 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1521 if (err < 0)
39d8c1b6 1522 goto out_fd;
1da177e4
LT
1523 }
1524
1525 /* File flags are not inherited via accept() unlike another OSes. */
1526
39d8c1b6
DM
1527 fd_install(newfd, newfile);
1528 err = newfd;
1da177e4 1529
1da177e4 1530out_put:
6cb153ca 1531 fput_light(sock->file, fput_needed);
1da177e4
LT
1532out:
1533 return err;
39d8c1b6 1534out_fd:
9606a216 1535 fput(newfile);
39d8c1b6 1536 put_unused_fd(newfd);
1da177e4
LT
1537 goto out_put;
1538}
1539
20f37034
HC
1540SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1541 int __user *, upeer_addrlen)
aaca0bdc 1542{
de11defe 1543 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1544}
1545
1da177e4
LT
1546/*
1547 * Attempt to connect to a socket with the server address. The address
1548 * is in user space so we verify it is OK and move it to kernel space.
1549 *
1550 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1551 * break bindings
1552 *
1553 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1554 * other SEQPACKET protocols that take time to connect() as it doesn't
1555 * include the -EINPROGRESS status for such sockets.
1556 */
1557
20f37034
HC
1558SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1559 int, addrlen)
1da177e4
LT
1560{
1561 struct socket *sock;
230b1839 1562 struct sockaddr_storage address;
6cb153ca 1563 int err, fput_needed;
1da177e4 1564
6cb153ca 1565 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1566 if (!sock)
1567 goto out;
43db362d 1568 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1569 if (err < 0)
1570 goto out_put;
1571
89bddce5 1572 err =
230b1839 1573 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1574 if (err)
1575 goto out_put;
1576
230b1839 1577 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1578 sock->file->f_flags);
1579out_put:
6cb153ca 1580 fput_light(sock->file, fput_needed);
1da177e4
LT
1581out:
1582 return err;
1583}
1584
1585/*
1586 * Get the local address ('name') of a socket object. Move the obtained
1587 * name to user space.
1588 */
1589
20f37034
HC
1590SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1591 int __user *, usockaddr_len)
1da177e4
LT
1592{
1593 struct socket *sock;
230b1839 1594 struct sockaddr_storage address;
6cb153ca 1595 int len, err, fput_needed;
89bddce5 1596
6cb153ca 1597 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1598 if (!sock)
1599 goto out;
1600
1601 err = security_socket_getsockname(sock);
1602 if (err)
1603 goto out_put;
1604
230b1839 1605 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1606 if (err)
1607 goto out_put;
43db362d 1608 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1609
1610out_put:
6cb153ca 1611 fput_light(sock->file, fput_needed);
1da177e4
LT
1612out:
1613 return err;
1614}
1615
1616/*
1617 * Get the remote address ('name') of a socket object. Move the obtained
1618 * name to user space.
1619 */
1620
20f37034
HC
1621SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1622 int __user *, usockaddr_len)
1da177e4
LT
1623{
1624 struct socket *sock;
230b1839 1625 struct sockaddr_storage address;
6cb153ca 1626 int len, err, fput_needed;
1da177e4 1627
89bddce5
SH
1628 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1629 if (sock != NULL) {
1da177e4
LT
1630 err = security_socket_getpeername(sock);
1631 if (err) {
6cb153ca 1632 fput_light(sock->file, fput_needed);
1da177e4
LT
1633 return err;
1634 }
1635
89bddce5 1636 err =
230b1839 1637 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1638 1);
1da177e4 1639 if (!err)
43db362d 1640 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1641 usockaddr_len);
6cb153ca 1642 fput_light(sock->file, fput_needed);
1da177e4
LT
1643 }
1644 return err;
1645}
1646
1647/*
1648 * Send a datagram to a given address. We move the address into kernel
1649 * space and check the user space data area is readable before invoking
1650 * the protocol.
1651 */
1652
3e0fa65f 1653SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1654 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1655 int, addr_len)
1da177e4
LT
1656{
1657 struct socket *sock;
230b1839 1658 struct sockaddr_storage address;
1da177e4
LT
1659 int err;
1660 struct msghdr msg;
1661 struct iovec iov;
6cb153ca 1662 int fput_needed;
6cb153ca 1663
602bd0e9
AV
1664 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1665 if (unlikely(err))
1666 return err;
de0fa95c
PE
1667 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1668 if (!sock)
4387ff75 1669 goto out;
6cb153ca 1670
89bddce5 1671 msg.msg_name = NULL;
89bddce5
SH
1672 msg.msg_control = NULL;
1673 msg.msg_controllen = 0;
1674 msg.msg_namelen = 0;
6cb153ca 1675 if (addr) {
43db362d 1676 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1677 if (err < 0)
1678 goto out_put;
230b1839 1679 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1680 msg.msg_namelen = addr_len;
1da177e4
LT
1681 }
1682 if (sock->file->f_flags & O_NONBLOCK)
1683 flags |= MSG_DONTWAIT;
1684 msg.msg_flags = flags;
d8725c86 1685 err = sock_sendmsg(sock, &msg);
1da177e4 1686
89bddce5 1687out_put:
de0fa95c 1688 fput_light(sock->file, fput_needed);
4387ff75 1689out:
1da177e4
LT
1690 return err;
1691}
1692
1693/*
89bddce5 1694 * Send a datagram down a socket.
1da177e4
LT
1695 */
1696
3e0fa65f 1697SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1698 unsigned int, flags)
1da177e4
LT
1699{
1700 return sys_sendto(fd, buff, len, flags, NULL, 0);
1701}
1702
1703/*
89bddce5 1704 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1705 * sender. We verify the buffers are writable and if needed move the
1706 * sender address from kernel to user space.
1707 */
1708
3e0fa65f 1709SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1710 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1711 int __user *, addr_len)
1da177e4
LT
1712{
1713 struct socket *sock;
1714 struct iovec iov;
1715 struct msghdr msg;
230b1839 1716 struct sockaddr_storage address;
89bddce5 1717 int err, err2;
6cb153ca
BL
1718 int fput_needed;
1719
602bd0e9
AV
1720 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1721 if (unlikely(err))
1722 return err;
de0fa95c 1723 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1724 if (!sock)
de0fa95c 1725 goto out;
1da177e4 1726
89bddce5
SH
1727 msg.msg_control = NULL;
1728 msg.msg_controllen = 0;
f3d33426
HFS
1729 /* Save some cycles and don't copy the address if not needed */
1730 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1731 /* We assume all kernel code knows the size of sockaddr_storage */
1732 msg.msg_namelen = 0;
130ed5d1 1733 msg.msg_iocb = NULL;
9f138fa6 1734 msg.msg_flags = 0;
1da177e4
LT
1735 if (sock->file->f_flags & O_NONBLOCK)
1736 flags |= MSG_DONTWAIT;
2da62906 1737 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1738
89bddce5 1739 if (err >= 0 && addr != NULL) {
43db362d 1740 err2 = move_addr_to_user(&address,
230b1839 1741 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1742 if (err2 < 0)
1743 err = err2;
1da177e4 1744 }
de0fa95c
PE
1745
1746 fput_light(sock->file, fput_needed);
4387ff75 1747out:
1da177e4
LT
1748 return err;
1749}
1750
1751/*
89bddce5 1752 * Receive a datagram from a socket.
1da177e4
LT
1753 */
1754
b7c0ddf5
JG
1755SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1756 unsigned int, flags)
1da177e4
LT
1757{
1758 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1759}
1760
1761/*
1762 * Set a socket option. Because we don't know the option lengths we have
1763 * to pass the user mode parameter for the protocols to sort out.
1764 */
1765
20f37034
HC
1766SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1767 char __user *, optval, int, optlen)
1da177e4 1768{
6cb153ca 1769 int err, fput_needed;
1da177e4
LT
1770 struct socket *sock;
1771
1772 if (optlen < 0)
1773 return -EINVAL;
89bddce5
SH
1774
1775 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1776 if (sock != NULL) {
1777 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1778 if (err)
1779 goto out_put;
1da177e4
LT
1780
1781 if (level == SOL_SOCKET)
89bddce5
SH
1782 err =
1783 sock_setsockopt(sock, level, optname, optval,
1784 optlen);
1da177e4 1785 else
89bddce5
SH
1786 err =
1787 sock->ops->setsockopt(sock, level, optname, optval,
1788 optlen);
6cb153ca
BL
1789out_put:
1790 fput_light(sock->file, fput_needed);
1da177e4
LT
1791 }
1792 return err;
1793}
1794
1795/*
1796 * Get a socket option. Because we don't know the option lengths we have
1797 * to pass a user mode parameter for the protocols to sort out.
1798 */
1799
20f37034
HC
1800SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1801 char __user *, optval, int __user *, optlen)
1da177e4 1802{
6cb153ca 1803 int err, fput_needed;
1da177e4
LT
1804 struct socket *sock;
1805
89bddce5
SH
1806 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1807 if (sock != NULL) {
6cb153ca
BL
1808 err = security_socket_getsockopt(sock, level, optname);
1809 if (err)
1810 goto out_put;
1da177e4
LT
1811
1812 if (level == SOL_SOCKET)
89bddce5
SH
1813 err =
1814 sock_getsockopt(sock, level, optname, optval,
1815 optlen);
1da177e4 1816 else
89bddce5
SH
1817 err =
1818 sock->ops->getsockopt(sock, level, optname, optval,
1819 optlen);
6cb153ca
BL
1820out_put:
1821 fput_light(sock->file, fput_needed);
1da177e4
LT
1822 }
1823 return err;
1824}
1825
1da177e4
LT
1826/*
1827 * Shutdown a socket.
1828 */
1829
754fe8d2 1830SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1831{
6cb153ca 1832 int err, fput_needed;
1da177e4
LT
1833 struct socket *sock;
1834
89bddce5
SH
1835 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1836 if (sock != NULL) {
1da177e4 1837 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1838 if (!err)
1839 err = sock->ops->shutdown(sock, how);
1840 fput_light(sock->file, fput_needed);
1da177e4
LT
1841 }
1842 return err;
1843}
1844
89bddce5 1845/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1846 * fields which are the same type (int / unsigned) on our platforms.
1847 */
1848#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1849#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1850#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1851
c71d8ebe
TH
1852struct used_address {
1853 struct sockaddr_storage name;
1854 unsigned int name_len;
1855};
1856
da184284
AV
1857static int copy_msghdr_from_user(struct msghdr *kmsg,
1858 struct user_msghdr __user *umsg,
1859 struct sockaddr __user **save_addr,
1860 struct iovec **iov)
1661bf36 1861{
08adb7da
AV
1862 struct sockaddr __user *uaddr;
1863 struct iovec __user *uiov;
c0371da6 1864 size_t nr_segs;
08adb7da
AV
1865 ssize_t err;
1866
1867 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1868 __get_user(uaddr, &umsg->msg_name) ||
1869 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1870 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1871 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1872 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1873 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1874 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1875 return -EFAULT;
dbb490b9 1876
08adb7da 1877 if (!uaddr)
6a2a2b3a
AS
1878 kmsg->msg_namelen = 0;
1879
dbb490b9
ML
1880 if (kmsg->msg_namelen < 0)
1881 return -EINVAL;
1882
1661bf36 1883 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1884 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1885
1886 if (save_addr)
1887 *save_addr = uaddr;
1888
1889 if (uaddr && kmsg->msg_namelen) {
1890 if (!save_addr) {
1891 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1892 kmsg->msg_name);
1893 if (err < 0)
1894 return err;
1895 }
1896 } else {
1897 kmsg->msg_name = NULL;
1898 kmsg->msg_namelen = 0;
1899 }
1900
c0371da6 1901 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1902 return -EMSGSIZE;
1903
0345f931 1904 kmsg->msg_iocb = NULL;
1905
da184284
AV
1906 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1907 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1908}
1909
666547ff 1910static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1911 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1912 struct used_address *used_address,
1913 unsigned int allowed_msghdr_flags)
1da177e4 1914{
89bddce5
SH
1915 struct compat_msghdr __user *msg_compat =
1916 (struct compat_msghdr __user *)msg;
230b1839 1917 struct sockaddr_storage address;
1da177e4 1918 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1919 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 1920 __aligned(sizeof(__kernel_size_t));
89bddce5 1921 /* 20 is size of ipv6_pktinfo */
1da177e4 1922 unsigned char *ctl_buf = ctl;
d8725c86 1923 int ctl_len;
08adb7da 1924 ssize_t err;
89bddce5 1925
08adb7da 1926 msg_sys->msg_name = &address;
1da177e4 1927
08449320 1928 if (MSG_CMSG_COMPAT & flags)
08adb7da 1929 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1930 else
08adb7da 1931 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1932 if (err < 0)
da184284 1933 return err;
1da177e4
LT
1934
1935 err = -ENOBUFS;
1936
228e548e 1937 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1938 goto out_freeiov;
28a94d8f 1939 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 1940 ctl_len = msg_sys->msg_controllen;
1da177e4 1941 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1942 err =
228e548e 1943 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1944 sizeof(ctl));
1da177e4
LT
1945 if (err)
1946 goto out_freeiov;
228e548e
AB
1947 ctl_buf = msg_sys->msg_control;
1948 ctl_len = msg_sys->msg_controllen;
1da177e4 1949 } else if (ctl_len) {
ac4340fc
DM
1950 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
1951 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 1952 if (ctl_len > sizeof(ctl)) {
1da177e4 1953 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1954 if (ctl_buf == NULL)
1da177e4
LT
1955 goto out_freeiov;
1956 }
1957 err = -EFAULT;
1958 /*
228e548e 1959 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1960 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1961 * checking falls down on this.
1962 */
fb8621bb 1963 if (copy_from_user(ctl_buf,
228e548e 1964 (void __user __force *)msg_sys->msg_control,
89bddce5 1965 ctl_len))
1da177e4 1966 goto out_freectl;
228e548e 1967 msg_sys->msg_control = ctl_buf;
1da177e4 1968 }
228e548e 1969 msg_sys->msg_flags = flags;
1da177e4
LT
1970
1971 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1972 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1973 /*
1974 * If this is sendmmsg() and current destination address is same as
1975 * previously succeeded address, omit asking LSM's decision.
1976 * used_address->name_len is initialized to UINT_MAX so that the first
1977 * destination address never matches.
1978 */
bc909d9d
MD
1979 if (used_address && msg_sys->msg_name &&
1980 used_address->name_len == msg_sys->msg_namelen &&
1981 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1982 used_address->name_len)) {
d8725c86 1983 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1984 goto out_freectl;
1985 }
d8725c86 1986 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1987 /*
1988 * If this is sendmmsg() and sending to current destination address was
1989 * successful, remember it.
1990 */
1991 if (used_address && err >= 0) {
1992 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1993 if (msg_sys->msg_name)
1994 memcpy(&used_address->name, msg_sys->msg_name,
1995 used_address->name_len);
c71d8ebe 1996 }
1da177e4
LT
1997
1998out_freectl:
89bddce5 1999 if (ctl_buf != ctl)
1da177e4
LT
2000 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2001out_freeiov:
da184284 2002 kfree(iov);
228e548e
AB
2003 return err;
2004}
2005
2006/*
2007 * BSD sendmsg interface
2008 */
2009
666547ff 2010long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
2011{
2012 int fput_needed, err;
2013 struct msghdr msg_sys;
1be374a0
AL
2014 struct socket *sock;
2015
1be374a0 2016 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2017 if (!sock)
2018 goto out;
2019
28a94d8f 2020 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2021
6cb153ca 2022 fput_light(sock->file, fput_needed);
89bddce5 2023out:
1da177e4
LT
2024 return err;
2025}
2026
666547ff 2027SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
2028{
2029 if (flags & MSG_CMSG_COMPAT)
2030 return -EINVAL;
2031 return __sys_sendmsg(fd, msg, flags);
2032}
2033
228e548e
AB
2034/*
2035 * Linux sendmmsg interface
2036 */
2037
2038int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2039 unsigned int flags)
2040{
2041 int fput_needed, err, datagrams;
2042 struct socket *sock;
2043 struct mmsghdr __user *entry;
2044 struct compat_mmsghdr __user *compat_entry;
2045 struct msghdr msg_sys;
c71d8ebe 2046 struct used_address used_address;
f092276d 2047 unsigned int oflags = flags;
228e548e 2048
98382f41
AB
2049 if (vlen > UIO_MAXIOV)
2050 vlen = UIO_MAXIOV;
228e548e
AB
2051
2052 datagrams = 0;
2053
2054 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2055 if (!sock)
2056 return err;
2057
c71d8ebe 2058 used_address.name_len = UINT_MAX;
228e548e
AB
2059 entry = mmsg;
2060 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2061 err = 0;
f092276d 2062 flags |= MSG_BATCH;
228e548e
AB
2063
2064 while (datagrams < vlen) {
f092276d
TH
2065 if (datagrams == vlen - 1)
2066 flags = oflags;
2067
228e548e 2068 if (MSG_CMSG_COMPAT & flags) {
666547ff 2069 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2070 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2071 if (err < 0)
2072 break;
2073 err = __put_user(err, &compat_entry->msg_len);
2074 ++compat_entry;
2075 } else {
a7526eb5 2076 err = ___sys_sendmsg(sock,
666547ff 2077 (struct user_msghdr __user *)entry,
28a94d8f 2078 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2079 if (err < 0)
2080 break;
2081 err = put_user(err, &entry->msg_len);
2082 ++entry;
2083 }
2084
2085 if (err)
2086 break;
2087 ++datagrams;
3023898b
SHY
2088 if (msg_data_left(&msg_sys))
2089 break;
a78cb84c 2090 cond_resched();
228e548e
AB
2091 }
2092
228e548e
AB
2093 fput_light(sock->file, fput_needed);
2094
728ffb86
AB
2095 /* We only return an error if no datagrams were able to be sent */
2096 if (datagrams != 0)
228e548e
AB
2097 return datagrams;
2098
228e548e
AB
2099 return err;
2100}
2101
2102SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2103 unsigned int, vlen, unsigned int, flags)
2104{
1be374a0
AL
2105 if (flags & MSG_CMSG_COMPAT)
2106 return -EINVAL;
228e548e
AB
2107 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2108}
2109
666547ff 2110static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2111 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2112{
89bddce5
SH
2113 struct compat_msghdr __user *msg_compat =
2114 (struct compat_msghdr __user *)msg;
1da177e4 2115 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2116 struct iovec *iov = iovstack;
1da177e4 2117 unsigned long cmsg_ptr;
2da62906 2118 int len;
08adb7da 2119 ssize_t err;
1da177e4
LT
2120
2121 /* kernel mode address */
230b1839 2122 struct sockaddr_storage addr;
1da177e4
LT
2123
2124 /* user mode address pointers */
2125 struct sockaddr __user *uaddr;
08adb7da 2126 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2127
08adb7da 2128 msg_sys->msg_name = &addr;
1da177e4 2129
f3d33426 2130 if (MSG_CMSG_COMPAT & flags)
08adb7da 2131 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2132 else
08adb7da 2133 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2134 if (err < 0)
da184284 2135 return err;
1da177e4 2136
a2e27255
ACM
2137 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2138 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2139
f3d33426
HFS
2140 /* We assume all kernel code knows the size of sockaddr_storage */
2141 msg_sys->msg_namelen = 0;
2142
1da177e4
LT
2143 if (sock->file->f_flags & O_NONBLOCK)
2144 flags |= MSG_DONTWAIT;
2da62906 2145 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2146 if (err < 0)
2147 goto out_freeiov;
2148 len = err;
2149
2150 if (uaddr != NULL) {
43db362d 2151 err = move_addr_to_user(&addr,
a2e27255 2152 msg_sys->msg_namelen, uaddr,
89bddce5 2153 uaddr_len);
1da177e4
LT
2154 if (err < 0)
2155 goto out_freeiov;
2156 }
a2e27255 2157 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2158 COMPAT_FLAGS(msg));
1da177e4
LT
2159 if (err)
2160 goto out_freeiov;
2161 if (MSG_CMSG_COMPAT & flags)
a2e27255 2162 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2163 &msg_compat->msg_controllen);
2164 else
a2e27255 2165 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2166 &msg->msg_controllen);
2167 if (err)
2168 goto out_freeiov;
2169 err = len;
2170
2171out_freeiov:
da184284 2172 kfree(iov);
a2e27255
ACM
2173 return err;
2174}
2175
2176/*
2177 * BSD recvmsg interface
2178 */
2179
666547ff 2180long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2181{
2182 int fput_needed, err;
2183 struct msghdr msg_sys;
1be374a0
AL
2184 struct socket *sock;
2185
1be374a0 2186 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2187 if (!sock)
2188 goto out;
2189
a7526eb5 2190 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2191
6cb153ca 2192 fput_light(sock->file, fput_needed);
1da177e4
LT
2193out:
2194 return err;
2195}
2196
666547ff 2197SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2198 unsigned int, flags)
2199{
2200 if (flags & MSG_CMSG_COMPAT)
2201 return -EINVAL;
2202 return __sys_recvmsg(fd, msg, flags);
2203}
2204
a2e27255
ACM
2205/*
2206 * Linux recvmmsg interface
2207 */
2208
2209int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2210 unsigned int flags, struct timespec *timeout)
2211{
2212 int fput_needed, err, datagrams;
2213 struct socket *sock;
2214 struct mmsghdr __user *entry;
d7256d0e 2215 struct compat_mmsghdr __user *compat_entry;
a2e27255 2216 struct msghdr msg_sys;
766b9f92
DD
2217 struct timespec64 end_time;
2218 struct timespec64 timeout64;
a2e27255
ACM
2219
2220 if (timeout &&
2221 poll_select_set_timeout(&end_time, timeout->tv_sec,
2222 timeout->tv_nsec))
2223 return -EINVAL;
2224
2225 datagrams = 0;
2226
2227 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2228 if (!sock)
2229 return err;
2230
2231 err = sock_error(sock->sk);
e623a9e9
MJ
2232 if (err) {
2233 datagrams = err;
a2e27255 2234 goto out_put;
e623a9e9 2235 }
a2e27255
ACM
2236
2237 entry = mmsg;
d7256d0e 2238 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2239
2240 while (datagrams < vlen) {
2241 /*
2242 * No need to ask LSM for more than the first datagram.
2243 */
d7256d0e 2244 if (MSG_CMSG_COMPAT & flags) {
666547ff 2245 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2246 &msg_sys, flags & ~MSG_WAITFORONE,
2247 datagrams);
d7256d0e
JMG
2248 if (err < 0)
2249 break;
2250 err = __put_user(err, &compat_entry->msg_len);
2251 ++compat_entry;
2252 } else {
a7526eb5 2253 err = ___sys_recvmsg(sock,
666547ff 2254 (struct user_msghdr __user *)entry,
a7526eb5
AL
2255 &msg_sys, flags & ~MSG_WAITFORONE,
2256 datagrams);
d7256d0e
JMG
2257 if (err < 0)
2258 break;
2259 err = put_user(err, &entry->msg_len);
2260 ++entry;
2261 }
2262
a2e27255
ACM
2263 if (err)
2264 break;
a2e27255
ACM
2265 ++datagrams;
2266
71c5c159
BB
2267 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2268 if (flags & MSG_WAITFORONE)
2269 flags |= MSG_DONTWAIT;
2270
a2e27255 2271 if (timeout) {
766b9f92
DD
2272 ktime_get_ts64(&timeout64);
2273 *timeout = timespec64_to_timespec(
2274 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2275 if (timeout->tv_sec < 0) {
2276 timeout->tv_sec = timeout->tv_nsec = 0;
2277 break;
2278 }
2279
2280 /* Timeout, return less than vlen datagrams */
2281 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2282 break;
2283 }
2284
2285 /* Out of band data, return right away */
2286 if (msg_sys.msg_flags & MSG_OOB)
2287 break;
a78cb84c 2288 cond_resched();
a2e27255
ACM
2289 }
2290
a2e27255 2291 if (err == 0)
34b88a68
ACM
2292 goto out_put;
2293
2294 if (datagrams == 0) {
2295 datagrams = err;
2296 goto out_put;
2297 }
a2e27255 2298
34b88a68
ACM
2299 /*
2300 * We may return less entries than requested (vlen) if the
2301 * sock is non block and there aren't enough datagrams...
2302 */
2303 if (err != -EAGAIN) {
a2e27255 2304 /*
34b88a68
ACM
2305 * ... or if recvmsg returns an error after we
2306 * received some datagrams, where we record the
2307 * error to return on the next call or if the
2308 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2309 */
34b88a68 2310 sock->sk->sk_err = -err;
a2e27255 2311 }
34b88a68
ACM
2312out_put:
2313 fput_light(sock->file, fput_needed);
a2e27255 2314
34b88a68 2315 return datagrams;
a2e27255
ACM
2316}
2317
2318SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2319 unsigned int, vlen, unsigned int, flags,
2320 struct timespec __user *, timeout)
2321{
2322 int datagrams;
2323 struct timespec timeout_sys;
2324
1be374a0
AL
2325 if (flags & MSG_CMSG_COMPAT)
2326 return -EINVAL;
2327
a2e27255
ACM
2328 if (!timeout)
2329 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2330
2331 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2332 return -EFAULT;
2333
2334 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2335
2336 if (datagrams > 0 &&
2337 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2338 datagrams = -EFAULT;
2339
2340 return datagrams;
2341}
2342
2343#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2344/* Argument list sizes for sys_socketcall */
2345#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2346static const unsigned char nargs[21] = {
c6d409cf
ED
2347 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2348 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2349 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2350 AL(4), AL(5), AL(4)
89bddce5
SH
2351};
2352
1da177e4
LT
2353#undef AL
2354
2355/*
89bddce5 2356 * System call vectors.
1da177e4
LT
2357 *
2358 * Argument checking cleaned up. Saved 20% in size.
2359 * This function doesn't need to set the kernel lock because
89bddce5 2360 * it is set by the callees.
1da177e4
LT
2361 */
2362
3e0fa65f 2363SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2364{
2950fa9d 2365 unsigned long a[AUDITSC_ARGS];
89bddce5 2366 unsigned long a0, a1;
1da177e4 2367 int err;
47379052 2368 unsigned int len;
1da177e4 2369
228e548e 2370 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2371 return -EINVAL;
2372
47379052
AV
2373 len = nargs[call];
2374 if (len > sizeof(a))
2375 return -EINVAL;
2376
1da177e4 2377 /* copy_from_user should be SMP safe. */
47379052 2378 if (copy_from_user(a, args, len))
1da177e4 2379 return -EFAULT;
3ec3b2fb 2380
2950fa9d
CG
2381 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2382 if (err)
2383 return err;
3ec3b2fb 2384
89bddce5
SH
2385 a0 = a[0];
2386 a1 = a[1];
2387
2388 switch (call) {
2389 case SYS_SOCKET:
2390 err = sys_socket(a0, a1, a[2]);
2391 break;
2392 case SYS_BIND:
2393 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2394 break;
2395 case SYS_CONNECT:
2396 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2397 break;
2398 case SYS_LISTEN:
2399 err = sys_listen(a0, a1);
2400 break;
2401 case SYS_ACCEPT:
de11defe
UD
2402 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2403 (int __user *)a[2], 0);
89bddce5
SH
2404 break;
2405 case SYS_GETSOCKNAME:
2406 err =
2407 sys_getsockname(a0, (struct sockaddr __user *)a1,
2408 (int __user *)a[2]);
2409 break;
2410 case SYS_GETPEERNAME:
2411 err =
2412 sys_getpeername(a0, (struct sockaddr __user *)a1,
2413 (int __user *)a[2]);
2414 break;
2415 case SYS_SOCKETPAIR:
2416 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2417 break;
2418 case SYS_SEND:
2419 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2420 break;
2421 case SYS_SENDTO:
2422 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2423 (struct sockaddr __user *)a[4], a[5]);
2424 break;
2425 case SYS_RECV:
2426 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2427 break;
2428 case SYS_RECVFROM:
2429 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2430 (struct sockaddr __user *)a[4],
2431 (int __user *)a[5]);
2432 break;
2433 case SYS_SHUTDOWN:
2434 err = sys_shutdown(a0, a1);
2435 break;
2436 case SYS_SETSOCKOPT:
2437 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2438 break;
2439 case SYS_GETSOCKOPT:
2440 err =
2441 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2442 (int __user *)a[4]);
2443 break;
2444 case SYS_SENDMSG:
666547ff 2445 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2446 break;
228e548e
AB
2447 case SYS_SENDMMSG:
2448 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2449 break;
89bddce5 2450 case SYS_RECVMSG:
666547ff 2451 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2452 break;
a2e27255
ACM
2453 case SYS_RECVMMSG:
2454 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2455 (struct timespec __user *)a[4]);
2456 break;
de11defe
UD
2457 case SYS_ACCEPT4:
2458 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2459 (int __user *)a[2], a[3]);
aaca0bdc 2460 break;
89bddce5
SH
2461 default:
2462 err = -EINVAL;
2463 break;
1da177e4
LT
2464 }
2465 return err;
2466}
2467
89bddce5 2468#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2469
55737fda
SH
2470/**
2471 * sock_register - add a socket protocol handler
2472 * @ops: description of protocol
2473 *
1da177e4
LT
2474 * This function is called by a protocol handler that wants to
2475 * advertise its address family, and have it linked into the
e793c0f7 2476 * socket interface. The value ops->family corresponds to the
55737fda 2477 * socket system call protocol family.
1da177e4 2478 */
f0fd27d4 2479int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2480{
2481 int err;
2482
2483 if (ops->family >= NPROTO) {
3410f22e 2484 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2485 return -ENOBUFS;
2486 }
55737fda
SH
2487
2488 spin_lock(&net_family_lock);
190683a9
ED
2489 if (rcu_dereference_protected(net_families[ops->family],
2490 lockdep_is_held(&net_family_lock)))
55737fda
SH
2491 err = -EEXIST;
2492 else {
cf778b00 2493 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2494 err = 0;
2495 }
55737fda
SH
2496 spin_unlock(&net_family_lock);
2497
3410f22e 2498 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2499 return err;
2500}
c6d409cf 2501EXPORT_SYMBOL(sock_register);
1da177e4 2502
55737fda
SH
2503/**
2504 * sock_unregister - remove a protocol handler
2505 * @family: protocol family to remove
2506 *
1da177e4
LT
2507 * This function is called by a protocol handler that wants to
2508 * remove its address family, and have it unlinked from the
55737fda
SH
2509 * new socket creation.
2510 *
2511 * If protocol handler is a module, then it can use module reference
2512 * counts to protect against new references. If protocol handler is not
2513 * a module then it needs to provide its own protection in
2514 * the ops->create routine.
1da177e4 2515 */
f0fd27d4 2516void sock_unregister(int family)
1da177e4 2517{
f0fd27d4 2518 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2519
55737fda 2520 spin_lock(&net_family_lock);
a9b3cd7f 2521 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2522 spin_unlock(&net_family_lock);
2523
2524 synchronize_rcu();
2525
3410f22e 2526 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2527}
c6d409cf 2528EXPORT_SYMBOL(sock_unregister);
1da177e4 2529
77d76ea3 2530static int __init sock_init(void)
1da177e4 2531{
b3e19d92 2532 int err;
2ca794e5
EB
2533 /*
2534 * Initialize the network sysctl infrastructure.
2535 */
2536 err = net_sysctl_init();
2537 if (err)
2538 goto out;
b3e19d92 2539
1da177e4 2540 /*
89bddce5 2541 * Initialize skbuff SLAB cache
1da177e4
LT
2542 */
2543 skb_init();
1da177e4
LT
2544
2545 /*
89bddce5 2546 * Initialize the protocols module.
1da177e4
LT
2547 */
2548
2549 init_inodecache();
b3e19d92
NP
2550
2551 err = register_filesystem(&sock_fs_type);
2552 if (err)
2553 goto out_fs;
1da177e4 2554 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2555 if (IS_ERR(sock_mnt)) {
2556 err = PTR_ERR(sock_mnt);
2557 goto out_mount;
2558 }
77d76ea3
AK
2559
2560 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2561 */
2562
2563#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2564 err = netfilter_init();
2565 if (err)
2566 goto out;
1da177e4 2567#endif
cbeb321a 2568
408eccce 2569 ptp_classifier_init();
c1f19b51 2570
b3e19d92
NP
2571out:
2572 return err;
2573
2574out_mount:
2575 unregister_filesystem(&sock_fs_type);
2576out_fs:
2577 goto out;
1da177e4
LT
2578}
2579
77d76ea3
AK
2580core_initcall(sock_init); /* early initcall */
2581
1da177e4
LT
2582#ifdef CONFIG_PROC_FS
2583void socket_seq_show(struct seq_file *seq)
2584{
2585 int cpu;
2586 int counter = 0;
2587
6f912042 2588 for_each_possible_cpu(cpu)
89bddce5 2589 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2590
2591 /* It can be negative, by the way. 8) */
2592 if (counter < 0)
2593 counter = 0;
2594
2595 seq_printf(seq, "sockets: used %d\n", counter);
2596}
89bddce5 2597#endif /* CONFIG_PROC_FS */
1da177e4 2598
89bbfc95 2599#ifdef CONFIG_COMPAT
6b96018b 2600static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2601 unsigned int cmd, void __user *up)
7a229387 2602{
7a229387
AB
2603 mm_segment_t old_fs = get_fs();
2604 struct timeval ktv;
2605 int err;
2606
2607 set_fs(KERNEL_DS);
6b96018b 2608 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2609 set_fs(old_fs);
644595f8 2610 if (!err)
ed6fe9d6 2611 err = compat_put_timeval(&ktv, up);
644595f8 2612
7a229387
AB
2613 return err;
2614}
2615
6b96018b 2616static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2617 unsigned int cmd, void __user *up)
7a229387 2618{
7a229387
AB
2619 mm_segment_t old_fs = get_fs();
2620 struct timespec kts;
2621 int err;
2622
2623 set_fs(KERNEL_DS);
6b96018b 2624 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2625 set_fs(old_fs);
644595f8 2626 if (!err)
ed6fe9d6 2627 err = compat_put_timespec(&kts, up);
644595f8 2628
7a229387
AB
2629 return err;
2630}
2631
6b96018b 2632static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2633{
2634 struct ifreq __user *uifr;
2635 int err;
2636
2637 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2638 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2639 return -EFAULT;
2640
6b96018b 2641 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2642 if (err)
2643 return err;
2644
6b96018b 2645 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2646 return -EFAULT;
2647
2648 return 0;
2649}
2650
6b96018b 2651static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2652{
6b96018b 2653 struct compat_ifconf ifc32;
7a229387
AB
2654 struct ifconf ifc;
2655 struct ifconf __user *uifc;
6b96018b 2656 struct compat_ifreq __user *ifr32;
7a229387
AB
2657 struct ifreq __user *ifr;
2658 unsigned int i, j;
2659 int err;
2660
6b96018b 2661 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2662 return -EFAULT;
2663
43da5f2e 2664 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2665 if (ifc32.ifcbuf == 0) {
2666 ifc32.ifc_len = 0;
2667 ifc.ifc_len = 0;
2668 ifc.ifc_req = NULL;
2669 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2670 } else {
c6d409cf
ED
2671 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2672 sizeof(struct ifreq);
7a229387
AB
2673 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2674 ifc.ifc_len = len;
2675 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2676 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2677 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2678 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2679 return -EFAULT;
2680 ifr++;
2681 ifr32++;
2682 }
2683 }
2684 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2685 return -EFAULT;
2686
6b96018b 2687 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2688 if (err)
2689 return err;
2690
2691 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2692 return -EFAULT;
2693
2694 ifr = ifc.ifc_req;
2695 ifr32 = compat_ptr(ifc32.ifcbuf);
2696 for (i = 0, j = 0;
c6d409cf
ED
2697 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2698 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2699 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2700 return -EFAULT;
2701 ifr32++;
2702 ifr++;
2703 }
2704
2705 if (ifc32.ifcbuf == 0) {
2706 /* Translate from 64-bit structure multiple to
2707 * a 32-bit one.
2708 */
2709 i = ifc.ifc_len;
6b96018b 2710 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2711 ifc32.ifc_len = i;
2712 } else {
2713 ifc32.ifc_len = i;
2714 }
6b96018b 2715 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2716 return -EFAULT;
2717
2718 return 0;
2719}
2720
6b96018b 2721static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2722{
3a7da39d
BH
2723 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2724 bool convert_in = false, convert_out = false;
2725 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2726 struct ethtool_rxnfc __user *rxnfc;
7a229387 2727 struct ifreq __user *ifr;
3a7da39d
BH
2728 u32 rule_cnt = 0, actual_rule_cnt;
2729 u32 ethcmd;
7a229387 2730 u32 data;
3a7da39d 2731 int ret;
7a229387 2732
3a7da39d
BH
2733 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2734 return -EFAULT;
7a229387 2735
3a7da39d
BH
2736 compat_rxnfc = compat_ptr(data);
2737
2738 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2739 return -EFAULT;
2740
3a7da39d
BH
2741 /* Most ethtool structures are defined without padding.
2742 * Unfortunately struct ethtool_rxnfc is an exception.
2743 */
2744 switch (ethcmd) {
2745 default:
2746 break;
2747 case ETHTOOL_GRXCLSRLALL:
2748 /* Buffer size is variable */
2749 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2750 return -EFAULT;
2751 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2752 return -ENOMEM;
2753 buf_size += rule_cnt * sizeof(u32);
2754 /* fall through */
2755 case ETHTOOL_GRXRINGS:
2756 case ETHTOOL_GRXCLSRLCNT:
2757 case ETHTOOL_GRXCLSRULE:
55664f32 2758 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2759 convert_out = true;
2760 /* fall through */
2761 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2762 buf_size += sizeof(struct ethtool_rxnfc);
2763 convert_in = true;
2764 break;
2765 }
2766
2767 ifr = compat_alloc_user_space(buf_size);
954b1244 2768 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2769
2770 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2771 return -EFAULT;
2772
3a7da39d
BH
2773 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2774 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2775 return -EFAULT;
2776
3a7da39d 2777 if (convert_in) {
127fe533 2778 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2779 * fs.ring_cookie and at the end of fs, but nowhere else.
2780 */
127fe533
AD
2781 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2782 sizeof(compat_rxnfc->fs.m_ext) !=
2783 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2784 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2785 BUILD_BUG_ON(
2786 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2787 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2788 offsetof(struct ethtool_rxnfc, fs.location) -
2789 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2790
2791 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2792 (void __user *)(&rxnfc->fs.m_ext + 1) -
2793 (void __user *)rxnfc) ||
3a7da39d
BH
2794 copy_in_user(&rxnfc->fs.ring_cookie,
2795 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2796 (void __user *)(&rxnfc->fs.location + 1) -
2797 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2798 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2799 sizeof(rxnfc->rule_cnt)))
2800 return -EFAULT;
2801 }
2802
2803 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2804 if (ret)
2805 return ret;
2806
2807 if (convert_out) {
2808 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2809 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2810 (const void __user *)rxnfc) ||
3a7da39d
BH
2811 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2812 &rxnfc->fs.ring_cookie,
954b1244
SH
2813 (const void __user *)(&rxnfc->fs.location + 1) -
2814 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2815 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2816 sizeof(rxnfc->rule_cnt)))
2817 return -EFAULT;
2818
2819 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2820 /* As an optimisation, we only copy the actual
2821 * number of rules that the underlying
2822 * function returned. Since Mallory might
2823 * change the rule count in user memory, we
2824 * check that it is less than the rule count
2825 * originally given (as the user buffer size),
2826 * which has been range-checked.
2827 */
2828 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2829 return -EFAULT;
2830 if (actual_rule_cnt < rule_cnt)
2831 rule_cnt = actual_rule_cnt;
2832 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2833 &rxnfc->rule_locs[0],
2834 rule_cnt * sizeof(u32)))
2835 return -EFAULT;
2836 }
2837 }
2838
2839 return 0;
7a229387
AB
2840}
2841
7a50a240
AB
2842static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2843{
2844 void __user *uptr;
2845 compat_uptr_t uptr32;
2846 struct ifreq __user *uifr;
2847
c6d409cf 2848 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2849 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2850 return -EFAULT;
2851
2852 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2853 return -EFAULT;
2854
2855 uptr = compat_ptr(uptr32);
2856
2857 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2858 return -EFAULT;
2859
2860 return dev_ioctl(net, SIOCWANDEV, uifr);
2861}
2862
6b96018b
AB
2863static int bond_ioctl(struct net *net, unsigned int cmd,
2864 struct compat_ifreq __user *ifr32)
7a229387
AB
2865{
2866 struct ifreq kifr;
7a229387
AB
2867 mm_segment_t old_fs;
2868 int err;
7a229387
AB
2869
2870 switch (cmd) {
2871 case SIOCBONDENSLAVE:
2872 case SIOCBONDRELEASE:
2873 case SIOCBONDSETHWADDR:
2874 case SIOCBONDCHANGEACTIVE:
6b96018b 2875 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2876 return -EFAULT;
2877
2878 old_fs = get_fs();
c6d409cf 2879 set_fs(KERNEL_DS);
c3f52ae6 2880 err = dev_ioctl(net, cmd,
2881 (struct ifreq __user __force *) &kifr);
c6d409cf 2882 set_fs(old_fs);
7a229387
AB
2883
2884 return err;
7a229387 2885 default:
07d106d0 2886 return -ENOIOCTLCMD;
ccbd6a5a 2887 }
7a229387
AB
2888}
2889
590d4693
BH
2890/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2891static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2892 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2893{
2894 struct ifreq __user *u_ifreq64;
7a229387
AB
2895 char tmp_buf[IFNAMSIZ];
2896 void __user *data64;
2897 u32 data32;
2898
2899 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2900 IFNAMSIZ))
2901 return -EFAULT;
417c3522 2902 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2903 return -EFAULT;
2904 data64 = compat_ptr(data32);
2905
2906 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2907
7a229387
AB
2908 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2909 IFNAMSIZ))
2910 return -EFAULT;
417c3522 2911 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2912 return -EFAULT;
2913
6b96018b 2914 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2915}
2916
6b96018b
AB
2917static int dev_ifsioc(struct net *net, struct socket *sock,
2918 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2919{
a2116ed2 2920 struct ifreq __user *uifr;
7a229387
AB
2921 int err;
2922
a2116ed2
AB
2923 uifr = compat_alloc_user_space(sizeof(*uifr));
2924 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2925 return -EFAULT;
2926
2927 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2928
7a229387
AB
2929 if (!err) {
2930 switch (cmd) {
2931 case SIOCGIFFLAGS:
2932 case SIOCGIFMETRIC:
2933 case SIOCGIFMTU:
2934 case SIOCGIFMEM:
2935 case SIOCGIFHWADDR:
2936 case SIOCGIFINDEX:
2937 case SIOCGIFADDR:
2938 case SIOCGIFBRDADDR:
2939 case SIOCGIFDSTADDR:
2940 case SIOCGIFNETMASK:
fab2532b 2941 case SIOCGIFPFLAGS:
7a229387 2942 case SIOCGIFTXQLEN:
fab2532b
AB
2943 case SIOCGMIIPHY:
2944 case SIOCGMIIREG:
a2116ed2 2945 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2946 err = -EFAULT;
2947 break;
2948 }
2949 }
2950 return err;
2951}
2952
a2116ed2
AB
2953static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2954 struct compat_ifreq __user *uifr32)
2955{
2956 struct ifreq ifr;
2957 struct compat_ifmap __user *uifmap32;
2958 mm_segment_t old_fs;
2959 int err;
2960
2961 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2962 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2963 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2964 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2965 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2966 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2967 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2968 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2969 if (err)
2970 return -EFAULT;
2971
2972 old_fs = get_fs();
c6d409cf 2973 set_fs(KERNEL_DS);
c3f52ae6 2974 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2975 set_fs(old_fs);
a2116ed2
AB
2976
2977 if (cmd == SIOCGIFMAP && !err) {
2978 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2979 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2980 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2981 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2982 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2983 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2984 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2985 if (err)
2986 err = -EFAULT;
2987 }
2988 return err;
2989}
2990
7a229387 2991struct rtentry32 {
c6d409cf 2992 u32 rt_pad1;
7a229387
AB
2993 struct sockaddr rt_dst; /* target address */
2994 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2995 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2996 unsigned short rt_flags;
2997 short rt_pad2;
2998 u32 rt_pad3;
2999 unsigned char rt_tos;
3000 unsigned char rt_class;
3001 short rt_pad4;
3002 short rt_metric; /* +1 for binary compatibility! */
7a229387 3003 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3004 u32 rt_mtu; /* per route MTU/Window */
3005 u32 rt_window; /* Window clamping */
7a229387
AB
3006 unsigned short rt_irtt; /* Initial RTT */
3007};
3008
3009struct in6_rtmsg32 {
3010 struct in6_addr rtmsg_dst;
3011 struct in6_addr rtmsg_src;
3012 struct in6_addr rtmsg_gateway;
3013 u32 rtmsg_type;
3014 u16 rtmsg_dst_len;
3015 u16 rtmsg_src_len;
3016 u32 rtmsg_metric;
3017 u32 rtmsg_info;
3018 u32 rtmsg_flags;
3019 s32 rtmsg_ifindex;
3020};
3021
6b96018b
AB
3022static int routing_ioctl(struct net *net, struct socket *sock,
3023 unsigned int cmd, void __user *argp)
7a229387
AB
3024{
3025 int ret;
3026 void *r = NULL;
3027 struct in6_rtmsg r6;
3028 struct rtentry r4;
3029 char devname[16];
3030 u32 rtdev;
3031 mm_segment_t old_fs = get_fs();
3032
6b96018b
AB
3033 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3034 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3035 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3036 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3037 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3038 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3039 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3040 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3041 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3042 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3043 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3044
3045 r = (void *) &r6;
3046 } else { /* ipv4 */
6b96018b 3047 struct rtentry32 __user *ur4 = argp;
c6d409cf 3048 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3049 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3050 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3051 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3052 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3053 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3054 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3055 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3056 if (rtdev) {
c6d409cf 3057 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3058 r4.rt_dev = (char __user __force *)devname;
3059 devname[15] = 0;
7a229387
AB
3060 } else
3061 r4.rt_dev = NULL;
3062
3063 r = (void *) &r4;
3064 }
3065
3066 if (ret) {
3067 ret = -EFAULT;
3068 goto out;
3069 }
3070
c6d409cf 3071 set_fs(KERNEL_DS);
6b96018b 3072 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3073 set_fs(old_fs);
7a229387
AB
3074
3075out:
7a229387
AB
3076 return ret;
3077}
3078
3079/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3080 * for some operations; this forces use of the newer bridge-utils that
25985edc 3081 * use compatible ioctls
7a229387 3082 */
6b96018b 3083static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3084{
6b96018b 3085 compat_ulong_t tmp;
7a229387 3086
6b96018b 3087 if (get_user(tmp, argp))
7a229387
AB
3088 return -EFAULT;
3089 if (tmp == BRCTL_GET_VERSION)
3090 return BRCTL_VERSION + 1;
3091 return -EINVAL;
3092}
3093
6b96018b
AB
3094static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3095 unsigned int cmd, unsigned long arg)
3096{
3097 void __user *argp = compat_ptr(arg);
3098 struct sock *sk = sock->sk;
3099 struct net *net = sock_net(sk);
7a229387 3100
6b96018b 3101 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3102 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3103
3104 switch (cmd) {
3105 case SIOCSIFBR:
3106 case SIOCGIFBR:
3107 return old_bridge_ioctl(argp);
3108 case SIOCGIFNAME:
3109 return dev_ifname32(net, argp);
3110 case SIOCGIFCONF:
3111 return dev_ifconf(net, argp);
3112 case SIOCETHTOOL:
3113 return ethtool_ioctl(net, argp);
7a50a240
AB
3114 case SIOCWANDEV:
3115 return compat_siocwandev(net, argp);
a2116ed2
AB
3116 case SIOCGIFMAP:
3117 case SIOCSIFMAP:
3118 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3119 case SIOCBONDENSLAVE:
3120 case SIOCBONDRELEASE:
3121 case SIOCBONDSETHWADDR:
6b96018b
AB
3122 case SIOCBONDCHANGEACTIVE:
3123 return bond_ioctl(net, cmd, argp);
3124 case SIOCADDRT:
3125 case SIOCDELRT:
3126 return routing_ioctl(net, sock, cmd, argp);
3127 case SIOCGSTAMP:
3128 return do_siocgstamp(net, sock, cmd, argp);
3129 case SIOCGSTAMPNS:
3130 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3131 case SIOCBONDSLAVEINFOQUERY:
3132 case SIOCBONDINFOQUERY:
a2116ed2 3133 case SIOCSHWTSTAMP:
fd468c74 3134 case SIOCGHWTSTAMP:
590d4693 3135 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3136
3137 case FIOSETOWN:
3138 case SIOCSPGRP:
3139 case FIOGETOWN:
3140 case SIOCGPGRP:
3141 case SIOCBRADDBR:
3142 case SIOCBRDELBR:
3143 case SIOCGIFVLAN:
3144 case SIOCSIFVLAN:
3145 case SIOCADDDLCI:
3146 case SIOCDELDLCI:
c62cce2c 3147 case SIOCGSKNS:
6b96018b
AB
3148 return sock_ioctl(file, cmd, arg);
3149
3150 case SIOCGIFFLAGS:
3151 case SIOCSIFFLAGS:
3152 case SIOCGIFMETRIC:
3153 case SIOCSIFMETRIC:
3154 case SIOCGIFMTU:
3155 case SIOCSIFMTU:
3156 case SIOCGIFMEM:
3157 case SIOCSIFMEM:
3158 case SIOCGIFHWADDR:
3159 case SIOCSIFHWADDR:
3160 case SIOCADDMULTI:
3161 case SIOCDELMULTI:
3162 case SIOCGIFINDEX:
6b96018b
AB
3163 case SIOCGIFADDR:
3164 case SIOCSIFADDR:
3165 case SIOCSIFHWBROADCAST:
6b96018b 3166 case SIOCDIFADDR:
6b96018b
AB
3167 case SIOCGIFBRDADDR:
3168 case SIOCSIFBRDADDR:
3169 case SIOCGIFDSTADDR:
3170 case SIOCSIFDSTADDR:
3171 case SIOCGIFNETMASK:
3172 case SIOCSIFNETMASK:
3173 case SIOCSIFPFLAGS:
3174 case SIOCGIFPFLAGS:
3175 case SIOCGIFTXQLEN:
3176 case SIOCSIFTXQLEN:
3177 case SIOCBRADDIF:
3178 case SIOCBRDELIF:
9177efd3
AB
3179 case SIOCSIFNAME:
3180 case SIOCGMIIPHY:
3181 case SIOCGMIIREG:
3182 case SIOCSMIIREG:
6b96018b 3183 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3184
6b96018b
AB
3185 case SIOCSARP:
3186 case SIOCGARP:
3187 case SIOCDARP:
6b96018b 3188 case SIOCATMARK:
9177efd3
AB
3189 return sock_do_ioctl(net, sock, cmd, arg);
3190 }
3191
6b96018b
AB
3192 return -ENOIOCTLCMD;
3193}
7a229387 3194
95c96174 3195static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3196 unsigned long arg)
89bbfc95
SP
3197{
3198 struct socket *sock = file->private_data;
3199 int ret = -ENOIOCTLCMD;
87de87d5
DM
3200 struct sock *sk;
3201 struct net *net;
3202
3203 sk = sock->sk;
3204 net = sock_net(sk);
89bbfc95
SP
3205
3206 if (sock->ops->compat_ioctl)
3207 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3208
87de87d5
DM
3209 if (ret == -ENOIOCTLCMD &&
3210 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3211 ret = compat_wext_handle_ioctl(net, cmd, arg);
3212
6b96018b
AB
3213 if (ret == -ENOIOCTLCMD)
3214 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3215
89bbfc95
SP
3216 return ret;
3217}
3218#endif
3219
ac5a488e
SS
3220int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3221{
3222 return sock->ops->bind(sock, addr, addrlen);
3223}
c6d409cf 3224EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3225
3226int kernel_listen(struct socket *sock, int backlog)
3227{
3228 return sock->ops->listen(sock, backlog);
3229}
c6d409cf 3230EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3231
3232int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3233{
3234 struct sock *sk = sock->sk;
3235 int err;
3236
3237 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3238 newsock);
3239 if (err < 0)
3240 goto done;
3241
cdfbabfb 3242 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3243 if (err < 0) {
3244 sock_release(*newsock);
fa8705b0 3245 *newsock = NULL;
ac5a488e
SS
3246 goto done;
3247 }
3248
3249 (*newsock)->ops = sock->ops;
1b08534e 3250 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3251
3252done:
3253 return err;
3254}
c6d409cf 3255EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3256
3257int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3258 int flags)
ac5a488e
SS
3259{
3260 return sock->ops->connect(sock, addr, addrlen, flags);
3261}
c6d409cf 3262EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3263
3264int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3265 int *addrlen)
3266{
3267 return sock->ops->getname(sock, addr, addrlen, 0);
3268}
c6d409cf 3269EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3270
3271int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3272 int *addrlen)
3273{
3274 return sock->ops->getname(sock, addr, addrlen, 1);
3275}
c6d409cf 3276EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3277
3278int kernel_getsockopt(struct socket *sock, int level, int optname,
3279 char *optval, int *optlen)
3280{
3281 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3282 char __user *uoptval;
3283 int __user *uoptlen;
ac5a488e
SS
3284 int err;
3285
fb8621bb
NK
3286 uoptval = (char __user __force *) optval;
3287 uoptlen = (int __user __force *) optlen;
3288
ac5a488e
SS
3289 set_fs(KERNEL_DS);
3290 if (level == SOL_SOCKET)
fb8621bb 3291 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3292 else
fb8621bb
NK
3293 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3294 uoptlen);
ac5a488e
SS
3295 set_fs(oldfs);
3296 return err;
3297}
c6d409cf 3298EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3299
3300int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3301 char *optval, unsigned int optlen)
ac5a488e
SS
3302{
3303 mm_segment_t oldfs = get_fs();
fb8621bb 3304 char __user *uoptval;
ac5a488e
SS
3305 int err;
3306
fb8621bb
NK
3307 uoptval = (char __user __force *) optval;
3308
ac5a488e
SS
3309 set_fs(KERNEL_DS);
3310 if (level == SOL_SOCKET)
fb8621bb 3311 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3312 else
fb8621bb 3313 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3314 optlen);
3315 set_fs(oldfs);
3316 return err;
3317}
c6d409cf 3318EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3319
3320int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3321 size_t size, int flags)
3322{
3323 if (sock->ops->sendpage)
3324 return sock->ops->sendpage(sock, page, offset, size, flags);
3325
3326 return sock_no_sendpage(sock, page, offset, size, flags);
3327}
c6d409cf 3328EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3329
3330int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3331{
3332 mm_segment_t oldfs = get_fs();
3333 int err;
3334
3335 set_fs(KERNEL_DS);
3336 err = sock->ops->ioctl(sock, cmd, arg);
3337 set_fs(oldfs);
3338
3339 return err;
3340}
c6d409cf 3341EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3342
91cf45f0
TM
3343int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3344{
3345 return sock->ops->shutdown(sock, how);
3346}
91cf45f0 3347EXPORT_SYMBOL(kernel_sock_shutdown);