]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/socket.c
net: change init_inodecache() return void
[mirror_ubuntu-artful-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4 92
7c0f6ba6 93#include <linux/uaccess.h>
1da177e4
LT
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1e911632 290static void init_inodecache(void)
1da177e4
LT
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1e911632 299 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
300}
301
b87221de 302static const struct super_operations sockfs_ops = {
c6d409cf
ED
303 .alloc_inode = sock_alloc_inode,
304 .destroy_inode = sock_destroy_inode,
305 .statfs = simple_statfs,
1da177e4
LT
306};
307
c23fbb6b
ED
308/*
309 * sockfs_dname() is called from d_path().
310 */
311static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
312{
313 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 314 d_inode(dentry)->i_ino);
c23fbb6b
ED
315}
316
3ba13d17 317static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 318 .d_dname = sockfs_dname,
1da177e4
LT
319};
320
bba0bd31
AG
321static int sockfs_xattr_get(const struct xattr_handler *handler,
322 struct dentry *dentry, struct inode *inode,
323 const char *suffix, void *value, size_t size)
324{
325 if (value) {
326 if (dentry->d_name.len + 1 > size)
327 return -ERANGE;
328 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
329 }
330 return dentry->d_name.len + 1;
331}
332
333#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
334#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
335#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
336
337static const struct xattr_handler sockfs_xattr_handler = {
338 .name = XATTR_NAME_SOCKPROTONAME,
339 .get = sockfs_xattr_get,
340};
341
4a590153
AG
342static int sockfs_security_xattr_set(const struct xattr_handler *handler,
343 struct dentry *dentry, struct inode *inode,
344 const char *suffix, const void *value,
345 size_t size, int flags)
346{
347 /* Handled by LSM. */
348 return -EAGAIN;
349}
350
351static const struct xattr_handler sockfs_security_xattr_handler = {
352 .prefix = XATTR_SECURITY_PREFIX,
353 .set = sockfs_security_xattr_set,
354};
355
bba0bd31
AG
356static const struct xattr_handler *sockfs_xattr_handlers[] = {
357 &sockfs_xattr_handler,
4a590153 358 &sockfs_security_xattr_handler,
bba0bd31
AG
359 NULL
360};
361
c74a1cbb
AV
362static struct dentry *sockfs_mount(struct file_system_type *fs_type,
363 int flags, const char *dev_name, void *data)
364{
bba0bd31
AG
365 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
366 sockfs_xattr_handlers,
367 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
368}
369
370static struct vfsmount *sock_mnt __read_mostly;
371
372static struct file_system_type sock_fs_type = {
373 .name = "sockfs",
374 .mount = sockfs_mount,
375 .kill_sb = kill_anon_super,
376};
377
1da177e4
LT
378/*
379 * Obtains the first available file descriptor and sets it up for use.
380 *
39d8c1b6
DM
381 * These functions create file structures and maps them to fd space
382 * of the current process. On success it returns file descriptor
1da177e4
LT
383 * and file struct implicitly stored in sock->file.
384 * Note that another thread may close file descriptor before we return
385 * from this function. We use the fact that now we do not refer
386 * to socket after mapping. If one day we will need it, this
387 * function will increment ref. count on file by 1.
388 *
389 * In any case returned fd MAY BE not valid!
390 * This race condition is unavoidable
391 * with shared fd spaces, we cannot solve it inside kernel,
392 * but we take care of internal coherence yet.
393 */
394
aab174f0 395struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 396{
7cbe66b6 397 struct qstr name = { .name = "" };
2c48b9c4 398 struct path path;
7cbe66b6 399 struct file *file;
1da177e4 400
600e1779
MY
401 if (dname) {
402 name.name = dname;
403 name.len = strlen(name.name);
404 } else if (sock->sk) {
405 name.name = sock->sk->sk_prot_creator->name;
406 name.len = strlen(name.name);
407 }
4b936885 408 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
409 if (unlikely(!path.dentry))
410 return ERR_PTR(-ENOMEM);
2c48b9c4 411 path.mnt = mntget(sock_mnt);
39d8c1b6 412
2c48b9c4 413 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 414
2c48b9c4 415 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 416 &socket_file_ops);
b5ffe634 417 if (IS_ERR(file)) {
cc3808f8 418 /* drop dentry, keep inode */
c5ef6035 419 ihold(d_inode(path.dentry));
2c48b9c4 420 path_put(&path);
39b65252 421 return file;
cc3808f8
AV
422 }
423
424 sock->file = file;
77d27200 425 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 426 file->private_data = sock;
28407630 427 return file;
39d8c1b6 428}
56b31d1c 429EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 430
56b31d1c 431static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
432{
433 struct file *newfile;
28407630
AV
434 int fd = get_unused_fd_flags(flags);
435 if (unlikely(fd < 0))
436 return fd;
39d8c1b6 437
aab174f0 438 newfile = sock_alloc_file(sock, flags, NULL);
28407630 439 if (likely(!IS_ERR(newfile))) {
39d8c1b6 440 fd_install(fd, newfile);
28407630
AV
441 return fd;
442 }
7cbe66b6 443
28407630
AV
444 put_unused_fd(fd);
445 return PTR_ERR(newfile);
1da177e4
LT
446}
447
406a3c63 448struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 449{
6cb153ca
BL
450 if (file->f_op == &socket_file_ops)
451 return file->private_data; /* set in sock_map_fd */
452
23bb80d2
ED
453 *err = -ENOTSOCK;
454 return NULL;
6cb153ca 455}
406a3c63 456EXPORT_SYMBOL(sock_from_file);
6cb153ca 457
1da177e4 458/**
c6d409cf 459 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
460 * @fd: file handle
461 * @err: pointer to an error code return
462 *
463 * The file handle passed in is locked and the socket it is bound
464 * too is returned. If an error occurs the err pointer is overwritten
465 * with a negative errno code and NULL is returned. The function checks
466 * for both invalid handles and passing a handle which is not a socket.
467 *
468 * On a success the socket object pointer is returned.
469 */
470
471struct socket *sockfd_lookup(int fd, int *err)
472{
473 struct file *file;
1da177e4
LT
474 struct socket *sock;
475
89bddce5
SH
476 file = fget(fd);
477 if (!file) {
1da177e4
LT
478 *err = -EBADF;
479 return NULL;
480 }
89bddce5 481
6cb153ca
BL
482 sock = sock_from_file(file, err);
483 if (!sock)
1da177e4 484 fput(file);
6cb153ca
BL
485 return sock;
486}
c6d409cf 487EXPORT_SYMBOL(sockfd_lookup);
1da177e4 488
6cb153ca
BL
489static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
490{
00e188ef 491 struct fd f = fdget(fd);
6cb153ca
BL
492 struct socket *sock;
493
3672558c 494 *err = -EBADF;
00e188ef
AV
495 if (f.file) {
496 sock = sock_from_file(f.file, err);
497 if (likely(sock)) {
498 *fput_needed = f.flags;
6cb153ca 499 return sock;
00e188ef
AV
500 }
501 fdput(f);
1da177e4 502 }
6cb153ca 503 return NULL;
1da177e4
LT
504}
505
600e1779
MY
506static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
507 size_t size)
508{
509 ssize_t len;
510 ssize_t used = 0;
511
c5ef6035 512 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
513 if (len < 0)
514 return len;
515 used += len;
516 if (buffer) {
517 if (size < used)
518 return -ERANGE;
519 buffer += len;
520 }
521
522 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
523 used += len;
524 if (buffer) {
525 if (size < used)
526 return -ERANGE;
527 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
528 buffer += len;
529 }
530
531 return used;
532}
533
86741ec2
LC
534int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
535{
536 int err = simple_setattr(dentry, iattr);
537
e1a3a60a 538 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
539 struct socket *sock = SOCKET_I(d_inode(dentry));
540
541 sock->sk->sk_uid = iattr->ia_uid;
542 }
543
544 return err;
545}
546
600e1779 547static const struct inode_operations sockfs_inode_ops = {
600e1779 548 .listxattr = sockfs_listxattr,
86741ec2 549 .setattr = sockfs_setattr,
600e1779
MY
550};
551
1da177e4
LT
552/**
553 * sock_alloc - allocate a socket
89bddce5 554 *
1da177e4
LT
555 * Allocate a new inode and socket object. The two are bound together
556 * and initialised. The socket is then returned. If we are out of inodes
557 * NULL is returned.
558 */
559
f4a00aac 560struct socket *sock_alloc(void)
1da177e4 561{
89bddce5
SH
562 struct inode *inode;
563 struct socket *sock;
1da177e4 564
a209dfc7 565 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
566 if (!inode)
567 return NULL;
568
569 sock = SOCKET_I(inode);
570
29a020d3 571 kmemcheck_annotate_bitfield(sock, type);
85fe4025 572 inode->i_ino = get_next_ino();
89bddce5 573 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
574 inode->i_uid = current_fsuid();
575 inode->i_gid = current_fsgid();
600e1779 576 inode->i_op = &sockfs_inode_ops;
1da177e4 577
19e8d69c 578 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
579 return sock;
580}
f4a00aac 581EXPORT_SYMBOL(sock_alloc);
1da177e4 582
1da177e4
LT
583/**
584 * sock_release - close a socket
585 * @sock: socket to close
586 *
587 * The socket is released from the protocol stack if it has a release
588 * callback, and the inode is then released if the socket is bound to
89bddce5 589 * an inode not a file.
1da177e4 590 */
89bddce5 591
1da177e4
LT
592void sock_release(struct socket *sock)
593{
594 if (sock->ops) {
595 struct module *owner = sock->ops->owner;
596
597 sock->ops->release(sock);
598 sock->ops = NULL;
599 module_put(owner);
600 }
601
eaefd110 602 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 603 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 604
19e8d69c 605 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
606 if (!sock->file) {
607 iput(SOCK_INODE(sock));
608 return;
609 }
89bddce5 610 sock->file = NULL;
1da177e4 611}
c6d409cf 612EXPORT_SYMBOL(sock_release);
1da177e4 613
c14ac945 614void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 615{
140c55d4
ED
616 u8 flags = *tx_flags;
617
c14ac945 618 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
619 flags |= SKBTX_HW_TSTAMP;
620
c14ac945 621 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
622 flags |= SKBTX_SW_TSTAMP;
623
c14ac945 624 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
625 flags |= SKBTX_SCHED_TSTAMP;
626
140c55d4 627 *tx_flags = flags;
20d49473 628}
67cc0d40 629EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 630
d8725c86 631static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 632{
01e97e65 633 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
634 BUG_ON(ret == -EIOCBQUEUED);
635 return ret;
1da177e4
LT
636}
637
d8725c86 638int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 639{
d8725c86 640 int err = security_socket_sendmsg(sock, msg,
01e97e65 641 msg_data_left(msg));
228e548e 642
d8725c86 643 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 644}
c6d409cf 645EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
646
647int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
648 struct kvec *vec, size_t num, size_t size)
649{
6aa24814 650 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 651 return sock_sendmsg(sock, msg);
1da177e4 652}
c6d409cf 653EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 654
92f37fd2
ED
655/*
656 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
657 */
658void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
659 struct sk_buff *skb)
660{
20d49473 661 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 662 struct scm_timestamping tss;
20d49473
PO
663 int empty = 1;
664 struct skb_shared_hwtstamps *shhwtstamps =
665 skb_hwtstamps(skb);
666
667 /* Race occurred between timestamp enabling and packet
668 receiving. Fill in the current time for now. */
2456e855 669 if (need_software_tstamp && skb->tstamp == 0)
20d49473
PO
670 __net_timestamp(skb);
671
672 if (need_software_tstamp) {
673 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
674 struct timeval tv;
675 skb_get_timestamp(skb, &tv);
676 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
677 sizeof(tv), &tv);
678 } else {
f24b9be5
WB
679 struct timespec ts;
680 skb_get_timestampns(skb, &ts);
20d49473 681 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 682 sizeof(ts), &ts);
20d49473
PO
683 }
684 }
685
f24b9be5 686 memset(&tss, 0, sizeof(tss));
c199105d 687 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 688 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 689 empty = 0;
4d276eb6 690 if (shhwtstamps &&
b9f40e21 691 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 692 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 693 empty = 0;
1c885808 694 if (!empty) {
20d49473 695 put_cmsg(msg, SOL_SOCKET,
f24b9be5 696 SCM_TIMESTAMPING, sizeof(tss), &tss);
1c885808
FY
697
698 if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
699 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
700 skb->len, skb->data);
701 }
92f37fd2 702}
7c81fd8b
ACM
703EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
704
6e3e939f
JB
705void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
706 struct sk_buff *skb)
707{
708 int ack;
709
710 if (!sock_flag(sk, SOCK_WIFI_STATUS))
711 return;
712 if (!skb->wifi_acked_valid)
713 return;
714
715 ack = skb->wifi_acked;
716
717 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
718}
719EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
720
11165f14 721static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
722 struct sk_buff *skb)
3b885787 723{
744d5a3e 724 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 725 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 726 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
727}
728
767dd033 729void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
730 struct sk_buff *skb)
731{
732 sock_recv_timestamp(msg, sk, skb);
733 sock_recv_drops(msg, sk, skb);
734}
767dd033 735EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 736
1b784140 737static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 738 int flags)
1da177e4 739{
2da62906 740 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
741}
742
2da62906 743int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 744{
2da62906 745 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 746
2da62906 747 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 748}
c6d409cf 749EXPORT_SYMBOL(sock_recvmsg);
1da177e4 750
c1249c0a
ML
751/**
752 * kernel_recvmsg - Receive a message from a socket (kernel space)
753 * @sock: The socket to receive the message from
754 * @msg: Received message
755 * @vec: Input s/g array for message data
756 * @num: Size of input s/g array
757 * @size: Number of bytes to read
758 * @flags: Message flags (MSG_DONTWAIT, etc...)
759 *
760 * On return the msg structure contains the scatter/gather array passed in the
761 * vec argument. The array is modified so that it consists of the unfilled
762 * portion of the original array.
763 *
764 * The returned value is the total number of bytes received, or an error.
765 */
89bddce5
SH
766int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
767 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
768{
769 mm_segment_t oldfs = get_fs();
770 int result;
771
6aa24814 772 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 773 set_fs(KERNEL_DS);
2da62906 774 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
775 set_fs(oldfs);
776 return result;
777}
c6d409cf 778EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 779
ce1d4d3e
CH
780static ssize_t sock_sendpage(struct file *file, struct page *page,
781 int offset, size_t size, loff_t *ppos, int more)
1da177e4 782{
1da177e4
LT
783 struct socket *sock;
784 int flags;
785
ce1d4d3e
CH
786 sock = file->private_data;
787
35f9c09f
ED
788 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
789 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
790 flags |= more;
ce1d4d3e 791
e6949583 792 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 793}
1da177e4 794
9c55e01c 795static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 796 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
797 unsigned int flags)
798{
799 struct socket *sock = file->private_data;
800
997b37da
RDC
801 if (unlikely(!sock->ops->splice_read))
802 return -EINVAL;
803
9c55e01c
JA
804 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
805}
806
8ae5e030 807static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 808{
6d652330
AV
809 struct file *file = iocb->ki_filp;
810 struct socket *sock = file->private_data;
0345f931 811 struct msghdr msg = {.msg_iter = *to,
812 .msg_iocb = iocb};
8ae5e030 813 ssize_t res;
ce1d4d3e 814
8ae5e030
AV
815 if (file->f_flags & O_NONBLOCK)
816 msg.msg_flags = MSG_DONTWAIT;
817
818 if (iocb->ki_pos != 0)
1da177e4 819 return -ESPIPE;
027445c3 820
66ee59af 821 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
822 return 0;
823
2da62906 824 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
825 *to = msg.msg_iter;
826 return res;
1da177e4
LT
827}
828
8ae5e030 829static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 830{
6d652330
AV
831 struct file *file = iocb->ki_filp;
832 struct socket *sock = file->private_data;
0345f931 833 struct msghdr msg = {.msg_iter = *from,
834 .msg_iocb = iocb};
8ae5e030 835 ssize_t res;
1da177e4 836
8ae5e030 837 if (iocb->ki_pos != 0)
ce1d4d3e 838 return -ESPIPE;
027445c3 839
8ae5e030
AV
840 if (file->f_flags & O_NONBLOCK)
841 msg.msg_flags = MSG_DONTWAIT;
842
6d652330
AV
843 if (sock->type == SOCK_SEQPACKET)
844 msg.msg_flags |= MSG_EOR;
845
d8725c86 846 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
847 *from = msg.msg_iter;
848 return res;
1da177e4
LT
849}
850
1da177e4
LT
851/*
852 * Atomic setting of ioctl hooks to avoid race
853 * with module unload.
854 */
855
4a3e2f71 856static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 857static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 858
881d966b 859void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 860{
4a3e2f71 861 mutex_lock(&br_ioctl_mutex);
1da177e4 862 br_ioctl_hook = hook;
4a3e2f71 863 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
864}
865EXPORT_SYMBOL(brioctl_set);
866
4a3e2f71 867static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 868static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 869
881d966b 870void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 871{
4a3e2f71 872 mutex_lock(&vlan_ioctl_mutex);
1da177e4 873 vlan_ioctl_hook = hook;
4a3e2f71 874 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
875}
876EXPORT_SYMBOL(vlan_ioctl_set);
877
4a3e2f71 878static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 879static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 880
89bddce5 881void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 882{
4a3e2f71 883 mutex_lock(&dlci_ioctl_mutex);
1da177e4 884 dlci_ioctl_hook = hook;
4a3e2f71 885 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
886}
887EXPORT_SYMBOL(dlci_ioctl_set);
888
6b96018b
AB
889static long sock_do_ioctl(struct net *net, struct socket *sock,
890 unsigned int cmd, unsigned long arg)
891{
892 int err;
893 void __user *argp = (void __user *)arg;
894
895 err = sock->ops->ioctl(sock, cmd, arg);
896
897 /*
898 * If this ioctl is unknown try to hand it down
899 * to the NIC driver.
900 */
901 if (err == -ENOIOCTLCMD)
902 err = dev_ioctl(net, cmd, argp);
903
904 return err;
905}
906
1da177e4
LT
907/*
908 * With an ioctl, arg may well be a user mode pointer, but we don't know
909 * what to do with it - that's up to the protocol still.
910 */
911
c62cce2c
AV
912static struct ns_common *get_net_ns(struct ns_common *ns)
913{
914 return &get_net(container_of(ns, struct net, ns))->ns;
915}
916
1da177e4
LT
917static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
918{
919 struct socket *sock;
881d966b 920 struct sock *sk;
1da177e4
LT
921 void __user *argp = (void __user *)arg;
922 int pid, err;
881d966b 923 struct net *net;
1da177e4 924
b69aee04 925 sock = file->private_data;
881d966b 926 sk = sock->sk;
3b1e0a65 927 net = sock_net(sk);
1da177e4 928 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 929 err = dev_ioctl(net, cmd, argp);
1da177e4 930 } else
3d23e349 931#ifdef CONFIG_WEXT_CORE
1da177e4 932 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 933 err = dev_ioctl(net, cmd, argp);
1da177e4 934 } else
3d23e349 935#endif
89bddce5 936 switch (cmd) {
1da177e4
LT
937 case FIOSETOWN:
938 case SIOCSPGRP:
939 err = -EFAULT;
940 if (get_user(pid, (int __user *)argp))
941 break;
e0b93edd
JL
942 f_setown(sock->file, pid, 1);
943 err = 0;
1da177e4
LT
944 break;
945 case FIOGETOWN:
946 case SIOCGPGRP:
609d7fa9 947 err = put_user(f_getown(sock->file),
89bddce5 948 (int __user *)argp);
1da177e4
LT
949 break;
950 case SIOCGIFBR:
951 case SIOCSIFBR:
952 case SIOCBRADDBR:
953 case SIOCBRDELBR:
954 err = -ENOPKG;
955 if (!br_ioctl_hook)
956 request_module("bridge");
957
4a3e2f71 958 mutex_lock(&br_ioctl_mutex);
89bddce5 959 if (br_ioctl_hook)
881d966b 960 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 961 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
962 break;
963 case SIOCGIFVLAN:
964 case SIOCSIFVLAN:
965 err = -ENOPKG;
966 if (!vlan_ioctl_hook)
967 request_module("8021q");
968
4a3e2f71 969 mutex_lock(&vlan_ioctl_mutex);
1da177e4 970 if (vlan_ioctl_hook)
881d966b 971 err = vlan_ioctl_hook(net, argp);
4a3e2f71 972 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 973 break;
1da177e4
LT
974 case SIOCADDDLCI:
975 case SIOCDELDLCI:
976 err = -ENOPKG;
977 if (!dlci_ioctl_hook)
978 request_module("dlci");
979
7512cbf6
PE
980 mutex_lock(&dlci_ioctl_mutex);
981 if (dlci_ioctl_hook)
1da177e4 982 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 983 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 984 break;
c62cce2c
AV
985 case SIOCGSKNS:
986 err = -EPERM;
987 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
988 break;
989
990 err = open_related_ns(&net->ns, get_net_ns);
991 break;
1da177e4 992 default:
6b96018b 993 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 994 break;
89bddce5 995 }
1da177e4
LT
996 return err;
997}
998
999int sock_create_lite(int family, int type, int protocol, struct socket **res)
1000{
1001 int err;
1002 struct socket *sock = NULL;
89bddce5 1003
1da177e4
LT
1004 err = security_socket_create(family, type, protocol, 1);
1005 if (err)
1006 goto out;
1007
1008 sock = sock_alloc();
1009 if (!sock) {
1010 err = -ENOMEM;
1011 goto out;
1012 }
1013
1da177e4 1014 sock->type = type;
7420ed23
VY
1015 err = security_socket_post_create(sock, family, type, protocol, 1);
1016 if (err)
1017 goto out_release;
1018
1da177e4
LT
1019out:
1020 *res = sock;
1021 return err;
7420ed23
VY
1022out_release:
1023 sock_release(sock);
1024 sock = NULL;
1025 goto out;
1da177e4 1026}
c6d409cf 1027EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1028
1029/* No kernel lock held - perfect */
89bddce5 1030static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1031{
cbf55001 1032 unsigned int busy_flag = 0;
1da177e4
LT
1033 struct socket *sock;
1034
1035 /*
89bddce5 1036 * We can't return errors to poll, so it's either yes or no.
1da177e4 1037 */
b69aee04 1038 sock = file->private_data;
2d48d67f 1039
cbf55001 1040 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1041 /* this socket can poll_ll so tell the system call */
cbf55001 1042 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1043
1044 /* once, only if requested by syscall */
cbf55001
ET
1045 if (wait && (wait->_key & POLL_BUSY_LOOP))
1046 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1047 }
1048
cbf55001 1049 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1050}
1051
89bddce5 1052static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1053{
b69aee04 1054 struct socket *sock = file->private_data;
1da177e4
LT
1055
1056 return sock->ops->mmap(file, sock, vma);
1057}
1058
20380731 1059static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1060{
1da177e4
LT
1061 sock_release(SOCKET_I(inode));
1062 return 0;
1063}
1064
1065/*
1066 * Update the socket async list
1067 *
1068 * Fasync_list locking strategy.
1069 *
1070 * 1. fasync_list is modified only under process context socket lock
1071 * i.e. under semaphore.
1072 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1073 * or under socket lock
1da177e4
LT
1074 */
1075
1076static int sock_fasync(int fd, struct file *filp, int on)
1077{
989a2979
ED
1078 struct socket *sock = filp->private_data;
1079 struct sock *sk = sock->sk;
eaefd110 1080 struct socket_wq *wq;
1da177e4 1081
989a2979 1082 if (sk == NULL)
1da177e4 1083 return -EINVAL;
1da177e4
LT
1084
1085 lock_sock(sk);
1e1d04e6 1086 wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
eaefd110 1087 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1088
eaefd110 1089 if (!wq->fasync_list)
989a2979
ED
1090 sock_reset_flag(sk, SOCK_FASYNC);
1091 else
bcdce719 1092 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1093
989a2979 1094 release_sock(sk);
1da177e4
LT
1095 return 0;
1096}
1097
ceb5d58b 1098/* This function may be called only under rcu_lock */
1da177e4 1099
ceb5d58b 1100int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1101{
ceb5d58b 1102 if (!wq || !wq->fasync_list)
1da177e4 1103 return -1;
ceb5d58b 1104
89bddce5 1105 switch (how) {
8d8ad9d7 1106 case SOCK_WAKE_WAITD:
ceb5d58b 1107 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1108 break;
1109 goto call_kill;
8d8ad9d7 1110 case SOCK_WAKE_SPACE:
ceb5d58b 1111 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1112 break;
1113 /* fall through */
8d8ad9d7 1114 case SOCK_WAKE_IO:
89bddce5 1115call_kill:
43815482 1116 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1117 break;
8d8ad9d7 1118 case SOCK_WAKE_URG:
43815482 1119 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1120 }
ceb5d58b 1121
1da177e4
LT
1122 return 0;
1123}
c6d409cf 1124EXPORT_SYMBOL(sock_wake_async);
1da177e4 1125
721db93a 1126int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1127 struct socket **res, int kern)
1da177e4
LT
1128{
1129 int err;
1130 struct socket *sock;
55737fda 1131 const struct net_proto_family *pf;
1da177e4
LT
1132
1133 /*
89bddce5 1134 * Check protocol is in range
1da177e4
LT
1135 */
1136 if (family < 0 || family >= NPROTO)
1137 return -EAFNOSUPPORT;
1138 if (type < 0 || type >= SOCK_MAX)
1139 return -EINVAL;
1140
1141 /* Compatibility.
1142
1143 This uglymoron is moved from INET layer to here to avoid
1144 deadlock in module load.
1145 */
1146 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1147 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1148 current->comm);
1da177e4
LT
1149 family = PF_PACKET;
1150 }
1151
1152 err = security_socket_create(family, type, protocol, kern);
1153 if (err)
1154 return err;
89bddce5 1155
55737fda
SH
1156 /*
1157 * Allocate the socket and allow the family to set things up. if
1158 * the protocol is 0, the family is instructed to select an appropriate
1159 * default.
1160 */
1161 sock = sock_alloc();
1162 if (!sock) {
e87cc472 1163 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1164 return -ENFILE; /* Not exactly a match, but its the
1165 closest posix thing */
1166 }
1167
1168 sock->type = type;
1169
95a5afca 1170#ifdef CONFIG_MODULES
89bddce5
SH
1171 /* Attempt to load a protocol module if the find failed.
1172 *
1173 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1174 * requested real, full-featured networking support upon configuration.
1175 * Otherwise module support will break!
1176 */
190683a9 1177 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1178 request_module("net-pf-%d", family);
1da177e4
LT
1179#endif
1180
55737fda
SH
1181 rcu_read_lock();
1182 pf = rcu_dereference(net_families[family]);
1183 err = -EAFNOSUPPORT;
1184 if (!pf)
1185 goto out_release;
1da177e4
LT
1186
1187 /*
1188 * We will call the ->create function, that possibly is in a loadable
1189 * module, so we have to bump that loadable module refcnt first.
1190 */
55737fda 1191 if (!try_module_get(pf->owner))
1da177e4
LT
1192 goto out_release;
1193
55737fda
SH
1194 /* Now protected by module ref count */
1195 rcu_read_unlock();
1196
3f378b68 1197 err = pf->create(net, sock, protocol, kern);
55737fda 1198 if (err < 0)
1da177e4 1199 goto out_module_put;
a79af59e 1200
1da177e4
LT
1201 /*
1202 * Now to bump the refcnt of the [loadable] module that owns this
1203 * socket at sock_release time we decrement its refcnt.
1204 */
55737fda
SH
1205 if (!try_module_get(sock->ops->owner))
1206 goto out_module_busy;
1207
1da177e4
LT
1208 /*
1209 * Now that we're done with the ->create function, the [loadable]
1210 * module can have its refcnt decremented
1211 */
55737fda 1212 module_put(pf->owner);
7420ed23
VY
1213 err = security_socket_post_create(sock, family, type, protocol, kern);
1214 if (err)
3b185525 1215 goto out_sock_release;
55737fda 1216 *res = sock;
1da177e4 1217
55737fda
SH
1218 return 0;
1219
1220out_module_busy:
1221 err = -EAFNOSUPPORT;
1da177e4 1222out_module_put:
55737fda
SH
1223 sock->ops = NULL;
1224 module_put(pf->owner);
1225out_sock_release:
1da177e4 1226 sock_release(sock);
55737fda
SH
1227 return err;
1228
1229out_release:
1230 rcu_read_unlock();
1231 goto out_sock_release;
1da177e4 1232}
721db93a 1233EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1234
1235int sock_create(int family, int type, int protocol, struct socket **res)
1236{
1b8d7ae4 1237 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1238}
c6d409cf 1239EXPORT_SYMBOL(sock_create);
1da177e4 1240
eeb1bd5c 1241int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1242{
eeb1bd5c 1243 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1244}
c6d409cf 1245EXPORT_SYMBOL(sock_create_kern);
1da177e4 1246
3e0fa65f 1247SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1248{
1249 int retval;
1250 struct socket *sock;
a677a039
UD
1251 int flags;
1252
e38b36f3
UD
1253 /* Check the SOCK_* constants for consistency. */
1254 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1255 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1256 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1257 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1258
a677a039 1259 flags = type & ~SOCK_TYPE_MASK;
77d27200 1260 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1261 return -EINVAL;
1262 type &= SOCK_TYPE_MASK;
1da177e4 1263
aaca0bdc
UD
1264 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1265 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1266
1da177e4
LT
1267 retval = sock_create(family, type, protocol, &sock);
1268 if (retval < 0)
1269 goto out;
1270
77d27200 1271 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1272 if (retval < 0)
1273 goto out_release;
1274
1275out:
1276 /* It may be already another descriptor 8) Not kernel problem. */
1277 return retval;
1278
1279out_release:
1280 sock_release(sock);
1281 return retval;
1282}
1283
1284/*
1285 * Create a pair of connected sockets.
1286 */
1287
3e0fa65f
HC
1288SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1289 int __user *, usockvec)
1da177e4
LT
1290{
1291 struct socket *sock1, *sock2;
1292 int fd1, fd2, err;
db349509 1293 struct file *newfile1, *newfile2;
a677a039
UD
1294 int flags;
1295
1296 flags = type & ~SOCK_TYPE_MASK;
77d27200 1297 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1298 return -EINVAL;
1299 type &= SOCK_TYPE_MASK;
1da177e4 1300
aaca0bdc
UD
1301 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1302 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1303
1da177e4
LT
1304 /*
1305 * Obtain the first socket and check if the underlying protocol
1306 * supports the socketpair call.
1307 */
1308
1309 err = sock_create(family, type, protocol, &sock1);
1310 if (err < 0)
1311 goto out;
1312
1313 err = sock_create(family, type, protocol, &sock2);
1314 if (err < 0)
1315 goto out_release_1;
1316
1317 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1318 if (err < 0)
1da177e4
LT
1319 goto out_release_both;
1320
28407630 1321 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1322 if (unlikely(fd1 < 0)) {
1323 err = fd1;
db349509 1324 goto out_release_both;
bf3c23d1 1325 }
d73aa286 1326
28407630 1327 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1328 if (unlikely(fd2 < 0)) {
1329 err = fd2;
d73aa286 1330 goto out_put_unused_1;
28407630
AV
1331 }
1332
aab174f0 1333 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1334 if (IS_ERR(newfile1)) {
28407630 1335 err = PTR_ERR(newfile1);
d73aa286 1336 goto out_put_unused_both;
28407630
AV
1337 }
1338
aab174f0 1339 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1340 if (IS_ERR(newfile2)) {
1341 err = PTR_ERR(newfile2);
d73aa286 1342 goto out_fput_1;
db349509
AV
1343 }
1344
d73aa286
YD
1345 err = put_user(fd1, &usockvec[0]);
1346 if (err)
1347 goto out_fput_both;
1348
1349 err = put_user(fd2, &usockvec[1]);
1350 if (err)
1351 goto out_fput_both;
1352
157cf649 1353 audit_fd_pair(fd1, fd2);
d73aa286 1354
db349509
AV
1355 fd_install(fd1, newfile1);
1356 fd_install(fd2, newfile2);
1da177e4
LT
1357 /* fd1 and fd2 may be already another descriptors.
1358 * Not kernel problem.
1359 */
1360
d73aa286 1361 return 0;
1da177e4 1362
d73aa286
YD
1363out_fput_both:
1364 fput(newfile2);
1365 fput(newfile1);
1366 put_unused_fd(fd2);
1367 put_unused_fd(fd1);
1368 goto out;
1369
1370out_fput_1:
1371 fput(newfile1);
1372 put_unused_fd(fd2);
1373 put_unused_fd(fd1);
1374 sock_release(sock2);
1375 goto out;
1da177e4 1376
d73aa286
YD
1377out_put_unused_both:
1378 put_unused_fd(fd2);
1379out_put_unused_1:
1380 put_unused_fd(fd1);
1da177e4 1381out_release_both:
89bddce5 1382 sock_release(sock2);
1da177e4 1383out_release_1:
89bddce5 1384 sock_release(sock1);
1da177e4
LT
1385out:
1386 return err;
1387}
1388
1da177e4
LT
1389/*
1390 * Bind a name to a socket. Nothing much to do here since it's
1391 * the protocol's responsibility to handle the local address.
1392 *
1393 * We move the socket address to kernel space before we call
1394 * the protocol layer (having also checked the address is ok).
1395 */
1396
20f37034 1397SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1398{
1399 struct socket *sock;
230b1839 1400 struct sockaddr_storage address;
6cb153ca 1401 int err, fput_needed;
1da177e4 1402
89bddce5 1403 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1404 if (sock) {
43db362d 1405 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1406 if (err >= 0) {
1407 err = security_socket_bind(sock,
230b1839 1408 (struct sockaddr *)&address,
89bddce5 1409 addrlen);
6cb153ca
BL
1410 if (!err)
1411 err = sock->ops->bind(sock,
89bddce5 1412 (struct sockaddr *)
230b1839 1413 &address, addrlen);
1da177e4 1414 }
6cb153ca 1415 fput_light(sock->file, fput_needed);
89bddce5 1416 }
1da177e4
LT
1417 return err;
1418}
1419
1da177e4
LT
1420/*
1421 * Perform a listen. Basically, we allow the protocol to do anything
1422 * necessary for a listen, and if that works, we mark the socket as
1423 * ready for listening.
1424 */
1425
3e0fa65f 1426SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1427{
1428 struct socket *sock;
6cb153ca 1429 int err, fput_needed;
b8e1f9b5 1430 int somaxconn;
89bddce5
SH
1431
1432 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1433 if (sock) {
8efa6e93 1434 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1435 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1436 backlog = somaxconn;
1da177e4
LT
1437
1438 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1439 if (!err)
1440 err = sock->ops->listen(sock, backlog);
1da177e4 1441
6cb153ca 1442 fput_light(sock->file, fput_needed);
1da177e4
LT
1443 }
1444 return err;
1445}
1446
1da177e4
LT
1447/*
1448 * For accept, we attempt to create a new socket, set up the link
1449 * with the client, wake up the client, then return the new
1450 * connected fd. We collect the address of the connector in kernel
1451 * space and move it to user at the very end. This is unclean because
1452 * we open the socket then return an error.
1453 *
1454 * 1003.1g adds the ability to recvmsg() to query connection pending
1455 * status to recvmsg. We need to add that support in a way thats
1456 * clean when we restucture accept also.
1457 */
1458
20f37034
HC
1459SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1460 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1461{
1462 struct socket *sock, *newsock;
39d8c1b6 1463 struct file *newfile;
6cb153ca 1464 int err, len, newfd, fput_needed;
230b1839 1465 struct sockaddr_storage address;
1da177e4 1466
77d27200 1467 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1468 return -EINVAL;
1469
1470 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1471 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1472
6cb153ca 1473 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1474 if (!sock)
1475 goto out;
1476
1477 err = -ENFILE;
c6d409cf
ED
1478 newsock = sock_alloc();
1479 if (!newsock)
1da177e4
LT
1480 goto out_put;
1481
1482 newsock->type = sock->type;
1483 newsock->ops = sock->ops;
1484
1da177e4
LT
1485 /*
1486 * We don't need try_module_get here, as the listening socket (sock)
1487 * has the protocol module (sock->ops->owner) held.
1488 */
1489 __module_get(newsock->ops->owner);
1490
28407630 1491 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1492 if (unlikely(newfd < 0)) {
1493 err = newfd;
9a1875e6
DM
1494 sock_release(newsock);
1495 goto out_put;
39d8c1b6 1496 }
aab174f0 1497 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1498 if (IS_ERR(newfile)) {
28407630
AV
1499 err = PTR_ERR(newfile);
1500 put_unused_fd(newfd);
1501 sock_release(newsock);
1502 goto out_put;
1503 }
39d8c1b6 1504
a79af59e
FF
1505 err = security_socket_accept(sock, newsock);
1506 if (err)
39d8c1b6 1507 goto out_fd;
a79af59e 1508
1da177e4
LT
1509 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1510 if (err < 0)
39d8c1b6 1511 goto out_fd;
1da177e4
LT
1512
1513 if (upeer_sockaddr) {
230b1839 1514 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1515 &len, 2) < 0) {
1da177e4 1516 err = -ECONNABORTED;
39d8c1b6 1517 goto out_fd;
1da177e4 1518 }
43db362d 1519 err = move_addr_to_user(&address,
230b1839 1520 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1521 if (err < 0)
39d8c1b6 1522 goto out_fd;
1da177e4
LT
1523 }
1524
1525 /* File flags are not inherited via accept() unlike another OSes. */
1526
39d8c1b6
DM
1527 fd_install(newfd, newfile);
1528 err = newfd;
1da177e4 1529
1da177e4 1530out_put:
6cb153ca 1531 fput_light(sock->file, fput_needed);
1da177e4
LT
1532out:
1533 return err;
39d8c1b6 1534out_fd:
9606a216 1535 fput(newfile);
39d8c1b6 1536 put_unused_fd(newfd);
1da177e4
LT
1537 goto out_put;
1538}
1539
20f37034
HC
1540SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1541 int __user *, upeer_addrlen)
aaca0bdc 1542{
de11defe 1543 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1544}
1545
1da177e4
LT
1546/*
1547 * Attempt to connect to a socket with the server address. The address
1548 * is in user space so we verify it is OK and move it to kernel space.
1549 *
1550 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1551 * break bindings
1552 *
1553 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1554 * other SEQPACKET protocols that take time to connect() as it doesn't
1555 * include the -EINPROGRESS status for such sockets.
1556 */
1557
20f37034
HC
1558SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1559 int, addrlen)
1da177e4
LT
1560{
1561 struct socket *sock;
230b1839 1562 struct sockaddr_storage address;
6cb153ca 1563 int err, fput_needed;
1da177e4 1564
6cb153ca 1565 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1566 if (!sock)
1567 goto out;
43db362d 1568 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1569 if (err < 0)
1570 goto out_put;
1571
89bddce5 1572 err =
230b1839 1573 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1574 if (err)
1575 goto out_put;
1576
230b1839 1577 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1578 sock->file->f_flags);
1579out_put:
6cb153ca 1580 fput_light(sock->file, fput_needed);
1da177e4
LT
1581out:
1582 return err;
1583}
1584
1585/*
1586 * Get the local address ('name') of a socket object. Move the obtained
1587 * name to user space.
1588 */
1589
20f37034
HC
1590SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1591 int __user *, usockaddr_len)
1da177e4
LT
1592{
1593 struct socket *sock;
230b1839 1594 struct sockaddr_storage address;
6cb153ca 1595 int len, err, fput_needed;
89bddce5 1596
6cb153ca 1597 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1598 if (!sock)
1599 goto out;
1600
1601 err = security_socket_getsockname(sock);
1602 if (err)
1603 goto out_put;
1604
230b1839 1605 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1606 if (err)
1607 goto out_put;
43db362d 1608 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1609
1610out_put:
6cb153ca 1611 fput_light(sock->file, fput_needed);
1da177e4
LT
1612out:
1613 return err;
1614}
1615
1616/*
1617 * Get the remote address ('name') of a socket object. Move the obtained
1618 * name to user space.
1619 */
1620
20f37034
HC
1621SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1622 int __user *, usockaddr_len)
1da177e4
LT
1623{
1624 struct socket *sock;
230b1839 1625 struct sockaddr_storage address;
6cb153ca 1626 int len, err, fput_needed;
1da177e4 1627
89bddce5
SH
1628 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1629 if (sock != NULL) {
1da177e4
LT
1630 err = security_socket_getpeername(sock);
1631 if (err) {
6cb153ca 1632 fput_light(sock->file, fput_needed);
1da177e4
LT
1633 return err;
1634 }
1635
89bddce5 1636 err =
230b1839 1637 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1638 1);
1da177e4 1639 if (!err)
43db362d 1640 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1641 usockaddr_len);
6cb153ca 1642 fput_light(sock->file, fput_needed);
1da177e4
LT
1643 }
1644 return err;
1645}
1646
1647/*
1648 * Send a datagram to a given address. We move the address into kernel
1649 * space and check the user space data area is readable before invoking
1650 * the protocol.
1651 */
1652
3e0fa65f 1653SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1654 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1655 int, addr_len)
1da177e4
LT
1656{
1657 struct socket *sock;
230b1839 1658 struct sockaddr_storage address;
1da177e4
LT
1659 int err;
1660 struct msghdr msg;
1661 struct iovec iov;
6cb153ca 1662 int fput_needed;
6cb153ca 1663
602bd0e9
AV
1664 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1665 if (unlikely(err))
1666 return err;
de0fa95c
PE
1667 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1668 if (!sock)
4387ff75 1669 goto out;
6cb153ca 1670
89bddce5 1671 msg.msg_name = NULL;
89bddce5
SH
1672 msg.msg_control = NULL;
1673 msg.msg_controllen = 0;
1674 msg.msg_namelen = 0;
6cb153ca 1675 if (addr) {
43db362d 1676 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1677 if (err < 0)
1678 goto out_put;
230b1839 1679 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1680 msg.msg_namelen = addr_len;
1da177e4
LT
1681 }
1682 if (sock->file->f_flags & O_NONBLOCK)
1683 flags |= MSG_DONTWAIT;
1684 msg.msg_flags = flags;
d8725c86 1685 err = sock_sendmsg(sock, &msg);
1da177e4 1686
89bddce5 1687out_put:
de0fa95c 1688 fput_light(sock->file, fput_needed);
4387ff75 1689out:
1da177e4
LT
1690 return err;
1691}
1692
1693/*
89bddce5 1694 * Send a datagram down a socket.
1da177e4
LT
1695 */
1696
3e0fa65f 1697SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1698 unsigned int, flags)
1da177e4
LT
1699{
1700 return sys_sendto(fd, buff, len, flags, NULL, 0);
1701}
1702
1703/*
89bddce5 1704 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1705 * sender. We verify the buffers are writable and if needed move the
1706 * sender address from kernel to user space.
1707 */
1708
3e0fa65f 1709SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1710 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1711 int __user *, addr_len)
1da177e4
LT
1712{
1713 struct socket *sock;
1714 struct iovec iov;
1715 struct msghdr msg;
230b1839 1716 struct sockaddr_storage address;
89bddce5 1717 int err, err2;
6cb153ca
BL
1718 int fput_needed;
1719
602bd0e9
AV
1720 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1721 if (unlikely(err))
1722 return err;
de0fa95c 1723 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1724 if (!sock)
de0fa95c 1725 goto out;
1da177e4 1726
89bddce5
SH
1727 msg.msg_control = NULL;
1728 msg.msg_controllen = 0;
f3d33426
HFS
1729 /* Save some cycles and don't copy the address if not needed */
1730 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1731 /* We assume all kernel code knows the size of sockaddr_storage */
1732 msg.msg_namelen = 0;
130ed5d1 1733 msg.msg_iocb = NULL;
1da177e4
LT
1734 if (sock->file->f_flags & O_NONBLOCK)
1735 flags |= MSG_DONTWAIT;
2da62906 1736 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1737
89bddce5 1738 if (err >= 0 && addr != NULL) {
43db362d 1739 err2 = move_addr_to_user(&address,
230b1839 1740 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1741 if (err2 < 0)
1742 err = err2;
1da177e4 1743 }
de0fa95c
PE
1744
1745 fput_light(sock->file, fput_needed);
4387ff75 1746out:
1da177e4
LT
1747 return err;
1748}
1749
1750/*
89bddce5 1751 * Receive a datagram from a socket.
1da177e4
LT
1752 */
1753
b7c0ddf5
JG
1754SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1755 unsigned int, flags)
1da177e4
LT
1756{
1757 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1758}
1759
1760/*
1761 * Set a socket option. Because we don't know the option lengths we have
1762 * to pass the user mode parameter for the protocols to sort out.
1763 */
1764
20f37034
HC
1765SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1766 char __user *, optval, int, optlen)
1da177e4 1767{
6cb153ca 1768 int err, fput_needed;
1da177e4
LT
1769 struct socket *sock;
1770
1771 if (optlen < 0)
1772 return -EINVAL;
89bddce5
SH
1773
1774 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1775 if (sock != NULL) {
1776 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1777 if (err)
1778 goto out_put;
1da177e4
LT
1779
1780 if (level == SOL_SOCKET)
89bddce5
SH
1781 err =
1782 sock_setsockopt(sock, level, optname, optval,
1783 optlen);
1da177e4 1784 else
89bddce5
SH
1785 err =
1786 sock->ops->setsockopt(sock, level, optname, optval,
1787 optlen);
6cb153ca
BL
1788out_put:
1789 fput_light(sock->file, fput_needed);
1da177e4
LT
1790 }
1791 return err;
1792}
1793
1794/*
1795 * Get a socket option. Because we don't know the option lengths we have
1796 * to pass a user mode parameter for the protocols to sort out.
1797 */
1798
20f37034
HC
1799SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1800 char __user *, optval, int __user *, optlen)
1da177e4 1801{
6cb153ca 1802 int err, fput_needed;
1da177e4
LT
1803 struct socket *sock;
1804
89bddce5
SH
1805 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1806 if (sock != NULL) {
6cb153ca
BL
1807 err = security_socket_getsockopt(sock, level, optname);
1808 if (err)
1809 goto out_put;
1da177e4
LT
1810
1811 if (level == SOL_SOCKET)
89bddce5
SH
1812 err =
1813 sock_getsockopt(sock, level, optname, optval,
1814 optlen);
1da177e4 1815 else
89bddce5
SH
1816 err =
1817 sock->ops->getsockopt(sock, level, optname, optval,
1818 optlen);
6cb153ca
BL
1819out_put:
1820 fput_light(sock->file, fput_needed);
1da177e4
LT
1821 }
1822 return err;
1823}
1824
1da177e4
LT
1825/*
1826 * Shutdown a socket.
1827 */
1828
754fe8d2 1829SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1830{
6cb153ca 1831 int err, fput_needed;
1da177e4
LT
1832 struct socket *sock;
1833
89bddce5
SH
1834 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1835 if (sock != NULL) {
1da177e4 1836 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1837 if (!err)
1838 err = sock->ops->shutdown(sock, how);
1839 fput_light(sock->file, fput_needed);
1da177e4
LT
1840 }
1841 return err;
1842}
1843
89bddce5 1844/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1845 * fields which are the same type (int / unsigned) on our platforms.
1846 */
1847#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1848#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1849#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1850
c71d8ebe
TH
1851struct used_address {
1852 struct sockaddr_storage name;
1853 unsigned int name_len;
1854};
1855
da184284
AV
1856static int copy_msghdr_from_user(struct msghdr *kmsg,
1857 struct user_msghdr __user *umsg,
1858 struct sockaddr __user **save_addr,
1859 struct iovec **iov)
1661bf36 1860{
08adb7da
AV
1861 struct sockaddr __user *uaddr;
1862 struct iovec __user *uiov;
c0371da6 1863 size_t nr_segs;
08adb7da
AV
1864 ssize_t err;
1865
1866 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1867 __get_user(uaddr, &umsg->msg_name) ||
1868 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1869 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1870 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1871 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1872 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1873 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1874 return -EFAULT;
dbb490b9 1875
08adb7da 1876 if (!uaddr)
6a2a2b3a
AS
1877 kmsg->msg_namelen = 0;
1878
dbb490b9
ML
1879 if (kmsg->msg_namelen < 0)
1880 return -EINVAL;
1881
1661bf36 1882 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1883 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1884
1885 if (save_addr)
1886 *save_addr = uaddr;
1887
1888 if (uaddr && kmsg->msg_namelen) {
1889 if (!save_addr) {
1890 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1891 kmsg->msg_name);
1892 if (err < 0)
1893 return err;
1894 }
1895 } else {
1896 kmsg->msg_name = NULL;
1897 kmsg->msg_namelen = 0;
1898 }
1899
c0371da6 1900 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1901 return -EMSGSIZE;
1902
0345f931 1903 kmsg->msg_iocb = NULL;
1904
da184284
AV
1905 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1906 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1907}
1908
666547ff 1909static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1910 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1911 struct used_address *used_address,
1912 unsigned int allowed_msghdr_flags)
1da177e4 1913{
89bddce5
SH
1914 struct compat_msghdr __user *msg_compat =
1915 (struct compat_msghdr __user *)msg;
230b1839 1916 struct sockaddr_storage address;
1da177e4 1917 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1918 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 1919 __aligned(sizeof(__kernel_size_t));
89bddce5 1920 /* 20 is size of ipv6_pktinfo */
1da177e4 1921 unsigned char *ctl_buf = ctl;
d8725c86 1922 int ctl_len;
08adb7da 1923 ssize_t err;
89bddce5 1924
08adb7da 1925 msg_sys->msg_name = &address;
1da177e4 1926
08449320 1927 if (MSG_CMSG_COMPAT & flags)
08adb7da 1928 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1929 else
08adb7da 1930 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1931 if (err < 0)
da184284 1932 return err;
1da177e4
LT
1933
1934 err = -ENOBUFS;
1935
228e548e 1936 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1937 goto out_freeiov;
28a94d8f 1938 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 1939 ctl_len = msg_sys->msg_controllen;
1da177e4 1940 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1941 err =
228e548e 1942 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1943 sizeof(ctl));
1da177e4
LT
1944 if (err)
1945 goto out_freeiov;
228e548e
AB
1946 ctl_buf = msg_sys->msg_control;
1947 ctl_len = msg_sys->msg_controllen;
1da177e4 1948 } else if (ctl_len) {
ac4340fc
DM
1949 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
1950 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 1951 if (ctl_len > sizeof(ctl)) {
1da177e4 1952 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1953 if (ctl_buf == NULL)
1da177e4
LT
1954 goto out_freeiov;
1955 }
1956 err = -EFAULT;
1957 /*
228e548e 1958 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1959 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1960 * checking falls down on this.
1961 */
fb8621bb 1962 if (copy_from_user(ctl_buf,
228e548e 1963 (void __user __force *)msg_sys->msg_control,
89bddce5 1964 ctl_len))
1da177e4 1965 goto out_freectl;
228e548e 1966 msg_sys->msg_control = ctl_buf;
1da177e4 1967 }
228e548e 1968 msg_sys->msg_flags = flags;
1da177e4
LT
1969
1970 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1971 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1972 /*
1973 * If this is sendmmsg() and current destination address is same as
1974 * previously succeeded address, omit asking LSM's decision.
1975 * used_address->name_len is initialized to UINT_MAX so that the first
1976 * destination address never matches.
1977 */
bc909d9d
MD
1978 if (used_address && msg_sys->msg_name &&
1979 used_address->name_len == msg_sys->msg_namelen &&
1980 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1981 used_address->name_len)) {
d8725c86 1982 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1983 goto out_freectl;
1984 }
d8725c86 1985 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1986 /*
1987 * If this is sendmmsg() and sending to current destination address was
1988 * successful, remember it.
1989 */
1990 if (used_address && err >= 0) {
1991 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1992 if (msg_sys->msg_name)
1993 memcpy(&used_address->name, msg_sys->msg_name,
1994 used_address->name_len);
c71d8ebe 1995 }
1da177e4
LT
1996
1997out_freectl:
89bddce5 1998 if (ctl_buf != ctl)
1da177e4
LT
1999 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2000out_freeiov:
da184284 2001 kfree(iov);
228e548e
AB
2002 return err;
2003}
2004
2005/*
2006 * BSD sendmsg interface
2007 */
2008
666547ff 2009long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
2010{
2011 int fput_needed, err;
2012 struct msghdr msg_sys;
1be374a0
AL
2013 struct socket *sock;
2014
1be374a0 2015 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2016 if (!sock)
2017 goto out;
2018
28a94d8f 2019 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2020
6cb153ca 2021 fput_light(sock->file, fput_needed);
89bddce5 2022out:
1da177e4
LT
2023 return err;
2024}
2025
666547ff 2026SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
2027{
2028 if (flags & MSG_CMSG_COMPAT)
2029 return -EINVAL;
2030 return __sys_sendmsg(fd, msg, flags);
2031}
2032
228e548e
AB
2033/*
2034 * Linux sendmmsg interface
2035 */
2036
2037int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2038 unsigned int flags)
2039{
2040 int fput_needed, err, datagrams;
2041 struct socket *sock;
2042 struct mmsghdr __user *entry;
2043 struct compat_mmsghdr __user *compat_entry;
2044 struct msghdr msg_sys;
c71d8ebe 2045 struct used_address used_address;
f092276d 2046 unsigned int oflags = flags;
228e548e 2047
98382f41
AB
2048 if (vlen > UIO_MAXIOV)
2049 vlen = UIO_MAXIOV;
228e548e
AB
2050
2051 datagrams = 0;
2052
2053 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2054 if (!sock)
2055 return err;
2056
c71d8ebe 2057 used_address.name_len = UINT_MAX;
228e548e
AB
2058 entry = mmsg;
2059 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2060 err = 0;
f092276d 2061 flags |= MSG_BATCH;
228e548e
AB
2062
2063 while (datagrams < vlen) {
f092276d
TH
2064 if (datagrams == vlen - 1)
2065 flags = oflags;
2066
228e548e 2067 if (MSG_CMSG_COMPAT & flags) {
666547ff 2068 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2069 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2070 if (err < 0)
2071 break;
2072 err = __put_user(err, &compat_entry->msg_len);
2073 ++compat_entry;
2074 } else {
a7526eb5 2075 err = ___sys_sendmsg(sock,
666547ff 2076 (struct user_msghdr __user *)entry,
28a94d8f 2077 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2078 if (err < 0)
2079 break;
2080 err = put_user(err, &entry->msg_len);
2081 ++entry;
2082 }
2083
2084 if (err)
2085 break;
2086 ++datagrams;
3023898b
SHY
2087 if (msg_data_left(&msg_sys))
2088 break;
a78cb84c 2089 cond_resched();
228e548e
AB
2090 }
2091
228e548e
AB
2092 fput_light(sock->file, fput_needed);
2093
728ffb86
AB
2094 /* We only return an error if no datagrams were able to be sent */
2095 if (datagrams != 0)
228e548e
AB
2096 return datagrams;
2097
228e548e
AB
2098 return err;
2099}
2100
2101SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2102 unsigned int, vlen, unsigned int, flags)
2103{
1be374a0
AL
2104 if (flags & MSG_CMSG_COMPAT)
2105 return -EINVAL;
228e548e
AB
2106 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2107}
2108
666547ff 2109static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2110 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2111{
89bddce5
SH
2112 struct compat_msghdr __user *msg_compat =
2113 (struct compat_msghdr __user *)msg;
1da177e4 2114 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2115 struct iovec *iov = iovstack;
1da177e4 2116 unsigned long cmsg_ptr;
2da62906 2117 int len;
08adb7da 2118 ssize_t err;
1da177e4
LT
2119
2120 /* kernel mode address */
230b1839 2121 struct sockaddr_storage addr;
1da177e4
LT
2122
2123 /* user mode address pointers */
2124 struct sockaddr __user *uaddr;
08adb7da 2125 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2126
08adb7da 2127 msg_sys->msg_name = &addr;
1da177e4 2128
f3d33426 2129 if (MSG_CMSG_COMPAT & flags)
08adb7da 2130 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2131 else
08adb7da 2132 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2133 if (err < 0)
da184284 2134 return err;
1da177e4 2135
a2e27255
ACM
2136 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2137 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2138
f3d33426
HFS
2139 /* We assume all kernel code knows the size of sockaddr_storage */
2140 msg_sys->msg_namelen = 0;
2141
1da177e4
LT
2142 if (sock->file->f_flags & O_NONBLOCK)
2143 flags |= MSG_DONTWAIT;
2da62906 2144 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2145 if (err < 0)
2146 goto out_freeiov;
2147 len = err;
2148
2149 if (uaddr != NULL) {
43db362d 2150 err = move_addr_to_user(&addr,
a2e27255 2151 msg_sys->msg_namelen, uaddr,
89bddce5 2152 uaddr_len);
1da177e4
LT
2153 if (err < 0)
2154 goto out_freeiov;
2155 }
a2e27255 2156 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2157 COMPAT_FLAGS(msg));
1da177e4
LT
2158 if (err)
2159 goto out_freeiov;
2160 if (MSG_CMSG_COMPAT & flags)
a2e27255 2161 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2162 &msg_compat->msg_controllen);
2163 else
a2e27255 2164 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2165 &msg->msg_controllen);
2166 if (err)
2167 goto out_freeiov;
2168 err = len;
2169
2170out_freeiov:
da184284 2171 kfree(iov);
a2e27255
ACM
2172 return err;
2173}
2174
2175/*
2176 * BSD recvmsg interface
2177 */
2178
666547ff 2179long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2180{
2181 int fput_needed, err;
2182 struct msghdr msg_sys;
1be374a0
AL
2183 struct socket *sock;
2184
1be374a0 2185 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2186 if (!sock)
2187 goto out;
2188
a7526eb5 2189 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2190
6cb153ca 2191 fput_light(sock->file, fput_needed);
1da177e4
LT
2192out:
2193 return err;
2194}
2195
666547ff 2196SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2197 unsigned int, flags)
2198{
2199 if (flags & MSG_CMSG_COMPAT)
2200 return -EINVAL;
2201 return __sys_recvmsg(fd, msg, flags);
2202}
2203
a2e27255
ACM
2204/*
2205 * Linux recvmmsg interface
2206 */
2207
2208int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2209 unsigned int flags, struct timespec *timeout)
2210{
2211 int fput_needed, err, datagrams;
2212 struct socket *sock;
2213 struct mmsghdr __user *entry;
d7256d0e 2214 struct compat_mmsghdr __user *compat_entry;
a2e27255 2215 struct msghdr msg_sys;
766b9f92
DD
2216 struct timespec64 end_time;
2217 struct timespec64 timeout64;
a2e27255
ACM
2218
2219 if (timeout &&
2220 poll_select_set_timeout(&end_time, timeout->tv_sec,
2221 timeout->tv_nsec))
2222 return -EINVAL;
2223
2224 datagrams = 0;
2225
2226 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2227 if (!sock)
2228 return err;
2229
2230 err = sock_error(sock->sk);
2231 if (err)
2232 goto out_put;
2233
2234 entry = mmsg;
d7256d0e 2235 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2236
2237 while (datagrams < vlen) {
2238 /*
2239 * No need to ask LSM for more than the first datagram.
2240 */
d7256d0e 2241 if (MSG_CMSG_COMPAT & flags) {
666547ff 2242 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2243 &msg_sys, flags & ~MSG_WAITFORONE,
2244 datagrams);
d7256d0e
JMG
2245 if (err < 0)
2246 break;
2247 err = __put_user(err, &compat_entry->msg_len);
2248 ++compat_entry;
2249 } else {
a7526eb5 2250 err = ___sys_recvmsg(sock,
666547ff 2251 (struct user_msghdr __user *)entry,
a7526eb5
AL
2252 &msg_sys, flags & ~MSG_WAITFORONE,
2253 datagrams);
d7256d0e
JMG
2254 if (err < 0)
2255 break;
2256 err = put_user(err, &entry->msg_len);
2257 ++entry;
2258 }
2259
a2e27255
ACM
2260 if (err)
2261 break;
a2e27255
ACM
2262 ++datagrams;
2263
71c5c159
BB
2264 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2265 if (flags & MSG_WAITFORONE)
2266 flags |= MSG_DONTWAIT;
2267
a2e27255 2268 if (timeout) {
766b9f92
DD
2269 ktime_get_ts64(&timeout64);
2270 *timeout = timespec64_to_timespec(
2271 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2272 if (timeout->tv_sec < 0) {
2273 timeout->tv_sec = timeout->tv_nsec = 0;
2274 break;
2275 }
2276
2277 /* Timeout, return less than vlen datagrams */
2278 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2279 break;
2280 }
2281
2282 /* Out of band data, return right away */
2283 if (msg_sys.msg_flags & MSG_OOB)
2284 break;
a78cb84c 2285 cond_resched();
a2e27255
ACM
2286 }
2287
a2e27255 2288 if (err == 0)
34b88a68
ACM
2289 goto out_put;
2290
2291 if (datagrams == 0) {
2292 datagrams = err;
2293 goto out_put;
2294 }
a2e27255 2295
34b88a68
ACM
2296 /*
2297 * We may return less entries than requested (vlen) if the
2298 * sock is non block and there aren't enough datagrams...
2299 */
2300 if (err != -EAGAIN) {
a2e27255 2301 /*
34b88a68
ACM
2302 * ... or if recvmsg returns an error after we
2303 * received some datagrams, where we record the
2304 * error to return on the next call or if the
2305 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2306 */
34b88a68 2307 sock->sk->sk_err = -err;
a2e27255 2308 }
34b88a68
ACM
2309out_put:
2310 fput_light(sock->file, fput_needed);
a2e27255 2311
34b88a68 2312 return datagrams;
a2e27255
ACM
2313}
2314
2315SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2316 unsigned int, vlen, unsigned int, flags,
2317 struct timespec __user *, timeout)
2318{
2319 int datagrams;
2320 struct timespec timeout_sys;
2321
1be374a0
AL
2322 if (flags & MSG_CMSG_COMPAT)
2323 return -EINVAL;
2324
a2e27255
ACM
2325 if (!timeout)
2326 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2327
2328 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2329 return -EFAULT;
2330
2331 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2332
2333 if (datagrams > 0 &&
2334 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2335 datagrams = -EFAULT;
2336
2337 return datagrams;
2338}
2339
2340#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2341/* Argument list sizes for sys_socketcall */
2342#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2343static const unsigned char nargs[21] = {
c6d409cf
ED
2344 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2345 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2346 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2347 AL(4), AL(5), AL(4)
89bddce5
SH
2348};
2349
1da177e4
LT
2350#undef AL
2351
2352/*
89bddce5 2353 * System call vectors.
1da177e4
LT
2354 *
2355 * Argument checking cleaned up. Saved 20% in size.
2356 * This function doesn't need to set the kernel lock because
89bddce5 2357 * it is set by the callees.
1da177e4
LT
2358 */
2359
3e0fa65f 2360SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2361{
2950fa9d 2362 unsigned long a[AUDITSC_ARGS];
89bddce5 2363 unsigned long a0, a1;
1da177e4 2364 int err;
47379052 2365 unsigned int len;
1da177e4 2366
228e548e 2367 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2368 return -EINVAL;
2369
47379052
AV
2370 len = nargs[call];
2371 if (len > sizeof(a))
2372 return -EINVAL;
2373
1da177e4 2374 /* copy_from_user should be SMP safe. */
47379052 2375 if (copy_from_user(a, args, len))
1da177e4 2376 return -EFAULT;
3ec3b2fb 2377
2950fa9d
CG
2378 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2379 if (err)
2380 return err;
3ec3b2fb 2381
89bddce5
SH
2382 a0 = a[0];
2383 a1 = a[1];
2384
2385 switch (call) {
2386 case SYS_SOCKET:
2387 err = sys_socket(a0, a1, a[2]);
2388 break;
2389 case SYS_BIND:
2390 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2391 break;
2392 case SYS_CONNECT:
2393 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2394 break;
2395 case SYS_LISTEN:
2396 err = sys_listen(a0, a1);
2397 break;
2398 case SYS_ACCEPT:
de11defe
UD
2399 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2400 (int __user *)a[2], 0);
89bddce5
SH
2401 break;
2402 case SYS_GETSOCKNAME:
2403 err =
2404 sys_getsockname(a0, (struct sockaddr __user *)a1,
2405 (int __user *)a[2]);
2406 break;
2407 case SYS_GETPEERNAME:
2408 err =
2409 sys_getpeername(a0, (struct sockaddr __user *)a1,
2410 (int __user *)a[2]);
2411 break;
2412 case SYS_SOCKETPAIR:
2413 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2414 break;
2415 case SYS_SEND:
2416 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2417 break;
2418 case SYS_SENDTO:
2419 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2420 (struct sockaddr __user *)a[4], a[5]);
2421 break;
2422 case SYS_RECV:
2423 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2424 break;
2425 case SYS_RECVFROM:
2426 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2427 (struct sockaddr __user *)a[4],
2428 (int __user *)a[5]);
2429 break;
2430 case SYS_SHUTDOWN:
2431 err = sys_shutdown(a0, a1);
2432 break;
2433 case SYS_SETSOCKOPT:
2434 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2435 break;
2436 case SYS_GETSOCKOPT:
2437 err =
2438 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2439 (int __user *)a[4]);
2440 break;
2441 case SYS_SENDMSG:
666547ff 2442 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2443 break;
228e548e
AB
2444 case SYS_SENDMMSG:
2445 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2446 break;
89bddce5 2447 case SYS_RECVMSG:
666547ff 2448 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2449 break;
a2e27255
ACM
2450 case SYS_RECVMMSG:
2451 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2452 (struct timespec __user *)a[4]);
2453 break;
de11defe
UD
2454 case SYS_ACCEPT4:
2455 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2456 (int __user *)a[2], a[3]);
aaca0bdc 2457 break;
89bddce5
SH
2458 default:
2459 err = -EINVAL;
2460 break;
1da177e4
LT
2461 }
2462 return err;
2463}
2464
89bddce5 2465#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2466
55737fda
SH
2467/**
2468 * sock_register - add a socket protocol handler
2469 * @ops: description of protocol
2470 *
1da177e4
LT
2471 * This function is called by a protocol handler that wants to
2472 * advertise its address family, and have it linked into the
e793c0f7 2473 * socket interface. The value ops->family corresponds to the
55737fda 2474 * socket system call protocol family.
1da177e4 2475 */
f0fd27d4 2476int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2477{
2478 int err;
2479
2480 if (ops->family >= NPROTO) {
3410f22e 2481 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2482 return -ENOBUFS;
2483 }
55737fda
SH
2484
2485 spin_lock(&net_family_lock);
190683a9
ED
2486 if (rcu_dereference_protected(net_families[ops->family],
2487 lockdep_is_held(&net_family_lock)))
55737fda
SH
2488 err = -EEXIST;
2489 else {
cf778b00 2490 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2491 err = 0;
2492 }
55737fda
SH
2493 spin_unlock(&net_family_lock);
2494
3410f22e 2495 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2496 return err;
2497}
c6d409cf 2498EXPORT_SYMBOL(sock_register);
1da177e4 2499
55737fda
SH
2500/**
2501 * sock_unregister - remove a protocol handler
2502 * @family: protocol family to remove
2503 *
1da177e4
LT
2504 * This function is called by a protocol handler that wants to
2505 * remove its address family, and have it unlinked from the
55737fda
SH
2506 * new socket creation.
2507 *
2508 * If protocol handler is a module, then it can use module reference
2509 * counts to protect against new references. If protocol handler is not
2510 * a module then it needs to provide its own protection in
2511 * the ops->create routine.
1da177e4 2512 */
f0fd27d4 2513void sock_unregister(int family)
1da177e4 2514{
f0fd27d4 2515 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2516
55737fda 2517 spin_lock(&net_family_lock);
a9b3cd7f 2518 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2519 spin_unlock(&net_family_lock);
2520
2521 synchronize_rcu();
2522
3410f22e 2523 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2524}
c6d409cf 2525EXPORT_SYMBOL(sock_unregister);
1da177e4 2526
77d76ea3 2527static int __init sock_init(void)
1da177e4 2528{
b3e19d92 2529 int err;
2ca794e5
EB
2530 /*
2531 * Initialize the network sysctl infrastructure.
2532 */
2533 err = net_sysctl_init();
2534 if (err)
2535 goto out;
b3e19d92 2536
1da177e4 2537 /*
89bddce5 2538 * Initialize skbuff SLAB cache
1da177e4
LT
2539 */
2540 skb_init();
1da177e4
LT
2541
2542 /*
89bddce5 2543 * Initialize the protocols module.
1da177e4
LT
2544 */
2545
2546 init_inodecache();
b3e19d92
NP
2547
2548 err = register_filesystem(&sock_fs_type);
2549 if (err)
2550 goto out_fs;
1da177e4 2551 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2552 if (IS_ERR(sock_mnt)) {
2553 err = PTR_ERR(sock_mnt);
2554 goto out_mount;
2555 }
77d76ea3
AK
2556
2557 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2558 */
2559
2560#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2561 err = netfilter_init();
2562 if (err)
2563 goto out;
1da177e4 2564#endif
cbeb321a 2565
408eccce 2566 ptp_classifier_init();
c1f19b51 2567
b3e19d92
NP
2568out:
2569 return err;
2570
2571out_mount:
2572 unregister_filesystem(&sock_fs_type);
2573out_fs:
2574 goto out;
1da177e4
LT
2575}
2576
77d76ea3
AK
2577core_initcall(sock_init); /* early initcall */
2578
1da177e4
LT
2579#ifdef CONFIG_PROC_FS
2580void socket_seq_show(struct seq_file *seq)
2581{
2582 int cpu;
2583 int counter = 0;
2584
6f912042 2585 for_each_possible_cpu(cpu)
89bddce5 2586 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2587
2588 /* It can be negative, by the way. 8) */
2589 if (counter < 0)
2590 counter = 0;
2591
2592 seq_printf(seq, "sockets: used %d\n", counter);
2593}
89bddce5 2594#endif /* CONFIG_PROC_FS */
1da177e4 2595
89bbfc95 2596#ifdef CONFIG_COMPAT
6b96018b 2597static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2598 unsigned int cmd, void __user *up)
7a229387 2599{
7a229387
AB
2600 mm_segment_t old_fs = get_fs();
2601 struct timeval ktv;
2602 int err;
2603
2604 set_fs(KERNEL_DS);
6b96018b 2605 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2606 set_fs(old_fs);
644595f8 2607 if (!err)
ed6fe9d6 2608 err = compat_put_timeval(&ktv, up);
644595f8 2609
7a229387
AB
2610 return err;
2611}
2612
6b96018b 2613static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2614 unsigned int cmd, void __user *up)
7a229387 2615{
7a229387
AB
2616 mm_segment_t old_fs = get_fs();
2617 struct timespec kts;
2618 int err;
2619
2620 set_fs(KERNEL_DS);
6b96018b 2621 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2622 set_fs(old_fs);
644595f8 2623 if (!err)
ed6fe9d6 2624 err = compat_put_timespec(&kts, up);
644595f8 2625
7a229387
AB
2626 return err;
2627}
2628
6b96018b 2629static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2630{
2631 struct ifreq __user *uifr;
2632 int err;
2633
2634 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2635 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2636 return -EFAULT;
2637
6b96018b 2638 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2639 if (err)
2640 return err;
2641
6b96018b 2642 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2643 return -EFAULT;
2644
2645 return 0;
2646}
2647
6b96018b 2648static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2649{
6b96018b 2650 struct compat_ifconf ifc32;
7a229387
AB
2651 struct ifconf ifc;
2652 struct ifconf __user *uifc;
6b96018b 2653 struct compat_ifreq __user *ifr32;
7a229387
AB
2654 struct ifreq __user *ifr;
2655 unsigned int i, j;
2656 int err;
2657
6b96018b 2658 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2659 return -EFAULT;
2660
43da5f2e 2661 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2662 if (ifc32.ifcbuf == 0) {
2663 ifc32.ifc_len = 0;
2664 ifc.ifc_len = 0;
2665 ifc.ifc_req = NULL;
2666 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2667 } else {
c6d409cf
ED
2668 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2669 sizeof(struct ifreq);
7a229387
AB
2670 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2671 ifc.ifc_len = len;
2672 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2673 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2674 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2675 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2676 return -EFAULT;
2677 ifr++;
2678 ifr32++;
2679 }
2680 }
2681 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2682 return -EFAULT;
2683
6b96018b 2684 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2685 if (err)
2686 return err;
2687
2688 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2689 return -EFAULT;
2690
2691 ifr = ifc.ifc_req;
2692 ifr32 = compat_ptr(ifc32.ifcbuf);
2693 for (i = 0, j = 0;
c6d409cf
ED
2694 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2695 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2696 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2697 return -EFAULT;
2698 ifr32++;
2699 ifr++;
2700 }
2701
2702 if (ifc32.ifcbuf == 0) {
2703 /* Translate from 64-bit structure multiple to
2704 * a 32-bit one.
2705 */
2706 i = ifc.ifc_len;
6b96018b 2707 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2708 ifc32.ifc_len = i;
2709 } else {
2710 ifc32.ifc_len = i;
2711 }
6b96018b 2712 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2713 return -EFAULT;
2714
2715 return 0;
2716}
2717
6b96018b 2718static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2719{
3a7da39d
BH
2720 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2721 bool convert_in = false, convert_out = false;
2722 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2723 struct ethtool_rxnfc __user *rxnfc;
7a229387 2724 struct ifreq __user *ifr;
3a7da39d
BH
2725 u32 rule_cnt = 0, actual_rule_cnt;
2726 u32 ethcmd;
7a229387 2727 u32 data;
3a7da39d 2728 int ret;
7a229387 2729
3a7da39d
BH
2730 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2731 return -EFAULT;
7a229387 2732
3a7da39d
BH
2733 compat_rxnfc = compat_ptr(data);
2734
2735 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2736 return -EFAULT;
2737
3a7da39d
BH
2738 /* Most ethtool structures are defined without padding.
2739 * Unfortunately struct ethtool_rxnfc is an exception.
2740 */
2741 switch (ethcmd) {
2742 default:
2743 break;
2744 case ETHTOOL_GRXCLSRLALL:
2745 /* Buffer size is variable */
2746 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2747 return -EFAULT;
2748 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2749 return -ENOMEM;
2750 buf_size += rule_cnt * sizeof(u32);
2751 /* fall through */
2752 case ETHTOOL_GRXRINGS:
2753 case ETHTOOL_GRXCLSRLCNT:
2754 case ETHTOOL_GRXCLSRULE:
55664f32 2755 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2756 convert_out = true;
2757 /* fall through */
2758 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2759 buf_size += sizeof(struct ethtool_rxnfc);
2760 convert_in = true;
2761 break;
2762 }
2763
2764 ifr = compat_alloc_user_space(buf_size);
954b1244 2765 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2766
2767 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2768 return -EFAULT;
2769
3a7da39d
BH
2770 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2771 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2772 return -EFAULT;
2773
3a7da39d 2774 if (convert_in) {
127fe533 2775 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2776 * fs.ring_cookie and at the end of fs, but nowhere else.
2777 */
127fe533
AD
2778 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2779 sizeof(compat_rxnfc->fs.m_ext) !=
2780 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2781 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2782 BUILD_BUG_ON(
2783 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2784 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2785 offsetof(struct ethtool_rxnfc, fs.location) -
2786 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2787
2788 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2789 (void __user *)(&rxnfc->fs.m_ext + 1) -
2790 (void __user *)rxnfc) ||
3a7da39d
BH
2791 copy_in_user(&rxnfc->fs.ring_cookie,
2792 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2793 (void __user *)(&rxnfc->fs.location + 1) -
2794 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2795 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2796 sizeof(rxnfc->rule_cnt)))
2797 return -EFAULT;
2798 }
2799
2800 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2801 if (ret)
2802 return ret;
2803
2804 if (convert_out) {
2805 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2806 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2807 (const void __user *)rxnfc) ||
3a7da39d
BH
2808 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2809 &rxnfc->fs.ring_cookie,
954b1244
SH
2810 (const void __user *)(&rxnfc->fs.location + 1) -
2811 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2812 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2813 sizeof(rxnfc->rule_cnt)))
2814 return -EFAULT;
2815
2816 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2817 /* As an optimisation, we only copy the actual
2818 * number of rules that the underlying
2819 * function returned. Since Mallory might
2820 * change the rule count in user memory, we
2821 * check that it is less than the rule count
2822 * originally given (as the user buffer size),
2823 * which has been range-checked.
2824 */
2825 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2826 return -EFAULT;
2827 if (actual_rule_cnt < rule_cnt)
2828 rule_cnt = actual_rule_cnt;
2829 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2830 &rxnfc->rule_locs[0],
2831 rule_cnt * sizeof(u32)))
2832 return -EFAULT;
2833 }
2834 }
2835
2836 return 0;
7a229387
AB
2837}
2838
7a50a240
AB
2839static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2840{
2841 void __user *uptr;
2842 compat_uptr_t uptr32;
2843 struct ifreq __user *uifr;
2844
c6d409cf 2845 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2846 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2847 return -EFAULT;
2848
2849 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2850 return -EFAULT;
2851
2852 uptr = compat_ptr(uptr32);
2853
2854 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2855 return -EFAULT;
2856
2857 return dev_ioctl(net, SIOCWANDEV, uifr);
2858}
2859
6b96018b
AB
2860static int bond_ioctl(struct net *net, unsigned int cmd,
2861 struct compat_ifreq __user *ifr32)
7a229387
AB
2862{
2863 struct ifreq kifr;
7a229387
AB
2864 mm_segment_t old_fs;
2865 int err;
7a229387
AB
2866
2867 switch (cmd) {
2868 case SIOCBONDENSLAVE:
2869 case SIOCBONDRELEASE:
2870 case SIOCBONDSETHWADDR:
2871 case SIOCBONDCHANGEACTIVE:
6b96018b 2872 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2873 return -EFAULT;
2874
2875 old_fs = get_fs();
c6d409cf 2876 set_fs(KERNEL_DS);
c3f52ae6 2877 err = dev_ioctl(net, cmd,
2878 (struct ifreq __user __force *) &kifr);
c6d409cf 2879 set_fs(old_fs);
7a229387
AB
2880
2881 return err;
7a229387 2882 default:
07d106d0 2883 return -ENOIOCTLCMD;
ccbd6a5a 2884 }
7a229387
AB
2885}
2886
590d4693
BH
2887/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2888static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2889 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2890{
2891 struct ifreq __user *u_ifreq64;
7a229387
AB
2892 char tmp_buf[IFNAMSIZ];
2893 void __user *data64;
2894 u32 data32;
2895
2896 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2897 IFNAMSIZ))
2898 return -EFAULT;
417c3522 2899 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2900 return -EFAULT;
2901 data64 = compat_ptr(data32);
2902
2903 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2904
7a229387
AB
2905 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2906 IFNAMSIZ))
2907 return -EFAULT;
417c3522 2908 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2909 return -EFAULT;
2910
6b96018b 2911 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2912}
2913
6b96018b
AB
2914static int dev_ifsioc(struct net *net, struct socket *sock,
2915 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2916{
a2116ed2 2917 struct ifreq __user *uifr;
7a229387
AB
2918 int err;
2919
a2116ed2
AB
2920 uifr = compat_alloc_user_space(sizeof(*uifr));
2921 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2922 return -EFAULT;
2923
2924 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2925
7a229387
AB
2926 if (!err) {
2927 switch (cmd) {
2928 case SIOCGIFFLAGS:
2929 case SIOCGIFMETRIC:
2930 case SIOCGIFMTU:
2931 case SIOCGIFMEM:
2932 case SIOCGIFHWADDR:
2933 case SIOCGIFINDEX:
2934 case SIOCGIFADDR:
2935 case SIOCGIFBRDADDR:
2936 case SIOCGIFDSTADDR:
2937 case SIOCGIFNETMASK:
fab2532b 2938 case SIOCGIFPFLAGS:
7a229387 2939 case SIOCGIFTXQLEN:
fab2532b
AB
2940 case SIOCGMIIPHY:
2941 case SIOCGMIIREG:
a2116ed2 2942 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2943 err = -EFAULT;
2944 break;
2945 }
2946 }
2947 return err;
2948}
2949
a2116ed2
AB
2950static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2951 struct compat_ifreq __user *uifr32)
2952{
2953 struct ifreq ifr;
2954 struct compat_ifmap __user *uifmap32;
2955 mm_segment_t old_fs;
2956 int err;
2957
2958 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2959 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2960 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2961 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2962 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2963 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2964 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2965 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2966 if (err)
2967 return -EFAULT;
2968
2969 old_fs = get_fs();
c6d409cf 2970 set_fs(KERNEL_DS);
c3f52ae6 2971 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2972 set_fs(old_fs);
a2116ed2
AB
2973
2974 if (cmd == SIOCGIFMAP && !err) {
2975 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2976 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2977 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2978 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2979 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2980 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2981 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2982 if (err)
2983 err = -EFAULT;
2984 }
2985 return err;
2986}
2987
7a229387 2988struct rtentry32 {
c6d409cf 2989 u32 rt_pad1;
7a229387
AB
2990 struct sockaddr rt_dst; /* target address */
2991 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2992 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2993 unsigned short rt_flags;
2994 short rt_pad2;
2995 u32 rt_pad3;
2996 unsigned char rt_tos;
2997 unsigned char rt_class;
2998 short rt_pad4;
2999 short rt_metric; /* +1 for binary compatibility! */
7a229387 3000 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3001 u32 rt_mtu; /* per route MTU/Window */
3002 u32 rt_window; /* Window clamping */
7a229387
AB
3003 unsigned short rt_irtt; /* Initial RTT */
3004};
3005
3006struct in6_rtmsg32 {
3007 struct in6_addr rtmsg_dst;
3008 struct in6_addr rtmsg_src;
3009 struct in6_addr rtmsg_gateway;
3010 u32 rtmsg_type;
3011 u16 rtmsg_dst_len;
3012 u16 rtmsg_src_len;
3013 u32 rtmsg_metric;
3014 u32 rtmsg_info;
3015 u32 rtmsg_flags;
3016 s32 rtmsg_ifindex;
3017};
3018
6b96018b
AB
3019static int routing_ioctl(struct net *net, struct socket *sock,
3020 unsigned int cmd, void __user *argp)
7a229387
AB
3021{
3022 int ret;
3023 void *r = NULL;
3024 struct in6_rtmsg r6;
3025 struct rtentry r4;
3026 char devname[16];
3027 u32 rtdev;
3028 mm_segment_t old_fs = get_fs();
3029
6b96018b
AB
3030 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3031 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3032 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3033 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3034 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3035 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3036 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3037 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3038 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3039 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3040 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3041
3042 r = (void *) &r6;
3043 } else { /* ipv4 */
6b96018b 3044 struct rtentry32 __user *ur4 = argp;
c6d409cf 3045 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3046 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3047 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3048 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3049 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3050 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3051 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3052 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3053 if (rtdev) {
c6d409cf 3054 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3055 r4.rt_dev = (char __user __force *)devname;
3056 devname[15] = 0;
7a229387
AB
3057 } else
3058 r4.rt_dev = NULL;
3059
3060 r = (void *) &r4;
3061 }
3062
3063 if (ret) {
3064 ret = -EFAULT;
3065 goto out;
3066 }
3067
c6d409cf 3068 set_fs(KERNEL_DS);
6b96018b 3069 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3070 set_fs(old_fs);
7a229387
AB
3071
3072out:
7a229387
AB
3073 return ret;
3074}
3075
3076/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3077 * for some operations; this forces use of the newer bridge-utils that
25985edc 3078 * use compatible ioctls
7a229387 3079 */
6b96018b 3080static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3081{
6b96018b 3082 compat_ulong_t tmp;
7a229387 3083
6b96018b 3084 if (get_user(tmp, argp))
7a229387
AB
3085 return -EFAULT;
3086 if (tmp == BRCTL_GET_VERSION)
3087 return BRCTL_VERSION + 1;
3088 return -EINVAL;
3089}
3090
6b96018b
AB
3091static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3092 unsigned int cmd, unsigned long arg)
3093{
3094 void __user *argp = compat_ptr(arg);
3095 struct sock *sk = sock->sk;
3096 struct net *net = sock_net(sk);
7a229387 3097
6b96018b 3098 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3099 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3100
3101 switch (cmd) {
3102 case SIOCSIFBR:
3103 case SIOCGIFBR:
3104 return old_bridge_ioctl(argp);
3105 case SIOCGIFNAME:
3106 return dev_ifname32(net, argp);
3107 case SIOCGIFCONF:
3108 return dev_ifconf(net, argp);
3109 case SIOCETHTOOL:
3110 return ethtool_ioctl(net, argp);
7a50a240
AB
3111 case SIOCWANDEV:
3112 return compat_siocwandev(net, argp);
a2116ed2
AB
3113 case SIOCGIFMAP:
3114 case SIOCSIFMAP:
3115 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3116 case SIOCBONDENSLAVE:
3117 case SIOCBONDRELEASE:
3118 case SIOCBONDSETHWADDR:
6b96018b
AB
3119 case SIOCBONDCHANGEACTIVE:
3120 return bond_ioctl(net, cmd, argp);
3121 case SIOCADDRT:
3122 case SIOCDELRT:
3123 return routing_ioctl(net, sock, cmd, argp);
3124 case SIOCGSTAMP:
3125 return do_siocgstamp(net, sock, cmd, argp);
3126 case SIOCGSTAMPNS:
3127 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3128 case SIOCBONDSLAVEINFOQUERY:
3129 case SIOCBONDINFOQUERY:
a2116ed2 3130 case SIOCSHWTSTAMP:
fd468c74 3131 case SIOCGHWTSTAMP:
590d4693 3132 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3133
3134 case FIOSETOWN:
3135 case SIOCSPGRP:
3136 case FIOGETOWN:
3137 case SIOCGPGRP:
3138 case SIOCBRADDBR:
3139 case SIOCBRDELBR:
3140 case SIOCGIFVLAN:
3141 case SIOCSIFVLAN:
3142 case SIOCADDDLCI:
3143 case SIOCDELDLCI:
c62cce2c 3144 case SIOCGSKNS:
6b96018b
AB
3145 return sock_ioctl(file, cmd, arg);
3146
3147 case SIOCGIFFLAGS:
3148 case SIOCSIFFLAGS:
3149 case SIOCGIFMETRIC:
3150 case SIOCSIFMETRIC:
3151 case SIOCGIFMTU:
3152 case SIOCSIFMTU:
3153 case SIOCGIFMEM:
3154 case SIOCSIFMEM:
3155 case SIOCGIFHWADDR:
3156 case SIOCSIFHWADDR:
3157 case SIOCADDMULTI:
3158 case SIOCDELMULTI:
3159 case SIOCGIFINDEX:
6b96018b
AB
3160 case SIOCGIFADDR:
3161 case SIOCSIFADDR:
3162 case SIOCSIFHWBROADCAST:
6b96018b 3163 case SIOCDIFADDR:
6b96018b
AB
3164 case SIOCGIFBRDADDR:
3165 case SIOCSIFBRDADDR:
3166 case SIOCGIFDSTADDR:
3167 case SIOCSIFDSTADDR:
3168 case SIOCGIFNETMASK:
3169 case SIOCSIFNETMASK:
3170 case SIOCSIFPFLAGS:
3171 case SIOCGIFPFLAGS:
3172 case SIOCGIFTXQLEN:
3173 case SIOCSIFTXQLEN:
3174 case SIOCBRADDIF:
3175 case SIOCBRDELIF:
9177efd3
AB
3176 case SIOCSIFNAME:
3177 case SIOCGMIIPHY:
3178 case SIOCGMIIREG:
3179 case SIOCSMIIREG:
6b96018b 3180 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3181
6b96018b
AB
3182 case SIOCSARP:
3183 case SIOCGARP:
3184 case SIOCDARP:
6b96018b 3185 case SIOCATMARK:
9177efd3
AB
3186 return sock_do_ioctl(net, sock, cmd, arg);
3187 }
3188
6b96018b
AB
3189 return -ENOIOCTLCMD;
3190}
7a229387 3191
95c96174 3192static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3193 unsigned long arg)
89bbfc95
SP
3194{
3195 struct socket *sock = file->private_data;
3196 int ret = -ENOIOCTLCMD;
87de87d5
DM
3197 struct sock *sk;
3198 struct net *net;
3199
3200 sk = sock->sk;
3201 net = sock_net(sk);
89bbfc95
SP
3202
3203 if (sock->ops->compat_ioctl)
3204 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3205
87de87d5
DM
3206 if (ret == -ENOIOCTLCMD &&
3207 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3208 ret = compat_wext_handle_ioctl(net, cmd, arg);
3209
6b96018b
AB
3210 if (ret == -ENOIOCTLCMD)
3211 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3212
89bbfc95
SP
3213 return ret;
3214}
3215#endif
3216
ac5a488e
SS
3217int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3218{
3219 return sock->ops->bind(sock, addr, addrlen);
3220}
c6d409cf 3221EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3222
3223int kernel_listen(struct socket *sock, int backlog)
3224{
3225 return sock->ops->listen(sock, backlog);
3226}
c6d409cf 3227EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3228
3229int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3230{
3231 struct sock *sk = sock->sk;
3232 int err;
3233
3234 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3235 newsock);
3236 if (err < 0)
3237 goto done;
3238
3239 err = sock->ops->accept(sock, *newsock, flags);
3240 if (err < 0) {
3241 sock_release(*newsock);
fa8705b0 3242 *newsock = NULL;
ac5a488e
SS
3243 goto done;
3244 }
3245
3246 (*newsock)->ops = sock->ops;
1b08534e 3247 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3248
3249done:
3250 return err;
3251}
c6d409cf 3252EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3253
3254int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3255 int flags)
ac5a488e
SS
3256{
3257 return sock->ops->connect(sock, addr, addrlen, flags);
3258}
c6d409cf 3259EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3260
3261int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3262 int *addrlen)
3263{
3264 return sock->ops->getname(sock, addr, addrlen, 0);
3265}
c6d409cf 3266EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3267
3268int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3269 int *addrlen)
3270{
3271 return sock->ops->getname(sock, addr, addrlen, 1);
3272}
c6d409cf 3273EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3274
3275int kernel_getsockopt(struct socket *sock, int level, int optname,
3276 char *optval, int *optlen)
3277{
3278 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3279 char __user *uoptval;
3280 int __user *uoptlen;
ac5a488e
SS
3281 int err;
3282
fb8621bb
NK
3283 uoptval = (char __user __force *) optval;
3284 uoptlen = (int __user __force *) optlen;
3285
ac5a488e
SS
3286 set_fs(KERNEL_DS);
3287 if (level == SOL_SOCKET)
fb8621bb 3288 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3289 else
fb8621bb
NK
3290 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3291 uoptlen);
ac5a488e
SS
3292 set_fs(oldfs);
3293 return err;
3294}
c6d409cf 3295EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3296
3297int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3298 char *optval, unsigned int optlen)
ac5a488e
SS
3299{
3300 mm_segment_t oldfs = get_fs();
fb8621bb 3301 char __user *uoptval;
ac5a488e
SS
3302 int err;
3303
fb8621bb
NK
3304 uoptval = (char __user __force *) optval;
3305
ac5a488e
SS
3306 set_fs(KERNEL_DS);
3307 if (level == SOL_SOCKET)
fb8621bb 3308 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3309 else
fb8621bb 3310 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3311 optlen);
3312 set_fs(oldfs);
3313 return err;
3314}
c6d409cf 3315EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3316
3317int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3318 size_t size, int flags)
3319{
3320 if (sock->ops->sendpage)
3321 return sock->ops->sendpage(sock, page, offset, size, flags);
3322
3323 return sock_no_sendpage(sock, page, offset, size, flags);
3324}
c6d409cf 3325EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3326
3327int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3328{
3329 mm_segment_t oldfs = get_fs();
3330 int err;
3331
3332 set_fs(KERNEL_DS);
3333 err = sock->ops->ioctl(sock, cmd, arg);
3334 set_fs(oldfs);
3335
3336 return err;
3337}
c6d409cf 3338EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3339
91cf45f0
TM
3340int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3341{
3342 return sock->ops->shutdown(sock, how);
3343}
91cf45f0 3344EXPORT_SYMBOL(kernel_sock_shutdown);