]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/socket.c
packet: remove deprecated syststamp timestamp
[mirror_ubuntu-artful-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
06021292 109
e0d1095a 110#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
111unsigned int sysctl_net_busy_read __read_mostly;
112unsigned int sysctl_net_busy_poll __read_mostly;
06021292 113#endif
6b96018b 114
1da177e4 115static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
116static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
117 unsigned long nr_segs, loff_t pos);
118static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
119 unsigned long nr_segs, loff_t pos);
89bddce5 120static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
121
122static int sock_close(struct inode *inode, struct file *file);
123static unsigned int sock_poll(struct file *file,
124 struct poll_table_struct *wait);
89bddce5 125static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
126#ifdef CONFIG_COMPAT
127static long compat_sock_ioctl(struct file *file,
89bddce5 128 unsigned int cmd, unsigned long arg);
89bbfc95 129#endif
1da177e4 130static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
131static ssize_t sock_sendpage(struct file *file, struct page *page,
132 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 133static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 134 struct pipe_inode_info *pipe, size_t len,
9c55e01c 135 unsigned int flags);
1da177e4 136
1da177e4
LT
137/*
138 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
139 * in the operation structures but are done directly via the socketcall() multiplexor.
140 */
141
da7071d7 142static const struct file_operations socket_file_ops = {
1da177e4
LT
143 .owner = THIS_MODULE,
144 .llseek = no_llseek,
145 .aio_read = sock_aio_read,
146 .aio_write = sock_aio_write,
147 .poll = sock_poll,
148 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
149#ifdef CONFIG_COMPAT
150 .compat_ioctl = compat_sock_ioctl,
151#endif
1da177e4
LT
152 .mmap = sock_mmap,
153 .open = sock_no_open, /* special open code to disallow open via /proc */
154 .release = sock_close,
155 .fasync = sock_fasync,
5274f052
JA
156 .sendpage = sock_sendpage,
157 .splice_write = generic_splice_sendpage,
9c55e01c 158 .splice_read = sock_splice_read,
1da177e4
LT
159};
160
161/*
162 * The protocol list. Each protocol is registered in here.
163 */
164
1da177e4 165static DEFINE_SPINLOCK(net_family_lock);
190683a9 166static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 167
1da177e4
LT
168/*
169 * Statistics counters of the socket lists
170 */
171
c6d409cf 172static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
173
174/*
89bddce5
SH
175 * Support routines.
176 * Move socket addresses back and forth across the kernel/user
177 * divide and look after the messy bits.
1da177e4
LT
178 */
179
1da177e4
LT
180/**
181 * move_addr_to_kernel - copy a socket address into kernel space
182 * @uaddr: Address in user space
183 * @kaddr: Address in kernel space
184 * @ulen: Length in user space
185 *
186 * The address is copied into kernel space. If the provided address is
187 * too long an error code of -EINVAL is returned. If the copy gives
188 * invalid addresses -EFAULT is returned. On a success 0 is returned.
189 */
190
43db362d 191int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 192{
230b1839 193 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 194 return -EINVAL;
89bddce5 195 if (ulen == 0)
1da177e4 196 return 0;
89bddce5 197 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 198 return -EFAULT;
3ec3b2fb 199 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
200}
201
202/**
203 * move_addr_to_user - copy an address to user space
204 * @kaddr: kernel space address
205 * @klen: length of address in kernel
206 * @uaddr: user space address
207 * @ulen: pointer to user length field
208 *
209 * The value pointed to by ulen on entry is the buffer length available.
210 * This is overwritten with the buffer space used. -EINVAL is returned
211 * if an overlong buffer is specified or a negative buffer size. -EFAULT
212 * is returned if either the buffer or the length field are not
213 * accessible.
214 * After copying the data up to the limit the user specifies, the true
215 * length of the data is written over the length limit the user
216 * specified. Zero is returned for a success.
217 */
89bddce5 218
43db362d 219static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 220 void __user *uaddr, int __user *ulen)
1da177e4
LT
221{
222 int err;
223 int len;
224
68c6beb3 225 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
226 err = get_user(len, ulen);
227 if (err)
1da177e4 228 return err;
89bddce5
SH
229 if (len > klen)
230 len = klen;
68c6beb3 231 if (len < 0)
1da177e4 232 return -EINVAL;
89bddce5 233 if (len) {
d6fe3945
SG
234 if (audit_sockaddr(klen, kaddr))
235 return -ENOMEM;
89bddce5 236 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
237 return -EFAULT;
238 }
239 /*
89bddce5
SH
240 * "fromlen shall refer to the value before truncation.."
241 * 1003.1g
1da177e4
LT
242 */
243 return __put_user(klen, ulen);
244}
245
e18b890b 246static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
247
248static struct inode *sock_alloc_inode(struct super_block *sb)
249{
250 struct socket_alloc *ei;
eaefd110 251 struct socket_wq *wq;
89bddce5 252
e94b1766 253 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
254 if (!ei)
255 return NULL;
eaefd110
ED
256 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
257 if (!wq) {
43815482
ED
258 kmem_cache_free(sock_inode_cachep, ei);
259 return NULL;
260 }
eaefd110
ED
261 init_waitqueue_head(&wq->wait);
262 wq->fasync_list = NULL;
263 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 264
1da177e4
LT
265 ei->socket.state = SS_UNCONNECTED;
266 ei->socket.flags = 0;
267 ei->socket.ops = NULL;
268 ei->socket.sk = NULL;
269 ei->socket.file = NULL;
1da177e4
LT
270
271 return &ei->vfs_inode;
272}
273
274static void sock_destroy_inode(struct inode *inode)
275{
43815482 276 struct socket_alloc *ei;
eaefd110 277 struct socket_wq *wq;
43815482
ED
278
279 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 280 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 281 kfree_rcu(wq, rcu);
43815482 282 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
283}
284
51cc5068 285static void init_once(void *foo)
1da177e4 286{
89bddce5 287 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 288
a35afb83 289 inode_init_once(&ei->vfs_inode);
1da177e4 290}
89bddce5 291
1da177e4
LT
292static int init_inodecache(void)
293{
294 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
295 sizeof(struct socket_alloc),
296 0,
297 (SLAB_HWCACHE_ALIGN |
298 SLAB_RECLAIM_ACCOUNT |
299 SLAB_MEM_SPREAD),
20c2df83 300 init_once);
1da177e4
LT
301 if (sock_inode_cachep == NULL)
302 return -ENOMEM;
303 return 0;
304}
305
b87221de 306static const struct super_operations sockfs_ops = {
c6d409cf
ED
307 .alloc_inode = sock_alloc_inode,
308 .destroy_inode = sock_destroy_inode,
309 .statfs = simple_statfs,
1da177e4
LT
310};
311
c23fbb6b
ED
312/*
313 * sockfs_dname() is called from d_path().
314 */
315static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
316{
317 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
318 dentry->d_inode->i_ino);
319}
320
3ba13d17 321static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 322 .d_dname = sockfs_dname,
1da177e4
LT
323};
324
c74a1cbb
AV
325static struct dentry *sockfs_mount(struct file_system_type *fs_type,
326 int flags, const char *dev_name, void *data)
327{
328 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
329 &sockfs_dentry_operations, SOCKFS_MAGIC);
330}
331
332static struct vfsmount *sock_mnt __read_mostly;
333
334static struct file_system_type sock_fs_type = {
335 .name = "sockfs",
336 .mount = sockfs_mount,
337 .kill_sb = kill_anon_super,
338};
339
1da177e4
LT
340/*
341 * Obtains the first available file descriptor and sets it up for use.
342 *
39d8c1b6
DM
343 * These functions create file structures and maps them to fd space
344 * of the current process. On success it returns file descriptor
1da177e4
LT
345 * and file struct implicitly stored in sock->file.
346 * Note that another thread may close file descriptor before we return
347 * from this function. We use the fact that now we do not refer
348 * to socket after mapping. If one day we will need it, this
349 * function will increment ref. count on file by 1.
350 *
351 * In any case returned fd MAY BE not valid!
352 * This race condition is unavoidable
353 * with shared fd spaces, we cannot solve it inside kernel,
354 * but we take care of internal coherence yet.
355 */
356
aab174f0 357struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 358{
7cbe66b6 359 struct qstr name = { .name = "" };
2c48b9c4 360 struct path path;
7cbe66b6 361 struct file *file;
1da177e4 362
600e1779
MY
363 if (dname) {
364 name.name = dname;
365 name.len = strlen(name.name);
366 } else if (sock->sk) {
367 name.name = sock->sk->sk_prot_creator->name;
368 name.len = strlen(name.name);
369 }
4b936885 370 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
371 if (unlikely(!path.dentry))
372 return ERR_PTR(-ENOMEM);
2c48b9c4 373 path.mnt = mntget(sock_mnt);
39d8c1b6 374
2c48b9c4 375 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 376 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 377
2c48b9c4 378 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 379 &socket_file_ops);
39b65252 380 if (unlikely(IS_ERR(file))) {
cc3808f8 381 /* drop dentry, keep inode */
7de9c6ee 382 ihold(path.dentry->d_inode);
2c48b9c4 383 path_put(&path);
39b65252 384 return file;
cc3808f8
AV
385 }
386
387 sock->file = file;
77d27200 388 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 389 file->private_data = sock;
28407630 390 return file;
39d8c1b6 391}
56b31d1c 392EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 393
56b31d1c 394static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
395{
396 struct file *newfile;
28407630
AV
397 int fd = get_unused_fd_flags(flags);
398 if (unlikely(fd < 0))
399 return fd;
39d8c1b6 400
aab174f0 401 newfile = sock_alloc_file(sock, flags, NULL);
28407630 402 if (likely(!IS_ERR(newfile))) {
39d8c1b6 403 fd_install(fd, newfile);
28407630
AV
404 return fd;
405 }
7cbe66b6 406
28407630
AV
407 put_unused_fd(fd);
408 return PTR_ERR(newfile);
1da177e4
LT
409}
410
406a3c63 411struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 412{
6cb153ca
BL
413 if (file->f_op == &socket_file_ops)
414 return file->private_data; /* set in sock_map_fd */
415
23bb80d2
ED
416 *err = -ENOTSOCK;
417 return NULL;
6cb153ca 418}
406a3c63 419EXPORT_SYMBOL(sock_from_file);
6cb153ca 420
1da177e4 421/**
c6d409cf 422 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
423 * @fd: file handle
424 * @err: pointer to an error code return
425 *
426 * The file handle passed in is locked and the socket it is bound
427 * too is returned. If an error occurs the err pointer is overwritten
428 * with a negative errno code and NULL is returned. The function checks
429 * for both invalid handles and passing a handle which is not a socket.
430 *
431 * On a success the socket object pointer is returned.
432 */
433
434struct socket *sockfd_lookup(int fd, int *err)
435{
436 struct file *file;
1da177e4
LT
437 struct socket *sock;
438
89bddce5
SH
439 file = fget(fd);
440 if (!file) {
1da177e4
LT
441 *err = -EBADF;
442 return NULL;
443 }
89bddce5 444
6cb153ca
BL
445 sock = sock_from_file(file, err);
446 if (!sock)
1da177e4 447 fput(file);
6cb153ca
BL
448 return sock;
449}
c6d409cf 450EXPORT_SYMBOL(sockfd_lookup);
1da177e4 451
6cb153ca
BL
452static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
453{
00e188ef 454 struct fd f = fdget(fd);
6cb153ca
BL
455 struct socket *sock;
456
3672558c 457 *err = -EBADF;
00e188ef
AV
458 if (f.file) {
459 sock = sock_from_file(f.file, err);
460 if (likely(sock)) {
461 *fput_needed = f.flags;
6cb153ca 462 return sock;
00e188ef
AV
463 }
464 fdput(f);
1da177e4 465 }
6cb153ca 466 return NULL;
1da177e4
LT
467}
468
600e1779
MY
469#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
470#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
471#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
472static ssize_t sockfs_getxattr(struct dentry *dentry,
473 const char *name, void *value, size_t size)
474{
475 const char *proto_name;
476 size_t proto_size;
477 int error;
478
479 error = -ENODATA;
480 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
481 proto_name = dentry->d_name.name;
482 proto_size = strlen(proto_name);
483
484 if (value) {
485 error = -ERANGE;
486 if (proto_size + 1 > size)
487 goto out;
488
489 strncpy(value, proto_name, proto_size + 1);
490 }
491 error = proto_size + 1;
492 }
493
494out:
495 return error;
496}
497
498static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
499 size_t size)
500{
501 ssize_t len;
502 ssize_t used = 0;
503
504 len = security_inode_listsecurity(dentry->d_inode, buffer, size);
505 if (len < 0)
506 return len;
507 used += len;
508 if (buffer) {
509 if (size < used)
510 return -ERANGE;
511 buffer += len;
512 }
513
514 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
515 used += len;
516 if (buffer) {
517 if (size < used)
518 return -ERANGE;
519 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
520 buffer += len;
521 }
522
523 return used;
524}
525
526static const struct inode_operations sockfs_inode_ops = {
527 .getxattr = sockfs_getxattr,
528 .listxattr = sockfs_listxattr,
529};
530
1da177e4
LT
531/**
532 * sock_alloc - allocate a socket
89bddce5 533 *
1da177e4
LT
534 * Allocate a new inode and socket object. The two are bound together
535 * and initialised. The socket is then returned. If we are out of inodes
536 * NULL is returned.
537 */
538
539static struct socket *sock_alloc(void)
540{
89bddce5
SH
541 struct inode *inode;
542 struct socket *sock;
1da177e4 543
a209dfc7 544 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
545 if (!inode)
546 return NULL;
547
548 sock = SOCKET_I(inode);
549
29a020d3 550 kmemcheck_annotate_bitfield(sock, type);
85fe4025 551 inode->i_ino = get_next_ino();
89bddce5 552 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
553 inode->i_uid = current_fsuid();
554 inode->i_gid = current_fsgid();
600e1779 555 inode->i_op = &sockfs_inode_ops;
1da177e4 556
19e8d69c 557 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
558 return sock;
559}
560
561/*
562 * In theory you can't get an open on this inode, but /proc provides
563 * a back door. Remember to keep it shut otherwise you'll let the
564 * creepy crawlies in.
565 */
89bddce5 566
1da177e4
LT
567static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
568{
569 return -ENXIO;
570}
571
4b6f5d20 572const struct file_operations bad_sock_fops = {
1da177e4
LT
573 .owner = THIS_MODULE,
574 .open = sock_no_open,
6038f373 575 .llseek = noop_llseek,
1da177e4
LT
576};
577
578/**
579 * sock_release - close a socket
580 * @sock: socket to close
581 *
582 * The socket is released from the protocol stack if it has a release
583 * callback, and the inode is then released if the socket is bound to
89bddce5 584 * an inode not a file.
1da177e4 585 */
89bddce5 586
1da177e4
LT
587void sock_release(struct socket *sock)
588{
589 if (sock->ops) {
590 struct module *owner = sock->ops->owner;
591
592 sock->ops->release(sock);
593 sock->ops = NULL;
594 module_put(owner);
595 }
596
eaefd110 597 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 598 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 599
b09e786b
MP
600 if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
601 return;
602
19e8d69c 603 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
604 if (!sock->file) {
605 iput(SOCK_INODE(sock));
606 return;
607 }
89bddce5 608 sock->file = NULL;
1da177e4 609}
c6d409cf 610EXPORT_SYMBOL(sock_release);
1da177e4 611
bf84a010 612void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
20d49473 613{
2244d07b 614 *tx_flags = 0;
20d49473 615 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
2244d07b 616 *tx_flags |= SKBTX_HW_TSTAMP;
20d49473 617 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
2244d07b 618 *tx_flags |= SKBTX_SW_TSTAMP;
6e3e939f
JB
619 if (sock_flag(sk, SOCK_WIFI_STATUS))
620 *tx_flags |= SKBTX_WIFI_STATUS;
20d49473
PO
621}
622EXPORT_SYMBOL(sock_tx_timestamp);
623
228e548e
AB
624static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
625 struct msghdr *msg, size_t size)
1da177e4
LT
626{
627 struct sock_iocb *si = kiocb_to_siocb(iocb);
1da177e4
LT
628
629 si->sock = sock;
630 si->scm = NULL;
631 si->msg = msg;
632 si->size = size;
633
1da177e4
LT
634 return sock->ops->sendmsg(iocb, sock, msg, size);
635}
636
228e548e
AB
637static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
638 struct msghdr *msg, size_t size)
639{
640 int err = security_socket_sendmsg(sock, msg, size);
641
642 return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
643}
644
1da177e4
LT
645int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
646{
647 struct kiocb iocb;
648 struct sock_iocb siocb;
649 int ret;
650
651 init_sync_kiocb(&iocb, NULL);
652 iocb.private = &siocb;
653 ret = __sock_sendmsg(&iocb, sock, msg, size);
654 if (-EIOCBQUEUED == ret)
655 ret = wait_on_sync_kiocb(&iocb);
656 return ret;
657}
c6d409cf 658EXPORT_SYMBOL(sock_sendmsg);
1da177e4 659
894dc24c 660static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
228e548e
AB
661{
662 struct kiocb iocb;
663 struct sock_iocb siocb;
664 int ret;
665
666 init_sync_kiocb(&iocb, NULL);
667 iocb.private = &siocb;
668 ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
669 if (-EIOCBQUEUED == ret)
670 ret = wait_on_sync_kiocb(&iocb);
671 return ret;
672}
673
1da177e4
LT
674int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
675 struct kvec *vec, size_t num, size_t size)
676{
677 mm_segment_t oldfs = get_fs();
678 int result;
679
680 set_fs(KERNEL_DS);
681 /*
682 * the following is safe, since for compiler definitions of kvec and
683 * iovec are identical, yielding the same in-core layout and alignment
684 */
89bddce5 685 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
686 msg->msg_iovlen = num;
687 result = sock_sendmsg(sock, msg, size);
688 set_fs(oldfs);
689 return result;
690}
c6d409cf 691EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 692
92f37fd2
ED
693/*
694 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
695 */
696void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
697 struct sk_buff *skb)
698{
20d49473
PO
699 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
700 struct timespec ts[3];
701 int empty = 1;
702 struct skb_shared_hwtstamps *shhwtstamps =
703 skb_hwtstamps(skb);
704
705 /* Race occurred between timestamp enabling and packet
706 receiving. Fill in the current time for now. */
707 if (need_software_tstamp && skb->tstamp.tv64 == 0)
708 __net_timestamp(skb);
709
710 if (need_software_tstamp) {
711 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
712 struct timeval tv;
713 skb_get_timestamp(skb, &tv);
714 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
715 sizeof(tv), &tv);
716 } else {
842509b8 717 skb_get_timestampns(skb, &ts[0]);
20d49473 718 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 719 sizeof(ts[0]), &ts[0]);
20d49473
PO
720 }
721 }
722
723
724 memset(ts, 0, sizeof(ts));
6e94d1ef
DB
725 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
726 ktime_to_timespec_cond(skb->tstamp, ts + 0))
20d49473 727 empty = 0;
20d49473
PO
728 if (shhwtstamps) {
729 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
6e94d1ef 730 ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1))
20d49473
PO
731 empty = 0;
732 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
6e94d1ef 733 ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
20d49473 734 empty = 0;
92f37fd2 735 }
20d49473
PO
736 if (!empty)
737 put_cmsg(msg, SOL_SOCKET,
738 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 739}
7c81fd8b
ACM
740EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
741
6e3e939f
JB
742void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
743 struct sk_buff *skb)
744{
745 int ack;
746
747 if (!sock_flag(sk, SOCK_WIFI_STATUS))
748 return;
749 if (!skb->wifi_acked_valid)
750 return;
751
752 ack = skb->wifi_acked;
753
754 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
755}
756EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
757
11165f14 758static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
759 struct sk_buff *skb)
3b885787
NH
760{
761 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
762 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
763 sizeof(__u32), &skb->dropcount);
764}
765
767dd033 766void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
767 struct sk_buff *skb)
768{
769 sock_recv_timestamp(msg, sk, skb);
770 sock_recv_drops(msg, sk, skb);
771}
767dd033 772EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 773
a2e27255
ACM
774static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
775 struct msghdr *msg, size_t size, int flags)
1da177e4 776{
1da177e4
LT
777 struct sock_iocb *si = kiocb_to_siocb(iocb);
778
779 si->sock = sock;
780 si->scm = NULL;
781 si->msg = msg;
782 si->size = size;
783 si->flags = flags;
784
1da177e4
LT
785 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
786}
787
a2e27255
ACM
788static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
789 struct msghdr *msg, size_t size, int flags)
790{
791 int err = security_socket_recvmsg(sock, msg, size, flags);
792
793 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
794}
795
89bddce5 796int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
797 size_t size, int flags)
798{
799 struct kiocb iocb;
800 struct sock_iocb siocb;
801 int ret;
802
89bddce5 803 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
804 iocb.private = &siocb;
805 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
806 if (-EIOCBQUEUED == ret)
807 ret = wait_on_sync_kiocb(&iocb);
808 return ret;
809}
c6d409cf 810EXPORT_SYMBOL(sock_recvmsg);
1da177e4 811
a2e27255
ACM
812static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
813 size_t size, int flags)
814{
815 struct kiocb iocb;
816 struct sock_iocb siocb;
817 int ret;
818
819 init_sync_kiocb(&iocb, NULL);
820 iocb.private = &siocb;
821 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
822 if (-EIOCBQUEUED == ret)
823 ret = wait_on_sync_kiocb(&iocb);
824 return ret;
825}
826
c1249c0a
ML
827/**
828 * kernel_recvmsg - Receive a message from a socket (kernel space)
829 * @sock: The socket to receive the message from
830 * @msg: Received message
831 * @vec: Input s/g array for message data
832 * @num: Size of input s/g array
833 * @size: Number of bytes to read
834 * @flags: Message flags (MSG_DONTWAIT, etc...)
835 *
836 * On return the msg structure contains the scatter/gather array passed in the
837 * vec argument. The array is modified so that it consists of the unfilled
838 * portion of the original array.
839 *
840 * The returned value is the total number of bytes received, or an error.
841 */
89bddce5
SH
842int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
843 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
844{
845 mm_segment_t oldfs = get_fs();
846 int result;
847
848 set_fs(KERNEL_DS);
849 /*
850 * the following is safe, since for compiler definitions of kvec and
851 * iovec are identical, yielding the same in-core layout and alignment
852 */
89bddce5 853 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
854 result = sock_recvmsg(sock, msg, size, flags);
855 set_fs(oldfs);
856 return result;
857}
c6d409cf 858EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 859
ce1d4d3e
CH
860static ssize_t sock_sendpage(struct file *file, struct page *page,
861 int offset, size_t size, loff_t *ppos, int more)
1da177e4 862{
1da177e4
LT
863 struct socket *sock;
864 int flags;
865
ce1d4d3e
CH
866 sock = file->private_data;
867
35f9c09f
ED
868 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
869 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
870 flags |= more;
ce1d4d3e 871
e6949583 872 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 873}
1da177e4 874
9c55e01c 875static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 876 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
877 unsigned int flags)
878{
879 struct socket *sock = file->private_data;
880
997b37da
RDC
881 if (unlikely(!sock->ops->splice_read))
882 return -EINVAL;
883
9c55e01c
JA
884 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
885}
886
ce1d4d3e 887static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 888 struct sock_iocb *siocb)
ce1d4d3e 889{
d29c445b
KO
890 if (!is_sync_kiocb(iocb))
891 BUG();
1da177e4 892
ce1d4d3e 893 siocb->kiocb = iocb;
ce1d4d3e
CH
894 iocb->private = siocb;
895 return siocb;
1da177e4
LT
896}
897
ce1d4d3e 898static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
899 struct file *file, const struct iovec *iov,
900 unsigned long nr_segs)
ce1d4d3e
CH
901{
902 struct socket *sock = file->private_data;
903 size_t size = 0;
904 int i;
1da177e4 905
89bddce5
SH
906 for (i = 0; i < nr_segs; i++)
907 size += iov[i].iov_len;
1da177e4 908
ce1d4d3e
CH
909 msg->msg_name = NULL;
910 msg->msg_namelen = 0;
911 msg->msg_control = NULL;
912 msg->msg_controllen = 0;
89bddce5 913 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
914 msg->msg_iovlen = nr_segs;
915 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
916
917 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
918}
919
027445c3
BP
920static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
921 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
922{
923 struct sock_iocb siocb, *x;
924
1da177e4
LT
925 if (pos != 0)
926 return -ESPIPE;
027445c3 927
73a7075e 928 if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
1da177e4
LT
929 return 0;
930
027445c3
BP
931
932 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
933 if (!x)
934 return -ENOMEM;
027445c3 935 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
936}
937
ce1d4d3e 938static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
939 struct file *file, const struct iovec *iov,
940 unsigned long nr_segs)
1da177e4 941{
ce1d4d3e
CH
942 struct socket *sock = file->private_data;
943 size_t size = 0;
944 int i;
1da177e4 945
89bddce5
SH
946 for (i = 0; i < nr_segs; i++)
947 size += iov[i].iov_len;
1da177e4 948
ce1d4d3e
CH
949 msg->msg_name = NULL;
950 msg->msg_namelen = 0;
951 msg->msg_control = NULL;
952 msg->msg_controllen = 0;
89bddce5 953 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
954 msg->msg_iovlen = nr_segs;
955 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
956 if (sock->type == SOCK_SEQPACKET)
957 msg->msg_flags |= MSG_EOR;
1da177e4 958
ce1d4d3e 959 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
960}
961
027445c3
BP
962static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
963 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
964{
965 struct sock_iocb siocb, *x;
1da177e4 966
ce1d4d3e
CH
967 if (pos != 0)
968 return -ESPIPE;
027445c3 969
027445c3 970 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
971 if (!x)
972 return -ENOMEM;
1da177e4 973
027445c3 974 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
975}
976
1da177e4
LT
977/*
978 * Atomic setting of ioctl hooks to avoid race
979 * with module unload.
980 */
981
4a3e2f71 982static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 983static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 984
881d966b 985void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 986{
4a3e2f71 987 mutex_lock(&br_ioctl_mutex);
1da177e4 988 br_ioctl_hook = hook;
4a3e2f71 989 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
990}
991EXPORT_SYMBOL(brioctl_set);
992
4a3e2f71 993static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 994static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 995
881d966b 996void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 997{
4a3e2f71 998 mutex_lock(&vlan_ioctl_mutex);
1da177e4 999 vlan_ioctl_hook = hook;
4a3e2f71 1000 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1001}
1002EXPORT_SYMBOL(vlan_ioctl_set);
1003
4a3e2f71 1004static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1005static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1006
89bddce5 1007void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1008{
4a3e2f71 1009 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1010 dlci_ioctl_hook = hook;
4a3e2f71 1011 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1012}
1013EXPORT_SYMBOL(dlci_ioctl_set);
1014
6b96018b
AB
1015static long sock_do_ioctl(struct net *net, struct socket *sock,
1016 unsigned int cmd, unsigned long arg)
1017{
1018 int err;
1019 void __user *argp = (void __user *)arg;
1020
1021 err = sock->ops->ioctl(sock, cmd, arg);
1022
1023 /*
1024 * If this ioctl is unknown try to hand it down
1025 * to the NIC driver.
1026 */
1027 if (err == -ENOIOCTLCMD)
1028 err = dev_ioctl(net, cmd, argp);
1029
1030 return err;
1031}
1032
1da177e4
LT
1033/*
1034 * With an ioctl, arg may well be a user mode pointer, but we don't know
1035 * what to do with it - that's up to the protocol still.
1036 */
1037
1038static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1039{
1040 struct socket *sock;
881d966b 1041 struct sock *sk;
1da177e4
LT
1042 void __user *argp = (void __user *)arg;
1043 int pid, err;
881d966b 1044 struct net *net;
1da177e4 1045
b69aee04 1046 sock = file->private_data;
881d966b 1047 sk = sock->sk;
3b1e0a65 1048 net = sock_net(sk);
1da177e4 1049 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 1050 err = dev_ioctl(net, cmd, argp);
1da177e4 1051 } else
3d23e349 1052#ifdef CONFIG_WEXT_CORE
1da177e4 1053 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 1054 err = dev_ioctl(net, cmd, argp);
1da177e4 1055 } else
3d23e349 1056#endif
89bddce5 1057 switch (cmd) {
1da177e4
LT
1058 case FIOSETOWN:
1059 case SIOCSPGRP:
1060 err = -EFAULT;
1061 if (get_user(pid, (int __user *)argp))
1062 break;
1063 err = f_setown(sock->file, pid, 1);
1064 break;
1065 case FIOGETOWN:
1066 case SIOCGPGRP:
609d7fa9 1067 err = put_user(f_getown(sock->file),
89bddce5 1068 (int __user *)argp);
1da177e4
LT
1069 break;
1070 case SIOCGIFBR:
1071 case SIOCSIFBR:
1072 case SIOCBRADDBR:
1073 case SIOCBRDELBR:
1074 err = -ENOPKG;
1075 if (!br_ioctl_hook)
1076 request_module("bridge");
1077
4a3e2f71 1078 mutex_lock(&br_ioctl_mutex);
89bddce5 1079 if (br_ioctl_hook)
881d966b 1080 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1081 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1082 break;
1083 case SIOCGIFVLAN:
1084 case SIOCSIFVLAN:
1085 err = -ENOPKG;
1086 if (!vlan_ioctl_hook)
1087 request_module("8021q");
1088
4a3e2f71 1089 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1090 if (vlan_ioctl_hook)
881d966b 1091 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1092 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1093 break;
1da177e4
LT
1094 case SIOCADDDLCI:
1095 case SIOCDELDLCI:
1096 err = -ENOPKG;
1097 if (!dlci_ioctl_hook)
1098 request_module("dlci");
1099
7512cbf6
PE
1100 mutex_lock(&dlci_ioctl_mutex);
1101 if (dlci_ioctl_hook)
1da177e4 1102 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1103 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1104 break;
1105 default:
6b96018b 1106 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1107 break;
89bddce5 1108 }
1da177e4
LT
1109 return err;
1110}
1111
1112int sock_create_lite(int family, int type, int protocol, struct socket **res)
1113{
1114 int err;
1115 struct socket *sock = NULL;
89bddce5 1116
1da177e4
LT
1117 err = security_socket_create(family, type, protocol, 1);
1118 if (err)
1119 goto out;
1120
1121 sock = sock_alloc();
1122 if (!sock) {
1123 err = -ENOMEM;
1124 goto out;
1125 }
1126
1da177e4 1127 sock->type = type;
7420ed23
VY
1128 err = security_socket_post_create(sock, family, type, protocol, 1);
1129 if (err)
1130 goto out_release;
1131
1da177e4
LT
1132out:
1133 *res = sock;
1134 return err;
7420ed23
VY
1135out_release:
1136 sock_release(sock);
1137 sock = NULL;
1138 goto out;
1da177e4 1139}
c6d409cf 1140EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1141
1142/* No kernel lock held - perfect */
89bddce5 1143static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1144{
cbf55001 1145 unsigned int busy_flag = 0;
1da177e4
LT
1146 struct socket *sock;
1147
1148 /*
89bddce5 1149 * We can't return errors to poll, so it's either yes or no.
1da177e4 1150 */
b69aee04 1151 sock = file->private_data;
2d48d67f 1152
cbf55001 1153 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1154 /* this socket can poll_ll so tell the system call */
cbf55001 1155 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1156
1157 /* once, only if requested by syscall */
cbf55001
ET
1158 if (wait && (wait->_key & POLL_BUSY_LOOP))
1159 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1160 }
1161
cbf55001 1162 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1163}
1164
89bddce5 1165static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1166{
b69aee04 1167 struct socket *sock = file->private_data;
1da177e4
LT
1168
1169 return sock->ops->mmap(file, sock, vma);
1170}
1171
20380731 1172static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1173{
1da177e4
LT
1174 sock_release(SOCKET_I(inode));
1175 return 0;
1176}
1177
1178/*
1179 * Update the socket async list
1180 *
1181 * Fasync_list locking strategy.
1182 *
1183 * 1. fasync_list is modified only under process context socket lock
1184 * i.e. under semaphore.
1185 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1186 * or under socket lock
1da177e4
LT
1187 */
1188
1189static int sock_fasync(int fd, struct file *filp, int on)
1190{
989a2979
ED
1191 struct socket *sock = filp->private_data;
1192 struct sock *sk = sock->sk;
eaefd110 1193 struct socket_wq *wq;
1da177e4 1194
989a2979 1195 if (sk == NULL)
1da177e4 1196 return -EINVAL;
1da177e4
LT
1197
1198 lock_sock(sk);
eaefd110
ED
1199 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1200 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1201
eaefd110 1202 if (!wq->fasync_list)
989a2979
ED
1203 sock_reset_flag(sk, SOCK_FASYNC);
1204 else
bcdce719 1205 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1206
989a2979 1207 release_sock(sk);
1da177e4
LT
1208 return 0;
1209}
1210
43815482 1211/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1212
1213int sock_wake_async(struct socket *sock, int how, int band)
1214{
43815482
ED
1215 struct socket_wq *wq;
1216
1217 if (!sock)
1218 return -1;
1219 rcu_read_lock();
1220 wq = rcu_dereference(sock->wq);
1221 if (!wq || !wq->fasync_list) {
1222 rcu_read_unlock();
1da177e4 1223 return -1;
43815482 1224 }
89bddce5 1225 switch (how) {
8d8ad9d7 1226 case SOCK_WAKE_WAITD:
1da177e4
LT
1227 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1228 break;
1229 goto call_kill;
8d8ad9d7 1230 case SOCK_WAKE_SPACE:
1da177e4
LT
1231 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1232 break;
1233 /* fall through */
8d8ad9d7 1234 case SOCK_WAKE_IO:
89bddce5 1235call_kill:
43815482 1236 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1237 break;
8d8ad9d7 1238 case SOCK_WAKE_URG:
43815482 1239 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1240 }
43815482 1241 rcu_read_unlock();
1da177e4
LT
1242 return 0;
1243}
c6d409cf 1244EXPORT_SYMBOL(sock_wake_async);
1da177e4 1245
721db93a 1246int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1247 struct socket **res, int kern)
1da177e4
LT
1248{
1249 int err;
1250 struct socket *sock;
55737fda 1251 const struct net_proto_family *pf;
1da177e4
LT
1252
1253 /*
89bddce5 1254 * Check protocol is in range
1da177e4
LT
1255 */
1256 if (family < 0 || family >= NPROTO)
1257 return -EAFNOSUPPORT;
1258 if (type < 0 || type >= SOCK_MAX)
1259 return -EINVAL;
1260
1261 /* Compatibility.
1262
1263 This uglymoron is moved from INET layer to here to avoid
1264 deadlock in module load.
1265 */
1266 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1267 static int warned;
1da177e4
LT
1268 if (!warned) {
1269 warned = 1;
3410f22e
YY
1270 pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1271 current->comm);
1da177e4
LT
1272 }
1273 family = PF_PACKET;
1274 }
1275
1276 err = security_socket_create(family, type, protocol, kern);
1277 if (err)
1278 return err;
89bddce5 1279
55737fda
SH
1280 /*
1281 * Allocate the socket and allow the family to set things up. if
1282 * the protocol is 0, the family is instructed to select an appropriate
1283 * default.
1284 */
1285 sock = sock_alloc();
1286 if (!sock) {
e87cc472 1287 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1288 return -ENFILE; /* Not exactly a match, but its the
1289 closest posix thing */
1290 }
1291
1292 sock->type = type;
1293
95a5afca 1294#ifdef CONFIG_MODULES
89bddce5
SH
1295 /* Attempt to load a protocol module if the find failed.
1296 *
1297 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1298 * requested real, full-featured networking support upon configuration.
1299 * Otherwise module support will break!
1300 */
190683a9 1301 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1302 request_module("net-pf-%d", family);
1da177e4
LT
1303#endif
1304
55737fda
SH
1305 rcu_read_lock();
1306 pf = rcu_dereference(net_families[family]);
1307 err = -EAFNOSUPPORT;
1308 if (!pf)
1309 goto out_release;
1da177e4
LT
1310
1311 /*
1312 * We will call the ->create function, that possibly is in a loadable
1313 * module, so we have to bump that loadable module refcnt first.
1314 */
55737fda 1315 if (!try_module_get(pf->owner))
1da177e4
LT
1316 goto out_release;
1317
55737fda
SH
1318 /* Now protected by module ref count */
1319 rcu_read_unlock();
1320
3f378b68 1321 err = pf->create(net, sock, protocol, kern);
55737fda 1322 if (err < 0)
1da177e4 1323 goto out_module_put;
a79af59e 1324
1da177e4
LT
1325 /*
1326 * Now to bump the refcnt of the [loadable] module that owns this
1327 * socket at sock_release time we decrement its refcnt.
1328 */
55737fda
SH
1329 if (!try_module_get(sock->ops->owner))
1330 goto out_module_busy;
1331
1da177e4
LT
1332 /*
1333 * Now that we're done with the ->create function, the [loadable]
1334 * module can have its refcnt decremented
1335 */
55737fda 1336 module_put(pf->owner);
7420ed23
VY
1337 err = security_socket_post_create(sock, family, type, protocol, kern);
1338 if (err)
3b185525 1339 goto out_sock_release;
55737fda 1340 *res = sock;
1da177e4 1341
55737fda
SH
1342 return 0;
1343
1344out_module_busy:
1345 err = -EAFNOSUPPORT;
1da177e4 1346out_module_put:
55737fda
SH
1347 sock->ops = NULL;
1348 module_put(pf->owner);
1349out_sock_release:
1da177e4 1350 sock_release(sock);
55737fda
SH
1351 return err;
1352
1353out_release:
1354 rcu_read_unlock();
1355 goto out_sock_release;
1da177e4 1356}
721db93a 1357EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1358
1359int sock_create(int family, int type, int protocol, struct socket **res)
1360{
1b8d7ae4 1361 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1362}
c6d409cf 1363EXPORT_SYMBOL(sock_create);
1da177e4
LT
1364
1365int sock_create_kern(int family, int type, int protocol, struct socket **res)
1366{
1b8d7ae4 1367 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1368}
c6d409cf 1369EXPORT_SYMBOL(sock_create_kern);
1da177e4 1370
3e0fa65f 1371SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1372{
1373 int retval;
1374 struct socket *sock;
a677a039
UD
1375 int flags;
1376
e38b36f3
UD
1377 /* Check the SOCK_* constants for consistency. */
1378 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1379 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1380 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1381 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1382
a677a039 1383 flags = type & ~SOCK_TYPE_MASK;
77d27200 1384 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1385 return -EINVAL;
1386 type &= SOCK_TYPE_MASK;
1da177e4 1387
aaca0bdc
UD
1388 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1389 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1390
1da177e4
LT
1391 retval = sock_create(family, type, protocol, &sock);
1392 if (retval < 0)
1393 goto out;
1394
77d27200 1395 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1396 if (retval < 0)
1397 goto out_release;
1398
1399out:
1400 /* It may be already another descriptor 8) Not kernel problem. */
1401 return retval;
1402
1403out_release:
1404 sock_release(sock);
1405 return retval;
1406}
1407
1408/*
1409 * Create a pair of connected sockets.
1410 */
1411
3e0fa65f
HC
1412SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1413 int __user *, usockvec)
1da177e4
LT
1414{
1415 struct socket *sock1, *sock2;
1416 int fd1, fd2, err;
db349509 1417 struct file *newfile1, *newfile2;
a677a039
UD
1418 int flags;
1419
1420 flags = type & ~SOCK_TYPE_MASK;
77d27200 1421 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1422 return -EINVAL;
1423 type &= SOCK_TYPE_MASK;
1da177e4 1424
aaca0bdc
UD
1425 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1426 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1427
1da177e4
LT
1428 /*
1429 * Obtain the first socket and check if the underlying protocol
1430 * supports the socketpair call.
1431 */
1432
1433 err = sock_create(family, type, protocol, &sock1);
1434 if (err < 0)
1435 goto out;
1436
1437 err = sock_create(family, type, protocol, &sock2);
1438 if (err < 0)
1439 goto out_release_1;
1440
1441 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1442 if (err < 0)
1da177e4
LT
1443 goto out_release_both;
1444
28407630 1445 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1446 if (unlikely(fd1 < 0)) {
1447 err = fd1;
db349509 1448 goto out_release_both;
bf3c23d1 1449 }
d73aa286 1450
28407630 1451 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1452 if (unlikely(fd2 < 0)) {
1453 err = fd2;
d73aa286 1454 goto out_put_unused_1;
28407630
AV
1455 }
1456
aab174f0 1457 newfile1 = sock_alloc_file(sock1, flags, NULL);
28407630
AV
1458 if (unlikely(IS_ERR(newfile1))) {
1459 err = PTR_ERR(newfile1);
d73aa286 1460 goto out_put_unused_both;
28407630
AV
1461 }
1462
aab174f0 1463 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1464 if (IS_ERR(newfile2)) {
1465 err = PTR_ERR(newfile2);
d73aa286 1466 goto out_fput_1;
db349509
AV
1467 }
1468
d73aa286
YD
1469 err = put_user(fd1, &usockvec[0]);
1470 if (err)
1471 goto out_fput_both;
1472
1473 err = put_user(fd2, &usockvec[1]);
1474 if (err)
1475 goto out_fput_both;
1476
157cf649 1477 audit_fd_pair(fd1, fd2);
d73aa286 1478
db349509
AV
1479 fd_install(fd1, newfile1);
1480 fd_install(fd2, newfile2);
1da177e4
LT
1481 /* fd1 and fd2 may be already another descriptors.
1482 * Not kernel problem.
1483 */
1484
d73aa286 1485 return 0;
1da177e4 1486
d73aa286
YD
1487out_fput_both:
1488 fput(newfile2);
1489 fput(newfile1);
1490 put_unused_fd(fd2);
1491 put_unused_fd(fd1);
1492 goto out;
1493
1494out_fput_1:
1495 fput(newfile1);
1496 put_unused_fd(fd2);
1497 put_unused_fd(fd1);
1498 sock_release(sock2);
1499 goto out;
1da177e4 1500
d73aa286
YD
1501out_put_unused_both:
1502 put_unused_fd(fd2);
1503out_put_unused_1:
1504 put_unused_fd(fd1);
1da177e4 1505out_release_both:
89bddce5 1506 sock_release(sock2);
1da177e4 1507out_release_1:
89bddce5 1508 sock_release(sock1);
1da177e4
LT
1509out:
1510 return err;
1511}
1512
1da177e4
LT
1513/*
1514 * Bind a name to a socket. Nothing much to do here since it's
1515 * the protocol's responsibility to handle the local address.
1516 *
1517 * We move the socket address to kernel space before we call
1518 * the protocol layer (having also checked the address is ok).
1519 */
1520
20f37034 1521SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1522{
1523 struct socket *sock;
230b1839 1524 struct sockaddr_storage address;
6cb153ca 1525 int err, fput_needed;
1da177e4 1526
89bddce5 1527 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1528 if (sock) {
43db362d 1529 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1530 if (err >= 0) {
1531 err = security_socket_bind(sock,
230b1839 1532 (struct sockaddr *)&address,
89bddce5 1533 addrlen);
6cb153ca
BL
1534 if (!err)
1535 err = sock->ops->bind(sock,
89bddce5 1536 (struct sockaddr *)
230b1839 1537 &address, addrlen);
1da177e4 1538 }
6cb153ca 1539 fput_light(sock->file, fput_needed);
89bddce5 1540 }
1da177e4
LT
1541 return err;
1542}
1543
1da177e4
LT
1544/*
1545 * Perform a listen. Basically, we allow the protocol to do anything
1546 * necessary for a listen, and if that works, we mark the socket as
1547 * ready for listening.
1548 */
1549
3e0fa65f 1550SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1551{
1552 struct socket *sock;
6cb153ca 1553 int err, fput_needed;
b8e1f9b5 1554 int somaxconn;
89bddce5
SH
1555
1556 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1557 if (sock) {
8efa6e93 1558 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1559 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1560 backlog = somaxconn;
1da177e4
LT
1561
1562 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1563 if (!err)
1564 err = sock->ops->listen(sock, backlog);
1da177e4 1565
6cb153ca 1566 fput_light(sock->file, fput_needed);
1da177e4
LT
1567 }
1568 return err;
1569}
1570
1da177e4
LT
1571/*
1572 * For accept, we attempt to create a new socket, set up the link
1573 * with the client, wake up the client, then return the new
1574 * connected fd. We collect the address of the connector in kernel
1575 * space and move it to user at the very end. This is unclean because
1576 * we open the socket then return an error.
1577 *
1578 * 1003.1g adds the ability to recvmsg() to query connection pending
1579 * status to recvmsg. We need to add that support in a way thats
1580 * clean when we restucture accept also.
1581 */
1582
20f37034
HC
1583SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1584 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1585{
1586 struct socket *sock, *newsock;
39d8c1b6 1587 struct file *newfile;
6cb153ca 1588 int err, len, newfd, fput_needed;
230b1839 1589 struct sockaddr_storage address;
1da177e4 1590
77d27200 1591 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1592 return -EINVAL;
1593
1594 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1595 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1596
6cb153ca 1597 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1598 if (!sock)
1599 goto out;
1600
1601 err = -ENFILE;
c6d409cf
ED
1602 newsock = sock_alloc();
1603 if (!newsock)
1da177e4
LT
1604 goto out_put;
1605
1606 newsock->type = sock->type;
1607 newsock->ops = sock->ops;
1608
1da177e4
LT
1609 /*
1610 * We don't need try_module_get here, as the listening socket (sock)
1611 * has the protocol module (sock->ops->owner) held.
1612 */
1613 __module_get(newsock->ops->owner);
1614
28407630 1615 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1616 if (unlikely(newfd < 0)) {
1617 err = newfd;
9a1875e6
DM
1618 sock_release(newsock);
1619 goto out_put;
39d8c1b6 1620 }
aab174f0 1621 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
28407630
AV
1622 if (unlikely(IS_ERR(newfile))) {
1623 err = PTR_ERR(newfile);
1624 put_unused_fd(newfd);
1625 sock_release(newsock);
1626 goto out_put;
1627 }
39d8c1b6 1628
a79af59e
FF
1629 err = security_socket_accept(sock, newsock);
1630 if (err)
39d8c1b6 1631 goto out_fd;
a79af59e 1632
1da177e4
LT
1633 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1634 if (err < 0)
39d8c1b6 1635 goto out_fd;
1da177e4
LT
1636
1637 if (upeer_sockaddr) {
230b1839 1638 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1639 &len, 2) < 0) {
1da177e4 1640 err = -ECONNABORTED;
39d8c1b6 1641 goto out_fd;
1da177e4 1642 }
43db362d 1643 err = move_addr_to_user(&address,
230b1839 1644 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1645 if (err < 0)
39d8c1b6 1646 goto out_fd;
1da177e4
LT
1647 }
1648
1649 /* File flags are not inherited via accept() unlike another OSes. */
1650
39d8c1b6
DM
1651 fd_install(newfd, newfile);
1652 err = newfd;
1da177e4 1653
1da177e4 1654out_put:
6cb153ca 1655 fput_light(sock->file, fput_needed);
1da177e4
LT
1656out:
1657 return err;
39d8c1b6 1658out_fd:
9606a216 1659 fput(newfile);
39d8c1b6 1660 put_unused_fd(newfd);
1da177e4
LT
1661 goto out_put;
1662}
1663
20f37034
HC
1664SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1665 int __user *, upeer_addrlen)
aaca0bdc 1666{
de11defe 1667 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1668}
1669
1da177e4
LT
1670/*
1671 * Attempt to connect to a socket with the server address. The address
1672 * is in user space so we verify it is OK and move it to kernel space.
1673 *
1674 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1675 * break bindings
1676 *
1677 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1678 * other SEQPACKET protocols that take time to connect() as it doesn't
1679 * include the -EINPROGRESS status for such sockets.
1680 */
1681
20f37034
HC
1682SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1683 int, addrlen)
1da177e4
LT
1684{
1685 struct socket *sock;
230b1839 1686 struct sockaddr_storage address;
6cb153ca 1687 int err, fput_needed;
1da177e4 1688
6cb153ca 1689 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1690 if (!sock)
1691 goto out;
43db362d 1692 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1693 if (err < 0)
1694 goto out_put;
1695
89bddce5 1696 err =
230b1839 1697 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1698 if (err)
1699 goto out_put;
1700
230b1839 1701 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1702 sock->file->f_flags);
1703out_put:
6cb153ca 1704 fput_light(sock->file, fput_needed);
1da177e4
LT
1705out:
1706 return err;
1707}
1708
1709/*
1710 * Get the local address ('name') of a socket object. Move the obtained
1711 * name to user space.
1712 */
1713
20f37034
HC
1714SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1715 int __user *, usockaddr_len)
1da177e4
LT
1716{
1717 struct socket *sock;
230b1839 1718 struct sockaddr_storage address;
6cb153ca 1719 int len, err, fput_needed;
89bddce5 1720
6cb153ca 1721 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1722 if (!sock)
1723 goto out;
1724
1725 err = security_socket_getsockname(sock);
1726 if (err)
1727 goto out_put;
1728
230b1839 1729 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1730 if (err)
1731 goto out_put;
43db362d 1732 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1733
1734out_put:
6cb153ca 1735 fput_light(sock->file, fput_needed);
1da177e4
LT
1736out:
1737 return err;
1738}
1739
1740/*
1741 * Get the remote address ('name') of a socket object. Move the obtained
1742 * name to user space.
1743 */
1744
20f37034
HC
1745SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1746 int __user *, usockaddr_len)
1da177e4
LT
1747{
1748 struct socket *sock;
230b1839 1749 struct sockaddr_storage address;
6cb153ca 1750 int len, err, fput_needed;
1da177e4 1751
89bddce5
SH
1752 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1753 if (sock != NULL) {
1da177e4
LT
1754 err = security_socket_getpeername(sock);
1755 if (err) {
6cb153ca 1756 fput_light(sock->file, fput_needed);
1da177e4
LT
1757 return err;
1758 }
1759
89bddce5 1760 err =
230b1839 1761 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1762 1);
1da177e4 1763 if (!err)
43db362d 1764 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1765 usockaddr_len);
6cb153ca 1766 fput_light(sock->file, fput_needed);
1da177e4
LT
1767 }
1768 return err;
1769}
1770
1771/*
1772 * Send a datagram to a given address. We move the address into kernel
1773 * space and check the user space data area is readable before invoking
1774 * the protocol.
1775 */
1776
3e0fa65f 1777SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1778 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1779 int, addr_len)
1da177e4
LT
1780{
1781 struct socket *sock;
230b1839 1782 struct sockaddr_storage address;
1da177e4
LT
1783 int err;
1784 struct msghdr msg;
1785 struct iovec iov;
6cb153ca 1786 int fput_needed;
6cb153ca 1787
253eacc0
LT
1788 if (len > INT_MAX)
1789 len = INT_MAX;
de0fa95c
PE
1790 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1791 if (!sock)
4387ff75 1792 goto out;
6cb153ca 1793
89bddce5
SH
1794 iov.iov_base = buff;
1795 iov.iov_len = len;
1796 msg.msg_name = NULL;
1797 msg.msg_iov = &iov;
1798 msg.msg_iovlen = 1;
1799 msg.msg_control = NULL;
1800 msg.msg_controllen = 0;
1801 msg.msg_namelen = 0;
6cb153ca 1802 if (addr) {
43db362d 1803 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1804 if (err < 0)
1805 goto out_put;
230b1839 1806 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1807 msg.msg_namelen = addr_len;
1da177e4
LT
1808 }
1809 if (sock->file->f_flags & O_NONBLOCK)
1810 flags |= MSG_DONTWAIT;
1811 msg.msg_flags = flags;
1812 err = sock_sendmsg(sock, &msg, len);
1813
89bddce5 1814out_put:
de0fa95c 1815 fput_light(sock->file, fput_needed);
4387ff75 1816out:
1da177e4
LT
1817 return err;
1818}
1819
1820/*
89bddce5 1821 * Send a datagram down a socket.
1da177e4
LT
1822 */
1823
3e0fa65f 1824SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1825 unsigned int, flags)
1da177e4
LT
1826{
1827 return sys_sendto(fd, buff, len, flags, NULL, 0);
1828}
1829
1830/*
89bddce5 1831 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1832 * sender. We verify the buffers are writable and if needed move the
1833 * sender address from kernel to user space.
1834 */
1835
3e0fa65f 1836SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1837 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1838 int __user *, addr_len)
1da177e4
LT
1839{
1840 struct socket *sock;
1841 struct iovec iov;
1842 struct msghdr msg;
230b1839 1843 struct sockaddr_storage address;
89bddce5 1844 int err, err2;
6cb153ca
BL
1845 int fput_needed;
1846
253eacc0
LT
1847 if (size > INT_MAX)
1848 size = INT_MAX;
de0fa95c 1849 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1850 if (!sock)
de0fa95c 1851 goto out;
1da177e4 1852
89bddce5
SH
1853 msg.msg_control = NULL;
1854 msg.msg_controllen = 0;
1855 msg.msg_iovlen = 1;
1856 msg.msg_iov = &iov;
1857 iov.iov_len = size;
1858 iov.iov_base = ubuf;
f3d33426
HFS
1859 /* Save some cycles and don't copy the address if not needed */
1860 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1861 /* We assume all kernel code knows the size of sockaddr_storage */
1862 msg.msg_namelen = 0;
1da177e4
LT
1863 if (sock->file->f_flags & O_NONBLOCK)
1864 flags |= MSG_DONTWAIT;
89bddce5 1865 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1866
89bddce5 1867 if (err >= 0 && addr != NULL) {
43db362d 1868 err2 = move_addr_to_user(&address,
230b1839 1869 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1870 if (err2 < 0)
1871 err = err2;
1da177e4 1872 }
de0fa95c
PE
1873
1874 fput_light(sock->file, fput_needed);
4387ff75 1875out:
1da177e4
LT
1876 return err;
1877}
1878
1879/*
89bddce5 1880 * Receive a datagram from a socket.
1da177e4
LT
1881 */
1882
b7c0ddf5
JG
1883SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1884 unsigned int, flags)
1da177e4
LT
1885{
1886 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1887}
1888
1889/*
1890 * Set a socket option. Because we don't know the option lengths we have
1891 * to pass the user mode parameter for the protocols to sort out.
1892 */
1893
20f37034
HC
1894SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1895 char __user *, optval, int, optlen)
1da177e4 1896{
6cb153ca 1897 int err, fput_needed;
1da177e4
LT
1898 struct socket *sock;
1899
1900 if (optlen < 0)
1901 return -EINVAL;
89bddce5
SH
1902
1903 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1904 if (sock != NULL) {
1905 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1906 if (err)
1907 goto out_put;
1da177e4
LT
1908
1909 if (level == SOL_SOCKET)
89bddce5
SH
1910 err =
1911 sock_setsockopt(sock, level, optname, optval,
1912 optlen);
1da177e4 1913 else
89bddce5
SH
1914 err =
1915 sock->ops->setsockopt(sock, level, optname, optval,
1916 optlen);
6cb153ca
BL
1917out_put:
1918 fput_light(sock->file, fput_needed);
1da177e4
LT
1919 }
1920 return err;
1921}
1922
1923/*
1924 * Get a socket option. Because we don't know the option lengths we have
1925 * to pass a user mode parameter for the protocols to sort out.
1926 */
1927
20f37034
HC
1928SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1929 char __user *, optval, int __user *, optlen)
1da177e4 1930{
6cb153ca 1931 int err, fput_needed;
1da177e4
LT
1932 struct socket *sock;
1933
89bddce5
SH
1934 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1935 if (sock != NULL) {
6cb153ca
BL
1936 err = security_socket_getsockopt(sock, level, optname);
1937 if (err)
1938 goto out_put;
1da177e4
LT
1939
1940 if (level == SOL_SOCKET)
89bddce5
SH
1941 err =
1942 sock_getsockopt(sock, level, optname, optval,
1943 optlen);
1da177e4 1944 else
89bddce5
SH
1945 err =
1946 sock->ops->getsockopt(sock, level, optname, optval,
1947 optlen);
6cb153ca
BL
1948out_put:
1949 fput_light(sock->file, fput_needed);
1da177e4
LT
1950 }
1951 return err;
1952}
1953
1da177e4
LT
1954/*
1955 * Shutdown a socket.
1956 */
1957
754fe8d2 1958SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1959{
6cb153ca 1960 int err, fput_needed;
1da177e4
LT
1961 struct socket *sock;
1962
89bddce5
SH
1963 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1964 if (sock != NULL) {
1da177e4 1965 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1966 if (!err)
1967 err = sock->ops->shutdown(sock, how);
1968 fput_light(sock->file, fput_needed);
1da177e4
LT
1969 }
1970 return err;
1971}
1972
89bddce5 1973/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1974 * fields which are the same type (int / unsigned) on our platforms.
1975 */
1976#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1977#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1978#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1979
c71d8ebe
TH
1980struct used_address {
1981 struct sockaddr_storage name;
1982 unsigned int name_len;
1983};
1984
1661bf36
DC
1985static int copy_msghdr_from_user(struct msghdr *kmsg,
1986 struct msghdr __user *umsg)
1987{
1988 if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
1989 return -EFAULT;
dbb490b9
ML
1990
1991 if (kmsg->msg_namelen < 0)
1992 return -EINVAL;
1993
1661bf36 1994 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1995 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
1661bf36
DC
1996 return 0;
1997}
1998
a7526eb5 1999static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 2000 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 2001 struct used_address *used_address)
1da177e4 2002{
89bddce5
SH
2003 struct compat_msghdr __user *msg_compat =
2004 (struct compat_msghdr __user *)msg;
230b1839 2005 struct sockaddr_storage address;
1da177e4 2006 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2007 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
2008 __attribute__ ((aligned(sizeof(__kernel_size_t))));
2009 /* 20 is size of ipv6_pktinfo */
1da177e4 2010 unsigned char *ctl_buf = ctl;
a74e9106 2011 int err, ctl_len, total_len;
89bddce5 2012
1da177e4
LT
2013 err = -EFAULT;
2014 if (MSG_CMSG_COMPAT & flags) {
228e548e 2015 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2016 return -EFAULT;
1661bf36
DC
2017 } else {
2018 err = copy_msghdr_from_user(msg_sys, msg);
2019 if (err)
2020 return err;
2021 }
1da177e4 2022
228e548e 2023 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2024 err = -EMSGSIZE;
2025 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2026 goto out;
2027 err = -ENOMEM;
2028 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2029 GFP_KERNEL);
1da177e4 2030 if (!iov)
228e548e 2031 goto out;
1da177e4
LT
2032 }
2033
2034 /* This will also move the address data into kernel space */
2035 if (MSG_CMSG_COMPAT & flags) {
43db362d 2036 err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ);
1da177e4 2037 } else
43db362d 2038 err = verify_iovec(msg_sys, iov, &address, VERIFY_READ);
89bddce5 2039 if (err < 0)
1da177e4
LT
2040 goto out_freeiov;
2041 total_len = err;
2042
2043 err = -ENOBUFS;
2044
228e548e 2045 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2046 goto out_freeiov;
228e548e 2047 ctl_len = msg_sys->msg_controllen;
1da177e4 2048 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2049 err =
228e548e 2050 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2051 sizeof(ctl));
1da177e4
LT
2052 if (err)
2053 goto out_freeiov;
228e548e
AB
2054 ctl_buf = msg_sys->msg_control;
2055 ctl_len = msg_sys->msg_controllen;
1da177e4 2056 } else if (ctl_len) {
89bddce5 2057 if (ctl_len > sizeof(ctl)) {
1da177e4 2058 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2059 if (ctl_buf == NULL)
1da177e4
LT
2060 goto out_freeiov;
2061 }
2062 err = -EFAULT;
2063 /*
228e548e 2064 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2065 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2066 * checking falls down on this.
2067 */
fb8621bb 2068 if (copy_from_user(ctl_buf,
228e548e 2069 (void __user __force *)msg_sys->msg_control,
89bddce5 2070 ctl_len))
1da177e4 2071 goto out_freectl;
228e548e 2072 msg_sys->msg_control = ctl_buf;
1da177e4 2073 }
228e548e 2074 msg_sys->msg_flags = flags;
1da177e4
LT
2075
2076 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2077 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2078 /*
2079 * If this is sendmmsg() and current destination address is same as
2080 * previously succeeded address, omit asking LSM's decision.
2081 * used_address->name_len is initialized to UINT_MAX so that the first
2082 * destination address never matches.
2083 */
bc909d9d
MD
2084 if (used_address && msg_sys->msg_name &&
2085 used_address->name_len == msg_sys->msg_namelen &&
2086 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe
TH
2087 used_address->name_len)) {
2088 err = sock_sendmsg_nosec(sock, msg_sys, total_len);
2089 goto out_freectl;
2090 }
2091 err = sock_sendmsg(sock, msg_sys, total_len);
2092 /*
2093 * If this is sendmmsg() and sending to current destination address was
2094 * successful, remember it.
2095 */
2096 if (used_address && err >= 0) {
2097 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2098 if (msg_sys->msg_name)
2099 memcpy(&used_address->name, msg_sys->msg_name,
2100 used_address->name_len);
c71d8ebe 2101 }
1da177e4
LT
2102
2103out_freectl:
89bddce5 2104 if (ctl_buf != ctl)
1da177e4
LT
2105 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2106out_freeiov:
2107 if (iov != iovstack)
a74e9106 2108 kfree(iov);
228e548e
AB
2109out:
2110 return err;
2111}
2112
2113/*
2114 * BSD sendmsg interface
2115 */
2116
a7526eb5 2117long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
228e548e
AB
2118{
2119 int fput_needed, err;
2120 struct msghdr msg_sys;
1be374a0
AL
2121 struct socket *sock;
2122
1be374a0 2123 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2124 if (!sock)
2125 goto out;
2126
a7526eb5 2127 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 2128
6cb153ca 2129 fput_light(sock->file, fput_needed);
89bddce5 2130out:
1da177e4
LT
2131 return err;
2132}
2133
a7526eb5
AL
2134SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
2135{
2136 if (flags & MSG_CMSG_COMPAT)
2137 return -EINVAL;
2138 return __sys_sendmsg(fd, msg, flags);
2139}
2140
228e548e
AB
2141/*
2142 * Linux sendmmsg interface
2143 */
2144
2145int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2146 unsigned int flags)
2147{
2148 int fput_needed, err, datagrams;
2149 struct socket *sock;
2150 struct mmsghdr __user *entry;
2151 struct compat_mmsghdr __user *compat_entry;
2152 struct msghdr msg_sys;
c71d8ebe 2153 struct used_address used_address;
228e548e 2154
98382f41
AB
2155 if (vlen > UIO_MAXIOV)
2156 vlen = UIO_MAXIOV;
228e548e
AB
2157
2158 datagrams = 0;
2159
2160 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2161 if (!sock)
2162 return err;
2163
c71d8ebe 2164 used_address.name_len = UINT_MAX;
228e548e
AB
2165 entry = mmsg;
2166 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2167 err = 0;
228e548e
AB
2168
2169 while (datagrams < vlen) {
228e548e 2170 if (MSG_CMSG_COMPAT & flags) {
a7526eb5
AL
2171 err = ___sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
2172 &msg_sys, flags, &used_address);
228e548e
AB
2173 if (err < 0)
2174 break;
2175 err = __put_user(err, &compat_entry->msg_len);
2176 ++compat_entry;
2177 } else {
a7526eb5
AL
2178 err = ___sys_sendmsg(sock,
2179 (struct msghdr __user *)entry,
2180 &msg_sys, flags, &used_address);
228e548e
AB
2181 if (err < 0)
2182 break;
2183 err = put_user(err, &entry->msg_len);
2184 ++entry;
2185 }
2186
2187 if (err)
2188 break;
2189 ++datagrams;
2190 }
2191
228e548e
AB
2192 fput_light(sock->file, fput_needed);
2193
728ffb86
AB
2194 /* We only return an error if no datagrams were able to be sent */
2195 if (datagrams != 0)
228e548e
AB
2196 return datagrams;
2197
228e548e
AB
2198 return err;
2199}
2200
2201SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2202 unsigned int, vlen, unsigned int, flags)
2203{
1be374a0
AL
2204 if (flags & MSG_CMSG_COMPAT)
2205 return -EINVAL;
228e548e
AB
2206 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2207}
2208
a7526eb5 2209static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
95c96174 2210 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2211{
89bddce5
SH
2212 struct compat_msghdr __user *msg_compat =
2213 (struct compat_msghdr __user *)msg;
1da177e4 2214 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2215 struct iovec *iov = iovstack;
1da177e4 2216 unsigned long cmsg_ptr;
a74e9106 2217 int err, total_len, len;
1da177e4
LT
2218
2219 /* kernel mode address */
230b1839 2220 struct sockaddr_storage addr;
1da177e4
LT
2221
2222 /* user mode address pointers */
2223 struct sockaddr __user *uaddr;
2224 int __user *uaddr_len;
89bddce5 2225
1da177e4 2226 if (MSG_CMSG_COMPAT & flags) {
a2e27255 2227 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 2228 return -EFAULT;
1661bf36
DC
2229 } else {
2230 err = copy_msghdr_from_user(msg_sys, msg);
2231 if (err)
2232 return err;
2233 }
1da177e4 2234
a2e27255 2235 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
a74e9106
ED
2236 err = -EMSGSIZE;
2237 if (msg_sys->msg_iovlen > UIO_MAXIOV)
2238 goto out;
2239 err = -ENOMEM;
2240 iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec),
2241 GFP_KERNEL);
1da177e4 2242 if (!iov)
a2e27255 2243 goto out;
1da177e4
LT
2244 }
2245
f3d33426
HFS
2246 /* Save the user-mode address (verify_iovec will change the
2247 * kernel msghdr to use the kernel address space)
1da177e4 2248 */
a2e27255 2249 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4 2250 uaddr_len = COMPAT_NAMELEN(msg);
f3d33426 2251 if (MSG_CMSG_COMPAT & flags)
43db362d 2252 err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
f3d33426 2253 else
43db362d 2254 err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
1da177e4
LT
2255 if (err < 0)
2256 goto out_freeiov;
89bddce5 2257 total_len = err;
1da177e4 2258
a2e27255
ACM
2259 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2260 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2261
f3d33426
HFS
2262 /* We assume all kernel code knows the size of sockaddr_storage */
2263 msg_sys->msg_namelen = 0;
2264
1da177e4
LT
2265 if (sock->file->f_flags & O_NONBLOCK)
2266 flags |= MSG_DONTWAIT;
a2e27255
ACM
2267 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2268 total_len, flags);
1da177e4
LT
2269 if (err < 0)
2270 goto out_freeiov;
2271 len = err;
2272
2273 if (uaddr != NULL) {
43db362d 2274 err = move_addr_to_user(&addr,
a2e27255 2275 msg_sys->msg_namelen, uaddr,
89bddce5 2276 uaddr_len);
1da177e4
LT
2277 if (err < 0)
2278 goto out_freeiov;
2279 }
a2e27255 2280 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2281 COMPAT_FLAGS(msg));
1da177e4
LT
2282 if (err)
2283 goto out_freeiov;
2284 if (MSG_CMSG_COMPAT & flags)
a2e27255 2285 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2286 &msg_compat->msg_controllen);
2287 else
a2e27255 2288 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2289 &msg->msg_controllen);
2290 if (err)
2291 goto out_freeiov;
2292 err = len;
2293
2294out_freeiov:
2295 if (iov != iovstack)
a74e9106 2296 kfree(iov);
a2e27255
ACM
2297out:
2298 return err;
2299}
2300
2301/*
2302 * BSD recvmsg interface
2303 */
2304
a7526eb5 2305long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags)
a2e27255
ACM
2306{
2307 int fput_needed, err;
2308 struct msghdr msg_sys;
1be374a0
AL
2309 struct socket *sock;
2310
1be374a0 2311 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2312 if (!sock)
2313 goto out;
2314
a7526eb5 2315 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2316
6cb153ca 2317 fput_light(sock->file, fput_needed);
1da177e4
LT
2318out:
2319 return err;
2320}
2321
a7526eb5
AL
2322SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2323 unsigned int, flags)
2324{
2325 if (flags & MSG_CMSG_COMPAT)
2326 return -EINVAL;
2327 return __sys_recvmsg(fd, msg, flags);
2328}
2329
a2e27255
ACM
2330/*
2331 * Linux recvmmsg interface
2332 */
2333
2334int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2335 unsigned int flags, struct timespec *timeout)
2336{
2337 int fput_needed, err, datagrams;
2338 struct socket *sock;
2339 struct mmsghdr __user *entry;
d7256d0e 2340 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2341 struct msghdr msg_sys;
2342 struct timespec end_time;
2343
2344 if (timeout &&
2345 poll_select_set_timeout(&end_time, timeout->tv_sec,
2346 timeout->tv_nsec))
2347 return -EINVAL;
2348
2349 datagrams = 0;
2350
2351 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2352 if (!sock)
2353 return err;
2354
2355 err = sock_error(sock->sk);
2356 if (err)
2357 goto out_put;
2358
2359 entry = mmsg;
d7256d0e 2360 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2361
2362 while (datagrams < vlen) {
2363 /*
2364 * No need to ask LSM for more than the first datagram.
2365 */
d7256d0e 2366 if (MSG_CMSG_COMPAT & flags) {
a7526eb5
AL
2367 err = ___sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
2368 &msg_sys, flags & ~MSG_WAITFORONE,
2369 datagrams);
d7256d0e
JMG
2370 if (err < 0)
2371 break;
2372 err = __put_user(err, &compat_entry->msg_len);
2373 ++compat_entry;
2374 } else {
a7526eb5
AL
2375 err = ___sys_recvmsg(sock,
2376 (struct msghdr __user *)entry,
2377 &msg_sys, flags & ~MSG_WAITFORONE,
2378 datagrams);
d7256d0e
JMG
2379 if (err < 0)
2380 break;
2381 err = put_user(err, &entry->msg_len);
2382 ++entry;
2383 }
2384
a2e27255
ACM
2385 if (err)
2386 break;
a2e27255
ACM
2387 ++datagrams;
2388
71c5c159
BB
2389 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2390 if (flags & MSG_WAITFORONE)
2391 flags |= MSG_DONTWAIT;
2392
a2e27255
ACM
2393 if (timeout) {
2394 ktime_get_ts(timeout);
2395 *timeout = timespec_sub(end_time, *timeout);
2396 if (timeout->tv_sec < 0) {
2397 timeout->tv_sec = timeout->tv_nsec = 0;
2398 break;
2399 }
2400
2401 /* Timeout, return less than vlen datagrams */
2402 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2403 break;
2404 }
2405
2406 /* Out of band data, return right away */
2407 if (msg_sys.msg_flags & MSG_OOB)
2408 break;
2409 }
2410
2411out_put:
2412 fput_light(sock->file, fput_needed);
1da177e4 2413
a2e27255
ACM
2414 if (err == 0)
2415 return datagrams;
2416
2417 if (datagrams != 0) {
2418 /*
2419 * We may return less entries than requested (vlen) if the
2420 * sock is non block and there aren't enough datagrams...
2421 */
2422 if (err != -EAGAIN) {
2423 /*
2424 * ... or if recvmsg returns an error after we
2425 * received some datagrams, where we record the
2426 * error to return on the next call or if the
2427 * app asks about it using getsockopt(SO_ERROR).
2428 */
2429 sock->sk->sk_err = -err;
2430 }
2431
2432 return datagrams;
2433 }
2434
2435 return err;
2436}
2437
2438SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2439 unsigned int, vlen, unsigned int, flags,
2440 struct timespec __user *, timeout)
2441{
2442 int datagrams;
2443 struct timespec timeout_sys;
2444
1be374a0
AL
2445 if (flags & MSG_CMSG_COMPAT)
2446 return -EINVAL;
2447
a2e27255
ACM
2448 if (!timeout)
2449 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2450
2451 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2452 return -EFAULT;
2453
2454 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2455
2456 if (datagrams > 0 &&
2457 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2458 datagrams = -EFAULT;
2459
2460 return datagrams;
2461}
2462
2463#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2464/* Argument list sizes for sys_socketcall */
2465#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2466static const unsigned char nargs[21] = {
c6d409cf
ED
2467 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2468 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2469 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2470 AL(4), AL(5), AL(4)
89bddce5
SH
2471};
2472
1da177e4
LT
2473#undef AL
2474
2475/*
89bddce5 2476 * System call vectors.
1da177e4
LT
2477 *
2478 * Argument checking cleaned up. Saved 20% in size.
2479 * This function doesn't need to set the kernel lock because
89bddce5 2480 * it is set by the callees.
1da177e4
LT
2481 */
2482
3e0fa65f 2483SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2484{
2950fa9d 2485 unsigned long a[AUDITSC_ARGS];
89bddce5 2486 unsigned long a0, a1;
1da177e4 2487 int err;
47379052 2488 unsigned int len;
1da177e4 2489
228e548e 2490 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2491 return -EINVAL;
2492
47379052
AV
2493 len = nargs[call];
2494 if (len > sizeof(a))
2495 return -EINVAL;
2496
1da177e4 2497 /* copy_from_user should be SMP safe. */
47379052 2498 if (copy_from_user(a, args, len))
1da177e4 2499 return -EFAULT;
3ec3b2fb 2500
2950fa9d
CG
2501 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2502 if (err)
2503 return err;
3ec3b2fb 2504
89bddce5
SH
2505 a0 = a[0];
2506 a1 = a[1];
2507
2508 switch (call) {
2509 case SYS_SOCKET:
2510 err = sys_socket(a0, a1, a[2]);
2511 break;
2512 case SYS_BIND:
2513 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2514 break;
2515 case SYS_CONNECT:
2516 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2517 break;
2518 case SYS_LISTEN:
2519 err = sys_listen(a0, a1);
2520 break;
2521 case SYS_ACCEPT:
de11defe
UD
2522 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2523 (int __user *)a[2], 0);
89bddce5
SH
2524 break;
2525 case SYS_GETSOCKNAME:
2526 err =
2527 sys_getsockname(a0, (struct sockaddr __user *)a1,
2528 (int __user *)a[2]);
2529 break;
2530 case SYS_GETPEERNAME:
2531 err =
2532 sys_getpeername(a0, (struct sockaddr __user *)a1,
2533 (int __user *)a[2]);
2534 break;
2535 case SYS_SOCKETPAIR:
2536 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2537 break;
2538 case SYS_SEND:
2539 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2540 break;
2541 case SYS_SENDTO:
2542 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2543 (struct sockaddr __user *)a[4], a[5]);
2544 break;
2545 case SYS_RECV:
2546 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2547 break;
2548 case SYS_RECVFROM:
2549 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2550 (struct sockaddr __user *)a[4],
2551 (int __user *)a[5]);
2552 break;
2553 case SYS_SHUTDOWN:
2554 err = sys_shutdown(a0, a1);
2555 break;
2556 case SYS_SETSOCKOPT:
2557 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2558 break;
2559 case SYS_GETSOCKOPT:
2560 err =
2561 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2562 (int __user *)a[4]);
2563 break;
2564 case SYS_SENDMSG:
2565 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2566 break;
228e548e
AB
2567 case SYS_SENDMMSG:
2568 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2569 break;
89bddce5
SH
2570 case SYS_RECVMSG:
2571 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2572 break;
a2e27255
ACM
2573 case SYS_RECVMMSG:
2574 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2575 (struct timespec __user *)a[4]);
2576 break;
de11defe
UD
2577 case SYS_ACCEPT4:
2578 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2579 (int __user *)a[2], a[3]);
aaca0bdc 2580 break;
89bddce5
SH
2581 default:
2582 err = -EINVAL;
2583 break;
1da177e4
LT
2584 }
2585 return err;
2586}
2587
89bddce5 2588#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2589
55737fda
SH
2590/**
2591 * sock_register - add a socket protocol handler
2592 * @ops: description of protocol
2593 *
1da177e4
LT
2594 * This function is called by a protocol handler that wants to
2595 * advertise its address family, and have it linked into the
55737fda
SH
2596 * socket interface. The value ops->family coresponds to the
2597 * socket system call protocol family.
1da177e4 2598 */
f0fd27d4 2599int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2600{
2601 int err;
2602
2603 if (ops->family >= NPROTO) {
3410f22e 2604 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2605 return -ENOBUFS;
2606 }
55737fda
SH
2607
2608 spin_lock(&net_family_lock);
190683a9
ED
2609 if (rcu_dereference_protected(net_families[ops->family],
2610 lockdep_is_held(&net_family_lock)))
55737fda
SH
2611 err = -EEXIST;
2612 else {
cf778b00 2613 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2614 err = 0;
2615 }
55737fda
SH
2616 spin_unlock(&net_family_lock);
2617
3410f22e 2618 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2619 return err;
2620}
c6d409cf 2621EXPORT_SYMBOL(sock_register);
1da177e4 2622
55737fda
SH
2623/**
2624 * sock_unregister - remove a protocol handler
2625 * @family: protocol family to remove
2626 *
1da177e4
LT
2627 * This function is called by a protocol handler that wants to
2628 * remove its address family, and have it unlinked from the
55737fda
SH
2629 * new socket creation.
2630 *
2631 * If protocol handler is a module, then it can use module reference
2632 * counts to protect against new references. If protocol handler is not
2633 * a module then it needs to provide its own protection in
2634 * the ops->create routine.
1da177e4 2635 */
f0fd27d4 2636void sock_unregister(int family)
1da177e4 2637{
f0fd27d4 2638 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2639
55737fda 2640 spin_lock(&net_family_lock);
a9b3cd7f 2641 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2642 spin_unlock(&net_family_lock);
2643
2644 synchronize_rcu();
2645
3410f22e 2646 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2647}
c6d409cf 2648EXPORT_SYMBOL(sock_unregister);
1da177e4 2649
77d76ea3 2650static int __init sock_init(void)
1da177e4 2651{
b3e19d92 2652 int err;
2ca794e5
EB
2653 /*
2654 * Initialize the network sysctl infrastructure.
2655 */
2656 err = net_sysctl_init();
2657 if (err)
2658 goto out;
b3e19d92 2659
1da177e4 2660 /*
89bddce5 2661 * Initialize skbuff SLAB cache
1da177e4
LT
2662 */
2663 skb_init();
1da177e4
LT
2664
2665 /*
89bddce5 2666 * Initialize the protocols module.
1da177e4
LT
2667 */
2668
2669 init_inodecache();
b3e19d92
NP
2670
2671 err = register_filesystem(&sock_fs_type);
2672 if (err)
2673 goto out_fs;
1da177e4 2674 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2675 if (IS_ERR(sock_mnt)) {
2676 err = PTR_ERR(sock_mnt);
2677 goto out_mount;
2678 }
77d76ea3
AK
2679
2680 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2681 */
2682
2683#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2684 err = netfilter_init();
2685 if (err)
2686 goto out;
1da177e4 2687#endif
cbeb321a 2688
408eccce 2689 ptp_classifier_init();
c1f19b51 2690
b3e19d92
NP
2691out:
2692 return err;
2693
2694out_mount:
2695 unregister_filesystem(&sock_fs_type);
2696out_fs:
2697 goto out;
1da177e4
LT
2698}
2699
77d76ea3
AK
2700core_initcall(sock_init); /* early initcall */
2701
1da177e4
LT
2702#ifdef CONFIG_PROC_FS
2703void socket_seq_show(struct seq_file *seq)
2704{
2705 int cpu;
2706 int counter = 0;
2707
6f912042 2708 for_each_possible_cpu(cpu)
89bddce5 2709 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2710
2711 /* It can be negative, by the way. 8) */
2712 if (counter < 0)
2713 counter = 0;
2714
2715 seq_printf(seq, "sockets: used %d\n", counter);
2716}
89bddce5 2717#endif /* CONFIG_PROC_FS */
1da177e4 2718
89bbfc95 2719#ifdef CONFIG_COMPAT
6b96018b 2720static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2721 unsigned int cmd, void __user *up)
7a229387 2722{
7a229387
AB
2723 mm_segment_t old_fs = get_fs();
2724 struct timeval ktv;
2725 int err;
2726
2727 set_fs(KERNEL_DS);
6b96018b 2728 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2729 set_fs(old_fs);
644595f8 2730 if (!err)
ed6fe9d6 2731 err = compat_put_timeval(&ktv, up);
644595f8 2732
7a229387
AB
2733 return err;
2734}
2735
6b96018b 2736static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2737 unsigned int cmd, void __user *up)
7a229387 2738{
7a229387
AB
2739 mm_segment_t old_fs = get_fs();
2740 struct timespec kts;
2741 int err;
2742
2743 set_fs(KERNEL_DS);
6b96018b 2744 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2745 set_fs(old_fs);
644595f8 2746 if (!err)
ed6fe9d6 2747 err = compat_put_timespec(&kts, up);
644595f8 2748
7a229387
AB
2749 return err;
2750}
2751
6b96018b 2752static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2753{
2754 struct ifreq __user *uifr;
2755 int err;
2756
2757 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2758 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2759 return -EFAULT;
2760
6b96018b 2761 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2762 if (err)
2763 return err;
2764
6b96018b 2765 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2766 return -EFAULT;
2767
2768 return 0;
2769}
2770
6b96018b 2771static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2772{
6b96018b 2773 struct compat_ifconf ifc32;
7a229387
AB
2774 struct ifconf ifc;
2775 struct ifconf __user *uifc;
6b96018b 2776 struct compat_ifreq __user *ifr32;
7a229387
AB
2777 struct ifreq __user *ifr;
2778 unsigned int i, j;
2779 int err;
2780
6b96018b 2781 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2782 return -EFAULT;
2783
43da5f2e 2784 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2785 if (ifc32.ifcbuf == 0) {
2786 ifc32.ifc_len = 0;
2787 ifc.ifc_len = 0;
2788 ifc.ifc_req = NULL;
2789 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2790 } else {
c6d409cf
ED
2791 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2792 sizeof(struct ifreq);
7a229387
AB
2793 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2794 ifc.ifc_len = len;
2795 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2796 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2797 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2798 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2799 return -EFAULT;
2800 ifr++;
2801 ifr32++;
2802 }
2803 }
2804 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2805 return -EFAULT;
2806
6b96018b 2807 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2808 if (err)
2809 return err;
2810
2811 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2812 return -EFAULT;
2813
2814 ifr = ifc.ifc_req;
2815 ifr32 = compat_ptr(ifc32.ifcbuf);
2816 for (i = 0, j = 0;
c6d409cf
ED
2817 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2818 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2819 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2820 return -EFAULT;
2821 ifr32++;
2822 ifr++;
2823 }
2824
2825 if (ifc32.ifcbuf == 0) {
2826 /* Translate from 64-bit structure multiple to
2827 * a 32-bit one.
2828 */
2829 i = ifc.ifc_len;
6b96018b 2830 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2831 ifc32.ifc_len = i;
2832 } else {
2833 ifc32.ifc_len = i;
2834 }
6b96018b 2835 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2836 return -EFAULT;
2837
2838 return 0;
2839}
2840
6b96018b 2841static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2842{
3a7da39d
BH
2843 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2844 bool convert_in = false, convert_out = false;
2845 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2846 struct ethtool_rxnfc __user *rxnfc;
7a229387 2847 struct ifreq __user *ifr;
3a7da39d
BH
2848 u32 rule_cnt = 0, actual_rule_cnt;
2849 u32 ethcmd;
7a229387 2850 u32 data;
3a7da39d 2851 int ret;
7a229387 2852
3a7da39d
BH
2853 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2854 return -EFAULT;
7a229387 2855
3a7da39d
BH
2856 compat_rxnfc = compat_ptr(data);
2857
2858 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2859 return -EFAULT;
2860
3a7da39d
BH
2861 /* Most ethtool structures are defined without padding.
2862 * Unfortunately struct ethtool_rxnfc is an exception.
2863 */
2864 switch (ethcmd) {
2865 default:
2866 break;
2867 case ETHTOOL_GRXCLSRLALL:
2868 /* Buffer size is variable */
2869 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2870 return -EFAULT;
2871 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2872 return -ENOMEM;
2873 buf_size += rule_cnt * sizeof(u32);
2874 /* fall through */
2875 case ETHTOOL_GRXRINGS:
2876 case ETHTOOL_GRXCLSRLCNT:
2877 case ETHTOOL_GRXCLSRULE:
55664f32 2878 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2879 convert_out = true;
2880 /* fall through */
2881 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2882 buf_size += sizeof(struct ethtool_rxnfc);
2883 convert_in = true;
2884 break;
2885 }
2886
2887 ifr = compat_alloc_user_space(buf_size);
954b1244 2888 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2889
2890 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2891 return -EFAULT;
2892
3a7da39d
BH
2893 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2894 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2895 return -EFAULT;
2896
3a7da39d 2897 if (convert_in) {
127fe533 2898 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2899 * fs.ring_cookie and at the end of fs, but nowhere else.
2900 */
127fe533
AD
2901 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2902 sizeof(compat_rxnfc->fs.m_ext) !=
2903 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2904 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2905 BUILD_BUG_ON(
2906 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2907 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2908 offsetof(struct ethtool_rxnfc, fs.location) -
2909 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2910
2911 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2912 (void __user *)(&rxnfc->fs.m_ext + 1) -
2913 (void __user *)rxnfc) ||
3a7da39d
BH
2914 copy_in_user(&rxnfc->fs.ring_cookie,
2915 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2916 (void __user *)(&rxnfc->fs.location + 1) -
2917 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2918 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2919 sizeof(rxnfc->rule_cnt)))
2920 return -EFAULT;
2921 }
2922
2923 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2924 if (ret)
2925 return ret;
2926
2927 if (convert_out) {
2928 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2929 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2930 (const void __user *)rxnfc) ||
3a7da39d
BH
2931 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2932 &rxnfc->fs.ring_cookie,
954b1244
SH
2933 (const void __user *)(&rxnfc->fs.location + 1) -
2934 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2935 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2936 sizeof(rxnfc->rule_cnt)))
2937 return -EFAULT;
2938
2939 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2940 /* As an optimisation, we only copy the actual
2941 * number of rules that the underlying
2942 * function returned. Since Mallory might
2943 * change the rule count in user memory, we
2944 * check that it is less than the rule count
2945 * originally given (as the user buffer size),
2946 * which has been range-checked.
2947 */
2948 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2949 return -EFAULT;
2950 if (actual_rule_cnt < rule_cnt)
2951 rule_cnt = actual_rule_cnt;
2952 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2953 &rxnfc->rule_locs[0],
2954 rule_cnt * sizeof(u32)))
2955 return -EFAULT;
2956 }
2957 }
2958
2959 return 0;
7a229387
AB
2960}
2961
7a50a240
AB
2962static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2963{
2964 void __user *uptr;
2965 compat_uptr_t uptr32;
2966 struct ifreq __user *uifr;
2967
c6d409cf 2968 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2969 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2970 return -EFAULT;
2971
2972 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2973 return -EFAULT;
2974
2975 uptr = compat_ptr(uptr32);
2976
2977 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2978 return -EFAULT;
2979
2980 return dev_ioctl(net, SIOCWANDEV, uifr);
2981}
2982
6b96018b
AB
2983static int bond_ioctl(struct net *net, unsigned int cmd,
2984 struct compat_ifreq __user *ifr32)
7a229387
AB
2985{
2986 struct ifreq kifr;
7a229387
AB
2987 mm_segment_t old_fs;
2988 int err;
7a229387
AB
2989
2990 switch (cmd) {
2991 case SIOCBONDENSLAVE:
2992 case SIOCBONDRELEASE:
2993 case SIOCBONDSETHWADDR:
2994 case SIOCBONDCHANGEACTIVE:
6b96018b 2995 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2996 return -EFAULT;
2997
2998 old_fs = get_fs();
c6d409cf 2999 set_fs(KERNEL_DS);
c3f52ae6 3000 err = dev_ioctl(net, cmd,
3001 (struct ifreq __user __force *) &kifr);
c6d409cf 3002 set_fs(old_fs);
7a229387
AB
3003
3004 return err;
7a229387 3005 default:
07d106d0 3006 return -ENOIOCTLCMD;
ccbd6a5a 3007 }
7a229387
AB
3008}
3009
590d4693
BH
3010/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3011static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3012 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
3013{
3014 struct ifreq __user *u_ifreq64;
7a229387
AB
3015 char tmp_buf[IFNAMSIZ];
3016 void __user *data64;
3017 u32 data32;
3018
3019 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
3020 IFNAMSIZ))
3021 return -EFAULT;
417c3522 3022 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
3023 return -EFAULT;
3024 data64 = compat_ptr(data32);
3025
3026 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
3027
7a229387
AB
3028 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
3029 IFNAMSIZ))
3030 return -EFAULT;
417c3522 3031 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
3032 return -EFAULT;
3033
6b96018b 3034 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
3035}
3036
6b96018b
AB
3037static int dev_ifsioc(struct net *net, struct socket *sock,
3038 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 3039{
a2116ed2 3040 struct ifreq __user *uifr;
7a229387
AB
3041 int err;
3042
a2116ed2
AB
3043 uifr = compat_alloc_user_space(sizeof(*uifr));
3044 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3045 return -EFAULT;
3046
3047 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3048
7a229387
AB
3049 if (!err) {
3050 switch (cmd) {
3051 case SIOCGIFFLAGS:
3052 case SIOCGIFMETRIC:
3053 case SIOCGIFMTU:
3054 case SIOCGIFMEM:
3055 case SIOCGIFHWADDR:
3056 case SIOCGIFINDEX:
3057 case SIOCGIFADDR:
3058 case SIOCGIFBRDADDR:
3059 case SIOCGIFDSTADDR:
3060 case SIOCGIFNETMASK:
fab2532b 3061 case SIOCGIFPFLAGS:
7a229387 3062 case SIOCGIFTXQLEN:
fab2532b
AB
3063 case SIOCGMIIPHY:
3064 case SIOCGMIIREG:
a2116ed2 3065 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
3066 err = -EFAULT;
3067 break;
3068 }
3069 }
3070 return err;
3071}
3072
a2116ed2
AB
3073static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3074 struct compat_ifreq __user *uifr32)
3075{
3076 struct ifreq ifr;
3077 struct compat_ifmap __user *uifmap32;
3078 mm_segment_t old_fs;
3079 int err;
3080
3081 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3082 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3083 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3084 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3085 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3086 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3087 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3088 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3089 if (err)
3090 return -EFAULT;
3091
3092 old_fs = get_fs();
c6d409cf 3093 set_fs(KERNEL_DS);
c3f52ae6 3094 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 3095 set_fs(old_fs);
a2116ed2
AB
3096
3097 if (cmd == SIOCGIFMAP && !err) {
3098 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3099 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3100 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3101 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3102 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3103 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3104 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3105 if (err)
3106 err = -EFAULT;
3107 }
3108 return err;
3109}
3110
7a229387 3111struct rtentry32 {
c6d409cf 3112 u32 rt_pad1;
7a229387
AB
3113 struct sockaddr rt_dst; /* target address */
3114 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3115 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3116 unsigned short rt_flags;
3117 short rt_pad2;
3118 u32 rt_pad3;
3119 unsigned char rt_tos;
3120 unsigned char rt_class;
3121 short rt_pad4;
3122 short rt_metric; /* +1 for binary compatibility! */
7a229387 3123 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3124 u32 rt_mtu; /* per route MTU/Window */
3125 u32 rt_window; /* Window clamping */
7a229387
AB
3126 unsigned short rt_irtt; /* Initial RTT */
3127};
3128
3129struct in6_rtmsg32 {
3130 struct in6_addr rtmsg_dst;
3131 struct in6_addr rtmsg_src;
3132 struct in6_addr rtmsg_gateway;
3133 u32 rtmsg_type;
3134 u16 rtmsg_dst_len;
3135 u16 rtmsg_src_len;
3136 u32 rtmsg_metric;
3137 u32 rtmsg_info;
3138 u32 rtmsg_flags;
3139 s32 rtmsg_ifindex;
3140};
3141
6b96018b
AB
3142static int routing_ioctl(struct net *net, struct socket *sock,
3143 unsigned int cmd, void __user *argp)
7a229387
AB
3144{
3145 int ret;
3146 void *r = NULL;
3147 struct in6_rtmsg r6;
3148 struct rtentry r4;
3149 char devname[16];
3150 u32 rtdev;
3151 mm_segment_t old_fs = get_fs();
3152
6b96018b
AB
3153 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3154 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3155 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3156 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3157 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3158 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3159 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3160 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3161 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3162 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3163 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3164
3165 r = (void *) &r6;
3166 } else { /* ipv4 */
6b96018b 3167 struct rtentry32 __user *ur4 = argp;
c6d409cf 3168 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3169 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3170 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3171 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3172 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3173 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3174 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3175 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3176 if (rtdev) {
c6d409cf 3177 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3178 r4.rt_dev = (char __user __force *)devname;
3179 devname[15] = 0;
7a229387
AB
3180 } else
3181 r4.rt_dev = NULL;
3182
3183 r = (void *) &r4;
3184 }
3185
3186 if (ret) {
3187 ret = -EFAULT;
3188 goto out;
3189 }
3190
c6d409cf 3191 set_fs(KERNEL_DS);
6b96018b 3192 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3193 set_fs(old_fs);
7a229387
AB
3194
3195out:
7a229387
AB
3196 return ret;
3197}
3198
3199/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3200 * for some operations; this forces use of the newer bridge-utils that
25985edc 3201 * use compatible ioctls
7a229387 3202 */
6b96018b 3203static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3204{
6b96018b 3205 compat_ulong_t tmp;
7a229387 3206
6b96018b 3207 if (get_user(tmp, argp))
7a229387
AB
3208 return -EFAULT;
3209 if (tmp == BRCTL_GET_VERSION)
3210 return BRCTL_VERSION + 1;
3211 return -EINVAL;
3212}
3213
6b96018b
AB
3214static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3215 unsigned int cmd, unsigned long arg)
3216{
3217 void __user *argp = compat_ptr(arg);
3218 struct sock *sk = sock->sk;
3219 struct net *net = sock_net(sk);
7a229387 3220
6b96018b 3221 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3222 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3223
3224 switch (cmd) {
3225 case SIOCSIFBR:
3226 case SIOCGIFBR:
3227 return old_bridge_ioctl(argp);
3228 case SIOCGIFNAME:
3229 return dev_ifname32(net, argp);
3230 case SIOCGIFCONF:
3231 return dev_ifconf(net, argp);
3232 case SIOCETHTOOL:
3233 return ethtool_ioctl(net, argp);
7a50a240
AB
3234 case SIOCWANDEV:
3235 return compat_siocwandev(net, argp);
a2116ed2
AB
3236 case SIOCGIFMAP:
3237 case SIOCSIFMAP:
3238 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3239 case SIOCBONDENSLAVE:
3240 case SIOCBONDRELEASE:
3241 case SIOCBONDSETHWADDR:
6b96018b
AB
3242 case SIOCBONDCHANGEACTIVE:
3243 return bond_ioctl(net, cmd, argp);
3244 case SIOCADDRT:
3245 case SIOCDELRT:
3246 return routing_ioctl(net, sock, cmd, argp);
3247 case SIOCGSTAMP:
3248 return do_siocgstamp(net, sock, cmd, argp);
3249 case SIOCGSTAMPNS:
3250 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3251 case SIOCBONDSLAVEINFOQUERY:
3252 case SIOCBONDINFOQUERY:
a2116ed2 3253 case SIOCSHWTSTAMP:
fd468c74 3254 case SIOCGHWTSTAMP:
590d4693 3255 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3256
3257 case FIOSETOWN:
3258 case SIOCSPGRP:
3259 case FIOGETOWN:
3260 case SIOCGPGRP:
3261 case SIOCBRADDBR:
3262 case SIOCBRDELBR:
3263 case SIOCGIFVLAN:
3264 case SIOCSIFVLAN:
3265 case SIOCADDDLCI:
3266 case SIOCDELDLCI:
3267 return sock_ioctl(file, cmd, arg);
3268
3269 case SIOCGIFFLAGS:
3270 case SIOCSIFFLAGS:
3271 case SIOCGIFMETRIC:
3272 case SIOCSIFMETRIC:
3273 case SIOCGIFMTU:
3274 case SIOCSIFMTU:
3275 case SIOCGIFMEM:
3276 case SIOCSIFMEM:
3277 case SIOCGIFHWADDR:
3278 case SIOCSIFHWADDR:
3279 case SIOCADDMULTI:
3280 case SIOCDELMULTI:
3281 case SIOCGIFINDEX:
6b96018b
AB
3282 case SIOCGIFADDR:
3283 case SIOCSIFADDR:
3284 case SIOCSIFHWBROADCAST:
6b96018b 3285 case SIOCDIFADDR:
6b96018b
AB
3286 case SIOCGIFBRDADDR:
3287 case SIOCSIFBRDADDR:
3288 case SIOCGIFDSTADDR:
3289 case SIOCSIFDSTADDR:
3290 case SIOCGIFNETMASK:
3291 case SIOCSIFNETMASK:
3292 case SIOCSIFPFLAGS:
3293 case SIOCGIFPFLAGS:
3294 case SIOCGIFTXQLEN:
3295 case SIOCSIFTXQLEN:
3296 case SIOCBRADDIF:
3297 case SIOCBRDELIF:
9177efd3
AB
3298 case SIOCSIFNAME:
3299 case SIOCGMIIPHY:
3300 case SIOCGMIIREG:
3301 case SIOCSMIIREG:
6b96018b 3302 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3303
6b96018b
AB
3304 case SIOCSARP:
3305 case SIOCGARP:
3306 case SIOCDARP:
6b96018b 3307 case SIOCATMARK:
9177efd3
AB
3308 return sock_do_ioctl(net, sock, cmd, arg);
3309 }
3310
6b96018b
AB
3311 return -ENOIOCTLCMD;
3312}
7a229387 3313
95c96174 3314static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3315 unsigned long arg)
89bbfc95
SP
3316{
3317 struct socket *sock = file->private_data;
3318 int ret = -ENOIOCTLCMD;
87de87d5
DM
3319 struct sock *sk;
3320 struct net *net;
3321
3322 sk = sock->sk;
3323 net = sock_net(sk);
89bbfc95
SP
3324
3325 if (sock->ops->compat_ioctl)
3326 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3327
87de87d5
DM
3328 if (ret == -ENOIOCTLCMD &&
3329 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3330 ret = compat_wext_handle_ioctl(net, cmd, arg);
3331
6b96018b
AB
3332 if (ret == -ENOIOCTLCMD)
3333 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3334
89bbfc95
SP
3335 return ret;
3336}
3337#endif
3338
ac5a488e
SS
3339int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3340{
3341 return sock->ops->bind(sock, addr, addrlen);
3342}
c6d409cf 3343EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3344
3345int kernel_listen(struct socket *sock, int backlog)
3346{
3347 return sock->ops->listen(sock, backlog);
3348}
c6d409cf 3349EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3350
3351int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3352{
3353 struct sock *sk = sock->sk;
3354 int err;
3355
3356 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3357 newsock);
3358 if (err < 0)
3359 goto done;
3360
3361 err = sock->ops->accept(sock, *newsock, flags);
3362 if (err < 0) {
3363 sock_release(*newsock);
fa8705b0 3364 *newsock = NULL;
ac5a488e
SS
3365 goto done;
3366 }
3367
3368 (*newsock)->ops = sock->ops;
1b08534e 3369 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3370
3371done:
3372 return err;
3373}
c6d409cf 3374EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3375
3376int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3377 int flags)
ac5a488e
SS
3378{
3379 return sock->ops->connect(sock, addr, addrlen, flags);
3380}
c6d409cf 3381EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3382
3383int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3384 int *addrlen)
3385{
3386 return sock->ops->getname(sock, addr, addrlen, 0);
3387}
c6d409cf 3388EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3389
3390int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3391 int *addrlen)
3392{
3393 return sock->ops->getname(sock, addr, addrlen, 1);
3394}
c6d409cf 3395EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3396
3397int kernel_getsockopt(struct socket *sock, int level, int optname,
3398 char *optval, int *optlen)
3399{
3400 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3401 char __user *uoptval;
3402 int __user *uoptlen;
ac5a488e
SS
3403 int err;
3404
fb8621bb
NK
3405 uoptval = (char __user __force *) optval;
3406 uoptlen = (int __user __force *) optlen;
3407
ac5a488e
SS
3408 set_fs(KERNEL_DS);
3409 if (level == SOL_SOCKET)
fb8621bb 3410 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3411 else
fb8621bb
NK
3412 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3413 uoptlen);
ac5a488e
SS
3414 set_fs(oldfs);
3415 return err;
3416}
c6d409cf 3417EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3418
3419int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3420 char *optval, unsigned int optlen)
ac5a488e
SS
3421{
3422 mm_segment_t oldfs = get_fs();
fb8621bb 3423 char __user *uoptval;
ac5a488e
SS
3424 int err;
3425
fb8621bb
NK
3426 uoptval = (char __user __force *) optval;
3427
ac5a488e
SS
3428 set_fs(KERNEL_DS);
3429 if (level == SOL_SOCKET)
fb8621bb 3430 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3431 else
fb8621bb 3432 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3433 optlen);
3434 set_fs(oldfs);
3435 return err;
3436}
c6d409cf 3437EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3438
3439int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3440 size_t size, int flags)
3441{
3442 if (sock->ops->sendpage)
3443 return sock->ops->sendpage(sock, page, offset, size, flags);
3444
3445 return sock_no_sendpage(sock, page, offset, size, flags);
3446}
c6d409cf 3447EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3448
3449int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3450{
3451 mm_segment_t oldfs = get_fs();
3452 int err;
3453
3454 set_fs(KERNEL_DS);
3455 err = sock->ops->ioctl(sock, cmd, arg);
3456 set_fs(oldfs);
3457
3458 return err;
3459}
c6d409cf 3460EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3461
91cf45f0
TM
3462int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3463{
3464 return sock->ops->shutdown(sock, how);
3465}
91cf45f0 3466EXPORT_SYMBOL(kernel_sock_shutdown);