]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/socket.c
ipv6: always add flag an address that failed DAD with DADFAILED
[mirror_ubuntu-zesty-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1da177e4
LT
290static int init_inodecache(void)
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
297 SLAB_MEM_SPREAD),
20c2df83 298 init_once);
1da177e4
LT
299 if (sock_inode_cachep == NULL)
300 return -ENOMEM;
301 return 0;
302}
303
b87221de 304static const struct super_operations sockfs_ops = {
c6d409cf
ED
305 .alloc_inode = sock_alloc_inode,
306 .destroy_inode = sock_destroy_inode,
307 .statfs = simple_statfs,
1da177e4
LT
308};
309
c23fbb6b
ED
310/*
311 * sockfs_dname() is called from d_path().
312 */
313static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
314{
315 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 316 d_inode(dentry)->i_ino);
c23fbb6b
ED
317}
318
3ba13d17 319static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 320 .d_dname = sockfs_dname,
1da177e4
LT
321};
322
c74a1cbb
AV
323static struct dentry *sockfs_mount(struct file_system_type *fs_type,
324 int flags, const char *dev_name, void *data)
325{
326 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
327 &sockfs_dentry_operations, SOCKFS_MAGIC);
328}
329
330static struct vfsmount *sock_mnt __read_mostly;
331
332static struct file_system_type sock_fs_type = {
333 .name = "sockfs",
334 .mount = sockfs_mount,
335 .kill_sb = kill_anon_super,
336};
337
1da177e4
LT
338/*
339 * Obtains the first available file descriptor and sets it up for use.
340 *
39d8c1b6
DM
341 * These functions create file structures and maps them to fd space
342 * of the current process. On success it returns file descriptor
1da177e4
LT
343 * and file struct implicitly stored in sock->file.
344 * Note that another thread may close file descriptor before we return
345 * from this function. We use the fact that now we do not refer
346 * to socket after mapping. If one day we will need it, this
347 * function will increment ref. count on file by 1.
348 *
349 * In any case returned fd MAY BE not valid!
350 * This race condition is unavoidable
351 * with shared fd spaces, we cannot solve it inside kernel,
352 * but we take care of internal coherence yet.
353 */
354
aab174f0 355struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 356{
7cbe66b6 357 struct qstr name = { .name = "" };
2c48b9c4 358 struct path path;
7cbe66b6 359 struct file *file;
1da177e4 360
600e1779
MY
361 if (dname) {
362 name.name = dname;
363 name.len = strlen(name.name);
364 } else if (sock->sk) {
365 name.name = sock->sk->sk_prot_creator->name;
366 name.len = strlen(name.name);
367 }
4b936885 368 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
369 if (unlikely(!path.dentry))
370 return ERR_PTR(-ENOMEM);
2c48b9c4 371 path.mnt = mntget(sock_mnt);
39d8c1b6 372
2c48b9c4 373 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 374
2c48b9c4 375 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 376 &socket_file_ops);
b5ffe634 377 if (IS_ERR(file)) {
cc3808f8 378 /* drop dentry, keep inode */
c5ef6035 379 ihold(d_inode(path.dentry));
2c48b9c4 380 path_put(&path);
39b65252 381 return file;
cc3808f8
AV
382 }
383
384 sock->file = file;
77d27200 385 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 386 file->private_data = sock;
28407630 387 return file;
39d8c1b6 388}
56b31d1c 389EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 390
56b31d1c 391static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
392{
393 struct file *newfile;
28407630
AV
394 int fd = get_unused_fd_flags(flags);
395 if (unlikely(fd < 0))
396 return fd;
39d8c1b6 397
aab174f0 398 newfile = sock_alloc_file(sock, flags, NULL);
28407630 399 if (likely(!IS_ERR(newfile))) {
39d8c1b6 400 fd_install(fd, newfile);
28407630
AV
401 return fd;
402 }
7cbe66b6 403
28407630
AV
404 put_unused_fd(fd);
405 return PTR_ERR(newfile);
1da177e4
LT
406}
407
406a3c63 408struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 409{
6cb153ca
BL
410 if (file->f_op == &socket_file_ops)
411 return file->private_data; /* set in sock_map_fd */
412
23bb80d2
ED
413 *err = -ENOTSOCK;
414 return NULL;
6cb153ca 415}
406a3c63 416EXPORT_SYMBOL(sock_from_file);
6cb153ca 417
1da177e4 418/**
c6d409cf 419 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
420 * @fd: file handle
421 * @err: pointer to an error code return
422 *
423 * The file handle passed in is locked and the socket it is bound
424 * too is returned. If an error occurs the err pointer is overwritten
425 * with a negative errno code and NULL is returned. The function checks
426 * for both invalid handles and passing a handle which is not a socket.
427 *
428 * On a success the socket object pointer is returned.
429 */
430
431struct socket *sockfd_lookup(int fd, int *err)
432{
433 struct file *file;
1da177e4
LT
434 struct socket *sock;
435
89bddce5
SH
436 file = fget(fd);
437 if (!file) {
1da177e4
LT
438 *err = -EBADF;
439 return NULL;
440 }
89bddce5 441
6cb153ca
BL
442 sock = sock_from_file(file, err);
443 if (!sock)
1da177e4 444 fput(file);
6cb153ca
BL
445 return sock;
446}
c6d409cf 447EXPORT_SYMBOL(sockfd_lookup);
1da177e4 448
6cb153ca
BL
449static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
450{
00e188ef 451 struct fd f = fdget(fd);
6cb153ca
BL
452 struct socket *sock;
453
3672558c 454 *err = -EBADF;
00e188ef
AV
455 if (f.file) {
456 sock = sock_from_file(f.file, err);
457 if (likely(sock)) {
458 *fput_needed = f.flags;
6cb153ca 459 return sock;
00e188ef
AV
460 }
461 fdput(f);
1da177e4 462 }
6cb153ca 463 return NULL;
1da177e4
LT
464}
465
600e1779
MY
466#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
467#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
468#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
469static ssize_t sockfs_getxattr(struct dentry *dentry,
470 const char *name, void *value, size_t size)
471{
472 const char *proto_name;
473 size_t proto_size;
474 int error;
475
476 error = -ENODATA;
477 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
478 proto_name = dentry->d_name.name;
479 proto_size = strlen(proto_name);
480
481 if (value) {
482 error = -ERANGE;
483 if (proto_size + 1 > size)
484 goto out;
485
486 strncpy(value, proto_name, proto_size + 1);
487 }
488 error = proto_size + 1;
489 }
490
491out:
492 return error;
493}
494
495static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
496 size_t size)
497{
498 ssize_t len;
499 ssize_t used = 0;
500
c5ef6035 501 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
502 if (len < 0)
503 return len;
504 used += len;
505 if (buffer) {
506 if (size < used)
507 return -ERANGE;
508 buffer += len;
509 }
510
511 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
512 used += len;
513 if (buffer) {
514 if (size < used)
515 return -ERANGE;
516 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
517 buffer += len;
518 }
519
520 return used;
521}
522
523static const struct inode_operations sockfs_inode_ops = {
524 .getxattr = sockfs_getxattr,
525 .listxattr = sockfs_listxattr,
526};
527
1da177e4
LT
528/**
529 * sock_alloc - allocate a socket
89bddce5 530 *
1da177e4
LT
531 * Allocate a new inode and socket object. The two are bound together
532 * and initialised. The socket is then returned. If we are out of inodes
533 * NULL is returned.
534 */
535
536static struct socket *sock_alloc(void)
537{
89bddce5
SH
538 struct inode *inode;
539 struct socket *sock;
1da177e4 540
a209dfc7 541 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
542 if (!inode)
543 return NULL;
544
545 sock = SOCKET_I(inode);
546
29a020d3 547 kmemcheck_annotate_bitfield(sock, type);
85fe4025 548 inode->i_ino = get_next_ino();
89bddce5 549 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
550 inode->i_uid = current_fsuid();
551 inode->i_gid = current_fsgid();
600e1779 552 inode->i_op = &sockfs_inode_ops;
1da177e4 553
19e8d69c 554 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
555 return sock;
556}
557
1da177e4
LT
558/**
559 * sock_release - close a socket
560 * @sock: socket to close
561 *
562 * The socket is released from the protocol stack if it has a release
563 * callback, and the inode is then released if the socket is bound to
89bddce5 564 * an inode not a file.
1da177e4 565 */
89bddce5 566
1da177e4
LT
567void sock_release(struct socket *sock)
568{
569 if (sock->ops) {
570 struct module *owner = sock->ops->owner;
571
572 sock->ops->release(sock);
573 sock->ops = NULL;
574 module_put(owner);
575 }
576
eaefd110 577 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 578 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 579
19e8d69c 580 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
581 if (!sock->file) {
582 iput(SOCK_INODE(sock));
583 return;
584 }
89bddce5 585 sock->file = NULL;
1da177e4 586}
c6d409cf 587EXPORT_SYMBOL(sock_release);
1da177e4 588
67cc0d40 589void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
20d49473 590{
140c55d4
ED
591 u8 flags = *tx_flags;
592
b9f40e21 593 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
594 flags |= SKBTX_HW_TSTAMP;
595
b9f40e21 596 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
597 flags |= SKBTX_SW_TSTAMP;
598
e7fd2885 599 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
600 flags |= SKBTX_SCHED_TSTAMP;
601
e1c8a607 602 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
140c55d4 603 flags |= SKBTX_ACK_TSTAMP;
e7fd2885 604
140c55d4 605 *tx_flags = flags;
20d49473 606}
67cc0d40 607EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 608
d8725c86 609static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 610{
01e97e65 611 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
612 BUG_ON(ret == -EIOCBQUEUED);
613 return ret;
1da177e4
LT
614}
615
d8725c86 616int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 617{
d8725c86 618 int err = security_socket_sendmsg(sock, msg,
01e97e65 619 msg_data_left(msg));
228e548e 620
d8725c86 621 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 622}
c6d409cf 623EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
624
625int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
626 struct kvec *vec, size_t num, size_t size)
627{
6aa24814 628 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 629 return sock_sendmsg(sock, msg);
1da177e4 630}
c6d409cf 631EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 632
92f37fd2
ED
633/*
634 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
635 */
636void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
637 struct sk_buff *skb)
638{
20d49473 639 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 640 struct scm_timestamping tss;
20d49473
PO
641 int empty = 1;
642 struct skb_shared_hwtstamps *shhwtstamps =
643 skb_hwtstamps(skb);
644
645 /* Race occurred between timestamp enabling and packet
646 receiving. Fill in the current time for now. */
647 if (need_software_tstamp && skb->tstamp.tv64 == 0)
648 __net_timestamp(skb);
649
650 if (need_software_tstamp) {
651 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
652 struct timeval tv;
653 skb_get_timestamp(skb, &tv);
654 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
655 sizeof(tv), &tv);
656 } else {
f24b9be5
WB
657 struct timespec ts;
658 skb_get_timestampns(skb, &ts);
20d49473 659 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 660 sizeof(ts), &ts);
20d49473
PO
661 }
662 }
663
f24b9be5 664 memset(&tss, 0, sizeof(tss));
c199105d 665 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 666 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 667 empty = 0;
4d276eb6 668 if (shhwtstamps &&
b9f40e21 669 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 670 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 671 empty = 0;
20d49473
PO
672 if (!empty)
673 put_cmsg(msg, SOL_SOCKET,
f24b9be5 674 SCM_TIMESTAMPING, sizeof(tss), &tss);
92f37fd2 675}
7c81fd8b
ACM
676EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
677
6e3e939f
JB
678void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
679 struct sk_buff *skb)
680{
681 int ack;
682
683 if (!sock_flag(sk, SOCK_WIFI_STATUS))
684 return;
685 if (!skb->wifi_acked_valid)
686 return;
687
688 ack = skb->wifi_acked;
689
690 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
691}
692EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
693
11165f14 694static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
695 struct sk_buff *skb)
3b885787 696{
744d5a3e 697 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 698 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 699 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
700}
701
767dd033 702void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
703 struct sk_buff *skb)
704{
705 sock_recv_timestamp(msg, sk, skb);
706 sock_recv_drops(msg, sk, skb);
707}
767dd033 708EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 709
1b784140
YX
710static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
711 size_t size, int flags)
1da177e4 712{
1b784140 713 return sock->ops->recvmsg(sock, msg, size, flags);
1da177e4
LT
714}
715
1b784140
YX
716int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
717 int flags)
a2e27255
ACM
718{
719 int err = security_socket_recvmsg(sock, msg, size, flags);
720
1b784140 721 return err ?: sock_recvmsg_nosec(sock, msg, size, flags);
1da177e4 722}
c6d409cf 723EXPORT_SYMBOL(sock_recvmsg);
1da177e4 724
c1249c0a
ML
725/**
726 * kernel_recvmsg - Receive a message from a socket (kernel space)
727 * @sock: The socket to receive the message from
728 * @msg: Received message
729 * @vec: Input s/g array for message data
730 * @num: Size of input s/g array
731 * @size: Number of bytes to read
732 * @flags: Message flags (MSG_DONTWAIT, etc...)
733 *
734 * On return the msg structure contains the scatter/gather array passed in the
735 * vec argument. The array is modified so that it consists of the unfilled
736 * portion of the original array.
737 *
738 * The returned value is the total number of bytes received, or an error.
739 */
89bddce5
SH
740int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
741 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
742{
743 mm_segment_t oldfs = get_fs();
744 int result;
745
6aa24814 746 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 747 set_fs(KERNEL_DS);
1da177e4
LT
748 result = sock_recvmsg(sock, msg, size, flags);
749 set_fs(oldfs);
750 return result;
751}
c6d409cf 752EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 753
ce1d4d3e
CH
754static ssize_t sock_sendpage(struct file *file, struct page *page,
755 int offset, size_t size, loff_t *ppos, int more)
1da177e4 756{
1da177e4
LT
757 struct socket *sock;
758 int flags;
759
ce1d4d3e
CH
760 sock = file->private_data;
761
35f9c09f
ED
762 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
763 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
764 flags |= more;
ce1d4d3e 765
e6949583 766 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 767}
1da177e4 768
9c55e01c 769static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 770 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
771 unsigned int flags)
772{
773 struct socket *sock = file->private_data;
774
997b37da
RDC
775 if (unlikely(!sock->ops->splice_read))
776 return -EINVAL;
777
9c55e01c
JA
778 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
779}
780
8ae5e030 781static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 782{
6d652330
AV
783 struct file *file = iocb->ki_filp;
784 struct socket *sock = file->private_data;
0345f931 785 struct msghdr msg = {.msg_iter = *to,
786 .msg_iocb = iocb};
8ae5e030 787 ssize_t res;
ce1d4d3e 788
8ae5e030
AV
789 if (file->f_flags & O_NONBLOCK)
790 msg.msg_flags = MSG_DONTWAIT;
791
792 if (iocb->ki_pos != 0)
1da177e4 793 return -ESPIPE;
027445c3 794
66ee59af 795 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
796 return 0;
797
237dae88 798 res = sock_recvmsg(sock, &msg, iov_iter_count(to), msg.msg_flags);
8ae5e030
AV
799 *to = msg.msg_iter;
800 return res;
1da177e4
LT
801}
802
8ae5e030 803static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 804{
6d652330
AV
805 struct file *file = iocb->ki_filp;
806 struct socket *sock = file->private_data;
0345f931 807 struct msghdr msg = {.msg_iter = *from,
808 .msg_iocb = iocb};
8ae5e030 809 ssize_t res;
1da177e4 810
8ae5e030 811 if (iocb->ki_pos != 0)
ce1d4d3e 812 return -ESPIPE;
027445c3 813
8ae5e030
AV
814 if (file->f_flags & O_NONBLOCK)
815 msg.msg_flags = MSG_DONTWAIT;
816
6d652330
AV
817 if (sock->type == SOCK_SEQPACKET)
818 msg.msg_flags |= MSG_EOR;
819
d8725c86 820 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
821 *from = msg.msg_iter;
822 return res;
1da177e4
LT
823}
824
1da177e4
LT
825/*
826 * Atomic setting of ioctl hooks to avoid race
827 * with module unload.
828 */
829
4a3e2f71 830static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 831static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 832
881d966b 833void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 834{
4a3e2f71 835 mutex_lock(&br_ioctl_mutex);
1da177e4 836 br_ioctl_hook = hook;
4a3e2f71 837 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
838}
839EXPORT_SYMBOL(brioctl_set);
840
4a3e2f71 841static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 842static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 843
881d966b 844void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 845{
4a3e2f71 846 mutex_lock(&vlan_ioctl_mutex);
1da177e4 847 vlan_ioctl_hook = hook;
4a3e2f71 848 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
849}
850EXPORT_SYMBOL(vlan_ioctl_set);
851
4a3e2f71 852static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 853static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 854
89bddce5 855void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 856{
4a3e2f71 857 mutex_lock(&dlci_ioctl_mutex);
1da177e4 858 dlci_ioctl_hook = hook;
4a3e2f71 859 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
860}
861EXPORT_SYMBOL(dlci_ioctl_set);
862
6b96018b
AB
863static long sock_do_ioctl(struct net *net, struct socket *sock,
864 unsigned int cmd, unsigned long arg)
865{
866 int err;
867 void __user *argp = (void __user *)arg;
868
869 err = sock->ops->ioctl(sock, cmd, arg);
870
871 /*
872 * If this ioctl is unknown try to hand it down
873 * to the NIC driver.
874 */
875 if (err == -ENOIOCTLCMD)
876 err = dev_ioctl(net, cmd, argp);
877
878 return err;
879}
880
1da177e4
LT
881/*
882 * With an ioctl, arg may well be a user mode pointer, but we don't know
883 * what to do with it - that's up to the protocol still.
884 */
885
886static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
887{
888 struct socket *sock;
881d966b 889 struct sock *sk;
1da177e4
LT
890 void __user *argp = (void __user *)arg;
891 int pid, err;
881d966b 892 struct net *net;
1da177e4 893
b69aee04 894 sock = file->private_data;
881d966b 895 sk = sock->sk;
3b1e0a65 896 net = sock_net(sk);
1da177e4 897 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 898 err = dev_ioctl(net, cmd, argp);
1da177e4 899 } else
3d23e349 900#ifdef CONFIG_WEXT_CORE
1da177e4 901 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 902 err = dev_ioctl(net, cmd, argp);
1da177e4 903 } else
3d23e349 904#endif
89bddce5 905 switch (cmd) {
1da177e4
LT
906 case FIOSETOWN:
907 case SIOCSPGRP:
908 err = -EFAULT;
909 if (get_user(pid, (int __user *)argp))
910 break;
e0b93edd
JL
911 f_setown(sock->file, pid, 1);
912 err = 0;
1da177e4
LT
913 break;
914 case FIOGETOWN:
915 case SIOCGPGRP:
609d7fa9 916 err = put_user(f_getown(sock->file),
89bddce5 917 (int __user *)argp);
1da177e4
LT
918 break;
919 case SIOCGIFBR:
920 case SIOCSIFBR:
921 case SIOCBRADDBR:
922 case SIOCBRDELBR:
923 err = -ENOPKG;
924 if (!br_ioctl_hook)
925 request_module("bridge");
926
4a3e2f71 927 mutex_lock(&br_ioctl_mutex);
89bddce5 928 if (br_ioctl_hook)
881d966b 929 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 930 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
931 break;
932 case SIOCGIFVLAN:
933 case SIOCSIFVLAN:
934 err = -ENOPKG;
935 if (!vlan_ioctl_hook)
936 request_module("8021q");
937
4a3e2f71 938 mutex_lock(&vlan_ioctl_mutex);
1da177e4 939 if (vlan_ioctl_hook)
881d966b 940 err = vlan_ioctl_hook(net, argp);
4a3e2f71 941 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 942 break;
1da177e4
LT
943 case SIOCADDDLCI:
944 case SIOCDELDLCI:
945 err = -ENOPKG;
946 if (!dlci_ioctl_hook)
947 request_module("dlci");
948
7512cbf6
PE
949 mutex_lock(&dlci_ioctl_mutex);
950 if (dlci_ioctl_hook)
1da177e4 951 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 952 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
953 break;
954 default:
6b96018b 955 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 956 break;
89bddce5 957 }
1da177e4
LT
958 return err;
959}
960
961int sock_create_lite(int family, int type, int protocol, struct socket **res)
962{
963 int err;
964 struct socket *sock = NULL;
89bddce5 965
1da177e4
LT
966 err = security_socket_create(family, type, protocol, 1);
967 if (err)
968 goto out;
969
970 sock = sock_alloc();
971 if (!sock) {
972 err = -ENOMEM;
973 goto out;
974 }
975
1da177e4 976 sock->type = type;
7420ed23
VY
977 err = security_socket_post_create(sock, family, type, protocol, 1);
978 if (err)
979 goto out_release;
980
1da177e4
LT
981out:
982 *res = sock;
983 return err;
7420ed23
VY
984out_release:
985 sock_release(sock);
986 sock = NULL;
987 goto out;
1da177e4 988}
c6d409cf 989EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
990
991/* No kernel lock held - perfect */
89bddce5 992static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 993{
cbf55001 994 unsigned int busy_flag = 0;
1da177e4
LT
995 struct socket *sock;
996
997 /*
89bddce5 998 * We can't return errors to poll, so it's either yes or no.
1da177e4 999 */
b69aee04 1000 sock = file->private_data;
2d48d67f 1001
cbf55001 1002 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1003 /* this socket can poll_ll so tell the system call */
cbf55001 1004 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1005
1006 /* once, only if requested by syscall */
cbf55001
ET
1007 if (wait && (wait->_key & POLL_BUSY_LOOP))
1008 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1009 }
1010
cbf55001 1011 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1012}
1013
89bddce5 1014static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1015{
b69aee04 1016 struct socket *sock = file->private_data;
1da177e4
LT
1017
1018 return sock->ops->mmap(file, sock, vma);
1019}
1020
20380731 1021static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1022{
1da177e4
LT
1023 sock_release(SOCKET_I(inode));
1024 return 0;
1025}
1026
1027/*
1028 * Update the socket async list
1029 *
1030 * Fasync_list locking strategy.
1031 *
1032 * 1. fasync_list is modified only under process context socket lock
1033 * i.e. under semaphore.
1034 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1035 * or under socket lock
1da177e4
LT
1036 */
1037
1038static int sock_fasync(int fd, struct file *filp, int on)
1039{
989a2979
ED
1040 struct socket *sock = filp->private_data;
1041 struct sock *sk = sock->sk;
eaefd110 1042 struct socket_wq *wq;
1da177e4 1043
989a2979 1044 if (sk == NULL)
1da177e4 1045 return -EINVAL;
1da177e4
LT
1046
1047 lock_sock(sk);
eaefd110
ED
1048 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1049 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1050
eaefd110 1051 if (!wq->fasync_list)
989a2979
ED
1052 sock_reset_flag(sk, SOCK_FASYNC);
1053 else
bcdce719 1054 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1055
989a2979 1056 release_sock(sk);
1da177e4
LT
1057 return 0;
1058}
1059
ceb5d58b 1060/* This function may be called only under rcu_lock */
1da177e4 1061
ceb5d58b 1062int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1063{
ceb5d58b 1064 if (!wq || !wq->fasync_list)
1da177e4 1065 return -1;
ceb5d58b 1066
89bddce5 1067 switch (how) {
8d8ad9d7 1068 case SOCK_WAKE_WAITD:
ceb5d58b 1069 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1070 break;
1071 goto call_kill;
8d8ad9d7 1072 case SOCK_WAKE_SPACE:
ceb5d58b 1073 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1074 break;
1075 /* fall through */
8d8ad9d7 1076 case SOCK_WAKE_IO:
89bddce5 1077call_kill:
43815482 1078 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1079 break;
8d8ad9d7 1080 case SOCK_WAKE_URG:
43815482 1081 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1082 }
ceb5d58b 1083
1da177e4
LT
1084 return 0;
1085}
c6d409cf 1086EXPORT_SYMBOL(sock_wake_async);
1da177e4 1087
721db93a 1088int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1089 struct socket **res, int kern)
1da177e4
LT
1090{
1091 int err;
1092 struct socket *sock;
55737fda 1093 const struct net_proto_family *pf;
1da177e4
LT
1094
1095 /*
89bddce5 1096 * Check protocol is in range
1da177e4
LT
1097 */
1098 if (family < 0 || family >= NPROTO)
1099 return -EAFNOSUPPORT;
1100 if (type < 0 || type >= SOCK_MAX)
1101 return -EINVAL;
1102
1103 /* Compatibility.
1104
1105 This uglymoron is moved from INET layer to here to avoid
1106 deadlock in module load.
1107 */
1108 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1109 static int warned;
1da177e4
LT
1110 if (!warned) {
1111 warned = 1;
3410f22e
YY
1112 pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1113 current->comm);
1da177e4
LT
1114 }
1115 family = PF_PACKET;
1116 }
1117
1118 err = security_socket_create(family, type, protocol, kern);
1119 if (err)
1120 return err;
89bddce5 1121
55737fda
SH
1122 /*
1123 * Allocate the socket and allow the family to set things up. if
1124 * the protocol is 0, the family is instructed to select an appropriate
1125 * default.
1126 */
1127 sock = sock_alloc();
1128 if (!sock) {
e87cc472 1129 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1130 return -ENFILE; /* Not exactly a match, but its the
1131 closest posix thing */
1132 }
1133
1134 sock->type = type;
1135
95a5afca 1136#ifdef CONFIG_MODULES
89bddce5
SH
1137 /* Attempt to load a protocol module if the find failed.
1138 *
1139 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1140 * requested real, full-featured networking support upon configuration.
1141 * Otherwise module support will break!
1142 */
190683a9 1143 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1144 request_module("net-pf-%d", family);
1da177e4
LT
1145#endif
1146
55737fda
SH
1147 rcu_read_lock();
1148 pf = rcu_dereference(net_families[family]);
1149 err = -EAFNOSUPPORT;
1150 if (!pf)
1151 goto out_release;
1da177e4
LT
1152
1153 /*
1154 * We will call the ->create function, that possibly is in a loadable
1155 * module, so we have to bump that loadable module refcnt first.
1156 */
55737fda 1157 if (!try_module_get(pf->owner))
1da177e4
LT
1158 goto out_release;
1159
55737fda
SH
1160 /* Now protected by module ref count */
1161 rcu_read_unlock();
1162
3f378b68 1163 err = pf->create(net, sock, protocol, kern);
55737fda 1164 if (err < 0)
1da177e4 1165 goto out_module_put;
a79af59e 1166
1da177e4
LT
1167 /*
1168 * Now to bump the refcnt of the [loadable] module that owns this
1169 * socket at sock_release time we decrement its refcnt.
1170 */
55737fda
SH
1171 if (!try_module_get(sock->ops->owner))
1172 goto out_module_busy;
1173
1da177e4
LT
1174 /*
1175 * Now that we're done with the ->create function, the [loadable]
1176 * module can have its refcnt decremented
1177 */
55737fda 1178 module_put(pf->owner);
7420ed23
VY
1179 err = security_socket_post_create(sock, family, type, protocol, kern);
1180 if (err)
3b185525 1181 goto out_sock_release;
55737fda 1182 *res = sock;
1da177e4 1183
55737fda
SH
1184 return 0;
1185
1186out_module_busy:
1187 err = -EAFNOSUPPORT;
1da177e4 1188out_module_put:
55737fda
SH
1189 sock->ops = NULL;
1190 module_put(pf->owner);
1191out_sock_release:
1da177e4 1192 sock_release(sock);
55737fda
SH
1193 return err;
1194
1195out_release:
1196 rcu_read_unlock();
1197 goto out_sock_release;
1da177e4 1198}
721db93a 1199EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1200
1201int sock_create(int family, int type, int protocol, struct socket **res)
1202{
1b8d7ae4 1203 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1204}
c6d409cf 1205EXPORT_SYMBOL(sock_create);
1da177e4 1206
eeb1bd5c 1207int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1208{
eeb1bd5c 1209 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1210}
c6d409cf 1211EXPORT_SYMBOL(sock_create_kern);
1da177e4 1212
3e0fa65f 1213SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1214{
1215 int retval;
1216 struct socket *sock;
a677a039
UD
1217 int flags;
1218
e38b36f3
UD
1219 /* Check the SOCK_* constants for consistency. */
1220 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1221 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1222 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1223 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1224
a677a039 1225 flags = type & ~SOCK_TYPE_MASK;
77d27200 1226 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1227 return -EINVAL;
1228 type &= SOCK_TYPE_MASK;
1da177e4 1229
aaca0bdc
UD
1230 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1231 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1232
1da177e4
LT
1233 retval = sock_create(family, type, protocol, &sock);
1234 if (retval < 0)
1235 goto out;
1236
77d27200 1237 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1238 if (retval < 0)
1239 goto out_release;
1240
1241out:
1242 /* It may be already another descriptor 8) Not kernel problem. */
1243 return retval;
1244
1245out_release:
1246 sock_release(sock);
1247 return retval;
1248}
1249
1250/*
1251 * Create a pair of connected sockets.
1252 */
1253
3e0fa65f
HC
1254SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1255 int __user *, usockvec)
1da177e4
LT
1256{
1257 struct socket *sock1, *sock2;
1258 int fd1, fd2, err;
db349509 1259 struct file *newfile1, *newfile2;
a677a039
UD
1260 int flags;
1261
1262 flags = type & ~SOCK_TYPE_MASK;
77d27200 1263 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1264 return -EINVAL;
1265 type &= SOCK_TYPE_MASK;
1da177e4 1266
aaca0bdc
UD
1267 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1268 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1269
1da177e4
LT
1270 /*
1271 * Obtain the first socket and check if the underlying protocol
1272 * supports the socketpair call.
1273 */
1274
1275 err = sock_create(family, type, protocol, &sock1);
1276 if (err < 0)
1277 goto out;
1278
1279 err = sock_create(family, type, protocol, &sock2);
1280 if (err < 0)
1281 goto out_release_1;
1282
1283 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1284 if (err < 0)
1da177e4
LT
1285 goto out_release_both;
1286
28407630 1287 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1288 if (unlikely(fd1 < 0)) {
1289 err = fd1;
db349509 1290 goto out_release_both;
bf3c23d1 1291 }
d73aa286 1292
28407630 1293 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1294 if (unlikely(fd2 < 0)) {
1295 err = fd2;
d73aa286 1296 goto out_put_unused_1;
28407630
AV
1297 }
1298
aab174f0 1299 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1300 if (IS_ERR(newfile1)) {
28407630 1301 err = PTR_ERR(newfile1);
d73aa286 1302 goto out_put_unused_both;
28407630
AV
1303 }
1304
aab174f0 1305 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1306 if (IS_ERR(newfile2)) {
1307 err = PTR_ERR(newfile2);
d73aa286 1308 goto out_fput_1;
db349509
AV
1309 }
1310
d73aa286
YD
1311 err = put_user(fd1, &usockvec[0]);
1312 if (err)
1313 goto out_fput_both;
1314
1315 err = put_user(fd2, &usockvec[1]);
1316 if (err)
1317 goto out_fput_both;
1318
157cf649 1319 audit_fd_pair(fd1, fd2);
d73aa286 1320
db349509
AV
1321 fd_install(fd1, newfile1);
1322 fd_install(fd2, newfile2);
1da177e4
LT
1323 /* fd1 and fd2 may be already another descriptors.
1324 * Not kernel problem.
1325 */
1326
d73aa286 1327 return 0;
1da177e4 1328
d73aa286
YD
1329out_fput_both:
1330 fput(newfile2);
1331 fput(newfile1);
1332 put_unused_fd(fd2);
1333 put_unused_fd(fd1);
1334 goto out;
1335
1336out_fput_1:
1337 fput(newfile1);
1338 put_unused_fd(fd2);
1339 put_unused_fd(fd1);
1340 sock_release(sock2);
1341 goto out;
1da177e4 1342
d73aa286
YD
1343out_put_unused_both:
1344 put_unused_fd(fd2);
1345out_put_unused_1:
1346 put_unused_fd(fd1);
1da177e4 1347out_release_both:
89bddce5 1348 sock_release(sock2);
1da177e4 1349out_release_1:
89bddce5 1350 sock_release(sock1);
1da177e4
LT
1351out:
1352 return err;
1353}
1354
1da177e4
LT
1355/*
1356 * Bind a name to a socket. Nothing much to do here since it's
1357 * the protocol's responsibility to handle the local address.
1358 *
1359 * We move the socket address to kernel space before we call
1360 * the protocol layer (having also checked the address is ok).
1361 */
1362
20f37034 1363SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1364{
1365 struct socket *sock;
230b1839 1366 struct sockaddr_storage address;
6cb153ca 1367 int err, fput_needed;
1da177e4 1368
89bddce5 1369 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1370 if (sock) {
43db362d 1371 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1372 if (err >= 0) {
1373 err = security_socket_bind(sock,
230b1839 1374 (struct sockaddr *)&address,
89bddce5 1375 addrlen);
6cb153ca
BL
1376 if (!err)
1377 err = sock->ops->bind(sock,
89bddce5 1378 (struct sockaddr *)
230b1839 1379 &address, addrlen);
1da177e4 1380 }
6cb153ca 1381 fput_light(sock->file, fput_needed);
89bddce5 1382 }
1da177e4
LT
1383 return err;
1384}
1385
1da177e4
LT
1386/*
1387 * Perform a listen. Basically, we allow the protocol to do anything
1388 * necessary for a listen, and if that works, we mark the socket as
1389 * ready for listening.
1390 */
1391
3e0fa65f 1392SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1393{
1394 struct socket *sock;
6cb153ca 1395 int err, fput_needed;
b8e1f9b5 1396 int somaxconn;
89bddce5
SH
1397
1398 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1399 if (sock) {
8efa6e93 1400 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1401 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1402 backlog = somaxconn;
1da177e4
LT
1403
1404 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1405 if (!err)
1406 err = sock->ops->listen(sock, backlog);
1da177e4 1407
6cb153ca 1408 fput_light(sock->file, fput_needed);
1da177e4
LT
1409 }
1410 return err;
1411}
1412
1da177e4
LT
1413/*
1414 * For accept, we attempt to create a new socket, set up the link
1415 * with the client, wake up the client, then return the new
1416 * connected fd. We collect the address of the connector in kernel
1417 * space and move it to user at the very end. This is unclean because
1418 * we open the socket then return an error.
1419 *
1420 * 1003.1g adds the ability to recvmsg() to query connection pending
1421 * status to recvmsg. We need to add that support in a way thats
1422 * clean when we restucture accept also.
1423 */
1424
20f37034
HC
1425SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1426 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1427{
1428 struct socket *sock, *newsock;
39d8c1b6 1429 struct file *newfile;
6cb153ca 1430 int err, len, newfd, fput_needed;
230b1839 1431 struct sockaddr_storage address;
1da177e4 1432
77d27200 1433 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1434 return -EINVAL;
1435
1436 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1437 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1438
6cb153ca 1439 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1440 if (!sock)
1441 goto out;
1442
1443 err = -ENFILE;
c6d409cf
ED
1444 newsock = sock_alloc();
1445 if (!newsock)
1da177e4
LT
1446 goto out_put;
1447
1448 newsock->type = sock->type;
1449 newsock->ops = sock->ops;
1450
1da177e4
LT
1451 /*
1452 * We don't need try_module_get here, as the listening socket (sock)
1453 * has the protocol module (sock->ops->owner) held.
1454 */
1455 __module_get(newsock->ops->owner);
1456
28407630 1457 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1458 if (unlikely(newfd < 0)) {
1459 err = newfd;
9a1875e6
DM
1460 sock_release(newsock);
1461 goto out_put;
39d8c1b6 1462 }
aab174f0 1463 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1464 if (IS_ERR(newfile)) {
28407630
AV
1465 err = PTR_ERR(newfile);
1466 put_unused_fd(newfd);
1467 sock_release(newsock);
1468 goto out_put;
1469 }
39d8c1b6 1470
a79af59e
FF
1471 err = security_socket_accept(sock, newsock);
1472 if (err)
39d8c1b6 1473 goto out_fd;
a79af59e 1474
1da177e4
LT
1475 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1476 if (err < 0)
39d8c1b6 1477 goto out_fd;
1da177e4
LT
1478
1479 if (upeer_sockaddr) {
230b1839 1480 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1481 &len, 2) < 0) {
1da177e4 1482 err = -ECONNABORTED;
39d8c1b6 1483 goto out_fd;
1da177e4 1484 }
43db362d 1485 err = move_addr_to_user(&address,
230b1839 1486 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1487 if (err < 0)
39d8c1b6 1488 goto out_fd;
1da177e4
LT
1489 }
1490
1491 /* File flags are not inherited via accept() unlike another OSes. */
1492
39d8c1b6
DM
1493 fd_install(newfd, newfile);
1494 err = newfd;
1da177e4 1495
1da177e4 1496out_put:
6cb153ca 1497 fput_light(sock->file, fput_needed);
1da177e4
LT
1498out:
1499 return err;
39d8c1b6 1500out_fd:
9606a216 1501 fput(newfile);
39d8c1b6 1502 put_unused_fd(newfd);
1da177e4
LT
1503 goto out_put;
1504}
1505
20f37034
HC
1506SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1507 int __user *, upeer_addrlen)
aaca0bdc 1508{
de11defe 1509 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1510}
1511
1da177e4
LT
1512/*
1513 * Attempt to connect to a socket with the server address. The address
1514 * is in user space so we verify it is OK and move it to kernel space.
1515 *
1516 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1517 * break bindings
1518 *
1519 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1520 * other SEQPACKET protocols that take time to connect() as it doesn't
1521 * include the -EINPROGRESS status for such sockets.
1522 */
1523
20f37034
HC
1524SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1525 int, addrlen)
1da177e4
LT
1526{
1527 struct socket *sock;
230b1839 1528 struct sockaddr_storage address;
6cb153ca 1529 int err, fput_needed;
1da177e4 1530
6cb153ca 1531 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1532 if (!sock)
1533 goto out;
43db362d 1534 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1535 if (err < 0)
1536 goto out_put;
1537
89bddce5 1538 err =
230b1839 1539 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1540 if (err)
1541 goto out_put;
1542
230b1839 1543 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1544 sock->file->f_flags);
1545out_put:
6cb153ca 1546 fput_light(sock->file, fput_needed);
1da177e4
LT
1547out:
1548 return err;
1549}
1550
1551/*
1552 * Get the local address ('name') of a socket object. Move the obtained
1553 * name to user space.
1554 */
1555
20f37034
HC
1556SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1557 int __user *, usockaddr_len)
1da177e4
LT
1558{
1559 struct socket *sock;
230b1839 1560 struct sockaddr_storage address;
6cb153ca 1561 int len, err, fput_needed;
89bddce5 1562
6cb153ca 1563 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1564 if (!sock)
1565 goto out;
1566
1567 err = security_socket_getsockname(sock);
1568 if (err)
1569 goto out_put;
1570
230b1839 1571 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1572 if (err)
1573 goto out_put;
43db362d 1574 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1575
1576out_put:
6cb153ca 1577 fput_light(sock->file, fput_needed);
1da177e4
LT
1578out:
1579 return err;
1580}
1581
1582/*
1583 * Get the remote address ('name') of a socket object. Move the obtained
1584 * name to user space.
1585 */
1586
20f37034
HC
1587SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1588 int __user *, usockaddr_len)
1da177e4
LT
1589{
1590 struct socket *sock;
230b1839 1591 struct sockaddr_storage address;
6cb153ca 1592 int len, err, fput_needed;
1da177e4 1593
89bddce5
SH
1594 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1595 if (sock != NULL) {
1da177e4
LT
1596 err = security_socket_getpeername(sock);
1597 if (err) {
6cb153ca 1598 fput_light(sock->file, fput_needed);
1da177e4
LT
1599 return err;
1600 }
1601
89bddce5 1602 err =
230b1839 1603 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1604 1);
1da177e4 1605 if (!err)
43db362d 1606 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1607 usockaddr_len);
6cb153ca 1608 fput_light(sock->file, fput_needed);
1da177e4
LT
1609 }
1610 return err;
1611}
1612
1613/*
1614 * Send a datagram to a given address. We move the address into kernel
1615 * space and check the user space data area is readable before invoking
1616 * the protocol.
1617 */
1618
3e0fa65f 1619SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1620 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1621 int, addr_len)
1da177e4
LT
1622{
1623 struct socket *sock;
230b1839 1624 struct sockaddr_storage address;
1da177e4
LT
1625 int err;
1626 struct msghdr msg;
1627 struct iovec iov;
6cb153ca 1628 int fput_needed;
6cb153ca 1629
602bd0e9
AV
1630 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1631 if (unlikely(err))
1632 return err;
de0fa95c
PE
1633 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1634 if (!sock)
4387ff75 1635 goto out;
6cb153ca 1636
89bddce5 1637 msg.msg_name = NULL;
89bddce5
SH
1638 msg.msg_control = NULL;
1639 msg.msg_controllen = 0;
1640 msg.msg_namelen = 0;
6cb153ca 1641 if (addr) {
43db362d 1642 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1643 if (err < 0)
1644 goto out_put;
230b1839 1645 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1646 msg.msg_namelen = addr_len;
1da177e4
LT
1647 }
1648 if (sock->file->f_flags & O_NONBLOCK)
1649 flags |= MSG_DONTWAIT;
1650 msg.msg_flags = flags;
d8725c86 1651 err = sock_sendmsg(sock, &msg);
1da177e4 1652
89bddce5 1653out_put:
de0fa95c 1654 fput_light(sock->file, fput_needed);
4387ff75 1655out:
1da177e4
LT
1656 return err;
1657}
1658
1659/*
89bddce5 1660 * Send a datagram down a socket.
1da177e4
LT
1661 */
1662
3e0fa65f 1663SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1664 unsigned int, flags)
1da177e4
LT
1665{
1666 return sys_sendto(fd, buff, len, flags, NULL, 0);
1667}
1668
1669/*
89bddce5 1670 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1671 * sender. We verify the buffers are writable and if needed move the
1672 * sender address from kernel to user space.
1673 */
1674
3e0fa65f 1675SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1676 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1677 int __user *, addr_len)
1da177e4
LT
1678{
1679 struct socket *sock;
1680 struct iovec iov;
1681 struct msghdr msg;
230b1839 1682 struct sockaddr_storage address;
89bddce5 1683 int err, err2;
6cb153ca
BL
1684 int fput_needed;
1685
602bd0e9
AV
1686 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1687 if (unlikely(err))
1688 return err;
de0fa95c 1689 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1690 if (!sock)
de0fa95c 1691 goto out;
1da177e4 1692
89bddce5
SH
1693 msg.msg_control = NULL;
1694 msg.msg_controllen = 0;
f3d33426
HFS
1695 /* Save some cycles and don't copy the address if not needed */
1696 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1697 /* We assume all kernel code knows the size of sockaddr_storage */
1698 msg.msg_namelen = 0;
130ed5d1 1699 msg.msg_iocb = NULL;
1da177e4
LT
1700 if (sock->file->f_flags & O_NONBLOCK)
1701 flags |= MSG_DONTWAIT;
602bd0e9 1702 err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags);
1da177e4 1703
89bddce5 1704 if (err >= 0 && addr != NULL) {
43db362d 1705 err2 = move_addr_to_user(&address,
230b1839 1706 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1707 if (err2 < 0)
1708 err = err2;
1da177e4 1709 }
de0fa95c
PE
1710
1711 fput_light(sock->file, fput_needed);
4387ff75 1712out:
1da177e4
LT
1713 return err;
1714}
1715
1716/*
89bddce5 1717 * Receive a datagram from a socket.
1da177e4
LT
1718 */
1719
b7c0ddf5
JG
1720SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1721 unsigned int, flags)
1da177e4
LT
1722{
1723 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1724}
1725
1726/*
1727 * Set a socket option. Because we don't know the option lengths we have
1728 * to pass the user mode parameter for the protocols to sort out.
1729 */
1730
20f37034
HC
1731SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1732 char __user *, optval, int, optlen)
1da177e4 1733{
6cb153ca 1734 int err, fput_needed;
1da177e4
LT
1735 struct socket *sock;
1736
1737 if (optlen < 0)
1738 return -EINVAL;
89bddce5
SH
1739
1740 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1741 if (sock != NULL) {
1742 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1743 if (err)
1744 goto out_put;
1da177e4
LT
1745
1746 if (level == SOL_SOCKET)
89bddce5
SH
1747 err =
1748 sock_setsockopt(sock, level, optname, optval,
1749 optlen);
1da177e4 1750 else
89bddce5
SH
1751 err =
1752 sock->ops->setsockopt(sock, level, optname, optval,
1753 optlen);
6cb153ca
BL
1754out_put:
1755 fput_light(sock->file, fput_needed);
1da177e4
LT
1756 }
1757 return err;
1758}
1759
1760/*
1761 * Get a socket option. Because we don't know the option lengths we have
1762 * to pass a user mode parameter for the protocols to sort out.
1763 */
1764
20f37034
HC
1765SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1766 char __user *, optval, int __user *, optlen)
1da177e4 1767{
6cb153ca 1768 int err, fput_needed;
1da177e4
LT
1769 struct socket *sock;
1770
89bddce5
SH
1771 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1772 if (sock != NULL) {
6cb153ca
BL
1773 err = security_socket_getsockopt(sock, level, optname);
1774 if (err)
1775 goto out_put;
1da177e4
LT
1776
1777 if (level == SOL_SOCKET)
89bddce5
SH
1778 err =
1779 sock_getsockopt(sock, level, optname, optval,
1780 optlen);
1da177e4 1781 else
89bddce5
SH
1782 err =
1783 sock->ops->getsockopt(sock, level, optname, optval,
1784 optlen);
6cb153ca
BL
1785out_put:
1786 fput_light(sock->file, fput_needed);
1da177e4
LT
1787 }
1788 return err;
1789}
1790
1da177e4
LT
1791/*
1792 * Shutdown a socket.
1793 */
1794
754fe8d2 1795SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1796{
6cb153ca 1797 int err, fput_needed;
1da177e4
LT
1798 struct socket *sock;
1799
89bddce5
SH
1800 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1801 if (sock != NULL) {
1da177e4 1802 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1803 if (!err)
1804 err = sock->ops->shutdown(sock, how);
1805 fput_light(sock->file, fput_needed);
1da177e4
LT
1806 }
1807 return err;
1808}
1809
89bddce5 1810/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1811 * fields which are the same type (int / unsigned) on our platforms.
1812 */
1813#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1814#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1815#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1816
c71d8ebe
TH
1817struct used_address {
1818 struct sockaddr_storage name;
1819 unsigned int name_len;
1820};
1821
da184284
AV
1822static int copy_msghdr_from_user(struct msghdr *kmsg,
1823 struct user_msghdr __user *umsg,
1824 struct sockaddr __user **save_addr,
1825 struct iovec **iov)
1661bf36 1826{
08adb7da
AV
1827 struct sockaddr __user *uaddr;
1828 struct iovec __user *uiov;
c0371da6 1829 size_t nr_segs;
08adb7da
AV
1830 ssize_t err;
1831
1832 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1833 __get_user(uaddr, &umsg->msg_name) ||
1834 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1835 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1836 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1837 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1838 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1839 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1840 return -EFAULT;
dbb490b9 1841
08adb7da 1842 if (!uaddr)
6a2a2b3a
AS
1843 kmsg->msg_namelen = 0;
1844
dbb490b9
ML
1845 if (kmsg->msg_namelen < 0)
1846 return -EINVAL;
1847
1661bf36 1848 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1849 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1850
1851 if (save_addr)
1852 *save_addr = uaddr;
1853
1854 if (uaddr && kmsg->msg_namelen) {
1855 if (!save_addr) {
1856 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1857 kmsg->msg_name);
1858 if (err < 0)
1859 return err;
1860 }
1861 } else {
1862 kmsg->msg_name = NULL;
1863 kmsg->msg_namelen = 0;
1864 }
1865
c0371da6 1866 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1867 return -EMSGSIZE;
1868
0345f931 1869 kmsg->msg_iocb = NULL;
1870
da184284
AV
1871 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1872 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1873}
1874
666547ff 1875static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1876 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1877 struct used_address *used_address)
1da177e4 1878{
89bddce5
SH
1879 struct compat_msghdr __user *msg_compat =
1880 (struct compat_msghdr __user *)msg;
230b1839 1881 struct sockaddr_storage address;
1da177e4 1882 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1883 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1884 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1885 /* 20 is size of ipv6_pktinfo */
1da177e4 1886 unsigned char *ctl_buf = ctl;
d8725c86 1887 int ctl_len;
08adb7da 1888 ssize_t err;
89bddce5 1889
08adb7da 1890 msg_sys->msg_name = &address;
1da177e4 1891
08449320 1892 if (MSG_CMSG_COMPAT & flags)
08adb7da 1893 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1894 else
08adb7da 1895 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1896 if (err < 0)
da184284 1897 return err;
1da177e4
LT
1898
1899 err = -ENOBUFS;
1900
228e548e 1901 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1902 goto out_freeiov;
228e548e 1903 ctl_len = msg_sys->msg_controllen;
1da177e4 1904 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1905 err =
228e548e 1906 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1907 sizeof(ctl));
1da177e4
LT
1908 if (err)
1909 goto out_freeiov;
228e548e
AB
1910 ctl_buf = msg_sys->msg_control;
1911 ctl_len = msg_sys->msg_controllen;
1da177e4 1912 } else if (ctl_len) {
89bddce5 1913 if (ctl_len > sizeof(ctl)) {
1da177e4 1914 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1915 if (ctl_buf == NULL)
1da177e4
LT
1916 goto out_freeiov;
1917 }
1918 err = -EFAULT;
1919 /*
228e548e 1920 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1921 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1922 * checking falls down on this.
1923 */
fb8621bb 1924 if (copy_from_user(ctl_buf,
228e548e 1925 (void __user __force *)msg_sys->msg_control,
89bddce5 1926 ctl_len))
1da177e4 1927 goto out_freectl;
228e548e 1928 msg_sys->msg_control = ctl_buf;
1da177e4 1929 }
228e548e 1930 msg_sys->msg_flags = flags;
1da177e4
LT
1931
1932 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1933 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1934 /*
1935 * If this is sendmmsg() and current destination address is same as
1936 * previously succeeded address, omit asking LSM's decision.
1937 * used_address->name_len is initialized to UINT_MAX so that the first
1938 * destination address never matches.
1939 */
bc909d9d
MD
1940 if (used_address && msg_sys->msg_name &&
1941 used_address->name_len == msg_sys->msg_namelen &&
1942 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1943 used_address->name_len)) {
d8725c86 1944 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1945 goto out_freectl;
1946 }
d8725c86 1947 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1948 /*
1949 * If this is sendmmsg() and sending to current destination address was
1950 * successful, remember it.
1951 */
1952 if (used_address && err >= 0) {
1953 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1954 if (msg_sys->msg_name)
1955 memcpy(&used_address->name, msg_sys->msg_name,
1956 used_address->name_len);
c71d8ebe 1957 }
1da177e4
LT
1958
1959out_freectl:
89bddce5 1960 if (ctl_buf != ctl)
1da177e4
LT
1961 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1962out_freeiov:
da184284 1963 kfree(iov);
228e548e
AB
1964 return err;
1965}
1966
1967/*
1968 * BSD sendmsg interface
1969 */
1970
666547ff 1971long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
1972{
1973 int fput_needed, err;
1974 struct msghdr msg_sys;
1be374a0
AL
1975 struct socket *sock;
1976
1be374a0 1977 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
1978 if (!sock)
1979 goto out;
1980
a7526eb5 1981 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 1982
6cb153ca 1983 fput_light(sock->file, fput_needed);
89bddce5 1984out:
1da177e4
LT
1985 return err;
1986}
1987
666547ff 1988SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
1989{
1990 if (flags & MSG_CMSG_COMPAT)
1991 return -EINVAL;
1992 return __sys_sendmsg(fd, msg, flags);
1993}
1994
228e548e
AB
1995/*
1996 * Linux sendmmsg interface
1997 */
1998
1999int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2000 unsigned int flags)
2001{
2002 int fput_needed, err, datagrams;
2003 struct socket *sock;
2004 struct mmsghdr __user *entry;
2005 struct compat_mmsghdr __user *compat_entry;
2006 struct msghdr msg_sys;
c71d8ebe 2007 struct used_address used_address;
228e548e 2008
98382f41
AB
2009 if (vlen > UIO_MAXIOV)
2010 vlen = UIO_MAXIOV;
228e548e
AB
2011
2012 datagrams = 0;
2013
2014 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2015 if (!sock)
2016 return err;
2017
c71d8ebe 2018 used_address.name_len = UINT_MAX;
228e548e
AB
2019 entry = mmsg;
2020 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2021 err = 0;
228e548e
AB
2022
2023 while (datagrams < vlen) {
228e548e 2024 if (MSG_CMSG_COMPAT & flags) {
666547ff 2025 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5 2026 &msg_sys, flags, &used_address);
228e548e
AB
2027 if (err < 0)
2028 break;
2029 err = __put_user(err, &compat_entry->msg_len);
2030 ++compat_entry;
2031 } else {
a7526eb5 2032 err = ___sys_sendmsg(sock,
666547ff 2033 (struct user_msghdr __user *)entry,
a7526eb5 2034 &msg_sys, flags, &used_address);
228e548e
AB
2035 if (err < 0)
2036 break;
2037 err = put_user(err, &entry->msg_len);
2038 ++entry;
2039 }
2040
2041 if (err)
2042 break;
2043 ++datagrams;
2044 }
2045
228e548e
AB
2046 fput_light(sock->file, fput_needed);
2047
728ffb86
AB
2048 /* We only return an error if no datagrams were able to be sent */
2049 if (datagrams != 0)
228e548e
AB
2050 return datagrams;
2051
228e548e
AB
2052 return err;
2053}
2054
2055SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2056 unsigned int, vlen, unsigned int, flags)
2057{
1be374a0
AL
2058 if (flags & MSG_CMSG_COMPAT)
2059 return -EINVAL;
228e548e
AB
2060 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2061}
2062
666547ff 2063static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2064 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2065{
89bddce5
SH
2066 struct compat_msghdr __user *msg_compat =
2067 (struct compat_msghdr __user *)msg;
1da177e4 2068 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2069 struct iovec *iov = iovstack;
1da177e4 2070 unsigned long cmsg_ptr;
08adb7da
AV
2071 int total_len, len;
2072 ssize_t err;
1da177e4
LT
2073
2074 /* kernel mode address */
230b1839 2075 struct sockaddr_storage addr;
1da177e4
LT
2076
2077 /* user mode address pointers */
2078 struct sockaddr __user *uaddr;
08adb7da 2079 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2080
08adb7da 2081 msg_sys->msg_name = &addr;
1da177e4 2082
f3d33426 2083 if (MSG_CMSG_COMPAT & flags)
08adb7da 2084 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2085 else
08adb7da 2086 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2087 if (err < 0)
da184284
AV
2088 return err;
2089 total_len = iov_iter_count(&msg_sys->msg_iter);
1da177e4 2090
a2e27255
ACM
2091 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2092 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2093
f3d33426
HFS
2094 /* We assume all kernel code knows the size of sockaddr_storage */
2095 msg_sys->msg_namelen = 0;
2096
1da177e4
LT
2097 if (sock->file->f_flags & O_NONBLOCK)
2098 flags |= MSG_DONTWAIT;
a2e27255
ACM
2099 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2100 total_len, flags);
1da177e4
LT
2101 if (err < 0)
2102 goto out_freeiov;
2103 len = err;
2104
2105 if (uaddr != NULL) {
43db362d 2106 err = move_addr_to_user(&addr,
a2e27255 2107 msg_sys->msg_namelen, uaddr,
89bddce5 2108 uaddr_len);
1da177e4
LT
2109 if (err < 0)
2110 goto out_freeiov;
2111 }
a2e27255 2112 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2113 COMPAT_FLAGS(msg));
1da177e4
LT
2114 if (err)
2115 goto out_freeiov;
2116 if (MSG_CMSG_COMPAT & flags)
a2e27255 2117 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2118 &msg_compat->msg_controllen);
2119 else
a2e27255 2120 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2121 &msg->msg_controllen);
2122 if (err)
2123 goto out_freeiov;
2124 err = len;
2125
2126out_freeiov:
da184284 2127 kfree(iov);
a2e27255
ACM
2128 return err;
2129}
2130
2131/*
2132 * BSD recvmsg interface
2133 */
2134
666547ff 2135long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2136{
2137 int fput_needed, err;
2138 struct msghdr msg_sys;
1be374a0
AL
2139 struct socket *sock;
2140
1be374a0 2141 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2142 if (!sock)
2143 goto out;
2144
a7526eb5 2145 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2146
6cb153ca 2147 fput_light(sock->file, fput_needed);
1da177e4
LT
2148out:
2149 return err;
2150}
2151
666547ff 2152SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2153 unsigned int, flags)
2154{
2155 if (flags & MSG_CMSG_COMPAT)
2156 return -EINVAL;
2157 return __sys_recvmsg(fd, msg, flags);
2158}
2159
a2e27255
ACM
2160/*
2161 * Linux recvmmsg interface
2162 */
2163
2164int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2165 unsigned int flags, struct timespec *timeout)
2166{
2167 int fput_needed, err, datagrams;
2168 struct socket *sock;
2169 struct mmsghdr __user *entry;
d7256d0e 2170 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2171 struct msghdr msg_sys;
2172 struct timespec end_time;
2173
2174 if (timeout &&
2175 poll_select_set_timeout(&end_time, timeout->tv_sec,
2176 timeout->tv_nsec))
2177 return -EINVAL;
2178
2179 datagrams = 0;
2180
2181 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2182 if (!sock)
2183 return err;
2184
2185 err = sock_error(sock->sk);
2186 if (err)
2187 goto out_put;
2188
2189 entry = mmsg;
d7256d0e 2190 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2191
2192 while (datagrams < vlen) {
2193 /*
2194 * No need to ask LSM for more than the first datagram.
2195 */
d7256d0e 2196 if (MSG_CMSG_COMPAT & flags) {
666547ff 2197 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2198 &msg_sys, flags & ~MSG_WAITFORONE,
2199 datagrams);
d7256d0e
JMG
2200 if (err < 0)
2201 break;
2202 err = __put_user(err, &compat_entry->msg_len);
2203 ++compat_entry;
2204 } else {
a7526eb5 2205 err = ___sys_recvmsg(sock,
666547ff 2206 (struct user_msghdr __user *)entry,
a7526eb5
AL
2207 &msg_sys, flags & ~MSG_WAITFORONE,
2208 datagrams);
d7256d0e
JMG
2209 if (err < 0)
2210 break;
2211 err = put_user(err, &entry->msg_len);
2212 ++entry;
2213 }
2214
a2e27255
ACM
2215 if (err)
2216 break;
a2e27255
ACM
2217 ++datagrams;
2218
71c5c159
BB
2219 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2220 if (flags & MSG_WAITFORONE)
2221 flags |= MSG_DONTWAIT;
2222
a2e27255
ACM
2223 if (timeout) {
2224 ktime_get_ts(timeout);
2225 *timeout = timespec_sub(end_time, *timeout);
2226 if (timeout->tv_sec < 0) {
2227 timeout->tv_sec = timeout->tv_nsec = 0;
2228 break;
2229 }
2230
2231 /* Timeout, return less than vlen datagrams */
2232 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2233 break;
2234 }
2235
2236 /* Out of band data, return right away */
2237 if (msg_sys.msg_flags & MSG_OOB)
2238 break;
2239 }
2240
2241out_put:
2242 fput_light(sock->file, fput_needed);
1da177e4 2243
a2e27255
ACM
2244 if (err == 0)
2245 return datagrams;
2246
2247 if (datagrams != 0) {
2248 /*
2249 * We may return less entries than requested (vlen) if the
2250 * sock is non block and there aren't enough datagrams...
2251 */
2252 if (err != -EAGAIN) {
2253 /*
2254 * ... or if recvmsg returns an error after we
2255 * received some datagrams, where we record the
2256 * error to return on the next call or if the
2257 * app asks about it using getsockopt(SO_ERROR).
2258 */
2259 sock->sk->sk_err = -err;
2260 }
2261
2262 return datagrams;
2263 }
2264
2265 return err;
2266}
2267
2268SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2269 unsigned int, vlen, unsigned int, flags,
2270 struct timespec __user *, timeout)
2271{
2272 int datagrams;
2273 struct timespec timeout_sys;
2274
1be374a0
AL
2275 if (flags & MSG_CMSG_COMPAT)
2276 return -EINVAL;
2277
a2e27255
ACM
2278 if (!timeout)
2279 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2280
2281 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2282 return -EFAULT;
2283
2284 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2285
2286 if (datagrams > 0 &&
2287 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2288 datagrams = -EFAULT;
2289
2290 return datagrams;
2291}
2292
2293#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2294/* Argument list sizes for sys_socketcall */
2295#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2296static const unsigned char nargs[21] = {
c6d409cf
ED
2297 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2298 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2299 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2300 AL(4), AL(5), AL(4)
89bddce5
SH
2301};
2302
1da177e4
LT
2303#undef AL
2304
2305/*
89bddce5 2306 * System call vectors.
1da177e4
LT
2307 *
2308 * Argument checking cleaned up. Saved 20% in size.
2309 * This function doesn't need to set the kernel lock because
89bddce5 2310 * it is set by the callees.
1da177e4
LT
2311 */
2312
3e0fa65f 2313SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2314{
2950fa9d 2315 unsigned long a[AUDITSC_ARGS];
89bddce5 2316 unsigned long a0, a1;
1da177e4 2317 int err;
47379052 2318 unsigned int len;
1da177e4 2319
228e548e 2320 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2321 return -EINVAL;
2322
47379052
AV
2323 len = nargs[call];
2324 if (len > sizeof(a))
2325 return -EINVAL;
2326
1da177e4 2327 /* copy_from_user should be SMP safe. */
47379052 2328 if (copy_from_user(a, args, len))
1da177e4 2329 return -EFAULT;
3ec3b2fb 2330
2950fa9d
CG
2331 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2332 if (err)
2333 return err;
3ec3b2fb 2334
89bddce5
SH
2335 a0 = a[0];
2336 a1 = a[1];
2337
2338 switch (call) {
2339 case SYS_SOCKET:
2340 err = sys_socket(a0, a1, a[2]);
2341 break;
2342 case SYS_BIND:
2343 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2344 break;
2345 case SYS_CONNECT:
2346 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2347 break;
2348 case SYS_LISTEN:
2349 err = sys_listen(a0, a1);
2350 break;
2351 case SYS_ACCEPT:
de11defe
UD
2352 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2353 (int __user *)a[2], 0);
89bddce5
SH
2354 break;
2355 case SYS_GETSOCKNAME:
2356 err =
2357 sys_getsockname(a0, (struct sockaddr __user *)a1,
2358 (int __user *)a[2]);
2359 break;
2360 case SYS_GETPEERNAME:
2361 err =
2362 sys_getpeername(a0, (struct sockaddr __user *)a1,
2363 (int __user *)a[2]);
2364 break;
2365 case SYS_SOCKETPAIR:
2366 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2367 break;
2368 case SYS_SEND:
2369 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2370 break;
2371 case SYS_SENDTO:
2372 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2373 (struct sockaddr __user *)a[4], a[5]);
2374 break;
2375 case SYS_RECV:
2376 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2377 break;
2378 case SYS_RECVFROM:
2379 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2380 (struct sockaddr __user *)a[4],
2381 (int __user *)a[5]);
2382 break;
2383 case SYS_SHUTDOWN:
2384 err = sys_shutdown(a0, a1);
2385 break;
2386 case SYS_SETSOCKOPT:
2387 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2388 break;
2389 case SYS_GETSOCKOPT:
2390 err =
2391 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2392 (int __user *)a[4]);
2393 break;
2394 case SYS_SENDMSG:
666547ff 2395 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2396 break;
228e548e
AB
2397 case SYS_SENDMMSG:
2398 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2399 break;
89bddce5 2400 case SYS_RECVMSG:
666547ff 2401 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2402 break;
a2e27255
ACM
2403 case SYS_RECVMMSG:
2404 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2405 (struct timespec __user *)a[4]);
2406 break;
de11defe
UD
2407 case SYS_ACCEPT4:
2408 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2409 (int __user *)a[2], a[3]);
aaca0bdc 2410 break;
89bddce5
SH
2411 default:
2412 err = -EINVAL;
2413 break;
1da177e4
LT
2414 }
2415 return err;
2416}
2417
89bddce5 2418#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2419
55737fda
SH
2420/**
2421 * sock_register - add a socket protocol handler
2422 * @ops: description of protocol
2423 *
1da177e4
LT
2424 * This function is called by a protocol handler that wants to
2425 * advertise its address family, and have it linked into the
e793c0f7 2426 * socket interface. The value ops->family corresponds to the
55737fda 2427 * socket system call protocol family.
1da177e4 2428 */
f0fd27d4 2429int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2430{
2431 int err;
2432
2433 if (ops->family >= NPROTO) {
3410f22e 2434 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2435 return -ENOBUFS;
2436 }
55737fda
SH
2437
2438 spin_lock(&net_family_lock);
190683a9
ED
2439 if (rcu_dereference_protected(net_families[ops->family],
2440 lockdep_is_held(&net_family_lock)))
55737fda
SH
2441 err = -EEXIST;
2442 else {
cf778b00 2443 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2444 err = 0;
2445 }
55737fda
SH
2446 spin_unlock(&net_family_lock);
2447
3410f22e 2448 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2449 return err;
2450}
c6d409cf 2451EXPORT_SYMBOL(sock_register);
1da177e4 2452
55737fda
SH
2453/**
2454 * sock_unregister - remove a protocol handler
2455 * @family: protocol family to remove
2456 *
1da177e4
LT
2457 * This function is called by a protocol handler that wants to
2458 * remove its address family, and have it unlinked from the
55737fda
SH
2459 * new socket creation.
2460 *
2461 * If protocol handler is a module, then it can use module reference
2462 * counts to protect against new references. If protocol handler is not
2463 * a module then it needs to provide its own protection in
2464 * the ops->create routine.
1da177e4 2465 */
f0fd27d4 2466void sock_unregister(int family)
1da177e4 2467{
f0fd27d4 2468 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2469
55737fda 2470 spin_lock(&net_family_lock);
a9b3cd7f 2471 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2472 spin_unlock(&net_family_lock);
2473
2474 synchronize_rcu();
2475
3410f22e 2476 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2477}
c6d409cf 2478EXPORT_SYMBOL(sock_unregister);
1da177e4 2479
77d76ea3 2480static int __init sock_init(void)
1da177e4 2481{
b3e19d92 2482 int err;
2ca794e5
EB
2483 /*
2484 * Initialize the network sysctl infrastructure.
2485 */
2486 err = net_sysctl_init();
2487 if (err)
2488 goto out;
b3e19d92 2489
1da177e4 2490 /*
89bddce5 2491 * Initialize skbuff SLAB cache
1da177e4
LT
2492 */
2493 skb_init();
1da177e4
LT
2494
2495 /*
89bddce5 2496 * Initialize the protocols module.
1da177e4
LT
2497 */
2498
2499 init_inodecache();
b3e19d92
NP
2500
2501 err = register_filesystem(&sock_fs_type);
2502 if (err)
2503 goto out_fs;
1da177e4 2504 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2505 if (IS_ERR(sock_mnt)) {
2506 err = PTR_ERR(sock_mnt);
2507 goto out_mount;
2508 }
77d76ea3
AK
2509
2510 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2511 */
2512
2513#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2514 err = netfilter_init();
2515 if (err)
2516 goto out;
1da177e4 2517#endif
cbeb321a 2518
408eccce 2519 ptp_classifier_init();
c1f19b51 2520
b3e19d92
NP
2521out:
2522 return err;
2523
2524out_mount:
2525 unregister_filesystem(&sock_fs_type);
2526out_fs:
2527 goto out;
1da177e4
LT
2528}
2529
77d76ea3
AK
2530core_initcall(sock_init); /* early initcall */
2531
1da177e4
LT
2532#ifdef CONFIG_PROC_FS
2533void socket_seq_show(struct seq_file *seq)
2534{
2535 int cpu;
2536 int counter = 0;
2537
6f912042 2538 for_each_possible_cpu(cpu)
89bddce5 2539 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2540
2541 /* It can be negative, by the way. 8) */
2542 if (counter < 0)
2543 counter = 0;
2544
2545 seq_printf(seq, "sockets: used %d\n", counter);
2546}
89bddce5 2547#endif /* CONFIG_PROC_FS */
1da177e4 2548
89bbfc95 2549#ifdef CONFIG_COMPAT
6b96018b 2550static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2551 unsigned int cmd, void __user *up)
7a229387 2552{
7a229387
AB
2553 mm_segment_t old_fs = get_fs();
2554 struct timeval ktv;
2555 int err;
2556
2557 set_fs(KERNEL_DS);
6b96018b 2558 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2559 set_fs(old_fs);
644595f8 2560 if (!err)
ed6fe9d6 2561 err = compat_put_timeval(&ktv, up);
644595f8 2562
7a229387
AB
2563 return err;
2564}
2565
6b96018b 2566static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2567 unsigned int cmd, void __user *up)
7a229387 2568{
7a229387
AB
2569 mm_segment_t old_fs = get_fs();
2570 struct timespec kts;
2571 int err;
2572
2573 set_fs(KERNEL_DS);
6b96018b 2574 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2575 set_fs(old_fs);
644595f8 2576 if (!err)
ed6fe9d6 2577 err = compat_put_timespec(&kts, up);
644595f8 2578
7a229387
AB
2579 return err;
2580}
2581
6b96018b 2582static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2583{
2584 struct ifreq __user *uifr;
2585 int err;
2586
2587 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2588 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2589 return -EFAULT;
2590
6b96018b 2591 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2592 if (err)
2593 return err;
2594
6b96018b 2595 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2596 return -EFAULT;
2597
2598 return 0;
2599}
2600
6b96018b 2601static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2602{
6b96018b 2603 struct compat_ifconf ifc32;
7a229387
AB
2604 struct ifconf ifc;
2605 struct ifconf __user *uifc;
6b96018b 2606 struct compat_ifreq __user *ifr32;
7a229387
AB
2607 struct ifreq __user *ifr;
2608 unsigned int i, j;
2609 int err;
2610
6b96018b 2611 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2612 return -EFAULT;
2613
43da5f2e 2614 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2615 if (ifc32.ifcbuf == 0) {
2616 ifc32.ifc_len = 0;
2617 ifc.ifc_len = 0;
2618 ifc.ifc_req = NULL;
2619 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2620 } else {
c6d409cf
ED
2621 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2622 sizeof(struct ifreq);
7a229387
AB
2623 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2624 ifc.ifc_len = len;
2625 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2626 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2627 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2628 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2629 return -EFAULT;
2630 ifr++;
2631 ifr32++;
2632 }
2633 }
2634 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2635 return -EFAULT;
2636
6b96018b 2637 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2638 if (err)
2639 return err;
2640
2641 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2642 return -EFAULT;
2643
2644 ifr = ifc.ifc_req;
2645 ifr32 = compat_ptr(ifc32.ifcbuf);
2646 for (i = 0, j = 0;
c6d409cf
ED
2647 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2648 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2649 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2650 return -EFAULT;
2651 ifr32++;
2652 ifr++;
2653 }
2654
2655 if (ifc32.ifcbuf == 0) {
2656 /* Translate from 64-bit structure multiple to
2657 * a 32-bit one.
2658 */
2659 i = ifc.ifc_len;
6b96018b 2660 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2661 ifc32.ifc_len = i;
2662 } else {
2663 ifc32.ifc_len = i;
2664 }
6b96018b 2665 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2666 return -EFAULT;
2667
2668 return 0;
2669}
2670
6b96018b 2671static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2672{
3a7da39d
BH
2673 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2674 bool convert_in = false, convert_out = false;
2675 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2676 struct ethtool_rxnfc __user *rxnfc;
7a229387 2677 struct ifreq __user *ifr;
3a7da39d
BH
2678 u32 rule_cnt = 0, actual_rule_cnt;
2679 u32 ethcmd;
7a229387 2680 u32 data;
3a7da39d 2681 int ret;
7a229387 2682
3a7da39d
BH
2683 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2684 return -EFAULT;
7a229387 2685
3a7da39d
BH
2686 compat_rxnfc = compat_ptr(data);
2687
2688 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2689 return -EFAULT;
2690
3a7da39d
BH
2691 /* Most ethtool structures are defined without padding.
2692 * Unfortunately struct ethtool_rxnfc is an exception.
2693 */
2694 switch (ethcmd) {
2695 default:
2696 break;
2697 case ETHTOOL_GRXCLSRLALL:
2698 /* Buffer size is variable */
2699 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2700 return -EFAULT;
2701 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2702 return -ENOMEM;
2703 buf_size += rule_cnt * sizeof(u32);
2704 /* fall through */
2705 case ETHTOOL_GRXRINGS:
2706 case ETHTOOL_GRXCLSRLCNT:
2707 case ETHTOOL_GRXCLSRULE:
55664f32 2708 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2709 convert_out = true;
2710 /* fall through */
2711 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2712 buf_size += sizeof(struct ethtool_rxnfc);
2713 convert_in = true;
2714 break;
2715 }
2716
2717 ifr = compat_alloc_user_space(buf_size);
954b1244 2718 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2719
2720 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2721 return -EFAULT;
2722
3a7da39d
BH
2723 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2724 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2725 return -EFAULT;
2726
3a7da39d 2727 if (convert_in) {
127fe533 2728 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2729 * fs.ring_cookie and at the end of fs, but nowhere else.
2730 */
127fe533
AD
2731 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2732 sizeof(compat_rxnfc->fs.m_ext) !=
2733 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2734 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2735 BUILD_BUG_ON(
2736 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2737 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2738 offsetof(struct ethtool_rxnfc, fs.location) -
2739 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2740
2741 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2742 (void __user *)(&rxnfc->fs.m_ext + 1) -
2743 (void __user *)rxnfc) ||
3a7da39d
BH
2744 copy_in_user(&rxnfc->fs.ring_cookie,
2745 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2746 (void __user *)(&rxnfc->fs.location + 1) -
2747 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2748 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2749 sizeof(rxnfc->rule_cnt)))
2750 return -EFAULT;
2751 }
2752
2753 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2754 if (ret)
2755 return ret;
2756
2757 if (convert_out) {
2758 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2759 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2760 (const void __user *)rxnfc) ||
3a7da39d
BH
2761 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2762 &rxnfc->fs.ring_cookie,
954b1244
SH
2763 (const void __user *)(&rxnfc->fs.location + 1) -
2764 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2765 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2766 sizeof(rxnfc->rule_cnt)))
2767 return -EFAULT;
2768
2769 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2770 /* As an optimisation, we only copy the actual
2771 * number of rules that the underlying
2772 * function returned. Since Mallory might
2773 * change the rule count in user memory, we
2774 * check that it is less than the rule count
2775 * originally given (as the user buffer size),
2776 * which has been range-checked.
2777 */
2778 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2779 return -EFAULT;
2780 if (actual_rule_cnt < rule_cnt)
2781 rule_cnt = actual_rule_cnt;
2782 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2783 &rxnfc->rule_locs[0],
2784 rule_cnt * sizeof(u32)))
2785 return -EFAULT;
2786 }
2787 }
2788
2789 return 0;
7a229387
AB
2790}
2791
7a50a240
AB
2792static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2793{
2794 void __user *uptr;
2795 compat_uptr_t uptr32;
2796 struct ifreq __user *uifr;
2797
c6d409cf 2798 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2799 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2800 return -EFAULT;
2801
2802 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2803 return -EFAULT;
2804
2805 uptr = compat_ptr(uptr32);
2806
2807 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2808 return -EFAULT;
2809
2810 return dev_ioctl(net, SIOCWANDEV, uifr);
2811}
2812
6b96018b
AB
2813static int bond_ioctl(struct net *net, unsigned int cmd,
2814 struct compat_ifreq __user *ifr32)
7a229387
AB
2815{
2816 struct ifreq kifr;
7a229387
AB
2817 mm_segment_t old_fs;
2818 int err;
7a229387
AB
2819
2820 switch (cmd) {
2821 case SIOCBONDENSLAVE:
2822 case SIOCBONDRELEASE:
2823 case SIOCBONDSETHWADDR:
2824 case SIOCBONDCHANGEACTIVE:
6b96018b 2825 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2826 return -EFAULT;
2827
2828 old_fs = get_fs();
c6d409cf 2829 set_fs(KERNEL_DS);
c3f52ae6 2830 err = dev_ioctl(net, cmd,
2831 (struct ifreq __user __force *) &kifr);
c6d409cf 2832 set_fs(old_fs);
7a229387
AB
2833
2834 return err;
7a229387 2835 default:
07d106d0 2836 return -ENOIOCTLCMD;
ccbd6a5a 2837 }
7a229387
AB
2838}
2839
590d4693
BH
2840/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2841static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2842 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2843{
2844 struct ifreq __user *u_ifreq64;
7a229387
AB
2845 char tmp_buf[IFNAMSIZ];
2846 void __user *data64;
2847 u32 data32;
2848
2849 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2850 IFNAMSIZ))
2851 return -EFAULT;
417c3522 2852 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2853 return -EFAULT;
2854 data64 = compat_ptr(data32);
2855
2856 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2857
7a229387
AB
2858 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2859 IFNAMSIZ))
2860 return -EFAULT;
417c3522 2861 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2862 return -EFAULT;
2863
6b96018b 2864 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2865}
2866
6b96018b
AB
2867static int dev_ifsioc(struct net *net, struct socket *sock,
2868 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2869{
a2116ed2 2870 struct ifreq __user *uifr;
7a229387
AB
2871 int err;
2872
a2116ed2
AB
2873 uifr = compat_alloc_user_space(sizeof(*uifr));
2874 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2875 return -EFAULT;
2876
2877 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2878
7a229387
AB
2879 if (!err) {
2880 switch (cmd) {
2881 case SIOCGIFFLAGS:
2882 case SIOCGIFMETRIC:
2883 case SIOCGIFMTU:
2884 case SIOCGIFMEM:
2885 case SIOCGIFHWADDR:
2886 case SIOCGIFINDEX:
2887 case SIOCGIFADDR:
2888 case SIOCGIFBRDADDR:
2889 case SIOCGIFDSTADDR:
2890 case SIOCGIFNETMASK:
fab2532b 2891 case SIOCGIFPFLAGS:
7a229387 2892 case SIOCGIFTXQLEN:
fab2532b
AB
2893 case SIOCGMIIPHY:
2894 case SIOCGMIIREG:
a2116ed2 2895 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2896 err = -EFAULT;
2897 break;
2898 }
2899 }
2900 return err;
2901}
2902
a2116ed2
AB
2903static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2904 struct compat_ifreq __user *uifr32)
2905{
2906 struct ifreq ifr;
2907 struct compat_ifmap __user *uifmap32;
2908 mm_segment_t old_fs;
2909 int err;
2910
2911 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2912 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2913 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2914 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2915 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2916 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2917 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2918 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2919 if (err)
2920 return -EFAULT;
2921
2922 old_fs = get_fs();
c6d409cf 2923 set_fs(KERNEL_DS);
c3f52ae6 2924 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2925 set_fs(old_fs);
a2116ed2
AB
2926
2927 if (cmd == SIOCGIFMAP && !err) {
2928 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2929 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2930 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2931 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2932 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2933 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2934 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2935 if (err)
2936 err = -EFAULT;
2937 }
2938 return err;
2939}
2940
7a229387 2941struct rtentry32 {
c6d409cf 2942 u32 rt_pad1;
7a229387
AB
2943 struct sockaddr rt_dst; /* target address */
2944 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2945 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2946 unsigned short rt_flags;
2947 short rt_pad2;
2948 u32 rt_pad3;
2949 unsigned char rt_tos;
2950 unsigned char rt_class;
2951 short rt_pad4;
2952 short rt_metric; /* +1 for binary compatibility! */
7a229387 2953 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
2954 u32 rt_mtu; /* per route MTU/Window */
2955 u32 rt_window; /* Window clamping */
7a229387
AB
2956 unsigned short rt_irtt; /* Initial RTT */
2957};
2958
2959struct in6_rtmsg32 {
2960 struct in6_addr rtmsg_dst;
2961 struct in6_addr rtmsg_src;
2962 struct in6_addr rtmsg_gateway;
2963 u32 rtmsg_type;
2964 u16 rtmsg_dst_len;
2965 u16 rtmsg_src_len;
2966 u32 rtmsg_metric;
2967 u32 rtmsg_info;
2968 u32 rtmsg_flags;
2969 s32 rtmsg_ifindex;
2970};
2971
6b96018b
AB
2972static int routing_ioctl(struct net *net, struct socket *sock,
2973 unsigned int cmd, void __user *argp)
7a229387
AB
2974{
2975 int ret;
2976 void *r = NULL;
2977 struct in6_rtmsg r6;
2978 struct rtentry r4;
2979 char devname[16];
2980 u32 rtdev;
2981 mm_segment_t old_fs = get_fs();
2982
6b96018b
AB
2983 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2984 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 2985 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 2986 3 * sizeof(struct in6_addr));
3ddc5b46
MD
2987 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
2988 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
2989 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
2990 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
2991 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
2992 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
2993 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
2994
2995 r = (void *) &r6;
2996 } else { /* ipv4 */
6b96018b 2997 struct rtentry32 __user *ur4 = argp;
c6d409cf 2998 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 2999 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3000 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3001 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3002 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3003 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3004 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3005 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3006 if (rtdev) {
c6d409cf 3007 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3008 r4.rt_dev = (char __user __force *)devname;
3009 devname[15] = 0;
7a229387
AB
3010 } else
3011 r4.rt_dev = NULL;
3012
3013 r = (void *) &r4;
3014 }
3015
3016 if (ret) {
3017 ret = -EFAULT;
3018 goto out;
3019 }
3020
c6d409cf 3021 set_fs(KERNEL_DS);
6b96018b 3022 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3023 set_fs(old_fs);
7a229387
AB
3024
3025out:
7a229387
AB
3026 return ret;
3027}
3028
3029/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3030 * for some operations; this forces use of the newer bridge-utils that
25985edc 3031 * use compatible ioctls
7a229387 3032 */
6b96018b 3033static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3034{
6b96018b 3035 compat_ulong_t tmp;
7a229387 3036
6b96018b 3037 if (get_user(tmp, argp))
7a229387
AB
3038 return -EFAULT;
3039 if (tmp == BRCTL_GET_VERSION)
3040 return BRCTL_VERSION + 1;
3041 return -EINVAL;
3042}
3043
6b96018b
AB
3044static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3045 unsigned int cmd, unsigned long arg)
3046{
3047 void __user *argp = compat_ptr(arg);
3048 struct sock *sk = sock->sk;
3049 struct net *net = sock_net(sk);
7a229387 3050
6b96018b 3051 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3052 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3053
3054 switch (cmd) {
3055 case SIOCSIFBR:
3056 case SIOCGIFBR:
3057 return old_bridge_ioctl(argp);
3058 case SIOCGIFNAME:
3059 return dev_ifname32(net, argp);
3060 case SIOCGIFCONF:
3061 return dev_ifconf(net, argp);
3062 case SIOCETHTOOL:
3063 return ethtool_ioctl(net, argp);
7a50a240
AB
3064 case SIOCWANDEV:
3065 return compat_siocwandev(net, argp);
a2116ed2
AB
3066 case SIOCGIFMAP:
3067 case SIOCSIFMAP:
3068 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3069 case SIOCBONDENSLAVE:
3070 case SIOCBONDRELEASE:
3071 case SIOCBONDSETHWADDR:
6b96018b
AB
3072 case SIOCBONDCHANGEACTIVE:
3073 return bond_ioctl(net, cmd, argp);
3074 case SIOCADDRT:
3075 case SIOCDELRT:
3076 return routing_ioctl(net, sock, cmd, argp);
3077 case SIOCGSTAMP:
3078 return do_siocgstamp(net, sock, cmd, argp);
3079 case SIOCGSTAMPNS:
3080 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3081 case SIOCBONDSLAVEINFOQUERY:
3082 case SIOCBONDINFOQUERY:
a2116ed2 3083 case SIOCSHWTSTAMP:
fd468c74 3084 case SIOCGHWTSTAMP:
590d4693 3085 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3086
3087 case FIOSETOWN:
3088 case SIOCSPGRP:
3089 case FIOGETOWN:
3090 case SIOCGPGRP:
3091 case SIOCBRADDBR:
3092 case SIOCBRDELBR:
3093 case SIOCGIFVLAN:
3094 case SIOCSIFVLAN:
3095 case SIOCADDDLCI:
3096 case SIOCDELDLCI:
3097 return sock_ioctl(file, cmd, arg);
3098
3099 case SIOCGIFFLAGS:
3100 case SIOCSIFFLAGS:
3101 case SIOCGIFMETRIC:
3102 case SIOCSIFMETRIC:
3103 case SIOCGIFMTU:
3104 case SIOCSIFMTU:
3105 case SIOCGIFMEM:
3106 case SIOCSIFMEM:
3107 case SIOCGIFHWADDR:
3108 case SIOCSIFHWADDR:
3109 case SIOCADDMULTI:
3110 case SIOCDELMULTI:
3111 case SIOCGIFINDEX:
6b96018b
AB
3112 case SIOCGIFADDR:
3113 case SIOCSIFADDR:
3114 case SIOCSIFHWBROADCAST:
6b96018b 3115 case SIOCDIFADDR:
6b96018b
AB
3116 case SIOCGIFBRDADDR:
3117 case SIOCSIFBRDADDR:
3118 case SIOCGIFDSTADDR:
3119 case SIOCSIFDSTADDR:
3120 case SIOCGIFNETMASK:
3121 case SIOCSIFNETMASK:
3122 case SIOCSIFPFLAGS:
3123 case SIOCGIFPFLAGS:
3124 case SIOCGIFTXQLEN:
3125 case SIOCSIFTXQLEN:
3126 case SIOCBRADDIF:
3127 case SIOCBRDELIF:
9177efd3
AB
3128 case SIOCSIFNAME:
3129 case SIOCGMIIPHY:
3130 case SIOCGMIIREG:
3131 case SIOCSMIIREG:
6b96018b 3132 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3133
6b96018b
AB
3134 case SIOCSARP:
3135 case SIOCGARP:
3136 case SIOCDARP:
6b96018b 3137 case SIOCATMARK:
9177efd3
AB
3138 return sock_do_ioctl(net, sock, cmd, arg);
3139 }
3140
6b96018b
AB
3141 return -ENOIOCTLCMD;
3142}
7a229387 3143
95c96174 3144static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3145 unsigned long arg)
89bbfc95
SP
3146{
3147 struct socket *sock = file->private_data;
3148 int ret = -ENOIOCTLCMD;
87de87d5
DM
3149 struct sock *sk;
3150 struct net *net;
3151
3152 sk = sock->sk;
3153 net = sock_net(sk);
89bbfc95
SP
3154
3155 if (sock->ops->compat_ioctl)
3156 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3157
87de87d5
DM
3158 if (ret == -ENOIOCTLCMD &&
3159 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3160 ret = compat_wext_handle_ioctl(net, cmd, arg);
3161
6b96018b
AB
3162 if (ret == -ENOIOCTLCMD)
3163 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3164
89bbfc95
SP
3165 return ret;
3166}
3167#endif
3168
ac5a488e
SS
3169int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3170{
3171 return sock->ops->bind(sock, addr, addrlen);
3172}
c6d409cf 3173EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3174
3175int kernel_listen(struct socket *sock, int backlog)
3176{
3177 return sock->ops->listen(sock, backlog);
3178}
c6d409cf 3179EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3180
3181int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3182{
3183 struct sock *sk = sock->sk;
3184 int err;
3185
3186 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3187 newsock);
3188 if (err < 0)
3189 goto done;
3190
3191 err = sock->ops->accept(sock, *newsock, flags);
3192 if (err < 0) {
3193 sock_release(*newsock);
fa8705b0 3194 *newsock = NULL;
ac5a488e
SS
3195 goto done;
3196 }
3197
3198 (*newsock)->ops = sock->ops;
1b08534e 3199 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3200
3201done:
3202 return err;
3203}
c6d409cf 3204EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3205
3206int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3207 int flags)
ac5a488e
SS
3208{
3209 return sock->ops->connect(sock, addr, addrlen, flags);
3210}
c6d409cf 3211EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3212
3213int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3214 int *addrlen)
3215{
3216 return sock->ops->getname(sock, addr, addrlen, 0);
3217}
c6d409cf 3218EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3219
3220int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3221 int *addrlen)
3222{
3223 return sock->ops->getname(sock, addr, addrlen, 1);
3224}
c6d409cf 3225EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3226
3227int kernel_getsockopt(struct socket *sock, int level, int optname,
3228 char *optval, int *optlen)
3229{
3230 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3231 char __user *uoptval;
3232 int __user *uoptlen;
ac5a488e
SS
3233 int err;
3234
fb8621bb
NK
3235 uoptval = (char __user __force *) optval;
3236 uoptlen = (int __user __force *) optlen;
3237
ac5a488e
SS
3238 set_fs(KERNEL_DS);
3239 if (level == SOL_SOCKET)
fb8621bb 3240 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3241 else
fb8621bb
NK
3242 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3243 uoptlen);
ac5a488e
SS
3244 set_fs(oldfs);
3245 return err;
3246}
c6d409cf 3247EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3248
3249int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3250 char *optval, unsigned int optlen)
ac5a488e
SS
3251{
3252 mm_segment_t oldfs = get_fs();
fb8621bb 3253 char __user *uoptval;
ac5a488e
SS
3254 int err;
3255
fb8621bb
NK
3256 uoptval = (char __user __force *) optval;
3257
ac5a488e
SS
3258 set_fs(KERNEL_DS);
3259 if (level == SOL_SOCKET)
fb8621bb 3260 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3261 else
fb8621bb 3262 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3263 optlen);
3264 set_fs(oldfs);
3265 return err;
3266}
c6d409cf 3267EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3268
3269int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3270 size_t size, int flags)
3271{
3272 if (sock->ops->sendpage)
3273 return sock->ops->sendpage(sock, page, offset, size, flags);
3274
3275 return sock_no_sendpage(sock, page, offset, size, flags);
3276}
c6d409cf 3277EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3278
3279int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3280{
3281 mm_segment_t oldfs = get_fs();
3282 int err;
3283
3284 set_fs(KERNEL_DS);
3285 err = sock->ops->ioctl(sock, cmd, arg);
3286 set_fs(oldfs);
3287
3288 return err;
3289}
c6d409cf 3290EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3291
91cf45f0
TM
3292int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3293{
3294 return sock->ops->shutdown(sock, how);
3295}
91cf45f0 3296EXPORT_SYMBOL(kernel_sock_shutdown);