]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/socket.c
net: Allow MSG_EOR in each msghdr of sendmmsg
[mirror_ubuntu-artful-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1da177e4
LT
290static int init_inodecache(void)
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1da177e4
LT
299 if (sock_inode_cachep == NULL)
300 return -ENOMEM;
301 return 0;
302}
303
b87221de 304static const struct super_operations sockfs_ops = {
c6d409cf
ED
305 .alloc_inode = sock_alloc_inode,
306 .destroy_inode = sock_destroy_inode,
307 .statfs = simple_statfs,
1da177e4
LT
308};
309
c23fbb6b
ED
310/*
311 * sockfs_dname() is called from d_path().
312 */
313static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
314{
315 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 316 d_inode(dentry)->i_ino);
c23fbb6b
ED
317}
318
3ba13d17 319static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 320 .d_dname = sockfs_dname,
1da177e4
LT
321};
322
c74a1cbb
AV
323static struct dentry *sockfs_mount(struct file_system_type *fs_type,
324 int flags, const char *dev_name, void *data)
325{
326 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
327 &sockfs_dentry_operations, SOCKFS_MAGIC);
328}
329
330static struct vfsmount *sock_mnt __read_mostly;
331
332static struct file_system_type sock_fs_type = {
333 .name = "sockfs",
334 .mount = sockfs_mount,
335 .kill_sb = kill_anon_super,
336};
337
1da177e4
LT
338/*
339 * Obtains the first available file descriptor and sets it up for use.
340 *
39d8c1b6
DM
341 * These functions create file structures and maps them to fd space
342 * of the current process. On success it returns file descriptor
1da177e4
LT
343 * and file struct implicitly stored in sock->file.
344 * Note that another thread may close file descriptor before we return
345 * from this function. We use the fact that now we do not refer
346 * to socket after mapping. If one day we will need it, this
347 * function will increment ref. count on file by 1.
348 *
349 * In any case returned fd MAY BE not valid!
350 * This race condition is unavoidable
351 * with shared fd spaces, we cannot solve it inside kernel,
352 * but we take care of internal coherence yet.
353 */
354
aab174f0 355struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 356{
7cbe66b6 357 struct qstr name = { .name = "" };
2c48b9c4 358 struct path path;
7cbe66b6 359 struct file *file;
1da177e4 360
600e1779
MY
361 if (dname) {
362 name.name = dname;
363 name.len = strlen(name.name);
364 } else if (sock->sk) {
365 name.name = sock->sk->sk_prot_creator->name;
366 name.len = strlen(name.name);
367 }
4b936885 368 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
369 if (unlikely(!path.dentry))
370 return ERR_PTR(-ENOMEM);
2c48b9c4 371 path.mnt = mntget(sock_mnt);
39d8c1b6 372
2c48b9c4 373 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 374
2c48b9c4 375 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 376 &socket_file_ops);
b5ffe634 377 if (IS_ERR(file)) {
cc3808f8 378 /* drop dentry, keep inode */
c5ef6035 379 ihold(d_inode(path.dentry));
2c48b9c4 380 path_put(&path);
39b65252 381 return file;
cc3808f8
AV
382 }
383
384 sock->file = file;
77d27200 385 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 386 file->private_data = sock;
28407630 387 return file;
39d8c1b6 388}
56b31d1c 389EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 390
56b31d1c 391static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
392{
393 struct file *newfile;
28407630
AV
394 int fd = get_unused_fd_flags(flags);
395 if (unlikely(fd < 0))
396 return fd;
39d8c1b6 397
aab174f0 398 newfile = sock_alloc_file(sock, flags, NULL);
28407630 399 if (likely(!IS_ERR(newfile))) {
39d8c1b6 400 fd_install(fd, newfile);
28407630
AV
401 return fd;
402 }
7cbe66b6 403
28407630
AV
404 put_unused_fd(fd);
405 return PTR_ERR(newfile);
1da177e4
LT
406}
407
406a3c63 408struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 409{
6cb153ca
BL
410 if (file->f_op == &socket_file_ops)
411 return file->private_data; /* set in sock_map_fd */
412
23bb80d2
ED
413 *err = -ENOTSOCK;
414 return NULL;
6cb153ca 415}
406a3c63 416EXPORT_SYMBOL(sock_from_file);
6cb153ca 417
1da177e4 418/**
c6d409cf 419 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
420 * @fd: file handle
421 * @err: pointer to an error code return
422 *
423 * The file handle passed in is locked and the socket it is bound
424 * too is returned. If an error occurs the err pointer is overwritten
425 * with a negative errno code and NULL is returned. The function checks
426 * for both invalid handles and passing a handle which is not a socket.
427 *
428 * On a success the socket object pointer is returned.
429 */
430
431struct socket *sockfd_lookup(int fd, int *err)
432{
433 struct file *file;
1da177e4
LT
434 struct socket *sock;
435
89bddce5
SH
436 file = fget(fd);
437 if (!file) {
1da177e4
LT
438 *err = -EBADF;
439 return NULL;
440 }
89bddce5 441
6cb153ca
BL
442 sock = sock_from_file(file, err);
443 if (!sock)
1da177e4 444 fput(file);
6cb153ca
BL
445 return sock;
446}
c6d409cf 447EXPORT_SYMBOL(sockfd_lookup);
1da177e4 448
6cb153ca
BL
449static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
450{
00e188ef 451 struct fd f = fdget(fd);
6cb153ca
BL
452 struct socket *sock;
453
3672558c 454 *err = -EBADF;
00e188ef
AV
455 if (f.file) {
456 sock = sock_from_file(f.file, err);
457 if (likely(sock)) {
458 *fput_needed = f.flags;
6cb153ca 459 return sock;
00e188ef
AV
460 }
461 fdput(f);
1da177e4 462 }
6cb153ca 463 return NULL;
1da177e4
LT
464}
465
600e1779
MY
466#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
467#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
468#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
469static ssize_t sockfs_getxattr(struct dentry *dentry,
470 const char *name, void *value, size_t size)
471{
472 const char *proto_name;
473 size_t proto_size;
474 int error;
475
476 error = -ENODATA;
477 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
478 proto_name = dentry->d_name.name;
479 proto_size = strlen(proto_name);
480
481 if (value) {
482 error = -ERANGE;
483 if (proto_size + 1 > size)
484 goto out;
485
486 strncpy(value, proto_name, proto_size + 1);
487 }
488 error = proto_size + 1;
489 }
490
491out:
492 return error;
493}
494
495static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
496 size_t size)
497{
498 ssize_t len;
499 ssize_t used = 0;
500
c5ef6035 501 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
502 if (len < 0)
503 return len;
504 used += len;
505 if (buffer) {
506 if (size < used)
507 return -ERANGE;
508 buffer += len;
509 }
510
511 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
512 used += len;
513 if (buffer) {
514 if (size < used)
515 return -ERANGE;
516 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
517 buffer += len;
518 }
519
520 return used;
521}
522
523static const struct inode_operations sockfs_inode_ops = {
524 .getxattr = sockfs_getxattr,
525 .listxattr = sockfs_listxattr,
526};
527
1da177e4
LT
528/**
529 * sock_alloc - allocate a socket
89bddce5 530 *
1da177e4
LT
531 * Allocate a new inode and socket object. The two are bound together
532 * and initialised. The socket is then returned. If we are out of inodes
533 * NULL is returned.
534 */
535
f4a00aac 536struct socket *sock_alloc(void)
1da177e4 537{
89bddce5
SH
538 struct inode *inode;
539 struct socket *sock;
1da177e4 540
a209dfc7 541 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
542 if (!inode)
543 return NULL;
544
545 sock = SOCKET_I(inode);
546
29a020d3 547 kmemcheck_annotate_bitfield(sock, type);
85fe4025 548 inode->i_ino = get_next_ino();
89bddce5 549 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
550 inode->i_uid = current_fsuid();
551 inode->i_gid = current_fsgid();
600e1779 552 inode->i_op = &sockfs_inode_ops;
1da177e4 553
19e8d69c 554 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
555 return sock;
556}
f4a00aac 557EXPORT_SYMBOL(sock_alloc);
1da177e4 558
1da177e4
LT
559/**
560 * sock_release - close a socket
561 * @sock: socket to close
562 *
563 * The socket is released from the protocol stack if it has a release
564 * callback, and the inode is then released if the socket is bound to
89bddce5 565 * an inode not a file.
1da177e4 566 */
89bddce5 567
1da177e4
LT
568void sock_release(struct socket *sock)
569{
570 if (sock->ops) {
571 struct module *owner = sock->ops->owner;
572
573 sock->ops->release(sock);
574 sock->ops = NULL;
575 module_put(owner);
576 }
577
eaefd110 578 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 579 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 580
19e8d69c 581 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
582 if (!sock->file) {
583 iput(SOCK_INODE(sock));
584 return;
585 }
89bddce5 586 sock->file = NULL;
1da177e4 587}
c6d409cf 588EXPORT_SYMBOL(sock_release);
1da177e4 589
67cc0d40 590void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
20d49473 591{
140c55d4
ED
592 u8 flags = *tx_flags;
593
b9f40e21 594 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
595 flags |= SKBTX_HW_TSTAMP;
596
b9f40e21 597 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
598 flags |= SKBTX_SW_TSTAMP;
599
e7fd2885 600 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
601 flags |= SKBTX_SCHED_TSTAMP;
602
e1c8a607 603 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
140c55d4 604 flags |= SKBTX_ACK_TSTAMP;
e7fd2885 605
140c55d4 606 *tx_flags = flags;
20d49473 607}
67cc0d40 608EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 609
d8725c86 610static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 611{
01e97e65 612 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
613 BUG_ON(ret == -EIOCBQUEUED);
614 return ret;
1da177e4
LT
615}
616
d8725c86 617int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 618{
d8725c86 619 int err = security_socket_sendmsg(sock, msg,
01e97e65 620 msg_data_left(msg));
228e548e 621
d8725c86 622 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 623}
c6d409cf 624EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
625
626int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
627 struct kvec *vec, size_t num, size_t size)
628{
6aa24814 629 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 630 return sock_sendmsg(sock, msg);
1da177e4 631}
c6d409cf 632EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 633
92f37fd2
ED
634/*
635 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
636 */
637void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
638 struct sk_buff *skb)
639{
20d49473 640 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 641 struct scm_timestamping tss;
20d49473
PO
642 int empty = 1;
643 struct skb_shared_hwtstamps *shhwtstamps =
644 skb_hwtstamps(skb);
645
646 /* Race occurred between timestamp enabling and packet
647 receiving. Fill in the current time for now. */
648 if (need_software_tstamp && skb->tstamp.tv64 == 0)
649 __net_timestamp(skb);
650
651 if (need_software_tstamp) {
652 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
653 struct timeval tv;
654 skb_get_timestamp(skb, &tv);
655 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
656 sizeof(tv), &tv);
657 } else {
f24b9be5
WB
658 struct timespec ts;
659 skb_get_timestampns(skb, &ts);
20d49473 660 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 661 sizeof(ts), &ts);
20d49473
PO
662 }
663 }
664
f24b9be5 665 memset(&tss, 0, sizeof(tss));
c199105d 666 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 667 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 668 empty = 0;
4d276eb6 669 if (shhwtstamps &&
b9f40e21 670 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 671 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 672 empty = 0;
20d49473
PO
673 if (!empty)
674 put_cmsg(msg, SOL_SOCKET,
f24b9be5 675 SCM_TIMESTAMPING, sizeof(tss), &tss);
92f37fd2 676}
7c81fd8b
ACM
677EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
678
6e3e939f
JB
679void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
680 struct sk_buff *skb)
681{
682 int ack;
683
684 if (!sock_flag(sk, SOCK_WIFI_STATUS))
685 return;
686 if (!skb->wifi_acked_valid)
687 return;
688
689 ack = skb->wifi_acked;
690
691 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
692}
693EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
694
11165f14 695static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
696 struct sk_buff *skb)
3b885787 697{
744d5a3e 698 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 699 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 700 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
701}
702
767dd033 703void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
704 struct sk_buff *skb)
705{
706 sock_recv_timestamp(msg, sk, skb);
707 sock_recv_drops(msg, sk, skb);
708}
767dd033 709EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 710
1b784140
YX
711static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
712 size_t size, int flags)
1da177e4 713{
1b784140 714 return sock->ops->recvmsg(sock, msg, size, flags);
1da177e4
LT
715}
716
1b784140
YX
717int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
718 int flags)
a2e27255
ACM
719{
720 int err = security_socket_recvmsg(sock, msg, size, flags);
721
1b784140 722 return err ?: sock_recvmsg_nosec(sock, msg, size, flags);
1da177e4 723}
c6d409cf 724EXPORT_SYMBOL(sock_recvmsg);
1da177e4 725
c1249c0a
ML
726/**
727 * kernel_recvmsg - Receive a message from a socket (kernel space)
728 * @sock: The socket to receive the message from
729 * @msg: Received message
730 * @vec: Input s/g array for message data
731 * @num: Size of input s/g array
732 * @size: Number of bytes to read
733 * @flags: Message flags (MSG_DONTWAIT, etc...)
734 *
735 * On return the msg structure contains the scatter/gather array passed in the
736 * vec argument. The array is modified so that it consists of the unfilled
737 * portion of the original array.
738 *
739 * The returned value is the total number of bytes received, or an error.
740 */
89bddce5
SH
741int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
742 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
743{
744 mm_segment_t oldfs = get_fs();
745 int result;
746
6aa24814 747 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 748 set_fs(KERNEL_DS);
1da177e4
LT
749 result = sock_recvmsg(sock, msg, size, flags);
750 set_fs(oldfs);
751 return result;
752}
c6d409cf 753EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 754
ce1d4d3e
CH
755static ssize_t sock_sendpage(struct file *file, struct page *page,
756 int offset, size_t size, loff_t *ppos, int more)
1da177e4 757{
1da177e4
LT
758 struct socket *sock;
759 int flags;
760
ce1d4d3e
CH
761 sock = file->private_data;
762
35f9c09f
ED
763 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
764 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
765 flags |= more;
ce1d4d3e 766
e6949583 767 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 768}
1da177e4 769
9c55e01c 770static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 771 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
772 unsigned int flags)
773{
774 struct socket *sock = file->private_data;
775
997b37da
RDC
776 if (unlikely(!sock->ops->splice_read))
777 return -EINVAL;
778
9c55e01c
JA
779 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
780}
781
8ae5e030 782static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 783{
6d652330
AV
784 struct file *file = iocb->ki_filp;
785 struct socket *sock = file->private_data;
0345f931 786 struct msghdr msg = {.msg_iter = *to,
787 .msg_iocb = iocb};
8ae5e030 788 ssize_t res;
ce1d4d3e 789
8ae5e030
AV
790 if (file->f_flags & O_NONBLOCK)
791 msg.msg_flags = MSG_DONTWAIT;
792
793 if (iocb->ki_pos != 0)
1da177e4 794 return -ESPIPE;
027445c3 795
66ee59af 796 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
797 return 0;
798
237dae88 799 res = sock_recvmsg(sock, &msg, iov_iter_count(to), msg.msg_flags);
8ae5e030
AV
800 *to = msg.msg_iter;
801 return res;
1da177e4
LT
802}
803
8ae5e030 804static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 805{
6d652330
AV
806 struct file *file = iocb->ki_filp;
807 struct socket *sock = file->private_data;
0345f931 808 struct msghdr msg = {.msg_iter = *from,
809 .msg_iocb = iocb};
8ae5e030 810 ssize_t res;
1da177e4 811
8ae5e030 812 if (iocb->ki_pos != 0)
ce1d4d3e 813 return -ESPIPE;
027445c3 814
8ae5e030
AV
815 if (file->f_flags & O_NONBLOCK)
816 msg.msg_flags = MSG_DONTWAIT;
817
6d652330
AV
818 if (sock->type == SOCK_SEQPACKET)
819 msg.msg_flags |= MSG_EOR;
820
d8725c86 821 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
822 *from = msg.msg_iter;
823 return res;
1da177e4
LT
824}
825
1da177e4
LT
826/*
827 * Atomic setting of ioctl hooks to avoid race
828 * with module unload.
829 */
830
4a3e2f71 831static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 832static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 833
881d966b 834void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 835{
4a3e2f71 836 mutex_lock(&br_ioctl_mutex);
1da177e4 837 br_ioctl_hook = hook;
4a3e2f71 838 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
839}
840EXPORT_SYMBOL(brioctl_set);
841
4a3e2f71 842static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 843static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 844
881d966b 845void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 846{
4a3e2f71 847 mutex_lock(&vlan_ioctl_mutex);
1da177e4 848 vlan_ioctl_hook = hook;
4a3e2f71 849 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
850}
851EXPORT_SYMBOL(vlan_ioctl_set);
852
4a3e2f71 853static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 854static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 855
89bddce5 856void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 857{
4a3e2f71 858 mutex_lock(&dlci_ioctl_mutex);
1da177e4 859 dlci_ioctl_hook = hook;
4a3e2f71 860 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
861}
862EXPORT_SYMBOL(dlci_ioctl_set);
863
6b96018b
AB
864static long sock_do_ioctl(struct net *net, struct socket *sock,
865 unsigned int cmd, unsigned long arg)
866{
867 int err;
868 void __user *argp = (void __user *)arg;
869
870 err = sock->ops->ioctl(sock, cmd, arg);
871
872 /*
873 * If this ioctl is unknown try to hand it down
874 * to the NIC driver.
875 */
876 if (err == -ENOIOCTLCMD)
877 err = dev_ioctl(net, cmd, argp);
878
879 return err;
880}
881
1da177e4
LT
882/*
883 * With an ioctl, arg may well be a user mode pointer, but we don't know
884 * what to do with it - that's up to the protocol still.
885 */
886
887static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
888{
889 struct socket *sock;
881d966b 890 struct sock *sk;
1da177e4
LT
891 void __user *argp = (void __user *)arg;
892 int pid, err;
881d966b 893 struct net *net;
1da177e4 894
b69aee04 895 sock = file->private_data;
881d966b 896 sk = sock->sk;
3b1e0a65 897 net = sock_net(sk);
1da177e4 898 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 899 err = dev_ioctl(net, cmd, argp);
1da177e4 900 } else
3d23e349 901#ifdef CONFIG_WEXT_CORE
1da177e4 902 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 903 err = dev_ioctl(net, cmd, argp);
1da177e4 904 } else
3d23e349 905#endif
89bddce5 906 switch (cmd) {
1da177e4
LT
907 case FIOSETOWN:
908 case SIOCSPGRP:
909 err = -EFAULT;
910 if (get_user(pid, (int __user *)argp))
911 break;
e0b93edd
JL
912 f_setown(sock->file, pid, 1);
913 err = 0;
1da177e4
LT
914 break;
915 case FIOGETOWN:
916 case SIOCGPGRP:
609d7fa9 917 err = put_user(f_getown(sock->file),
89bddce5 918 (int __user *)argp);
1da177e4
LT
919 break;
920 case SIOCGIFBR:
921 case SIOCSIFBR:
922 case SIOCBRADDBR:
923 case SIOCBRDELBR:
924 err = -ENOPKG;
925 if (!br_ioctl_hook)
926 request_module("bridge");
927
4a3e2f71 928 mutex_lock(&br_ioctl_mutex);
89bddce5 929 if (br_ioctl_hook)
881d966b 930 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 931 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
932 break;
933 case SIOCGIFVLAN:
934 case SIOCSIFVLAN:
935 err = -ENOPKG;
936 if (!vlan_ioctl_hook)
937 request_module("8021q");
938
4a3e2f71 939 mutex_lock(&vlan_ioctl_mutex);
1da177e4 940 if (vlan_ioctl_hook)
881d966b 941 err = vlan_ioctl_hook(net, argp);
4a3e2f71 942 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 943 break;
1da177e4
LT
944 case SIOCADDDLCI:
945 case SIOCDELDLCI:
946 err = -ENOPKG;
947 if (!dlci_ioctl_hook)
948 request_module("dlci");
949
7512cbf6
PE
950 mutex_lock(&dlci_ioctl_mutex);
951 if (dlci_ioctl_hook)
1da177e4 952 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 953 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
954 break;
955 default:
6b96018b 956 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 957 break;
89bddce5 958 }
1da177e4
LT
959 return err;
960}
961
962int sock_create_lite(int family, int type, int protocol, struct socket **res)
963{
964 int err;
965 struct socket *sock = NULL;
89bddce5 966
1da177e4
LT
967 err = security_socket_create(family, type, protocol, 1);
968 if (err)
969 goto out;
970
971 sock = sock_alloc();
972 if (!sock) {
973 err = -ENOMEM;
974 goto out;
975 }
976
1da177e4 977 sock->type = type;
7420ed23
VY
978 err = security_socket_post_create(sock, family, type, protocol, 1);
979 if (err)
980 goto out_release;
981
1da177e4
LT
982out:
983 *res = sock;
984 return err;
7420ed23
VY
985out_release:
986 sock_release(sock);
987 sock = NULL;
988 goto out;
1da177e4 989}
c6d409cf 990EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
991
992/* No kernel lock held - perfect */
89bddce5 993static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 994{
cbf55001 995 unsigned int busy_flag = 0;
1da177e4
LT
996 struct socket *sock;
997
998 /*
89bddce5 999 * We can't return errors to poll, so it's either yes or no.
1da177e4 1000 */
b69aee04 1001 sock = file->private_data;
2d48d67f 1002
cbf55001 1003 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1004 /* this socket can poll_ll so tell the system call */
cbf55001 1005 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1006
1007 /* once, only if requested by syscall */
cbf55001
ET
1008 if (wait && (wait->_key & POLL_BUSY_LOOP))
1009 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1010 }
1011
cbf55001 1012 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1013}
1014
89bddce5 1015static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1016{
b69aee04 1017 struct socket *sock = file->private_data;
1da177e4
LT
1018
1019 return sock->ops->mmap(file, sock, vma);
1020}
1021
20380731 1022static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1023{
1da177e4
LT
1024 sock_release(SOCKET_I(inode));
1025 return 0;
1026}
1027
1028/*
1029 * Update the socket async list
1030 *
1031 * Fasync_list locking strategy.
1032 *
1033 * 1. fasync_list is modified only under process context socket lock
1034 * i.e. under semaphore.
1035 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1036 * or under socket lock
1da177e4
LT
1037 */
1038
1039static int sock_fasync(int fd, struct file *filp, int on)
1040{
989a2979
ED
1041 struct socket *sock = filp->private_data;
1042 struct sock *sk = sock->sk;
eaefd110 1043 struct socket_wq *wq;
1da177e4 1044
989a2979 1045 if (sk == NULL)
1da177e4 1046 return -EINVAL;
1da177e4
LT
1047
1048 lock_sock(sk);
eaefd110
ED
1049 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1050 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1051
eaefd110 1052 if (!wq->fasync_list)
989a2979
ED
1053 sock_reset_flag(sk, SOCK_FASYNC);
1054 else
bcdce719 1055 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1056
989a2979 1057 release_sock(sk);
1da177e4
LT
1058 return 0;
1059}
1060
ceb5d58b 1061/* This function may be called only under rcu_lock */
1da177e4 1062
ceb5d58b 1063int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1064{
ceb5d58b 1065 if (!wq || !wq->fasync_list)
1da177e4 1066 return -1;
ceb5d58b 1067
89bddce5 1068 switch (how) {
8d8ad9d7 1069 case SOCK_WAKE_WAITD:
ceb5d58b 1070 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1071 break;
1072 goto call_kill;
8d8ad9d7 1073 case SOCK_WAKE_SPACE:
ceb5d58b 1074 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1075 break;
1076 /* fall through */
8d8ad9d7 1077 case SOCK_WAKE_IO:
89bddce5 1078call_kill:
43815482 1079 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1080 break;
8d8ad9d7 1081 case SOCK_WAKE_URG:
43815482 1082 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1083 }
ceb5d58b 1084
1da177e4
LT
1085 return 0;
1086}
c6d409cf 1087EXPORT_SYMBOL(sock_wake_async);
1da177e4 1088
721db93a 1089int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1090 struct socket **res, int kern)
1da177e4
LT
1091{
1092 int err;
1093 struct socket *sock;
55737fda 1094 const struct net_proto_family *pf;
1da177e4
LT
1095
1096 /*
89bddce5 1097 * Check protocol is in range
1da177e4
LT
1098 */
1099 if (family < 0 || family >= NPROTO)
1100 return -EAFNOSUPPORT;
1101 if (type < 0 || type >= SOCK_MAX)
1102 return -EINVAL;
1103
1104 /* Compatibility.
1105
1106 This uglymoron is moved from INET layer to here to avoid
1107 deadlock in module load.
1108 */
1109 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1110 static int warned;
1da177e4
LT
1111 if (!warned) {
1112 warned = 1;
3410f22e
YY
1113 pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1114 current->comm);
1da177e4
LT
1115 }
1116 family = PF_PACKET;
1117 }
1118
1119 err = security_socket_create(family, type, protocol, kern);
1120 if (err)
1121 return err;
89bddce5 1122
55737fda
SH
1123 /*
1124 * Allocate the socket and allow the family to set things up. if
1125 * the protocol is 0, the family is instructed to select an appropriate
1126 * default.
1127 */
1128 sock = sock_alloc();
1129 if (!sock) {
e87cc472 1130 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1131 return -ENFILE; /* Not exactly a match, but its the
1132 closest posix thing */
1133 }
1134
1135 sock->type = type;
1136
95a5afca 1137#ifdef CONFIG_MODULES
89bddce5
SH
1138 /* Attempt to load a protocol module if the find failed.
1139 *
1140 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1141 * requested real, full-featured networking support upon configuration.
1142 * Otherwise module support will break!
1143 */
190683a9 1144 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1145 request_module("net-pf-%d", family);
1da177e4
LT
1146#endif
1147
55737fda
SH
1148 rcu_read_lock();
1149 pf = rcu_dereference(net_families[family]);
1150 err = -EAFNOSUPPORT;
1151 if (!pf)
1152 goto out_release;
1da177e4
LT
1153
1154 /*
1155 * We will call the ->create function, that possibly is in a loadable
1156 * module, so we have to bump that loadable module refcnt first.
1157 */
55737fda 1158 if (!try_module_get(pf->owner))
1da177e4
LT
1159 goto out_release;
1160
55737fda
SH
1161 /* Now protected by module ref count */
1162 rcu_read_unlock();
1163
3f378b68 1164 err = pf->create(net, sock, protocol, kern);
55737fda 1165 if (err < 0)
1da177e4 1166 goto out_module_put;
a79af59e 1167
1da177e4
LT
1168 /*
1169 * Now to bump the refcnt of the [loadable] module that owns this
1170 * socket at sock_release time we decrement its refcnt.
1171 */
55737fda
SH
1172 if (!try_module_get(sock->ops->owner))
1173 goto out_module_busy;
1174
1da177e4
LT
1175 /*
1176 * Now that we're done with the ->create function, the [loadable]
1177 * module can have its refcnt decremented
1178 */
55737fda 1179 module_put(pf->owner);
7420ed23
VY
1180 err = security_socket_post_create(sock, family, type, protocol, kern);
1181 if (err)
3b185525 1182 goto out_sock_release;
55737fda 1183 *res = sock;
1da177e4 1184
55737fda
SH
1185 return 0;
1186
1187out_module_busy:
1188 err = -EAFNOSUPPORT;
1da177e4 1189out_module_put:
55737fda
SH
1190 sock->ops = NULL;
1191 module_put(pf->owner);
1192out_sock_release:
1da177e4 1193 sock_release(sock);
55737fda
SH
1194 return err;
1195
1196out_release:
1197 rcu_read_unlock();
1198 goto out_sock_release;
1da177e4 1199}
721db93a 1200EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1201
1202int sock_create(int family, int type, int protocol, struct socket **res)
1203{
1b8d7ae4 1204 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1205}
c6d409cf 1206EXPORT_SYMBOL(sock_create);
1da177e4 1207
eeb1bd5c 1208int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1209{
eeb1bd5c 1210 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1211}
c6d409cf 1212EXPORT_SYMBOL(sock_create_kern);
1da177e4 1213
3e0fa65f 1214SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1215{
1216 int retval;
1217 struct socket *sock;
a677a039
UD
1218 int flags;
1219
e38b36f3
UD
1220 /* Check the SOCK_* constants for consistency. */
1221 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1222 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1223 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1224 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1225
a677a039 1226 flags = type & ~SOCK_TYPE_MASK;
77d27200 1227 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1228 return -EINVAL;
1229 type &= SOCK_TYPE_MASK;
1da177e4 1230
aaca0bdc
UD
1231 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1232 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1233
1da177e4
LT
1234 retval = sock_create(family, type, protocol, &sock);
1235 if (retval < 0)
1236 goto out;
1237
77d27200 1238 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1239 if (retval < 0)
1240 goto out_release;
1241
1242out:
1243 /* It may be already another descriptor 8) Not kernel problem. */
1244 return retval;
1245
1246out_release:
1247 sock_release(sock);
1248 return retval;
1249}
1250
1251/*
1252 * Create a pair of connected sockets.
1253 */
1254
3e0fa65f
HC
1255SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1256 int __user *, usockvec)
1da177e4
LT
1257{
1258 struct socket *sock1, *sock2;
1259 int fd1, fd2, err;
db349509 1260 struct file *newfile1, *newfile2;
a677a039
UD
1261 int flags;
1262
1263 flags = type & ~SOCK_TYPE_MASK;
77d27200 1264 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1265 return -EINVAL;
1266 type &= SOCK_TYPE_MASK;
1da177e4 1267
aaca0bdc
UD
1268 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1269 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1270
1da177e4
LT
1271 /*
1272 * Obtain the first socket and check if the underlying protocol
1273 * supports the socketpair call.
1274 */
1275
1276 err = sock_create(family, type, protocol, &sock1);
1277 if (err < 0)
1278 goto out;
1279
1280 err = sock_create(family, type, protocol, &sock2);
1281 if (err < 0)
1282 goto out_release_1;
1283
1284 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1285 if (err < 0)
1da177e4
LT
1286 goto out_release_both;
1287
28407630 1288 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1289 if (unlikely(fd1 < 0)) {
1290 err = fd1;
db349509 1291 goto out_release_both;
bf3c23d1 1292 }
d73aa286 1293
28407630 1294 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1295 if (unlikely(fd2 < 0)) {
1296 err = fd2;
d73aa286 1297 goto out_put_unused_1;
28407630
AV
1298 }
1299
aab174f0 1300 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1301 if (IS_ERR(newfile1)) {
28407630 1302 err = PTR_ERR(newfile1);
d73aa286 1303 goto out_put_unused_both;
28407630
AV
1304 }
1305
aab174f0 1306 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1307 if (IS_ERR(newfile2)) {
1308 err = PTR_ERR(newfile2);
d73aa286 1309 goto out_fput_1;
db349509
AV
1310 }
1311
d73aa286
YD
1312 err = put_user(fd1, &usockvec[0]);
1313 if (err)
1314 goto out_fput_both;
1315
1316 err = put_user(fd2, &usockvec[1]);
1317 if (err)
1318 goto out_fput_both;
1319
157cf649 1320 audit_fd_pair(fd1, fd2);
d73aa286 1321
db349509
AV
1322 fd_install(fd1, newfile1);
1323 fd_install(fd2, newfile2);
1da177e4
LT
1324 /* fd1 and fd2 may be already another descriptors.
1325 * Not kernel problem.
1326 */
1327
d73aa286 1328 return 0;
1da177e4 1329
d73aa286
YD
1330out_fput_both:
1331 fput(newfile2);
1332 fput(newfile1);
1333 put_unused_fd(fd2);
1334 put_unused_fd(fd1);
1335 goto out;
1336
1337out_fput_1:
1338 fput(newfile1);
1339 put_unused_fd(fd2);
1340 put_unused_fd(fd1);
1341 sock_release(sock2);
1342 goto out;
1da177e4 1343
d73aa286
YD
1344out_put_unused_both:
1345 put_unused_fd(fd2);
1346out_put_unused_1:
1347 put_unused_fd(fd1);
1da177e4 1348out_release_both:
89bddce5 1349 sock_release(sock2);
1da177e4 1350out_release_1:
89bddce5 1351 sock_release(sock1);
1da177e4
LT
1352out:
1353 return err;
1354}
1355
1da177e4
LT
1356/*
1357 * Bind a name to a socket. Nothing much to do here since it's
1358 * the protocol's responsibility to handle the local address.
1359 *
1360 * We move the socket address to kernel space before we call
1361 * the protocol layer (having also checked the address is ok).
1362 */
1363
20f37034 1364SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1365{
1366 struct socket *sock;
230b1839 1367 struct sockaddr_storage address;
6cb153ca 1368 int err, fput_needed;
1da177e4 1369
89bddce5 1370 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1371 if (sock) {
43db362d 1372 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1373 if (err >= 0) {
1374 err = security_socket_bind(sock,
230b1839 1375 (struct sockaddr *)&address,
89bddce5 1376 addrlen);
6cb153ca
BL
1377 if (!err)
1378 err = sock->ops->bind(sock,
89bddce5 1379 (struct sockaddr *)
230b1839 1380 &address, addrlen);
1da177e4 1381 }
6cb153ca 1382 fput_light(sock->file, fput_needed);
89bddce5 1383 }
1da177e4
LT
1384 return err;
1385}
1386
1da177e4
LT
1387/*
1388 * Perform a listen. Basically, we allow the protocol to do anything
1389 * necessary for a listen, and if that works, we mark the socket as
1390 * ready for listening.
1391 */
1392
3e0fa65f 1393SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1394{
1395 struct socket *sock;
6cb153ca 1396 int err, fput_needed;
b8e1f9b5 1397 int somaxconn;
89bddce5
SH
1398
1399 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1400 if (sock) {
8efa6e93 1401 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1402 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1403 backlog = somaxconn;
1da177e4
LT
1404
1405 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1406 if (!err)
1407 err = sock->ops->listen(sock, backlog);
1da177e4 1408
6cb153ca 1409 fput_light(sock->file, fput_needed);
1da177e4
LT
1410 }
1411 return err;
1412}
1413
1da177e4
LT
1414/*
1415 * For accept, we attempt to create a new socket, set up the link
1416 * with the client, wake up the client, then return the new
1417 * connected fd. We collect the address of the connector in kernel
1418 * space and move it to user at the very end. This is unclean because
1419 * we open the socket then return an error.
1420 *
1421 * 1003.1g adds the ability to recvmsg() to query connection pending
1422 * status to recvmsg. We need to add that support in a way thats
1423 * clean when we restucture accept also.
1424 */
1425
20f37034
HC
1426SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1427 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1428{
1429 struct socket *sock, *newsock;
39d8c1b6 1430 struct file *newfile;
6cb153ca 1431 int err, len, newfd, fput_needed;
230b1839 1432 struct sockaddr_storage address;
1da177e4 1433
77d27200 1434 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1435 return -EINVAL;
1436
1437 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1438 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1439
6cb153ca 1440 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1441 if (!sock)
1442 goto out;
1443
1444 err = -ENFILE;
c6d409cf
ED
1445 newsock = sock_alloc();
1446 if (!newsock)
1da177e4
LT
1447 goto out_put;
1448
1449 newsock->type = sock->type;
1450 newsock->ops = sock->ops;
1451
1da177e4
LT
1452 /*
1453 * We don't need try_module_get here, as the listening socket (sock)
1454 * has the protocol module (sock->ops->owner) held.
1455 */
1456 __module_get(newsock->ops->owner);
1457
28407630 1458 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1459 if (unlikely(newfd < 0)) {
1460 err = newfd;
9a1875e6
DM
1461 sock_release(newsock);
1462 goto out_put;
39d8c1b6 1463 }
aab174f0 1464 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1465 if (IS_ERR(newfile)) {
28407630
AV
1466 err = PTR_ERR(newfile);
1467 put_unused_fd(newfd);
1468 sock_release(newsock);
1469 goto out_put;
1470 }
39d8c1b6 1471
a79af59e
FF
1472 err = security_socket_accept(sock, newsock);
1473 if (err)
39d8c1b6 1474 goto out_fd;
a79af59e 1475
1da177e4
LT
1476 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1477 if (err < 0)
39d8c1b6 1478 goto out_fd;
1da177e4
LT
1479
1480 if (upeer_sockaddr) {
230b1839 1481 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1482 &len, 2) < 0) {
1da177e4 1483 err = -ECONNABORTED;
39d8c1b6 1484 goto out_fd;
1da177e4 1485 }
43db362d 1486 err = move_addr_to_user(&address,
230b1839 1487 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1488 if (err < 0)
39d8c1b6 1489 goto out_fd;
1da177e4
LT
1490 }
1491
1492 /* File flags are not inherited via accept() unlike another OSes. */
1493
39d8c1b6
DM
1494 fd_install(newfd, newfile);
1495 err = newfd;
1da177e4 1496
1da177e4 1497out_put:
6cb153ca 1498 fput_light(sock->file, fput_needed);
1da177e4
LT
1499out:
1500 return err;
39d8c1b6 1501out_fd:
9606a216 1502 fput(newfile);
39d8c1b6 1503 put_unused_fd(newfd);
1da177e4
LT
1504 goto out_put;
1505}
1506
20f37034
HC
1507SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1508 int __user *, upeer_addrlen)
aaca0bdc 1509{
de11defe 1510 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1511}
1512
1da177e4
LT
1513/*
1514 * Attempt to connect to a socket with the server address. The address
1515 * is in user space so we verify it is OK and move it to kernel space.
1516 *
1517 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1518 * break bindings
1519 *
1520 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1521 * other SEQPACKET protocols that take time to connect() as it doesn't
1522 * include the -EINPROGRESS status for such sockets.
1523 */
1524
20f37034
HC
1525SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1526 int, addrlen)
1da177e4
LT
1527{
1528 struct socket *sock;
230b1839 1529 struct sockaddr_storage address;
6cb153ca 1530 int err, fput_needed;
1da177e4 1531
6cb153ca 1532 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1533 if (!sock)
1534 goto out;
43db362d 1535 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1536 if (err < 0)
1537 goto out_put;
1538
89bddce5 1539 err =
230b1839 1540 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1541 if (err)
1542 goto out_put;
1543
230b1839 1544 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1545 sock->file->f_flags);
1546out_put:
6cb153ca 1547 fput_light(sock->file, fput_needed);
1da177e4
LT
1548out:
1549 return err;
1550}
1551
1552/*
1553 * Get the local address ('name') of a socket object. Move the obtained
1554 * name to user space.
1555 */
1556
20f37034
HC
1557SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1558 int __user *, usockaddr_len)
1da177e4
LT
1559{
1560 struct socket *sock;
230b1839 1561 struct sockaddr_storage address;
6cb153ca 1562 int len, err, fput_needed;
89bddce5 1563
6cb153ca 1564 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1565 if (!sock)
1566 goto out;
1567
1568 err = security_socket_getsockname(sock);
1569 if (err)
1570 goto out_put;
1571
230b1839 1572 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1573 if (err)
1574 goto out_put;
43db362d 1575 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1576
1577out_put:
6cb153ca 1578 fput_light(sock->file, fput_needed);
1da177e4
LT
1579out:
1580 return err;
1581}
1582
1583/*
1584 * Get the remote address ('name') of a socket object. Move the obtained
1585 * name to user space.
1586 */
1587
20f37034
HC
1588SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1589 int __user *, usockaddr_len)
1da177e4
LT
1590{
1591 struct socket *sock;
230b1839 1592 struct sockaddr_storage address;
6cb153ca 1593 int len, err, fput_needed;
1da177e4 1594
89bddce5
SH
1595 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1596 if (sock != NULL) {
1da177e4
LT
1597 err = security_socket_getpeername(sock);
1598 if (err) {
6cb153ca 1599 fput_light(sock->file, fput_needed);
1da177e4
LT
1600 return err;
1601 }
1602
89bddce5 1603 err =
230b1839 1604 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1605 1);
1da177e4 1606 if (!err)
43db362d 1607 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1608 usockaddr_len);
6cb153ca 1609 fput_light(sock->file, fput_needed);
1da177e4
LT
1610 }
1611 return err;
1612}
1613
1614/*
1615 * Send a datagram to a given address. We move the address into kernel
1616 * space and check the user space data area is readable before invoking
1617 * the protocol.
1618 */
1619
3e0fa65f 1620SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1621 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1622 int, addr_len)
1da177e4
LT
1623{
1624 struct socket *sock;
230b1839 1625 struct sockaddr_storage address;
1da177e4
LT
1626 int err;
1627 struct msghdr msg;
1628 struct iovec iov;
6cb153ca 1629 int fput_needed;
6cb153ca 1630
602bd0e9
AV
1631 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1632 if (unlikely(err))
1633 return err;
de0fa95c
PE
1634 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1635 if (!sock)
4387ff75 1636 goto out;
6cb153ca 1637
89bddce5 1638 msg.msg_name = NULL;
89bddce5
SH
1639 msg.msg_control = NULL;
1640 msg.msg_controllen = 0;
1641 msg.msg_namelen = 0;
6cb153ca 1642 if (addr) {
43db362d 1643 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1644 if (err < 0)
1645 goto out_put;
230b1839 1646 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1647 msg.msg_namelen = addr_len;
1da177e4
LT
1648 }
1649 if (sock->file->f_flags & O_NONBLOCK)
1650 flags |= MSG_DONTWAIT;
1651 msg.msg_flags = flags;
d8725c86 1652 err = sock_sendmsg(sock, &msg);
1da177e4 1653
89bddce5 1654out_put:
de0fa95c 1655 fput_light(sock->file, fput_needed);
4387ff75 1656out:
1da177e4
LT
1657 return err;
1658}
1659
1660/*
89bddce5 1661 * Send a datagram down a socket.
1da177e4
LT
1662 */
1663
3e0fa65f 1664SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1665 unsigned int, flags)
1da177e4
LT
1666{
1667 return sys_sendto(fd, buff, len, flags, NULL, 0);
1668}
1669
1670/*
89bddce5 1671 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1672 * sender. We verify the buffers are writable and if needed move the
1673 * sender address from kernel to user space.
1674 */
1675
3e0fa65f 1676SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1677 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1678 int __user *, addr_len)
1da177e4
LT
1679{
1680 struct socket *sock;
1681 struct iovec iov;
1682 struct msghdr msg;
230b1839 1683 struct sockaddr_storage address;
89bddce5 1684 int err, err2;
6cb153ca
BL
1685 int fput_needed;
1686
602bd0e9
AV
1687 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1688 if (unlikely(err))
1689 return err;
de0fa95c 1690 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1691 if (!sock)
de0fa95c 1692 goto out;
1da177e4 1693
89bddce5
SH
1694 msg.msg_control = NULL;
1695 msg.msg_controllen = 0;
f3d33426
HFS
1696 /* Save some cycles and don't copy the address if not needed */
1697 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1698 /* We assume all kernel code knows the size of sockaddr_storage */
1699 msg.msg_namelen = 0;
130ed5d1 1700 msg.msg_iocb = NULL;
1da177e4
LT
1701 if (sock->file->f_flags & O_NONBLOCK)
1702 flags |= MSG_DONTWAIT;
602bd0e9 1703 err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags);
1da177e4 1704
89bddce5 1705 if (err >= 0 && addr != NULL) {
43db362d 1706 err2 = move_addr_to_user(&address,
230b1839 1707 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1708 if (err2 < 0)
1709 err = err2;
1da177e4 1710 }
de0fa95c
PE
1711
1712 fput_light(sock->file, fput_needed);
4387ff75 1713out:
1da177e4
LT
1714 return err;
1715}
1716
1717/*
89bddce5 1718 * Receive a datagram from a socket.
1da177e4
LT
1719 */
1720
b7c0ddf5
JG
1721SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1722 unsigned int, flags)
1da177e4
LT
1723{
1724 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1725}
1726
1727/*
1728 * Set a socket option. Because we don't know the option lengths we have
1729 * to pass the user mode parameter for the protocols to sort out.
1730 */
1731
20f37034
HC
1732SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1733 char __user *, optval, int, optlen)
1da177e4 1734{
6cb153ca 1735 int err, fput_needed;
1da177e4
LT
1736 struct socket *sock;
1737
1738 if (optlen < 0)
1739 return -EINVAL;
89bddce5
SH
1740
1741 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1742 if (sock != NULL) {
1743 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1744 if (err)
1745 goto out_put;
1da177e4
LT
1746
1747 if (level == SOL_SOCKET)
89bddce5
SH
1748 err =
1749 sock_setsockopt(sock, level, optname, optval,
1750 optlen);
1da177e4 1751 else
89bddce5
SH
1752 err =
1753 sock->ops->setsockopt(sock, level, optname, optval,
1754 optlen);
6cb153ca
BL
1755out_put:
1756 fput_light(sock->file, fput_needed);
1da177e4
LT
1757 }
1758 return err;
1759}
1760
1761/*
1762 * Get a socket option. Because we don't know the option lengths we have
1763 * to pass a user mode parameter for the protocols to sort out.
1764 */
1765
20f37034
HC
1766SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1767 char __user *, optval, int __user *, optlen)
1da177e4 1768{
6cb153ca 1769 int err, fput_needed;
1da177e4
LT
1770 struct socket *sock;
1771
89bddce5
SH
1772 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1773 if (sock != NULL) {
6cb153ca
BL
1774 err = security_socket_getsockopt(sock, level, optname);
1775 if (err)
1776 goto out_put;
1da177e4
LT
1777
1778 if (level == SOL_SOCKET)
89bddce5
SH
1779 err =
1780 sock_getsockopt(sock, level, optname, optval,
1781 optlen);
1da177e4 1782 else
89bddce5
SH
1783 err =
1784 sock->ops->getsockopt(sock, level, optname, optval,
1785 optlen);
6cb153ca
BL
1786out_put:
1787 fput_light(sock->file, fput_needed);
1da177e4
LT
1788 }
1789 return err;
1790}
1791
1da177e4
LT
1792/*
1793 * Shutdown a socket.
1794 */
1795
754fe8d2 1796SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1797{
6cb153ca 1798 int err, fput_needed;
1da177e4
LT
1799 struct socket *sock;
1800
89bddce5
SH
1801 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1802 if (sock != NULL) {
1da177e4 1803 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1804 if (!err)
1805 err = sock->ops->shutdown(sock, how);
1806 fput_light(sock->file, fput_needed);
1da177e4
LT
1807 }
1808 return err;
1809}
1810
89bddce5 1811/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1812 * fields which are the same type (int / unsigned) on our platforms.
1813 */
1814#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1815#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1816#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1817
c71d8ebe
TH
1818struct used_address {
1819 struct sockaddr_storage name;
1820 unsigned int name_len;
1821};
1822
da184284
AV
1823static int copy_msghdr_from_user(struct msghdr *kmsg,
1824 struct user_msghdr __user *umsg,
1825 struct sockaddr __user **save_addr,
1826 struct iovec **iov)
1661bf36 1827{
08adb7da
AV
1828 struct sockaddr __user *uaddr;
1829 struct iovec __user *uiov;
c0371da6 1830 size_t nr_segs;
08adb7da
AV
1831 ssize_t err;
1832
1833 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1834 __get_user(uaddr, &umsg->msg_name) ||
1835 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1836 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1837 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1838 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1839 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1840 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1841 return -EFAULT;
dbb490b9 1842
08adb7da 1843 if (!uaddr)
6a2a2b3a
AS
1844 kmsg->msg_namelen = 0;
1845
dbb490b9
ML
1846 if (kmsg->msg_namelen < 0)
1847 return -EINVAL;
1848
1661bf36 1849 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1850 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1851
1852 if (save_addr)
1853 *save_addr = uaddr;
1854
1855 if (uaddr && kmsg->msg_namelen) {
1856 if (!save_addr) {
1857 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1858 kmsg->msg_name);
1859 if (err < 0)
1860 return err;
1861 }
1862 } else {
1863 kmsg->msg_name = NULL;
1864 kmsg->msg_namelen = 0;
1865 }
1866
c0371da6 1867 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1868 return -EMSGSIZE;
1869
0345f931 1870 kmsg->msg_iocb = NULL;
1871
da184284
AV
1872 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1873 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1874}
1875
666547ff 1876static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1877 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1878 struct used_address *used_address,
1879 unsigned int allowed_msghdr_flags)
1da177e4 1880{
89bddce5
SH
1881 struct compat_msghdr __user *msg_compat =
1882 (struct compat_msghdr __user *)msg;
230b1839 1883 struct sockaddr_storage address;
1da177e4 1884 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1885 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1886 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1887 /* 20 is size of ipv6_pktinfo */
1da177e4 1888 unsigned char *ctl_buf = ctl;
d8725c86 1889 int ctl_len;
08adb7da 1890 ssize_t err;
89bddce5 1891
08adb7da 1892 msg_sys->msg_name = &address;
1da177e4 1893
08449320 1894 if (MSG_CMSG_COMPAT & flags)
08adb7da 1895 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1896 else
08adb7da 1897 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1898 if (err < 0)
da184284 1899 return err;
1da177e4
LT
1900
1901 err = -ENOBUFS;
1902
228e548e 1903 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1904 goto out_freeiov;
28a94d8f 1905 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 1906 ctl_len = msg_sys->msg_controllen;
1da177e4 1907 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1908 err =
228e548e 1909 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1910 sizeof(ctl));
1da177e4
LT
1911 if (err)
1912 goto out_freeiov;
228e548e
AB
1913 ctl_buf = msg_sys->msg_control;
1914 ctl_len = msg_sys->msg_controllen;
1da177e4 1915 } else if (ctl_len) {
89bddce5 1916 if (ctl_len > sizeof(ctl)) {
1da177e4 1917 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1918 if (ctl_buf == NULL)
1da177e4
LT
1919 goto out_freeiov;
1920 }
1921 err = -EFAULT;
1922 /*
228e548e 1923 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1924 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1925 * checking falls down on this.
1926 */
fb8621bb 1927 if (copy_from_user(ctl_buf,
228e548e 1928 (void __user __force *)msg_sys->msg_control,
89bddce5 1929 ctl_len))
1da177e4 1930 goto out_freectl;
228e548e 1931 msg_sys->msg_control = ctl_buf;
1da177e4 1932 }
228e548e 1933 msg_sys->msg_flags = flags;
1da177e4
LT
1934
1935 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1936 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1937 /*
1938 * If this is sendmmsg() and current destination address is same as
1939 * previously succeeded address, omit asking LSM's decision.
1940 * used_address->name_len is initialized to UINT_MAX so that the first
1941 * destination address never matches.
1942 */
bc909d9d
MD
1943 if (used_address && msg_sys->msg_name &&
1944 used_address->name_len == msg_sys->msg_namelen &&
1945 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1946 used_address->name_len)) {
d8725c86 1947 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1948 goto out_freectl;
1949 }
d8725c86 1950 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1951 /*
1952 * If this is sendmmsg() and sending to current destination address was
1953 * successful, remember it.
1954 */
1955 if (used_address && err >= 0) {
1956 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1957 if (msg_sys->msg_name)
1958 memcpy(&used_address->name, msg_sys->msg_name,
1959 used_address->name_len);
c71d8ebe 1960 }
1da177e4
LT
1961
1962out_freectl:
89bddce5 1963 if (ctl_buf != ctl)
1da177e4
LT
1964 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1965out_freeiov:
da184284 1966 kfree(iov);
228e548e
AB
1967 return err;
1968}
1969
1970/*
1971 * BSD sendmsg interface
1972 */
1973
666547ff 1974long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
1975{
1976 int fput_needed, err;
1977 struct msghdr msg_sys;
1be374a0
AL
1978 struct socket *sock;
1979
1be374a0 1980 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
1981 if (!sock)
1982 goto out;
1983
28a94d8f 1984 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 1985
6cb153ca 1986 fput_light(sock->file, fput_needed);
89bddce5 1987out:
1da177e4
LT
1988 return err;
1989}
1990
666547ff 1991SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
1992{
1993 if (flags & MSG_CMSG_COMPAT)
1994 return -EINVAL;
1995 return __sys_sendmsg(fd, msg, flags);
1996}
1997
228e548e
AB
1998/*
1999 * Linux sendmmsg interface
2000 */
2001
2002int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2003 unsigned int flags)
2004{
2005 int fput_needed, err, datagrams;
2006 struct socket *sock;
2007 struct mmsghdr __user *entry;
2008 struct compat_mmsghdr __user *compat_entry;
2009 struct msghdr msg_sys;
c71d8ebe 2010 struct used_address used_address;
228e548e 2011
98382f41
AB
2012 if (vlen > UIO_MAXIOV)
2013 vlen = UIO_MAXIOV;
228e548e
AB
2014
2015 datagrams = 0;
2016
2017 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2018 if (!sock)
2019 return err;
2020
c71d8ebe 2021 used_address.name_len = UINT_MAX;
228e548e
AB
2022 entry = mmsg;
2023 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2024 err = 0;
228e548e
AB
2025
2026 while (datagrams < vlen) {
228e548e 2027 if (MSG_CMSG_COMPAT & flags) {
666547ff 2028 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2029 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2030 if (err < 0)
2031 break;
2032 err = __put_user(err, &compat_entry->msg_len);
2033 ++compat_entry;
2034 } else {
a7526eb5 2035 err = ___sys_sendmsg(sock,
666547ff 2036 (struct user_msghdr __user *)entry,
28a94d8f 2037 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2038 if (err < 0)
2039 break;
2040 err = put_user(err, &entry->msg_len);
2041 ++entry;
2042 }
2043
2044 if (err)
2045 break;
2046 ++datagrams;
a78cb84c 2047 cond_resched();
228e548e
AB
2048 }
2049
228e548e
AB
2050 fput_light(sock->file, fput_needed);
2051
728ffb86
AB
2052 /* We only return an error if no datagrams were able to be sent */
2053 if (datagrams != 0)
228e548e
AB
2054 return datagrams;
2055
228e548e
AB
2056 return err;
2057}
2058
2059SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2060 unsigned int, vlen, unsigned int, flags)
2061{
1be374a0
AL
2062 if (flags & MSG_CMSG_COMPAT)
2063 return -EINVAL;
228e548e
AB
2064 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2065}
2066
666547ff 2067static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2068 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2069{
89bddce5
SH
2070 struct compat_msghdr __user *msg_compat =
2071 (struct compat_msghdr __user *)msg;
1da177e4 2072 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2073 struct iovec *iov = iovstack;
1da177e4 2074 unsigned long cmsg_ptr;
08adb7da
AV
2075 int total_len, len;
2076 ssize_t err;
1da177e4
LT
2077
2078 /* kernel mode address */
230b1839 2079 struct sockaddr_storage addr;
1da177e4
LT
2080
2081 /* user mode address pointers */
2082 struct sockaddr __user *uaddr;
08adb7da 2083 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2084
08adb7da 2085 msg_sys->msg_name = &addr;
1da177e4 2086
f3d33426 2087 if (MSG_CMSG_COMPAT & flags)
08adb7da 2088 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2089 else
08adb7da 2090 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2091 if (err < 0)
da184284
AV
2092 return err;
2093 total_len = iov_iter_count(&msg_sys->msg_iter);
1da177e4 2094
a2e27255
ACM
2095 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2096 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2097
f3d33426
HFS
2098 /* We assume all kernel code knows the size of sockaddr_storage */
2099 msg_sys->msg_namelen = 0;
2100
1da177e4
LT
2101 if (sock->file->f_flags & O_NONBLOCK)
2102 flags |= MSG_DONTWAIT;
a2e27255
ACM
2103 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2104 total_len, flags);
1da177e4
LT
2105 if (err < 0)
2106 goto out_freeiov;
2107 len = err;
2108
2109 if (uaddr != NULL) {
43db362d 2110 err = move_addr_to_user(&addr,
a2e27255 2111 msg_sys->msg_namelen, uaddr,
89bddce5 2112 uaddr_len);
1da177e4
LT
2113 if (err < 0)
2114 goto out_freeiov;
2115 }
a2e27255 2116 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2117 COMPAT_FLAGS(msg));
1da177e4
LT
2118 if (err)
2119 goto out_freeiov;
2120 if (MSG_CMSG_COMPAT & flags)
a2e27255 2121 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2122 &msg_compat->msg_controllen);
2123 else
a2e27255 2124 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2125 &msg->msg_controllen);
2126 if (err)
2127 goto out_freeiov;
2128 err = len;
2129
2130out_freeiov:
da184284 2131 kfree(iov);
a2e27255
ACM
2132 return err;
2133}
2134
2135/*
2136 * BSD recvmsg interface
2137 */
2138
666547ff 2139long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2140{
2141 int fput_needed, err;
2142 struct msghdr msg_sys;
1be374a0
AL
2143 struct socket *sock;
2144
1be374a0 2145 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2146 if (!sock)
2147 goto out;
2148
a7526eb5 2149 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2150
6cb153ca 2151 fput_light(sock->file, fput_needed);
1da177e4
LT
2152out:
2153 return err;
2154}
2155
666547ff 2156SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2157 unsigned int, flags)
2158{
2159 if (flags & MSG_CMSG_COMPAT)
2160 return -EINVAL;
2161 return __sys_recvmsg(fd, msg, flags);
2162}
2163
a2e27255
ACM
2164/*
2165 * Linux recvmmsg interface
2166 */
2167
2168int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2169 unsigned int flags, struct timespec *timeout)
2170{
2171 int fput_needed, err, datagrams;
2172 struct socket *sock;
2173 struct mmsghdr __user *entry;
d7256d0e 2174 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2175 struct msghdr msg_sys;
2176 struct timespec end_time;
2177
2178 if (timeout &&
2179 poll_select_set_timeout(&end_time, timeout->tv_sec,
2180 timeout->tv_nsec))
2181 return -EINVAL;
2182
2183 datagrams = 0;
2184
2185 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2186 if (!sock)
2187 return err;
2188
2189 err = sock_error(sock->sk);
2190 if (err)
2191 goto out_put;
2192
2193 entry = mmsg;
d7256d0e 2194 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2195
2196 while (datagrams < vlen) {
2197 /*
2198 * No need to ask LSM for more than the first datagram.
2199 */
d7256d0e 2200 if (MSG_CMSG_COMPAT & flags) {
666547ff 2201 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2202 &msg_sys, flags & ~MSG_WAITFORONE,
2203 datagrams);
d7256d0e
JMG
2204 if (err < 0)
2205 break;
2206 err = __put_user(err, &compat_entry->msg_len);
2207 ++compat_entry;
2208 } else {
a7526eb5 2209 err = ___sys_recvmsg(sock,
666547ff 2210 (struct user_msghdr __user *)entry,
a7526eb5
AL
2211 &msg_sys, flags & ~MSG_WAITFORONE,
2212 datagrams);
d7256d0e
JMG
2213 if (err < 0)
2214 break;
2215 err = put_user(err, &entry->msg_len);
2216 ++entry;
2217 }
2218
a2e27255
ACM
2219 if (err)
2220 break;
a2e27255
ACM
2221 ++datagrams;
2222
71c5c159
BB
2223 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2224 if (flags & MSG_WAITFORONE)
2225 flags |= MSG_DONTWAIT;
2226
a2e27255
ACM
2227 if (timeout) {
2228 ktime_get_ts(timeout);
2229 *timeout = timespec_sub(end_time, *timeout);
2230 if (timeout->tv_sec < 0) {
2231 timeout->tv_sec = timeout->tv_nsec = 0;
2232 break;
2233 }
2234
2235 /* Timeout, return less than vlen datagrams */
2236 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2237 break;
2238 }
2239
2240 /* Out of band data, return right away */
2241 if (msg_sys.msg_flags & MSG_OOB)
2242 break;
a78cb84c 2243 cond_resched();
a2e27255
ACM
2244 }
2245
2246out_put:
2247 fput_light(sock->file, fput_needed);
1da177e4 2248
a2e27255
ACM
2249 if (err == 0)
2250 return datagrams;
2251
2252 if (datagrams != 0) {
2253 /*
2254 * We may return less entries than requested (vlen) if the
2255 * sock is non block and there aren't enough datagrams...
2256 */
2257 if (err != -EAGAIN) {
2258 /*
2259 * ... or if recvmsg returns an error after we
2260 * received some datagrams, where we record the
2261 * error to return on the next call or if the
2262 * app asks about it using getsockopt(SO_ERROR).
2263 */
2264 sock->sk->sk_err = -err;
2265 }
2266
2267 return datagrams;
2268 }
2269
2270 return err;
2271}
2272
2273SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2274 unsigned int, vlen, unsigned int, flags,
2275 struct timespec __user *, timeout)
2276{
2277 int datagrams;
2278 struct timespec timeout_sys;
2279
1be374a0
AL
2280 if (flags & MSG_CMSG_COMPAT)
2281 return -EINVAL;
2282
a2e27255
ACM
2283 if (!timeout)
2284 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2285
2286 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2287 return -EFAULT;
2288
2289 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2290
2291 if (datagrams > 0 &&
2292 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2293 datagrams = -EFAULT;
2294
2295 return datagrams;
2296}
2297
2298#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2299/* Argument list sizes for sys_socketcall */
2300#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2301static const unsigned char nargs[21] = {
c6d409cf
ED
2302 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2303 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2304 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2305 AL(4), AL(5), AL(4)
89bddce5
SH
2306};
2307
1da177e4
LT
2308#undef AL
2309
2310/*
89bddce5 2311 * System call vectors.
1da177e4
LT
2312 *
2313 * Argument checking cleaned up. Saved 20% in size.
2314 * This function doesn't need to set the kernel lock because
89bddce5 2315 * it is set by the callees.
1da177e4
LT
2316 */
2317
3e0fa65f 2318SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2319{
2950fa9d 2320 unsigned long a[AUDITSC_ARGS];
89bddce5 2321 unsigned long a0, a1;
1da177e4 2322 int err;
47379052 2323 unsigned int len;
1da177e4 2324
228e548e 2325 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2326 return -EINVAL;
2327
47379052
AV
2328 len = nargs[call];
2329 if (len > sizeof(a))
2330 return -EINVAL;
2331
1da177e4 2332 /* copy_from_user should be SMP safe. */
47379052 2333 if (copy_from_user(a, args, len))
1da177e4 2334 return -EFAULT;
3ec3b2fb 2335
2950fa9d
CG
2336 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2337 if (err)
2338 return err;
3ec3b2fb 2339
89bddce5
SH
2340 a0 = a[0];
2341 a1 = a[1];
2342
2343 switch (call) {
2344 case SYS_SOCKET:
2345 err = sys_socket(a0, a1, a[2]);
2346 break;
2347 case SYS_BIND:
2348 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2349 break;
2350 case SYS_CONNECT:
2351 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2352 break;
2353 case SYS_LISTEN:
2354 err = sys_listen(a0, a1);
2355 break;
2356 case SYS_ACCEPT:
de11defe
UD
2357 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2358 (int __user *)a[2], 0);
89bddce5
SH
2359 break;
2360 case SYS_GETSOCKNAME:
2361 err =
2362 sys_getsockname(a0, (struct sockaddr __user *)a1,
2363 (int __user *)a[2]);
2364 break;
2365 case SYS_GETPEERNAME:
2366 err =
2367 sys_getpeername(a0, (struct sockaddr __user *)a1,
2368 (int __user *)a[2]);
2369 break;
2370 case SYS_SOCKETPAIR:
2371 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2372 break;
2373 case SYS_SEND:
2374 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2375 break;
2376 case SYS_SENDTO:
2377 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2378 (struct sockaddr __user *)a[4], a[5]);
2379 break;
2380 case SYS_RECV:
2381 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2382 break;
2383 case SYS_RECVFROM:
2384 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2385 (struct sockaddr __user *)a[4],
2386 (int __user *)a[5]);
2387 break;
2388 case SYS_SHUTDOWN:
2389 err = sys_shutdown(a0, a1);
2390 break;
2391 case SYS_SETSOCKOPT:
2392 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2393 break;
2394 case SYS_GETSOCKOPT:
2395 err =
2396 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2397 (int __user *)a[4]);
2398 break;
2399 case SYS_SENDMSG:
666547ff 2400 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2401 break;
228e548e
AB
2402 case SYS_SENDMMSG:
2403 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2404 break;
89bddce5 2405 case SYS_RECVMSG:
666547ff 2406 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2407 break;
a2e27255
ACM
2408 case SYS_RECVMMSG:
2409 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2410 (struct timespec __user *)a[4]);
2411 break;
de11defe
UD
2412 case SYS_ACCEPT4:
2413 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2414 (int __user *)a[2], a[3]);
aaca0bdc 2415 break;
89bddce5
SH
2416 default:
2417 err = -EINVAL;
2418 break;
1da177e4
LT
2419 }
2420 return err;
2421}
2422
89bddce5 2423#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2424
55737fda
SH
2425/**
2426 * sock_register - add a socket protocol handler
2427 * @ops: description of protocol
2428 *
1da177e4
LT
2429 * This function is called by a protocol handler that wants to
2430 * advertise its address family, and have it linked into the
e793c0f7 2431 * socket interface. The value ops->family corresponds to the
55737fda 2432 * socket system call protocol family.
1da177e4 2433 */
f0fd27d4 2434int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2435{
2436 int err;
2437
2438 if (ops->family >= NPROTO) {
3410f22e 2439 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2440 return -ENOBUFS;
2441 }
55737fda
SH
2442
2443 spin_lock(&net_family_lock);
190683a9
ED
2444 if (rcu_dereference_protected(net_families[ops->family],
2445 lockdep_is_held(&net_family_lock)))
55737fda
SH
2446 err = -EEXIST;
2447 else {
cf778b00 2448 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2449 err = 0;
2450 }
55737fda
SH
2451 spin_unlock(&net_family_lock);
2452
3410f22e 2453 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2454 return err;
2455}
c6d409cf 2456EXPORT_SYMBOL(sock_register);
1da177e4 2457
55737fda
SH
2458/**
2459 * sock_unregister - remove a protocol handler
2460 * @family: protocol family to remove
2461 *
1da177e4
LT
2462 * This function is called by a protocol handler that wants to
2463 * remove its address family, and have it unlinked from the
55737fda
SH
2464 * new socket creation.
2465 *
2466 * If protocol handler is a module, then it can use module reference
2467 * counts to protect against new references. If protocol handler is not
2468 * a module then it needs to provide its own protection in
2469 * the ops->create routine.
1da177e4 2470 */
f0fd27d4 2471void sock_unregister(int family)
1da177e4 2472{
f0fd27d4 2473 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2474
55737fda 2475 spin_lock(&net_family_lock);
a9b3cd7f 2476 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2477 spin_unlock(&net_family_lock);
2478
2479 synchronize_rcu();
2480
3410f22e 2481 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2482}
c6d409cf 2483EXPORT_SYMBOL(sock_unregister);
1da177e4 2484
77d76ea3 2485static int __init sock_init(void)
1da177e4 2486{
b3e19d92 2487 int err;
2ca794e5
EB
2488 /*
2489 * Initialize the network sysctl infrastructure.
2490 */
2491 err = net_sysctl_init();
2492 if (err)
2493 goto out;
b3e19d92 2494
1da177e4 2495 /*
89bddce5 2496 * Initialize skbuff SLAB cache
1da177e4
LT
2497 */
2498 skb_init();
1da177e4
LT
2499
2500 /*
89bddce5 2501 * Initialize the protocols module.
1da177e4
LT
2502 */
2503
2504 init_inodecache();
b3e19d92
NP
2505
2506 err = register_filesystem(&sock_fs_type);
2507 if (err)
2508 goto out_fs;
1da177e4 2509 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2510 if (IS_ERR(sock_mnt)) {
2511 err = PTR_ERR(sock_mnt);
2512 goto out_mount;
2513 }
77d76ea3
AK
2514
2515 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2516 */
2517
2518#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2519 err = netfilter_init();
2520 if (err)
2521 goto out;
1da177e4 2522#endif
cbeb321a 2523
408eccce 2524 ptp_classifier_init();
c1f19b51 2525
b3e19d92
NP
2526out:
2527 return err;
2528
2529out_mount:
2530 unregister_filesystem(&sock_fs_type);
2531out_fs:
2532 goto out;
1da177e4
LT
2533}
2534
77d76ea3
AK
2535core_initcall(sock_init); /* early initcall */
2536
1da177e4
LT
2537#ifdef CONFIG_PROC_FS
2538void socket_seq_show(struct seq_file *seq)
2539{
2540 int cpu;
2541 int counter = 0;
2542
6f912042 2543 for_each_possible_cpu(cpu)
89bddce5 2544 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2545
2546 /* It can be negative, by the way. 8) */
2547 if (counter < 0)
2548 counter = 0;
2549
2550 seq_printf(seq, "sockets: used %d\n", counter);
2551}
89bddce5 2552#endif /* CONFIG_PROC_FS */
1da177e4 2553
89bbfc95 2554#ifdef CONFIG_COMPAT
6b96018b 2555static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2556 unsigned int cmd, void __user *up)
7a229387 2557{
7a229387
AB
2558 mm_segment_t old_fs = get_fs();
2559 struct timeval ktv;
2560 int err;
2561
2562 set_fs(KERNEL_DS);
6b96018b 2563 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2564 set_fs(old_fs);
644595f8 2565 if (!err)
ed6fe9d6 2566 err = compat_put_timeval(&ktv, up);
644595f8 2567
7a229387
AB
2568 return err;
2569}
2570
6b96018b 2571static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2572 unsigned int cmd, void __user *up)
7a229387 2573{
7a229387
AB
2574 mm_segment_t old_fs = get_fs();
2575 struct timespec kts;
2576 int err;
2577
2578 set_fs(KERNEL_DS);
6b96018b 2579 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2580 set_fs(old_fs);
644595f8 2581 if (!err)
ed6fe9d6 2582 err = compat_put_timespec(&kts, up);
644595f8 2583
7a229387
AB
2584 return err;
2585}
2586
6b96018b 2587static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2588{
2589 struct ifreq __user *uifr;
2590 int err;
2591
2592 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2593 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2594 return -EFAULT;
2595
6b96018b 2596 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2597 if (err)
2598 return err;
2599
6b96018b 2600 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2601 return -EFAULT;
2602
2603 return 0;
2604}
2605
6b96018b 2606static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2607{
6b96018b 2608 struct compat_ifconf ifc32;
7a229387
AB
2609 struct ifconf ifc;
2610 struct ifconf __user *uifc;
6b96018b 2611 struct compat_ifreq __user *ifr32;
7a229387
AB
2612 struct ifreq __user *ifr;
2613 unsigned int i, j;
2614 int err;
2615
6b96018b 2616 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2617 return -EFAULT;
2618
43da5f2e 2619 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2620 if (ifc32.ifcbuf == 0) {
2621 ifc32.ifc_len = 0;
2622 ifc.ifc_len = 0;
2623 ifc.ifc_req = NULL;
2624 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2625 } else {
c6d409cf
ED
2626 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2627 sizeof(struct ifreq);
7a229387
AB
2628 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2629 ifc.ifc_len = len;
2630 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2631 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2632 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2633 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2634 return -EFAULT;
2635 ifr++;
2636 ifr32++;
2637 }
2638 }
2639 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2640 return -EFAULT;
2641
6b96018b 2642 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2643 if (err)
2644 return err;
2645
2646 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2647 return -EFAULT;
2648
2649 ifr = ifc.ifc_req;
2650 ifr32 = compat_ptr(ifc32.ifcbuf);
2651 for (i = 0, j = 0;
c6d409cf
ED
2652 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2653 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2654 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2655 return -EFAULT;
2656 ifr32++;
2657 ifr++;
2658 }
2659
2660 if (ifc32.ifcbuf == 0) {
2661 /* Translate from 64-bit structure multiple to
2662 * a 32-bit one.
2663 */
2664 i = ifc.ifc_len;
6b96018b 2665 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2666 ifc32.ifc_len = i;
2667 } else {
2668 ifc32.ifc_len = i;
2669 }
6b96018b 2670 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2671 return -EFAULT;
2672
2673 return 0;
2674}
2675
6b96018b 2676static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2677{
3a7da39d
BH
2678 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2679 bool convert_in = false, convert_out = false;
2680 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2681 struct ethtool_rxnfc __user *rxnfc;
7a229387 2682 struct ifreq __user *ifr;
3a7da39d
BH
2683 u32 rule_cnt = 0, actual_rule_cnt;
2684 u32 ethcmd;
7a229387 2685 u32 data;
3a7da39d 2686 int ret;
7a229387 2687
3a7da39d
BH
2688 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2689 return -EFAULT;
7a229387 2690
3a7da39d
BH
2691 compat_rxnfc = compat_ptr(data);
2692
2693 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2694 return -EFAULT;
2695
3a7da39d
BH
2696 /* Most ethtool structures are defined without padding.
2697 * Unfortunately struct ethtool_rxnfc is an exception.
2698 */
2699 switch (ethcmd) {
2700 default:
2701 break;
2702 case ETHTOOL_GRXCLSRLALL:
2703 /* Buffer size is variable */
2704 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2705 return -EFAULT;
2706 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2707 return -ENOMEM;
2708 buf_size += rule_cnt * sizeof(u32);
2709 /* fall through */
2710 case ETHTOOL_GRXRINGS:
2711 case ETHTOOL_GRXCLSRLCNT:
2712 case ETHTOOL_GRXCLSRULE:
55664f32 2713 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2714 convert_out = true;
2715 /* fall through */
2716 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2717 buf_size += sizeof(struct ethtool_rxnfc);
2718 convert_in = true;
2719 break;
2720 }
2721
2722 ifr = compat_alloc_user_space(buf_size);
954b1244 2723 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2724
2725 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2726 return -EFAULT;
2727
3a7da39d
BH
2728 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2729 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2730 return -EFAULT;
2731
3a7da39d 2732 if (convert_in) {
127fe533 2733 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2734 * fs.ring_cookie and at the end of fs, but nowhere else.
2735 */
127fe533
AD
2736 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2737 sizeof(compat_rxnfc->fs.m_ext) !=
2738 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2739 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2740 BUILD_BUG_ON(
2741 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2742 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2743 offsetof(struct ethtool_rxnfc, fs.location) -
2744 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2745
2746 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2747 (void __user *)(&rxnfc->fs.m_ext + 1) -
2748 (void __user *)rxnfc) ||
3a7da39d
BH
2749 copy_in_user(&rxnfc->fs.ring_cookie,
2750 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2751 (void __user *)(&rxnfc->fs.location + 1) -
2752 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2753 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2754 sizeof(rxnfc->rule_cnt)))
2755 return -EFAULT;
2756 }
2757
2758 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2759 if (ret)
2760 return ret;
2761
2762 if (convert_out) {
2763 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2764 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2765 (const void __user *)rxnfc) ||
3a7da39d
BH
2766 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2767 &rxnfc->fs.ring_cookie,
954b1244
SH
2768 (const void __user *)(&rxnfc->fs.location + 1) -
2769 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2770 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2771 sizeof(rxnfc->rule_cnt)))
2772 return -EFAULT;
2773
2774 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2775 /* As an optimisation, we only copy the actual
2776 * number of rules that the underlying
2777 * function returned. Since Mallory might
2778 * change the rule count in user memory, we
2779 * check that it is less than the rule count
2780 * originally given (as the user buffer size),
2781 * which has been range-checked.
2782 */
2783 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2784 return -EFAULT;
2785 if (actual_rule_cnt < rule_cnt)
2786 rule_cnt = actual_rule_cnt;
2787 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2788 &rxnfc->rule_locs[0],
2789 rule_cnt * sizeof(u32)))
2790 return -EFAULT;
2791 }
2792 }
2793
2794 return 0;
7a229387
AB
2795}
2796
7a50a240
AB
2797static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2798{
2799 void __user *uptr;
2800 compat_uptr_t uptr32;
2801 struct ifreq __user *uifr;
2802
c6d409cf 2803 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2804 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2805 return -EFAULT;
2806
2807 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2808 return -EFAULT;
2809
2810 uptr = compat_ptr(uptr32);
2811
2812 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2813 return -EFAULT;
2814
2815 return dev_ioctl(net, SIOCWANDEV, uifr);
2816}
2817
6b96018b
AB
2818static int bond_ioctl(struct net *net, unsigned int cmd,
2819 struct compat_ifreq __user *ifr32)
7a229387
AB
2820{
2821 struct ifreq kifr;
7a229387
AB
2822 mm_segment_t old_fs;
2823 int err;
7a229387
AB
2824
2825 switch (cmd) {
2826 case SIOCBONDENSLAVE:
2827 case SIOCBONDRELEASE:
2828 case SIOCBONDSETHWADDR:
2829 case SIOCBONDCHANGEACTIVE:
6b96018b 2830 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2831 return -EFAULT;
2832
2833 old_fs = get_fs();
c6d409cf 2834 set_fs(KERNEL_DS);
c3f52ae6 2835 err = dev_ioctl(net, cmd,
2836 (struct ifreq __user __force *) &kifr);
c6d409cf 2837 set_fs(old_fs);
7a229387
AB
2838
2839 return err;
7a229387 2840 default:
07d106d0 2841 return -ENOIOCTLCMD;
ccbd6a5a 2842 }
7a229387
AB
2843}
2844
590d4693
BH
2845/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2846static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2847 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2848{
2849 struct ifreq __user *u_ifreq64;
7a229387
AB
2850 char tmp_buf[IFNAMSIZ];
2851 void __user *data64;
2852 u32 data32;
2853
2854 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2855 IFNAMSIZ))
2856 return -EFAULT;
417c3522 2857 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2858 return -EFAULT;
2859 data64 = compat_ptr(data32);
2860
2861 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2862
7a229387
AB
2863 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2864 IFNAMSIZ))
2865 return -EFAULT;
417c3522 2866 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2867 return -EFAULT;
2868
6b96018b 2869 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2870}
2871
6b96018b
AB
2872static int dev_ifsioc(struct net *net, struct socket *sock,
2873 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2874{
a2116ed2 2875 struct ifreq __user *uifr;
7a229387
AB
2876 int err;
2877
a2116ed2
AB
2878 uifr = compat_alloc_user_space(sizeof(*uifr));
2879 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2880 return -EFAULT;
2881
2882 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2883
7a229387
AB
2884 if (!err) {
2885 switch (cmd) {
2886 case SIOCGIFFLAGS:
2887 case SIOCGIFMETRIC:
2888 case SIOCGIFMTU:
2889 case SIOCGIFMEM:
2890 case SIOCGIFHWADDR:
2891 case SIOCGIFINDEX:
2892 case SIOCGIFADDR:
2893 case SIOCGIFBRDADDR:
2894 case SIOCGIFDSTADDR:
2895 case SIOCGIFNETMASK:
fab2532b 2896 case SIOCGIFPFLAGS:
7a229387 2897 case SIOCGIFTXQLEN:
fab2532b
AB
2898 case SIOCGMIIPHY:
2899 case SIOCGMIIREG:
a2116ed2 2900 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2901 err = -EFAULT;
2902 break;
2903 }
2904 }
2905 return err;
2906}
2907
a2116ed2
AB
2908static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2909 struct compat_ifreq __user *uifr32)
2910{
2911 struct ifreq ifr;
2912 struct compat_ifmap __user *uifmap32;
2913 mm_segment_t old_fs;
2914 int err;
2915
2916 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2917 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2918 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2919 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2920 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2921 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2922 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2923 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2924 if (err)
2925 return -EFAULT;
2926
2927 old_fs = get_fs();
c6d409cf 2928 set_fs(KERNEL_DS);
c3f52ae6 2929 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2930 set_fs(old_fs);
a2116ed2
AB
2931
2932 if (cmd == SIOCGIFMAP && !err) {
2933 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2934 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2935 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2936 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2937 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2938 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2939 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2940 if (err)
2941 err = -EFAULT;
2942 }
2943 return err;
2944}
2945
7a229387 2946struct rtentry32 {
c6d409cf 2947 u32 rt_pad1;
7a229387
AB
2948 struct sockaddr rt_dst; /* target address */
2949 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2950 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2951 unsigned short rt_flags;
2952 short rt_pad2;
2953 u32 rt_pad3;
2954 unsigned char rt_tos;
2955 unsigned char rt_class;
2956 short rt_pad4;
2957 short rt_metric; /* +1 for binary compatibility! */
7a229387 2958 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
2959 u32 rt_mtu; /* per route MTU/Window */
2960 u32 rt_window; /* Window clamping */
7a229387
AB
2961 unsigned short rt_irtt; /* Initial RTT */
2962};
2963
2964struct in6_rtmsg32 {
2965 struct in6_addr rtmsg_dst;
2966 struct in6_addr rtmsg_src;
2967 struct in6_addr rtmsg_gateway;
2968 u32 rtmsg_type;
2969 u16 rtmsg_dst_len;
2970 u16 rtmsg_src_len;
2971 u32 rtmsg_metric;
2972 u32 rtmsg_info;
2973 u32 rtmsg_flags;
2974 s32 rtmsg_ifindex;
2975};
2976
6b96018b
AB
2977static int routing_ioctl(struct net *net, struct socket *sock,
2978 unsigned int cmd, void __user *argp)
7a229387
AB
2979{
2980 int ret;
2981 void *r = NULL;
2982 struct in6_rtmsg r6;
2983 struct rtentry r4;
2984 char devname[16];
2985 u32 rtdev;
2986 mm_segment_t old_fs = get_fs();
2987
6b96018b
AB
2988 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2989 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 2990 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 2991 3 * sizeof(struct in6_addr));
3ddc5b46
MD
2992 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
2993 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
2994 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
2995 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
2996 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
2997 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
2998 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
2999
3000 r = (void *) &r6;
3001 } else { /* ipv4 */
6b96018b 3002 struct rtentry32 __user *ur4 = argp;
c6d409cf 3003 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3004 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3005 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3006 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3007 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3008 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3009 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3010 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3011 if (rtdev) {
c6d409cf 3012 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3013 r4.rt_dev = (char __user __force *)devname;
3014 devname[15] = 0;
7a229387
AB
3015 } else
3016 r4.rt_dev = NULL;
3017
3018 r = (void *) &r4;
3019 }
3020
3021 if (ret) {
3022 ret = -EFAULT;
3023 goto out;
3024 }
3025
c6d409cf 3026 set_fs(KERNEL_DS);
6b96018b 3027 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3028 set_fs(old_fs);
7a229387
AB
3029
3030out:
7a229387
AB
3031 return ret;
3032}
3033
3034/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3035 * for some operations; this forces use of the newer bridge-utils that
25985edc 3036 * use compatible ioctls
7a229387 3037 */
6b96018b 3038static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3039{
6b96018b 3040 compat_ulong_t tmp;
7a229387 3041
6b96018b 3042 if (get_user(tmp, argp))
7a229387
AB
3043 return -EFAULT;
3044 if (tmp == BRCTL_GET_VERSION)
3045 return BRCTL_VERSION + 1;
3046 return -EINVAL;
3047}
3048
6b96018b
AB
3049static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3050 unsigned int cmd, unsigned long arg)
3051{
3052 void __user *argp = compat_ptr(arg);
3053 struct sock *sk = sock->sk;
3054 struct net *net = sock_net(sk);
7a229387 3055
6b96018b 3056 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3057 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3058
3059 switch (cmd) {
3060 case SIOCSIFBR:
3061 case SIOCGIFBR:
3062 return old_bridge_ioctl(argp);
3063 case SIOCGIFNAME:
3064 return dev_ifname32(net, argp);
3065 case SIOCGIFCONF:
3066 return dev_ifconf(net, argp);
3067 case SIOCETHTOOL:
3068 return ethtool_ioctl(net, argp);
7a50a240
AB
3069 case SIOCWANDEV:
3070 return compat_siocwandev(net, argp);
a2116ed2
AB
3071 case SIOCGIFMAP:
3072 case SIOCSIFMAP:
3073 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3074 case SIOCBONDENSLAVE:
3075 case SIOCBONDRELEASE:
3076 case SIOCBONDSETHWADDR:
6b96018b
AB
3077 case SIOCBONDCHANGEACTIVE:
3078 return bond_ioctl(net, cmd, argp);
3079 case SIOCADDRT:
3080 case SIOCDELRT:
3081 return routing_ioctl(net, sock, cmd, argp);
3082 case SIOCGSTAMP:
3083 return do_siocgstamp(net, sock, cmd, argp);
3084 case SIOCGSTAMPNS:
3085 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3086 case SIOCBONDSLAVEINFOQUERY:
3087 case SIOCBONDINFOQUERY:
a2116ed2 3088 case SIOCSHWTSTAMP:
fd468c74 3089 case SIOCGHWTSTAMP:
590d4693 3090 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3091
3092 case FIOSETOWN:
3093 case SIOCSPGRP:
3094 case FIOGETOWN:
3095 case SIOCGPGRP:
3096 case SIOCBRADDBR:
3097 case SIOCBRDELBR:
3098 case SIOCGIFVLAN:
3099 case SIOCSIFVLAN:
3100 case SIOCADDDLCI:
3101 case SIOCDELDLCI:
3102 return sock_ioctl(file, cmd, arg);
3103
3104 case SIOCGIFFLAGS:
3105 case SIOCSIFFLAGS:
3106 case SIOCGIFMETRIC:
3107 case SIOCSIFMETRIC:
3108 case SIOCGIFMTU:
3109 case SIOCSIFMTU:
3110 case SIOCGIFMEM:
3111 case SIOCSIFMEM:
3112 case SIOCGIFHWADDR:
3113 case SIOCSIFHWADDR:
3114 case SIOCADDMULTI:
3115 case SIOCDELMULTI:
3116 case SIOCGIFINDEX:
6b96018b
AB
3117 case SIOCGIFADDR:
3118 case SIOCSIFADDR:
3119 case SIOCSIFHWBROADCAST:
6b96018b 3120 case SIOCDIFADDR:
6b96018b
AB
3121 case SIOCGIFBRDADDR:
3122 case SIOCSIFBRDADDR:
3123 case SIOCGIFDSTADDR:
3124 case SIOCSIFDSTADDR:
3125 case SIOCGIFNETMASK:
3126 case SIOCSIFNETMASK:
3127 case SIOCSIFPFLAGS:
3128 case SIOCGIFPFLAGS:
3129 case SIOCGIFTXQLEN:
3130 case SIOCSIFTXQLEN:
3131 case SIOCBRADDIF:
3132 case SIOCBRDELIF:
9177efd3
AB
3133 case SIOCSIFNAME:
3134 case SIOCGMIIPHY:
3135 case SIOCGMIIREG:
3136 case SIOCSMIIREG:
6b96018b 3137 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3138
6b96018b
AB
3139 case SIOCSARP:
3140 case SIOCGARP:
3141 case SIOCDARP:
6b96018b 3142 case SIOCATMARK:
9177efd3
AB
3143 return sock_do_ioctl(net, sock, cmd, arg);
3144 }
3145
6b96018b
AB
3146 return -ENOIOCTLCMD;
3147}
7a229387 3148
95c96174 3149static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3150 unsigned long arg)
89bbfc95
SP
3151{
3152 struct socket *sock = file->private_data;
3153 int ret = -ENOIOCTLCMD;
87de87d5
DM
3154 struct sock *sk;
3155 struct net *net;
3156
3157 sk = sock->sk;
3158 net = sock_net(sk);
89bbfc95
SP
3159
3160 if (sock->ops->compat_ioctl)
3161 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3162
87de87d5
DM
3163 if (ret == -ENOIOCTLCMD &&
3164 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3165 ret = compat_wext_handle_ioctl(net, cmd, arg);
3166
6b96018b
AB
3167 if (ret == -ENOIOCTLCMD)
3168 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3169
89bbfc95
SP
3170 return ret;
3171}
3172#endif
3173
ac5a488e
SS
3174int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3175{
3176 return sock->ops->bind(sock, addr, addrlen);
3177}
c6d409cf 3178EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3179
3180int kernel_listen(struct socket *sock, int backlog)
3181{
3182 return sock->ops->listen(sock, backlog);
3183}
c6d409cf 3184EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3185
3186int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3187{
3188 struct sock *sk = sock->sk;
3189 int err;
3190
3191 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3192 newsock);
3193 if (err < 0)
3194 goto done;
3195
3196 err = sock->ops->accept(sock, *newsock, flags);
3197 if (err < 0) {
3198 sock_release(*newsock);
fa8705b0 3199 *newsock = NULL;
ac5a488e
SS
3200 goto done;
3201 }
3202
3203 (*newsock)->ops = sock->ops;
1b08534e 3204 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3205
3206done:
3207 return err;
3208}
c6d409cf 3209EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3210
3211int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3212 int flags)
ac5a488e
SS
3213{
3214 return sock->ops->connect(sock, addr, addrlen, flags);
3215}
c6d409cf 3216EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3217
3218int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3219 int *addrlen)
3220{
3221 return sock->ops->getname(sock, addr, addrlen, 0);
3222}
c6d409cf 3223EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3224
3225int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3226 int *addrlen)
3227{
3228 return sock->ops->getname(sock, addr, addrlen, 1);
3229}
c6d409cf 3230EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3231
3232int kernel_getsockopt(struct socket *sock, int level, int optname,
3233 char *optval, int *optlen)
3234{
3235 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3236 char __user *uoptval;
3237 int __user *uoptlen;
ac5a488e
SS
3238 int err;
3239
fb8621bb
NK
3240 uoptval = (char __user __force *) optval;
3241 uoptlen = (int __user __force *) optlen;
3242
ac5a488e
SS
3243 set_fs(KERNEL_DS);
3244 if (level == SOL_SOCKET)
fb8621bb 3245 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3246 else
fb8621bb
NK
3247 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3248 uoptlen);
ac5a488e
SS
3249 set_fs(oldfs);
3250 return err;
3251}
c6d409cf 3252EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3253
3254int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3255 char *optval, unsigned int optlen)
ac5a488e
SS
3256{
3257 mm_segment_t oldfs = get_fs();
fb8621bb 3258 char __user *uoptval;
ac5a488e
SS
3259 int err;
3260
fb8621bb
NK
3261 uoptval = (char __user __force *) optval;
3262
ac5a488e
SS
3263 set_fs(KERNEL_DS);
3264 if (level == SOL_SOCKET)
fb8621bb 3265 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3266 else
fb8621bb 3267 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3268 optlen);
3269 set_fs(oldfs);
3270 return err;
3271}
c6d409cf 3272EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3273
3274int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3275 size_t size, int flags)
3276{
3277 if (sock->ops->sendpage)
3278 return sock->ops->sendpage(sock, page, offset, size, flags);
3279
3280 return sock_no_sendpage(sock, page, offset, size, flags);
3281}
c6d409cf 3282EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3283
3284int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3285{
3286 mm_segment_t oldfs = get_fs();
3287 int err;
3288
3289 set_fs(KERNEL_DS);
3290 err = sock->ops->ioctl(sock, cmd, arg);
3291 set_fs(oldfs);
3292
3293 return err;
3294}
c6d409cf 3295EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3296
91cf45f0
TM
3297int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3298{
3299 return sock->ops->shutdown(sock, how);
3300}
91cf45f0 3301EXPORT_SYMBOL(kernel_sock_shutdown);