]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/socket.c
at803x: fix suspend/resume for SGMII link
[mirror_ubuntu-artful-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1da177e4
LT
290static int init_inodecache(void)
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1da177e4
LT
299 if (sock_inode_cachep == NULL)
300 return -ENOMEM;
301 return 0;
302}
303
b87221de 304static const struct super_operations sockfs_ops = {
c6d409cf
ED
305 .alloc_inode = sock_alloc_inode,
306 .destroy_inode = sock_destroy_inode,
307 .statfs = simple_statfs,
1da177e4
LT
308};
309
c23fbb6b
ED
310/*
311 * sockfs_dname() is called from d_path().
312 */
313static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
314{
315 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 316 d_inode(dentry)->i_ino);
c23fbb6b
ED
317}
318
3ba13d17 319static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 320 .d_dname = sockfs_dname,
1da177e4
LT
321};
322
c74a1cbb
AV
323static struct dentry *sockfs_mount(struct file_system_type *fs_type,
324 int flags, const char *dev_name, void *data)
325{
326 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
327 &sockfs_dentry_operations, SOCKFS_MAGIC);
328}
329
330static struct vfsmount *sock_mnt __read_mostly;
331
332static struct file_system_type sock_fs_type = {
333 .name = "sockfs",
334 .mount = sockfs_mount,
335 .kill_sb = kill_anon_super,
336};
337
1da177e4
LT
338/*
339 * Obtains the first available file descriptor and sets it up for use.
340 *
39d8c1b6
DM
341 * These functions create file structures and maps them to fd space
342 * of the current process. On success it returns file descriptor
1da177e4
LT
343 * and file struct implicitly stored in sock->file.
344 * Note that another thread may close file descriptor before we return
345 * from this function. We use the fact that now we do not refer
346 * to socket after mapping. If one day we will need it, this
347 * function will increment ref. count on file by 1.
348 *
349 * In any case returned fd MAY BE not valid!
350 * This race condition is unavoidable
351 * with shared fd spaces, we cannot solve it inside kernel,
352 * but we take care of internal coherence yet.
353 */
354
aab174f0 355struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 356{
7cbe66b6 357 struct qstr name = { .name = "" };
2c48b9c4 358 struct path path;
7cbe66b6 359 struct file *file;
1da177e4 360
600e1779
MY
361 if (dname) {
362 name.name = dname;
363 name.len = strlen(name.name);
364 } else if (sock->sk) {
365 name.name = sock->sk->sk_prot_creator->name;
366 name.len = strlen(name.name);
367 }
4b936885 368 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
369 if (unlikely(!path.dentry))
370 return ERR_PTR(-ENOMEM);
2c48b9c4 371 path.mnt = mntget(sock_mnt);
39d8c1b6 372
2c48b9c4 373 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 374
2c48b9c4 375 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 376 &socket_file_ops);
b5ffe634 377 if (IS_ERR(file)) {
cc3808f8 378 /* drop dentry, keep inode */
c5ef6035 379 ihold(d_inode(path.dentry));
2c48b9c4 380 path_put(&path);
39b65252 381 return file;
cc3808f8
AV
382 }
383
384 sock->file = file;
77d27200 385 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 386 file->private_data = sock;
28407630 387 return file;
39d8c1b6 388}
56b31d1c 389EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 390
56b31d1c 391static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
392{
393 struct file *newfile;
28407630
AV
394 int fd = get_unused_fd_flags(flags);
395 if (unlikely(fd < 0))
396 return fd;
39d8c1b6 397
aab174f0 398 newfile = sock_alloc_file(sock, flags, NULL);
28407630 399 if (likely(!IS_ERR(newfile))) {
39d8c1b6 400 fd_install(fd, newfile);
28407630
AV
401 return fd;
402 }
7cbe66b6 403
28407630
AV
404 put_unused_fd(fd);
405 return PTR_ERR(newfile);
1da177e4
LT
406}
407
406a3c63 408struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 409{
6cb153ca
BL
410 if (file->f_op == &socket_file_ops)
411 return file->private_data; /* set in sock_map_fd */
412
23bb80d2
ED
413 *err = -ENOTSOCK;
414 return NULL;
6cb153ca 415}
406a3c63 416EXPORT_SYMBOL(sock_from_file);
6cb153ca 417
1da177e4 418/**
c6d409cf 419 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
420 * @fd: file handle
421 * @err: pointer to an error code return
422 *
423 * The file handle passed in is locked and the socket it is bound
424 * too is returned. If an error occurs the err pointer is overwritten
425 * with a negative errno code and NULL is returned. The function checks
426 * for both invalid handles and passing a handle which is not a socket.
427 *
428 * On a success the socket object pointer is returned.
429 */
430
431struct socket *sockfd_lookup(int fd, int *err)
432{
433 struct file *file;
1da177e4
LT
434 struct socket *sock;
435
89bddce5
SH
436 file = fget(fd);
437 if (!file) {
1da177e4
LT
438 *err = -EBADF;
439 return NULL;
440 }
89bddce5 441
6cb153ca
BL
442 sock = sock_from_file(file, err);
443 if (!sock)
1da177e4 444 fput(file);
6cb153ca
BL
445 return sock;
446}
c6d409cf 447EXPORT_SYMBOL(sockfd_lookup);
1da177e4 448
6cb153ca
BL
449static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
450{
00e188ef 451 struct fd f = fdget(fd);
6cb153ca
BL
452 struct socket *sock;
453
3672558c 454 *err = -EBADF;
00e188ef
AV
455 if (f.file) {
456 sock = sock_from_file(f.file, err);
457 if (likely(sock)) {
458 *fput_needed = f.flags;
6cb153ca 459 return sock;
00e188ef
AV
460 }
461 fdput(f);
1da177e4 462 }
6cb153ca 463 return NULL;
1da177e4
LT
464}
465
600e1779
MY
466#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
467#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
468#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
469static ssize_t sockfs_getxattr(struct dentry *dentry,
470 const char *name, void *value, size_t size)
471{
472 const char *proto_name;
473 size_t proto_size;
474 int error;
475
476 error = -ENODATA;
477 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
478 proto_name = dentry->d_name.name;
479 proto_size = strlen(proto_name);
480
481 if (value) {
482 error = -ERANGE;
483 if (proto_size + 1 > size)
484 goto out;
485
486 strncpy(value, proto_name, proto_size + 1);
487 }
488 error = proto_size + 1;
489 }
490
491out:
492 return error;
493}
494
495static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
496 size_t size)
497{
498 ssize_t len;
499 ssize_t used = 0;
500
c5ef6035 501 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
502 if (len < 0)
503 return len;
504 used += len;
505 if (buffer) {
506 if (size < used)
507 return -ERANGE;
508 buffer += len;
509 }
510
511 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
512 used += len;
513 if (buffer) {
514 if (size < used)
515 return -ERANGE;
516 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
517 buffer += len;
518 }
519
520 return used;
521}
522
523static const struct inode_operations sockfs_inode_ops = {
524 .getxattr = sockfs_getxattr,
525 .listxattr = sockfs_listxattr,
526};
527
1da177e4
LT
528/**
529 * sock_alloc - allocate a socket
89bddce5 530 *
1da177e4
LT
531 * Allocate a new inode and socket object. The two are bound together
532 * and initialised. The socket is then returned. If we are out of inodes
533 * NULL is returned.
534 */
535
f4a00aac 536struct socket *sock_alloc(void)
1da177e4 537{
89bddce5
SH
538 struct inode *inode;
539 struct socket *sock;
1da177e4 540
a209dfc7 541 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
542 if (!inode)
543 return NULL;
544
545 sock = SOCKET_I(inode);
546
29a020d3 547 kmemcheck_annotate_bitfield(sock, type);
85fe4025 548 inode->i_ino = get_next_ino();
89bddce5 549 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
550 inode->i_uid = current_fsuid();
551 inode->i_gid = current_fsgid();
600e1779 552 inode->i_op = &sockfs_inode_ops;
1da177e4 553
19e8d69c 554 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
555 return sock;
556}
f4a00aac 557EXPORT_SYMBOL(sock_alloc);
1da177e4 558
1da177e4
LT
559/**
560 * sock_release - close a socket
561 * @sock: socket to close
562 *
563 * The socket is released from the protocol stack if it has a release
564 * callback, and the inode is then released if the socket is bound to
89bddce5 565 * an inode not a file.
1da177e4 566 */
89bddce5 567
1da177e4
LT
568void sock_release(struct socket *sock)
569{
570 if (sock->ops) {
571 struct module *owner = sock->ops->owner;
572
573 sock->ops->release(sock);
574 sock->ops = NULL;
575 module_put(owner);
576 }
577
eaefd110 578 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 579 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 580
19e8d69c 581 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
582 if (!sock->file) {
583 iput(SOCK_INODE(sock));
584 return;
585 }
89bddce5 586 sock->file = NULL;
1da177e4 587}
c6d409cf 588EXPORT_SYMBOL(sock_release);
1da177e4 589
67cc0d40 590void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
20d49473 591{
140c55d4
ED
592 u8 flags = *tx_flags;
593
b9f40e21 594 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
595 flags |= SKBTX_HW_TSTAMP;
596
b9f40e21 597 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
598 flags |= SKBTX_SW_TSTAMP;
599
e7fd2885 600 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
601 flags |= SKBTX_SCHED_TSTAMP;
602
e1c8a607 603 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
140c55d4 604 flags |= SKBTX_ACK_TSTAMP;
e7fd2885 605
140c55d4 606 *tx_flags = flags;
20d49473 607}
67cc0d40 608EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 609
d8725c86 610static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 611{
01e97e65 612 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
613 BUG_ON(ret == -EIOCBQUEUED);
614 return ret;
1da177e4
LT
615}
616
d8725c86 617int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 618{
d8725c86 619 int err = security_socket_sendmsg(sock, msg,
01e97e65 620 msg_data_left(msg));
228e548e 621
d8725c86 622 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 623}
c6d409cf 624EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
625
626int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
627 struct kvec *vec, size_t num, size_t size)
628{
6aa24814 629 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 630 return sock_sendmsg(sock, msg);
1da177e4 631}
c6d409cf 632EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 633
92f37fd2
ED
634/*
635 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
636 */
637void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
638 struct sk_buff *skb)
639{
20d49473 640 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 641 struct scm_timestamping tss;
20d49473
PO
642 int empty = 1;
643 struct skb_shared_hwtstamps *shhwtstamps =
644 skb_hwtstamps(skb);
645
646 /* Race occurred between timestamp enabling and packet
647 receiving. Fill in the current time for now. */
648 if (need_software_tstamp && skb->tstamp.tv64 == 0)
649 __net_timestamp(skb);
650
651 if (need_software_tstamp) {
652 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
653 struct timeval tv;
654 skb_get_timestamp(skb, &tv);
655 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
656 sizeof(tv), &tv);
657 } else {
f24b9be5
WB
658 struct timespec ts;
659 skb_get_timestampns(skb, &ts);
20d49473 660 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 661 sizeof(ts), &ts);
20d49473
PO
662 }
663 }
664
f24b9be5 665 memset(&tss, 0, sizeof(tss));
c199105d 666 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 667 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 668 empty = 0;
4d276eb6 669 if (shhwtstamps &&
b9f40e21 670 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 671 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 672 empty = 0;
20d49473
PO
673 if (!empty)
674 put_cmsg(msg, SOL_SOCKET,
f24b9be5 675 SCM_TIMESTAMPING, sizeof(tss), &tss);
92f37fd2 676}
7c81fd8b
ACM
677EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
678
6e3e939f
JB
679void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
680 struct sk_buff *skb)
681{
682 int ack;
683
684 if (!sock_flag(sk, SOCK_WIFI_STATUS))
685 return;
686 if (!skb->wifi_acked_valid)
687 return;
688
689 ack = skb->wifi_acked;
690
691 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
692}
693EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
694
11165f14 695static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
696 struct sk_buff *skb)
3b885787 697{
744d5a3e 698 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 699 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 700 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
701}
702
767dd033 703void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
704 struct sk_buff *skb)
705{
706 sock_recv_timestamp(msg, sk, skb);
707 sock_recv_drops(msg, sk, skb);
708}
767dd033 709EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 710
1b784140
YX
711static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
712 size_t size, int flags)
1da177e4 713{
1b784140 714 return sock->ops->recvmsg(sock, msg, size, flags);
1da177e4
LT
715}
716
1b784140
YX
717int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
718 int flags)
a2e27255
ACM
719{
720 int err = security_socket_recvmsg(sock, msg, size, flags);
721
1b784140 722 return err ?: sock_recvmsg_nosec(sock, msg, size, flags);
1da177e4 723}
c6d409cf 724EXPORT_SYMBOL(sock_recvmsg);
1da177e4 725
c1249c0a
ML
726/**
727 * kernel_recvmsg - Receive a message from a socket (kernel space)
728 * @sock: The socket to receive the message from
729 * @msg: Received message
730 * @vec: Input s/g array for message data
731 * @num: Size of input s/g array
732 * @size: Number of bytes to read
733 * @flags: Message flags (MSG_DONTWAIT, etc...)
734 *
735 * On return the msg structure contains the scatter/gather array passed in the
736 * vec argument. The array is modified so that it consists of the unfilled
737 * portion of the original array.
738 *
739 * The returned value is the total number of bytes received, or an error.
740 */
89bddce5
SH
741int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
742 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
743{
744 mm_segment_t oldfs = get_fs();
745 int result;
746
6aa24814 747 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 748 set_fs(KERNEL_DS);
1da177e4
LT
749 result = sock_recvmsg(sock, msg, size, flags);
750 set_fs(oldfs);
751 return result;
752}
c6d409cf 753EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 754
ce1d4d3e
CH
755static ssize_t sock_sendpage(struct file *file, struct page *page,
756 int offset, size_t size, loff_t *ppos, int more)
1da177e4 757{
1da177e4
LT
758 struct socket *sock;
759 int flags;
760
ce1d4d3e
CH
761 sock = file->private_data;
762
35f9c09f
ED
763 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
764 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
765 flags |= more;
ce1d4d3e 766
e6949583 767 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 768}
1da177e4 769
9c55e01c 770static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 771 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
772 unsigned int flags)
773{
774 struct socket *sock = file->private_data;
775
997b37da
RDC
776 if (unlikely(!sock->ops->splice_read))
777 return -EINVAL;
778
9c55e01c
JA
779 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
780}
781
8ae5e030 782static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 783{
6d652330
AV
784 struct file *file = iocb->ki_filp;
785 struct socket *sock = file->private_data;
0345f931 786 struct msghdr msg = {.msg_iter = *to,
787 .msg_iocb = iocb};
8ae5e030 788 ssize_t res;
ce1d4d3e 789
8ae5e030
AV
790 if (file->f_flags & O_NONBLOCK)
791 msg.msg_flags = MSG_DONTWAIT;
792
793 if (iocb->ki_pos != 0)
1da177e4 794 return -ESPIPE;
027445c3 795
66ee59af 796 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
797 return 0;
798
237dae88 799 res = sock_recvmsg(sock, &msg, iov_iter_count(to), msg.msg_flags);
8ae5e030
AV
800 *to = msg.msg_iter;
801 return res;
1da177e4
LT
802}
803
8ae5e030 804static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 805{
6d652330
AV
806 struct file *file = iocb->ki_filp;
807 struct socket *sock = file->private_data;
0345f931 808 struct msghdr msg = {.msg_iter = *from,
809 .msg_iocb = iocb};
8ae5e030 810 ssize_t res;
1da177e4 811
8ae5e030 812 if (iocb->ki_pos != 0)
ce1d4d3e 813 return -ESPIPE;
027445c3 814
8ae5e030
AV
815 if (file->f_flags & O_NONBLOCK)
816 msg.msg_flags = MSG_DONTWAIT;
817
6d652330
AV
818 if (sock->type == SOCK_SEQPACKET)
819 msg.msg_flags |= MSG_EOR;
820
d8725c86 821 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
822 *from = msg.msg_iter;
823 return res;
1da177e4
LT
824}
825
1da177e4
LT
826/*
827 * Atomic setting of ioctl hooks to avoid race
828 * with module unload.
829 */
830
4a3e2f71 831static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 832static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 833
881d966b 834void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 835{
4a3e2f71 836 mutex_lock(&br_ioctl_mutex);
1da177e4 837 br_ioctl_hook = hook;
4a3e2f71 838 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
839}
840EXPORT_SYMBOL(brioctl_set);
841
4a3e2f71 842static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 843static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 844
881d966b 845void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 846{
4a3e2f71 847 mutex_lock(&vlan_ioctl_mutex);
1da177e4 848 vlan_ioctl_hook = hook;
4a3e2f71 849 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
850}
851EXPORT_SYMBOL(vlan_ioctl_set);
852
4a3e2f71 853static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 854static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 855
89bddce5 856void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 857{
4a3e2f71 858 mutex_lock(&dlci_ioctl_mutex);
1da177e4 859 dlci_ioctl_hook = hook;
4a3e2f71 860 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
861}
862EXPORT_SYMBOL(dlci_ioctl_set);
863
6b96018b
AB
864static long sock_do_ioctl(struct net *net, struct socket *sock,
865 unsigned int cmd, unsigned long arg)
866{
867 int err;
868 void __user *argp = (void __user *)arg;
869
870 err = sock->ops->ioctl(sock, cmd, arg);
871
872 /*
873 * If this ioctl is unknown try to hand it down
874 * to the NIC driver.
875 */
876 if (err == -ENOIOCTLCMD)
877 err = dev_ioctl(net, cmd, argp);
878
879 return err;
880}
881
1da177e4
LT
882/*
883 * With an ioctl, arg may well be a user mode pointer, but we don't know
884 * what to do with it - that's up to the protocol still.
885 */
886
887static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
888{
889 struct socket *sock;
881d966b 890 struct sock *sk;
1da177e4
LT
891 void __user *argp = (void __user *)arg;
892 int pid, err;
881d966b 893 struct net *net;
1da177e4 894
b69aee04 895 sock = file->private_data;
881d966b 896 sk = sock->sk;
3b1e0a65 897 net = sock_net(sk);
1da177e4 898 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 899 err = dev_ioctl(net, cmd, argp);
1da177e4 900 } else
3d23e349 901#ifdef CONFIG_WEXT_CORE
1da177e4 902 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 903 err = dev_ioctl(net, cmd, argp);
1da177e4 904 } else
3d23e349 905#endif
89bddce5 906 switch (cmd) {
1da177e4
LT
907 case FIOSETOWN:
908 case SIOCSPGRP:
909 err = -EFAULT;
910 if (get_user(pid, (int __user *)argp))
911 break;
e0b93edd
JL
912 f_setown(sock->file, pid, 1);
913 err = 0;
1da177e4
LT
914 break;
915 case FIOGETOWN:
916 case SIOCGPGRP:
609d7fa9 917 err = put_user(f_getown(sock->file),
89bddce5 918 (int __user *)argp);
1da177e4
LT
919 break;
920 case SIOCGIFBR:
921 case SIOCSIFBR:
922 case SIOCBRADDBR:
923 case SIOCBRDELBR:
924 err = -ENOPKG;
925 if (!br_ioctl_hook)
926 request_module("bridge");
927
4a3e2f71 928 mutex_lock(&br_ioctl_mutex);
89bddce5 929 if (br_ioctl_hook)
881d966b 930 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 931 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
932 break;
933 case SIOCGIFVLAN:
934 case SIOCSIFVLAN:
935 err = -ENOPKG;
936 if (!vlan_ioctl_hook)
937 request_module("8021q");
938
4a3e2f71 939 mutex_lock(&vlan_ioctl_mutex);
1da177e4 940 if (vlan_ioctl_hook)
881d966b 941 err = vlan_ioctl_hook(net, argp);
4a3e2f71 942 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 943 break;
1da177e4
LT
944 case SIOCADDDLCI:
945 case SIOCDELDLCI:
946 err = -ENOPKG;
947 if (!dlci_ioctl_hook)
948 request_module("dlci");
949
7512cbf6
PE
950 mutex_lock(&dlci_ioctl_mutex);
951 if (dlci_ioctl_hook)
1da177e4 952 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 953 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
954 break;
955 default:
6b96018b 956 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 957 break;
89bddce5 958 }
1da177e4
LT
959 return err;
960}
961
962int sock_create_lite(int family, int type, int protocol, struct socket **res)
963{
964 int err;
965 struct socket *sock = NULL;
89bddce5 966
1da177e4
LT
967 err = security_socket_create(family, type, protocol, 1);
968 if (err)
969 goto out;
970
971 sock = sock_alloc();
972 if (!sock) {
973 err = -ENOMEM;
974 goto out;
975 }
976
1da177e4 977 sock->type = type;
7420ed23
VY
978 err = security_socket_post_create(sock, family, type, protocol, 1);
979 if (err)
980 goto out_release;
981
1da177e4
LT
982out:
983 *res = sock;
984 return err;
7420ed23
VY
985out_release:
986 sock_release(sock);
987 sock = NULL;
988 goto out;
1da177e4 989}
c6d409cf 990EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
991
992/* No kernel lock held - perfect */
89bddce5 993static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 994{
cbf55001 995 unsigned int busy_flag = 0;
1da177e4
LT
996 struct socket *sock;
997
998 /*
89bddce5 999 * We can't return errors to poll, so it's either yes or no.
1da177e4 1000 */
b69aee04 1001 sock = file->private_data;
2d48d67f 1002
cbf55001 1003 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1004 /* this socket can poll_ll so tell the system call */
cbf55001 1005 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1006
1007 /* once, only if requested by syscall */
cbf55001
ET
1008 if (wait && (wait->_key & POLL_BUSY_LOOP))
1009 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1010 }
1011
cbf55001 1012 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1013}
1014
89bddce5 1015static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1016{
b69aee04 1017 struct socket *sock = file->private_data;
1da177e4
LT
1018
1019 return sock->ops->mmap(file, sock, vma);
1020}
1021
20380731 1022static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1023{
1da177e4
LT
1024 sock_release(SOCKET_I(inode));
1025 return 0;
1026}
1027
1028/*
1029 * Update the socket async list
1030 *
1031 * Fasync_list locking strategy.
1032 *
1033 * 1. fasync_list is modified only under process context socket lock
1034 * i.e. under semaphore.
1035 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1036 * or under socket lock
1da177e4
LT
1037 */
1038
1039static int sock_fasync(int fd, struct file *filp, int on)
1040{
989a2979
ED
1041 struct socket *sock = filp->private_data;
1042 struct sock *sk = sock->sk;
eaefd110 1043 struct socket_wq *wq;
1da177e4 1044
989a2979 1045 if (sk == NULL)
1da177e4 1046 return -EINVAL;
1da177e4
LT
1047
1048 lock_sock(sk);
eaefd110
ED
1049 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1050 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1051
eaefd110 1052 if (!wq->fasync_list)
989a2979
ED
1053 sock_reset_flag(sk, SOCK_FASYNC);
1054 else
bcdce719 1055 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1056
989a2979 1057 release_sock(sk);
1da177e4
LT
1058 return 0;
1059}
1060
ceb5d58b 1061/* This function may be called only under rcu_lock */
1da177e4 1062
ceb5d58b 1063int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1064{
ceb5d58b 1065 if (!wq || !wq->fasync_list)
1da177e4 1066 return -1;
ceb5d58b 1067
89bddce5 1068 switch (how) {
8d8ad9d7 1069 case SOCK_WAKE_WAITD:
ceb5d58b 1070 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1071 break;
1072 goto call_kill;
8d8ad9d7 1073 case SOCK_WAKE_SPACE:
ceb5d58b 1074 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1075 break;
1076 /* fall through */
8d8ad9d7 1077 case SOCK_WAKE_IO:
89bddce5 1078call_kill:
43815482 1079 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1080 break;
8d8ad9d7 1081 case SOCK_WAKE_URG:
43815482 1082 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1083 }
ceb5d58b 1084
1da177e4
LT
1085 return 0;
1086}
c6d409cf 1087EXPORT_SYMBOL(sock_wake_async);
1da177e4 1088
721db93a 1089int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1090 struct socket **res, int kern)
1da177e4
LT
1091{
1092 int err;
1093 struct socket *sock;
55737fda 1094 const struct net_proto_family *pf;
1da177e4
LT
1095
1096 /*
89bddce5 1097 * Check protocol is in range
1da177e4
LT
1098 */
1099 if (family < 0 || family >= NPROTO)
1100 return -EAFNOSUPPORT;
1101 if (type < 0 || type >= SOCK_MAX)
1102 return -EINVAL;
1103
1104 /* Compatibility.
1105
1106 This uglymoron is moved from INET layer to here to avoid
1107 deadlock in module load.
1108 */
1109 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1110 static int warned;
1da177e4
LT
1111 if (!warned) {
1112 warned = 1;
3410f22e
YY
1113 pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1114 current->comm);
1da177e4
LT
1115 }
1116 family = PF_PACKET;
1117 }
1118
1119 err = security_socket_create(family, type, protocol, kern);
1120 if (err)
1121 return err;
89bddce5 1122
55737fda
SH
1123 /*
1124 * Allocate the socket and allow the family to set things up. if
1125 * the protocol is 0, the family is instructed to select an appropriate
1126 * default.
1127 */
1128 sock = sock_alloc();
1129 if (!sock) {
e87cc472 1130 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1131 return -ENFILE; /* Not exactly a match, but its the
1132 closest posix thing */
1133 }
1134
1135 sock->type = type;
1136
95a5afca 1137#ifdef CONFIG_MODULES
89bddce5
SH
1138 /* Attempt to load a protocol module if the find failed.
1139 *
1140 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1141 * requested real, full-featured networking support upon configuration.
1142 * Otherwise module support will break!
1143 */
190683a9 1144 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1145 request_module("net-pf-%d", family);
1da177e4
LT
1146#endif
1147
55737fda
SH
1148 rcu_read_lock();
1149 pf = rcu_dereference(net_families[family]);
1150 err = -EAFNOSUPPORT;
1151 if (!pf)
1152 goto out_release;
1da177e4
LT
1153
1154 /*
1155 * We will call the ->create function, that possibly is in a loadable
1156 * module, so we have to bump that loadable module refcnt first.
1157 */
55737fda 1158 if (!try_module_get(pf->owner))
1da177e4
LT
1159 goto out_release;
1160
55737fda
SH
1161 /* Now protected by module ref count */
1162 rcu_read_unlock();
1163
3f378b68 1164 err = pf->create(net, sock, protocol, kern);
55737fda 1165 if (err < 0)
1da177e4 1166 goto out_module_put;
a79af59e 1167
1da177e4
LT
1168 /*
1169 * Now to bump the refcnt of the [loadable] module that owns this
1170 * socket at sock_release time we decrement its refcnt.
1171 */
55737fda
SH
1172 if (!try_module_get(sock->ops->owner))
1173 goto out_module_busy;
1174
1da177e4
LT
1175 /*
1176 * Now that we're done with the ->create function, the [loadable]
1177 * module can have its refcnt decremented
1178 */
55737fda 1179 module_put(pf->owner);
7420ed23
VY
1180 err = security_socket_post_create(sock, family, type, protocol, kern);
1181 if (err)
3b185525 1182 goto out_sock_release;
55737fda 1183 *res = sock;
1da177e4 1184
55737fda
SH
1185 return 0;
1186
1187out_module_busy:
1188 err = -EAFNOSUPPORT;
1da177e4 1189out_module_put:
55737fda
SH
1190 sock->ops = NULL;
1191 module_put(pf->owner);
1192out_sock_release:
1da177e4 1193 sock_release(sock);
55737fda
SH
1194 return err;
1195
1196out_release:
1197 rcu_read_unlock();
1198 goto out_sock_release;
1da177e4 1199}
721db93a 1200EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1201
1202int sock_create(int family, int type, int protocol, struct socket **res)
1203{
1b8d7ae4 1204 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1205}
c6d409cf 1206EXPORT_SYMBOL(sock_create);
1da177e4 1207
eeb1bd5c 1208int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1209{
eeb1bd5c 1210 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1211}
c6d409cf 1212EXPORT_SYMBOL(sock_create_kern);
1da177e4 1213
3e0fa65f 1214SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1215{
1216 int retval;
1217 struct socket *sock;
a677a039
UD
1218 int flags;
1219
e38b36f3
UD
1220 /* Check the SOCK_* constants for consistency. */
1221 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1222 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1223 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1224 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1225
a677a039 1226 flags = type & ~SOCK_TYPE_MASK;
77d27200 1227 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1228 return -EINVAL;
1229 type &= SOCK_TYPE_MASK;
1da177e4 1230
aaca0bdc
UD
1231 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1232 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1233
1da177e4
LT
1234 retval = sock_create(family, type, protocol, &sock);
1235 if (retval < 0)
1236 goto out;
1237
77d27200 1238 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1239 if (retval < 0)
1240 goto out_release;
1241
1242out:
1243 /* It may be already another descriptor 8) Not kernel problem. */
1244 return retval;
1245
1246out_release:
1247 sock_release(sock);
1248 return retval;
1249}
1250
1251/*
1252 * Create a pair of connected sockets.
1253 */
1254
3e0fa65f
HC
1255SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1256 int __user *, usockvec)
1da177e4
LT
1257{
1258 struct socket *sock1, *sock2;
1259 int fd1, fd2, err;
db349509 1260 struct file *newfile1, *newfile2;
a677a039
UD
1261 int flags;
1262
1263 flags = type & ~SOCK_TYPE_MASK;
77d27200 1264 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1265 return -EINVAL;
1266 type &= SOCK_TYPE_MASK;
1da177e4 1267
aaca0bdc
UD
1268 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1269 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1270
1da177e4
LT
1271 /*
1272 * Obtain the first socket and check if the underlying protocol
1273 * supports the socketpair call.
1274 */
1275
1276 err = sock_create(family, type, protocol, &sock1);
1277 if (err < 0)
1278 goto out;
1279
1280 err = sock_create(family, type, protocol, &sock2);
1281 if (err < 0)
1282 goto out_release_1;
1283
1284 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1285 if (err < 0)
1da177e4
LT
1286 goto out_release_both;
1287
28407630 1288 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1289 if (unlikely(fd1 < 0)) {
1290 err = fd1;
db349509 1291 goto out_release_both;
bf3c23d1 1292 }
d73aa286 1293
28407630 1294 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1295 if (unlikely(fd2 < 0)) {
1296 err = fd2;
d73aa286 1297 goto out_put_unused_1;
28407630
AV
1298 }
1299
aab174f0 1300 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1301 if (IS_ERR(newfile1)) {
28407630 1302 err = PTR_ERR(newfile1);
d73aa286 1303 goto out_put_unused_both;
28407630
AV
1304 }
1305
aab174f0 1306 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1307 if (IS_ERR(newfile2)) {
1308 err = PTR_ERR(newfile2);
d73aa286 1309 goto out_fput_1;
db349509
AV
1310 }
1311
d73aa286
YD
1312 err = put_user(fd1, &usockvec[0]);
1313 if (err)
1314 goto out_fput_both;
1315
1316 err = put_user(fd2, &usockvec[1]);
1317 if (err)
1318 goto out_fput_both;
1319
157cf649 1320 audit_fd_pair(fd1, fd2);
d73aa286 1321
db349509
AV
1322 fd_install(fd1, newfile1);
1323 fd_install(fd2, newfile2);
1da177e4
LT
1324 /* fd1 and fd2 may be already another descriptors.
1325 * Not kernel problem.
1326 */
1327
d73aa286 1328 return 0;
1da177e4 1329
d73aa286
YD
1330out_fput_both:
1331 fput(newfile2);
1332 fput(newfile1);
1333 put_unused_fd(fd2);
1334 put_unused_fd(fd1);
1335 goto out;
1336
1337out_fput_1:
1338 fput(newfile1);
1339 put_unused_fd(fd2);
1340 put_unused_fd(fd1);
1341 sock_release(sock2);
1342 goto out;
1da177e4 1343
d73aa286
YD
1344out_put_unused_both:
1345 put_unused_fd(fd2);
1346out_put_unused_1:
1347 put_unused_fd(fd1);
1da177e4 1348out_release_both:
89bddce5 1349 sock_release(sock2);
1da177e4 1350out_release_1:
89bddce5 1351 sock_release(sock1);
1da177e4
LT
1352out:
1353 return err;
1354}
1355
1da177e4
LT
1356/*
1357 * Bind a name to a socket. Nothing much to do here since it's
1358 * the protocol's responsibility to handle the local address.
1359 *
1360 * We move the socket address to kernel space before we call
1361 * the protocol layer (having also checked the address is ok).
1362 */
1363
20f37034 1364SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1365{
1366 struct socket *sock;
230b1839 1367 struct sockaddr_storage address;
6cb153ca 1368 int err, fput_needed;
1da177e4 1369
89bddce5 1370 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1371 if (sock) {
43db362d 1372 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1373 if (err >= 0) {
1374 err = security_socket_bind(sock,
230b1839 1375 (struct sockaddr *)&address,
89bddce5 1376 addrlen);
6cb153ca
BL
1377 if (!err)
1378 err = sock->ops->bind(sock,
89bddce5 1379 (struct sockaddr *)
230b1839 1380 &address, addrlen);
1da177e4 1381 }
6cb153ca 1382 fput_light(sock->file, fput_needed);
89bddce5 1383 }
1da177e4
LT
1384 return err;
1385}
1386
1da177e4
LT
1387/*
1388 * Perform a listen. Basically, we allow the protocol to do anything
1389 * necessary for a listen, and if that works, we mark the socket as
1390 * ready for listening.
1391 */
1392
3e0fa65f 1393SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1394{
1395 struct socket *sock;
6cb153ca 1396 int err, fput_needed;
b8e1f9b5 1397 int somaxconn;
89bddce5
SH
1398
1399 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1400 if (sock) {
8efa6e93 1401 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1402 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1403 backlog = somaxconn;
1da177e4
LT
1404
1405 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1406 if (!err)
1407 err = sock->ops->listen(sock, backlog);
1da177e4 1408
6cb153ca 1409 fput_light(sock->file, fput_needed);
1da177e4
LT
1410 }
1411 return err;
1412}
1413
1da177e4
LT
1414/*
1415 * For accept, we attempt to create a new socket, set up the link
1416 * with the client, wake up the client, then return the new
1417 * connected fd. We collect the address of the connector in kernel
1418 * space and move it to user at the very end. This is unclean because
1419 * we open the socket then return an error.
1420 *
1421 * 1003.1g adds the ability to recvmsg() to query connection pending
1422 * status to recvmsg. We need to add that support in a way thats
1423 * clean when we restucture accept also.
1424 */
1425
20f37034
HC
1426SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1427 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1428{
1429 struct socket *sock, *newsock;
39d8c1b6 1430 struct file *newfile;
6cb153ca 1431 int err, len, newfd, fput_needed;
230b1839 1432 struct sockaddr_storage address;
1da177e4 1433
77d27200 1434 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1435 return -EINVAL;
1436
1437 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1438 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1439
6cb153ca 1440 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1441 if (!sock)
1442 goto out;
1443
1444 err = -ENFILE;
c6d409cf
ED
1445 newsock = sock_alloc();
1446 if (!newsock)
1da177e4
LT
1447 goto out_put;
1448
1449 newsock->type = sock->type;
1450 newsock->ops = sock->ops;
1451
1da177e4
LT
1452 /*
1453 * We don't need try_module_get here, as the listening socket (sock)
1454 * has the protocol module (sock->ops->owner) held.
1455 */
1456 __module_get(newsock->ops->owner);
1457
28407630 1458 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1459 if (unlikely(newfd < 0)) {
1460 err = newfd;
9a1875e6
DM
1461 sock_release(newsock);
1462 goto out_put;
39d8c1b6 1463 }
aab174f0 1464 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1465 if (IS_ERR(newfile)) {
28407630
AV
1466 err = PTR_ERR(newfile);
1467 put_unused_fd(newfd);
1468 sock_release(newsock);
1469 goto out_put;
1470 }
39d8c1b6 1471
a79af59e
FF
1472 err = security_socket_accept(sock, newsock);
1473 if (err)
39d8c1b6 1474 goto out_fd;
a79af59e 1475
1da177e4
LT
1476 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1477 if (err < 0)
39d8c1b6 1478 goto out_fd;
1da177e4
LT
1479
1480 if (upeer_sockaddr) {
230b1839 1481 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1482 &len, 2) < 0) {
1da177e4 1483 err = -ECONNABORTED;
39d8c1b6 1484 goto out_fd;
1da177e4 1485 }
43db362d 1486 err = move_addr_to_user(&address,
230b1839 1487 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1488 if (err < 0)
39d8c1b6 1489 goto out_fd;
1da177e4
LT
1490 }
1491
1492 /* File flags are not inherited via accept() unlike another OSes. */
1493
39d8c1b6
DM
1494 fd_install(newfd, newfile);
1495 err = newfd;
1da177e4 1496
1da177e4 1497out_put:
6cb153ca 1498 fput_light(sock->file, fput_needed);
1da177e4
LT
1499out:
1500 return err;
39d8c1b6 1501out_fd:
9606a216 1502 fput(newfile);
39d8c1b6 1503 put_unused_fd(newfd);
1da177e4
LT
1504 goto out_put;
1505}
1506
20f37034
HC
1507SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1508 int __user *, upeer_addrlen)
aaca0bdc 1509{
de11defe 1510 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1511}
1512
1da177e4
LT
1513/*
1514 * Attempt to connect to a socket with the server address. The address
1515 * is in user space so we verify it is OK and move it to kernel space.
1516 *
1517 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1518 * break bindings
1519 *
1520 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1521 * other SEQPACKET protocols that take time to connect() as it doesn't
1522 * include the -EINPROGRESS status for such sockets.
1523 */
1524
20f37034
HC
1525SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1526 int, addrlen)
1da177e4
LT
1527{
1528 struct socket *sock;
230b1839 1529 struct sockaddr_storage address;
6cb153ca 1530 int err, fput_needed;
1da177e4 1531
6cb153ca 1532 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1533 if (!sock)
1534 goto out;
43db362d 1535 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1536 if (err < 0)
1537 goto out_put;
1538
89bddce5 1539 err =
230b1839 1540 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1541 if (err)
1542 goto out_put;
1543
230b1839 1544 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1545 sock->file->f_flags);
1546out_put:
6cb153ca 1547 fput_light(sock->file, fput_needed);
1da177e4
LT
1548out:
1549 return err;
1550}
1551
1552/*
1553 * Get the local address ('name') of a socket object. Move the obtained
1554 * name to user space.
1555 */
1556
20f37034
HC
1557SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1558 int __user *, usockaddr_len)
1da177e4
LT
1559{
1560 struct socket *sock;
230b1839 1561 struct sockaddr_storage address;
6cb153ca 1562 int len, err, fput_needed;
89bddce5 1563
6cb153ca 1564 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1565 if (!sock)
1566 goto out;
1567
1568 err = security_socket_getsockname(sock);
1569 if (err)
1570 goto out_put;
1571
230b1839 1572 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1573 if (err)
1574 goto out_put;
43db362d 1575 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1576
1577out_put:
6cb153ca 1578 fput_light(sock->file, fput_needed);
1da177e4
LT
1579out:
1580 return err;
1581}
1582
1583/*
1584 * Get the remote address ('name') of a socket object. Move the obtained
1585 * name to user space.
1586 */
1587
20f37034
HC
1588SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1589 int __user *, usockaddr_len)
1da177e4
LT
1590{
1591 struct socket *sock;
230b1839 1592 struct sockaddr_storage address;
6cb153ca 1593 int len, err, fput_needed;
1da177e4 1594
89bddce5
SH
1595 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1596 if (sock != NULL) {
1da177e4
LT
1597 err = security_socket_getpeername(sock);
1598 if (err) {
6cb153ca 1599 fput_light(sock->file, fput_needed);
1da177e4
LT
1600 return err;
1601 }
1602
89bddce5 1603 err =
230b1839 1604 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1605 1);
1da177e4 1606 if (!err)
43db362d 1607 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1608 usockaddr_len);
6cb153ca 1609 fput_light(sock->file, fput_needed);
1da177e4
LT
1610 }
1611 return err;
1612}
1613
1614/*
1615 * Send a datagram to a given address. We move the address into kernel
1616 * space and check the user space data area is readable before invoking
1617 * the protocol.
1618 */
1619
3e0fa65f 1620SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1621 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1622 int, addr_len)
1da177e4
LT
1623{
1624 struct socket *sock;
230b1839 1625 struct sockaddr_storage address;
1da177e4
LT
1626 int err;
1627 struct msghdr msg;
1628 struct iovec iov;
6cb153ca 1629 int fput_needed;
6cb153ca 1630
602bd0e9
AV
1631 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1632 if (unlikely(err))
1633 return err;
de0fa95c
PE
1634 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1635 if (!sock)
4387ff75 1636 goto out;
6cb153ca 1637
89bddce5 1638 msg.msg_name = NULL;
89bddce5
SH
1639 msg.msg_control = NULL;
1640 msg.msg_controllen = 0;
1641 msg.msg_namelen = 0;
6cb153ca 1642 if (addr) {
43db362d 1643 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1644 if (err < 0)
1645 goto out_put;
230b1839 1646 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1647 msg.msg_namelen = addr_len;
1da177e4
LT
1648 }
1649 if (sock->file->f_flags & O_NONBLOCK)
1650 flags |= MSG_DONTWAIT;
1651 msg.msg_flags = flags;
d8725c86 1652 err = sock_sendmsg(sock, &msg);
1da177e4 1653
89bddce5 1654out_put:
de0fa95c 1655 fput_light(sock->file, fput_needed);
4387ff75 1656out:
1da177e4
LT
1657 return err;
1658}
1659
1660/*
89bddce5 1661 * Send a datagram down a socket.
1da177e4
LT
1662 */
1663
3e0fa65f 1664SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1665 unsigned int, flags)
1da177e4
LT
1666{
1667 return sys_sendto(fd, buff, len, flags, NULL, 0);
1668}
1669
1670/*
89bddce5 1671 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1672 * sender. We verify the buffers are writable and if needed move the
1673 * sender address from kernel to user space.
1674 */
1675
3e0fa65f 1676SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1677 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1678 int __user *, addr_len)
1da177e4
LT
1679{
1680 struct socket *sock;
1681 struct iovec iov;
1682 struct msghdr msg;
230b1839 1683 struct sockaddr_storage address;
89bddce5 1684 int err, err2;
6cb153ca
BL
1685 int fput_needed;
1686
602bd0e9
AV
1687 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1688 if (unlikely(err))
1689 return err;
de0fa95c 1690 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1691 if (!sock)
de0fa95c 1692 goto out;
1da177e4 1693
89bddce5
SH
1694 msg.msg_control = NULL;
1695 msg.msg_controllen = 0;
f3d33426
HFS
1696 /* Save some cycles and don't copy the address if not needed */
1697 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1698 /* We assume all kernel code knows the size of sockaddr_storage */
1699 msg.msg_namelen = 0;
130ed5d1 1700 msg.msg_iocb = NULL;
1da177e4
LT
1701 if (sock->file->f_flags & O_NONBLOCK)
1702 flags |= MSG_DONTWAIT;
602bd0e9 1703 err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags);
1da177e4 1704
89bddce5 1705 if (err >= 0 && addr != NULL) {
43db362d 1706 err2 = move_addr_to_user(&address,
230b1839 1707 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1708 if (err2 < 0)
1709 err = err2;
1da177e4 1710 }
de0fa95c
PE
1711
1712 fput_light(sock->file, fput_needed);
4387ff75 1713out:
1da177e4
LT
1714 return err;
1715}
1716
1717/*
89bddce5 1718 * Receive a datagram from a socket.
1da177e4
LT
1719 */
1720
b7c0ddf5
JG
1721SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1722 unsigned int, flags)
1da177e4
LT
1723{
1724 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1725}
1726
1727/*
1728 * Set a socket option. Because we don't know the option lengths we have
1729 * to pass the user mode parameter for the protocols to sort out.
1730 */
1731
20f37034
HC
1732SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1733 char __user *, optval, int, optlen)
1da177e4 1734{
6cb153ca 1735 int err, fput_needed;
1da177e4
LT
1736 struct socket *sock;
1737
1738 if (optlen < 0)
1739 return -EINVAL;
89bddce5
SH
1740
1741 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1742 if (sock != NULL) {
1743 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1744 if (err)
1745 goto out_put;
1da177e4
LT
1746
1747 if (level == SOL_SOCKET)
89bddce5
SH
1748 err =
1749 sock_setsockopt(sock, level, optname, optval,
1750 optlen);
1da177e4 1751 else
89bddce5
SH
1752 err =
1753 sock->ops->setsockopt(sock, level, optname, optval,
1754 optlen);
6cb153ca
BL
1755out_put:
1756 fput_light(sock->file, fput_needed);
1da177e4
LT
1757 }
1758 return err;
1759}
1760
1761/*
1762 * Get a socket option. Because we don't know the option lengths we have
1763 * to pass a user mode parameter for the protocols to sort out.
1764 */
1765
20f37034
HC
1766SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1767 char __user *, optval, int __user *, optlen)
1da177e4 1768{
6cb153ca 1769 int err, fput_needed;
1da177e4
LT
1770 struct socket *sock;
1771
89bddce5
SH
1772 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1773 if (sock != NULL) {
6cb153ca
BL
1774 err = security_socket_getsockopt(sock, level, optname);
1775 if (err)
1776 goto out_put;
1da177e4
LT
1777
1778 if (level == SOL_SOCKET)
89bddce5
SH
1779 err =
1780 sock_getsockopt(sock, level, optname, optval,
1781 optlen);
1da177e4 1782 else
89bddce5
SH
1783 err =
1784 sock->ops->getsockopt(sock, level, optname, optval,
1785 optlen);
6cb153ca
BL
1786out_put:
1787 fput_light(sock->file, fput_needed);
1da177e4
LT
1788 }
1789 return err;
1790}
1791
1da177e4
LT
1792/*
1793 * Shutdown a socket.
1794 */
1795
754fe8d2 1796SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1797{
6cb153ca 1798 int err, fput_needed;
1da177e4
LT
1799 struct socket *sock;
1800
89bddce5
SH
1801 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1802 if (sock != NULL) {
1da177e4 1803 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1804 if (!err)
1805 err = sock->ops->shutdown(sock, how);
1806 fput_light(sock->file, fput_needed);
1da177e4
LT
1807 }
1808 return err;
1809}
1810
89bddce5 1811/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1812 * fields which are the same type (int / unsigned) on our platforms.
1813 */
1814#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1815#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1816#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1817
c71d8ebe
TH
1818struct used_address {
1819 struct sockaddr_storage name;
1820 unsigned int name_len;
1821};
1822
da184284
AV
1823static int copy_msghdr_from_user(struct msghdr *kmsg,
1824 struct user_msghdr __user *umsg,
1825 struct sockaddr __user **save_addr,
1826 struct iovec **iov)
1661bf36 1827{
08adb7da
AV
1828 struct sockaddr __user *uaddr;
1829 struct iovec __user *uiov;
c0371da6 1830 size_t nr_segs;
08adb7da
AV
1831 ssize_t err;
1832
1833 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1834 __get_user(uaddr, &umsg->msg_name) ||
1835 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1836 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1837 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1838 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1839 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1840 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1841 return -EFAULT;
dbb490b9 1842
08adb7da 1843 if (!uaddr)
6a2a2b3a
AS
1844 kmsg->msg_namelen = 0;
1845
dbb490b9
ML
1846 if (kmsg->msg_namelen < 0)
1847 return -EINVAL;
1848
1661bf36 1849 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1850 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1851
1852 if (save_addr)
1853 *save_addr = uaddr;
1854
1855 if (uaddr && kmsg->msg_namelen) {
1856 if (!save_addr) {
1857 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1858 kmsg->msg_name);
1859 if (err < 0)
1860 return err;
1861 }
1862 } else {
1863 kmsg->msg_name = NULL;
1864 kmsg->msg_namelen = 0;
1865 }
1866
c0371da6 1867 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1868 return -EMSGSIZE;
1869
0345f931 1870 kmsg->msg_iocb = NULL;
1871
da184284
AV
1872 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1873 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1874}
1875
666547ff 1876static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1877 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1878 struct used_address *used_address,
1879 unsigned int allowed_msghdr_flags)
1da177e4 1880{
89bddce5
SH
1881 struct compat_msghdr __user *msg_compat =
1882 (struct compat_msghdr __user *)msg;
230b1839 1883 struct sockaddr_storage address;
1da177e4 1884 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1885 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1886 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1887 /* 20 is size of ipv6_pktinfo */
1da177e4 1888 unsigned char *ctl_buf = ctl;
d8725c86 1889 int ctl_len;
08adb7da 1890 ssize_t err;
89bddce5 1891
08adb7da 1892 msg_sys->msg_name = &address;
1da177e4 1893
08449320 1894 if (MSG_CMSG_COMPAT & flags)
08adb7da 1895 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1896 else
08adb7da 1897 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1898 if (err < 0)
da184284 1899 return err;
1da177e4
LT
1900
1901 err = -ENOBUFS;
1902
228e548e 1903 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1904 goto out_freeiov;
28a94d8f 1905 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 1906 ctl_len = msg_sys->msg_controllen;
1da177e4 1907 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1908 err =
228e548e 1909 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1910 sizeof(ctl));
1da177e4
LT
1911 if (err)
1912 goto out_freeiov;
228e548e
AB
1913 ctl_buf = msg_sys->msg_control;
1914 ctl_len = msg_sys->msg_controllen;
1da177e4 1915 } else if (ctl_len) {
89bddce5 1916 if (ctl_len > sizeof(ctl)) {
1da177e4 1917 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1918 if (ctl_buf == NULL)
1da177e4
LT
1919 goto out_freeiov;
1920 }
1921 err = -EFAULT;
1922 /*
228e548e 1923 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1924 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1925 * checking falls down on this.
1926 */
fb8621bb 1927 if (copy_from_user(ctl_buf,
228e548e 1928 (void __user __force *)msg_sys->msg_control,
89bddce5 1929 ctl_len))
1da177e4 1930 goto out_freectl;
228e548e 1931 msg_sys->msg_control = ctl_buf;
1da177e4 1932 }
228e548e 1933 msg_sys->msg_flags = flags;
1da177e4
LT
1934
1935 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1936 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1937 /*
1938 * If this is sendmmsg() and current destination address is same as
1939 * previously succeeded address, omit asking LSM's decision.
1940 * used_address->name_len is initialized to UINT_MAX so that the first
1941 * destination address never matches.
1942 */
bc909d9d
MD
1943 if (used_address && msg_sys->msg_name &&
1944 used_address->name_len == msg_sys->msg_namelen &&
1945 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1946 used_address->name_len)) {
d8725c86 1947 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1948 goto out_freectl;
1949 }
d8725c86 1950 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1951 /*
1952 * If this is sendmmsg() and sending to current destination address was
1953 * successful, remember it.
1954 */
1955 if (used_address && err >= 0) {
1956 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1957 if (msg_sys->msg_name)
1958 memcpy(&used_address->name, msg_sys->msg_name,
1959 used_address->name_len);
c71d8ebe 1960 }
1da177e4
LT
1961
1962out_freectl:
89bddce5 1963 if (ctl_buf != ctl)
1da177e4
LT
1964 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1965out_freeiov:
da184284 1966 kfree(iov);
228e548e
AB
1967 return err;
1968}
1969
1970/*
1971 * BSD sendmsg interface
1972 */
1973
666547ff 1974long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
1975{
1976 int fput_needed, err;
1977 struct msghdr msg_sys;
1be374a0
AL
1978 struct socket *sock;
1979
1be374a0 1980 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
1981 if (!sock)
1982 goto out;
1983
28a94d8f 1984 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 1985
6cb153ca 1986 fput_light(sock->file, fput_needed);
89bddce5 1987out:
1da177e4
LT
1988 return err;
1989}
1990
666547ff 1991SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
1992{
1993 if (flags & MSG_CMSG_COMPAT)
1994 return -EINVAL;
1995 return __sys_sendmsg(fd, msg, flags);
1996}
1997
228e548e
AB
1998/*
1999 * Linux sendmmsg interface
2000 */
2001
2002int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2003 unsigned int flags)
2004{
2005 int fput_needed, err, datagrams;
2006 struct socket *sock;
2007 struct mmsghdr __user *entry;
2008 struct compat_mmsghdr __user *compat_entry;
2009 struct msghdr msg_sys;
c71d8ebe 2010 struct used_address used_address;
f092276d 2011 unsigned int oflags = flags;
228e548e 2012
98382f41
AB
2013 if (vlen > UIO_MAXIOV)
2014 vlen = UIO_MAXIOV;
228e548e
AB
2015
2016 datagrams = 0;
2017
2018 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2019 if (!sock)
2020 return err;
2021
c71d8ebe 2022 used_address.name_len = UINT_MAX;
228e548e
AB
2023 entry = mmsg;
2024 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2025 err = 0;
f092276d 2026 flags |= MSG_BATCH;
228e548e
AB
2027
2028 while (datagrams < vlen) {
f092276d
TH
2029 if (datagrams == vlen - 1)
2030 flags = oflags;
2031
228e548e 2032 if (MSG_CMSG_COMPAT & flags) {
666547ff 2033 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2034 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2035 if (err < 0)
2036 break;
2037 err = __put_user(err, &compat_entry->msg_len);
2038 ++compat_entry;
2039 } else {
a7526eb5 2040 err = ___sys_sendmsg(sock,
666547ff 2041 (struct user_msghdr __user *)entry,
28a94d8f 2042 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2043 if (err < 0)
2044 break;
2045 err = put_user(err, &entry->msg_len);
2046 ++entry;
2047 }
2048
2049 if (err)
2050 break;
2051 ++datagrams;
a78cb84c 2052 cond_resched();
228e548e
AB
2053 }
2054
228e548e
AB
2055 fput_light(sock->file, fput_needed);
2056
728ffb86
AB
2057 /* We only return an error if no datagrams were able to be sent */
2058 if (datagrams != 0)
228e548e
AB
2059 return datagrams;
2060
228e548e
AB
2061 return err;
2062}
2063
2064SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2065 unsigned int, vlen, unsigned int, flags)
2066{
1be374a0
AL
2067 if (flags & MSG_CMSG_COMPAT)
2068 return -EINVAL;
228e548e
AB
2069 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2070}
2071
666547ff 2072static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2073 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2074{
89bddce5
SH
2075 struct compat_msghdr __user *msg_compat =
2076 (struct compat_msghdr __user *)msg;
1da177e4 2077 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2078 struct iovec *iov = iovstack;
1da177e4 2079 unsigned long cmsg_ptr;
08adb7da
AV
2080 int total_len, len;
2081 ssize_t err;
1da177e4
LT
2082
2083 /* kernel mode address */
230b1839 2084 struct sockaddr_storage addr;
1da177e4
LT
2085
2086 /* user mode address pointers */
2087 struct sockaddr __user *uaddr;
08adb7da 2088 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2089
08adb7da 2090 msg_sys->msg_name = &addr;
1da177e4 2091
f3d33426 2092 if (MSG_CMSG_COMPAT & flags)
08adb7da 2093 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2094 else
08adb7da 2095 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2096 if (err < 0)
da184284
AV
2097 return err;
2098 total_len = iov_iter_count(&msg_sys->msg_iter);
1da177e4 2099
a2e27255
ACM
2100 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2101 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2102
f3d33426
HFS
2103 /* We assume all kernel code knows the size of sockaddr_storage */
2104 msg_sys->msg_namelen = 0;
2105
1da177e4
LT
2106 if (sock->file->f_flags & O_NONBLOCK)
2107 flags |= MSG_DONTWAIT;
a2e27255
ACM
2108 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2109 total_len, flags);
1da177e4
LT
2110 if (err < 0)
2111 goto out_freeiov;
2112 len = err;
2113
2114 if (uaddr != NULL) {
43db362d 2115 err = move_addr_to_user(&addr,
a2e27255 2116 msg_sys->msg_namelen, uaddr,
89bddce5 2117 uaddr_len);
1da177e4
LT
2118 if (err < 0)
2119 goto out_freeiov;
2120 }
a2e27255 2121 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2122 COMPAT_FLAGS(msg));
1da177e4
LT
2123 if (err)
2124 goto out_freeiov;
2125 if (MSG_CMSG_COMPAT & flags)
a2e27255 2126 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2127 &msg_compat->msg_controllen);
2128 else
a2e27255 2129 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2130 &msg->msg_controllen);
2131 if (err)
2132 goto out_freeiov;
2133 err = len;
2134
2135out_freeiov:
da184284 2136 kfree(iov);
a2e27255
ACM
2137 return err;
2138}
2139
2140/*
2141 * BSD recvmsg interface
2142 */
2143
666547ff 2144long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2145{
2146 int fput_needed, err;
2147 struct msghdr msg_sys;
1be374a0
AL
2148 struct socket *sock;
2149
1be374a0 2150 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2151 if (!sock)
2152 goto out;
2153
a7526eb5 2154 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2155
6cb153ca 2156 fput_light(sock->file, fput_needed);
1da177e4
LT
2157out:
2158 return err;
2159}
2160
666547ff 2161SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2162 unsigned int, flags)
2163{
2164 if (flags & MSG_CMSG_COMPAT)
2165 return -EINVAL;
2166 return __sys_recvmsg(fd, msg, flags);
2167}
2168
a2e27255
ACM
2169/*
2170 * Linux recvmmsg interface
2171 */
2172
2173int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2174 unsigned int flags, struct timespec *timeout)
2175{
2176 int fput_needed, err, datagrams;
2177 struct socket *sock;
2178 struct mmsghdr __user *entry;
d7256d0e 2179 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2180 struct msghdr msg_sys;
2181 struct timespec end_time;
2182
2183 if (timeout &&
2184 poll_select_set_timeout(&end_time, timeout->tv_sec,
2185 timeout->tv_nsec))
2186 return -EINVAL;
2187
2188 datagrams = 0;
2189
2190 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2191 if (!sock)
2192 return err;
2193
2194 err = sock_error(sock->sk);
2195 if (err)
2196 goto out_put;
2197
2198 entry = mmsg;
d7256d0e 2199 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2200
2201 while (datagrams < vlen) {
2202 /*
2203 * No need to ask LSM for more than the first datagram.
2204 */
d7256d0e 2205 if (MSG_CMSG_COMPAT & flags) {
666547ff 2206 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2207 &msg_sys, flags & ~MSG_WAITFORONE,
2208 datagrams);
d7256d0e
JMG
2209 if (err < 0)
2210 break;
2211 err = __put_user(err, &compat_entry->msg_len);
2212 ++compat_entry;
2213 } else {
a7526eb5 2214 err = ___sys_recvmsg(sock,
666547ff 2215 (struct user_msghdr __user *)entry,
a7526eb5
AL
2216 &msg_sys, flags & ~MSG_WAITFORONE,
2217 datagrams);
d7256d0e
JMG
2218 if (err < 0)
2219 break;
2220 err = put_user(err, &entry->msg_len);
2221 ++entry;
2222 }
2223
a2e27255
ACM
2224 if (err)
2225 break;
a2e27255
ACM
2226 ++datagrams;
2227
71c5c159
BB
2228 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2229 if (flags & MSG_WAITFORONE)
2230 flags |= MSG_DONTWAIT;
2231
a2e27255
ACM
2232 if (timeout) {
2233 ktime_get_ts(timeout);
2234 *timeout = timespec_sub(end_time, *timeout);
2235 if (timeout->tv_sec < 0) {
2236 timeout->tv_sec = timeout->tv_nsec = 0;
2237 break;
2238 }
2239
2240 /* Timeout, return less than vlen datagrams */
2241 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2242 break;
2243 }
2244
2245 /* Out of band data, return right away */
2246 if (msg_sys.msg_flags & MSG_OOB)
2247 break;
a78cb84c 2248 cond_resched();
a2e27255
ACM
2249 }
2250
2251out_put:
2252 fput_light(sock->file, fput_needed);
1da177e4 2253
a2e27255
ACM
2254 if (err == 0)
2255 return datagrams;
2256
2257 if (datagrams != 0) {
2258 /*
2259 * We may return less entries than requested (vlen) if the
2260 * sock is non block and there aren't enough datagrams...
2261 */
2262 if (err != -EAGAIN) {
2263 /*
2264 * ... or if recvmsg returns an error after we
2265 * received some datagrams, where we record the
2266 * error to return on the next call or if the
2267 * app asks about it using getsockopt(SO_ERROR).
2268 */
2269 sock->sk->sk_err = -err;
2270 }
2271
2272 return datagrams;
2273 }
2274
2275 return err;
2276}
2277
2278SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2279 unsigned int, vlen, unsigned int, flags,
2280 struct timespec __user *, timeout)
2281{
2282 int datagrams;
2283 struct timespec timeout_sys;
2284
1be374a0
AL
2285 if (flags & MSG_CMSG_COMPAT)
2286 return -EINVAL;
2287
a2e27255
ACM
2288 if (!timeout)
2289 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2290
2291 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2292 return -EFAULT;
2293
2294 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2295
2296 if (datagrams > 0 &&
2297 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2298 datagrams = -EFAULT;
2299
2300 return datagrams;
2301}
2302
2303#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2304/* Argument list sizes for sys_socketcall */
2305#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2306static const unsigned char nargs[21] = {
c6d409cf
ED
2307 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2308 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2309 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2310 AL(4), AL(5), AL(4)
89bddce5
SH
2311};
2312
1da177e4
LT
2313#undef AL
2314
2315/*
89bddce5 2316 * System call vectors.
1da177e4
LT
2317 *
2318 * Argument checking cleaned up. Saved 20% in size.
2319 * This function doesn't need to set the kernel lock because
89bddce5 2320 * it is set by the callees.
1da177e4
LT
2321 */
2322
3e0fa65f 2323SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2324{
2950fa9d 2325 unsigned long a[AUDITSC_ARGS];
89bddce5 2326 unsigned long a0, a1;
1da177e4 2327 int err;
47379052 2328 unsigned int len;
1da177e4 2329
228e548e 2330 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2331 return -EINVAL;
2332
47379052
AV
2333 len = nargs[call];
2334 if (len > sizeof(a))
2335 return -EINVAL;
2336
1da177e4 2337 /* copy_from_user should be SMP safe. */
47379052 2338 if (copy_from_user(a, args, len))
1da177e4 2339 return -EFAULT;
3ec3b2fb 2340
2950fa9d
CG
2341 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2342 if (err)
2343 return err;
3ec3b2fb 2344
89bddce5
SH
2345 a0 = a[0];
2346 a1 = a[1];
2347
2348 switch (call) {
2349 case SYS_SOCKET:
2350 err = sys_socket(a0, a1, a[2]);
2351 break;
2352 case SYS_BIND:
2353 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2354 break;
2355 case SYS_CONNECT:
2356 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2357 break;
2358 case SYS_LISTEN:
2359 err = sys_listen(a0, a1);
2360 break;
2361 case SYS_ACCEPT:
de11defe
UD
2362 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2363 (int __user *)a[2], 0);
89bddce5
SH
2364 break;
2365 case SYS_GETSOCKNAME:
2366 err =
2367 sys_getsockname(a0, (struct sockaddr __user *)a1,
2368 (int __user *)a[2]);
2369 break;
2370 case SYS_GETPEERNAME:
2371 err =
2372 sys_getpeername(a0, (struct sockaddr __user *)a1,
2373 (int __user *)a[2]);
2374 break;
2375 case SYS_SOCKETPAIR:
2376 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2377 break;
2378 case SYS_SEND:
2379 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2380 break;
2381 case SYS_SENDTO:
2382 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2383 (struct sockaddr __user *)a[4], a[5]);
2384 break;
2385 case SYS_RECV:
2386 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2387 break;
2388 case SYS_RECVFROM:
2389 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2390 (struct sockaddr __user *)a[4],
2391 (int __user *)a[5]);
2392 break;
2393 case SYS_SHUTDOWN:
2394 err = sys_shutdown(a0, a1);
2395 break;
2396 case SYS_SETSOCKOPT:
2397 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2398 break;
2399 case SYS_GETSOCKOPT:
2400 err =
2401 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2402 (int __user *)a[4]);
2403 break;
2404 case SYS_SENDMSG:
666547ff 2405 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2406 break;
228e548e
AB
2407 case SYS_SENDMMSG:
2408 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2409 break;
89bddce5 2410 case SYS_RECVMSG:
666547ff 2411 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2412 break;
a2e27255
ACM
2413 case SYS_RECVMMSG:
2414 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2415 (struct timespec __user *)a[4]);
2416 break;
de11defe
UD
2417 case SYS_ACCEPT4:
2418 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2419 (int __user *)a[2], a[3]);
aaca0bdc 2420 break;
89bddce5
SH
2421 default:
2422 err = -EINVAL;
2423 break;
1da177e4
LT
2424 }
2425 return err;
2426}
2427
89bddce5 2428#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2429
55737fda
SH
2430/**
2431 * sock_register - add a socket protocol handler
2432 * @ops: description of protocol
2433 *
1da177e4
LT
2434 * This function is called by a protocol handler that wants to
2435 * advertise its address family, and have it linked into the
e793c0f7 2436 * socket interface. The value ops->family corresponds to the
55737fda 2437 * socket system call protocol family.
1da177e4 2438 */
f0fd27d4 2439int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2440{
2441 int err;
2442
2443 if (ops->family >= NPROTO) {
3410f22e 2444 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2445 return -ENOBUFS;
2446 }
55737fda
SH
2447
2448 spin_lock(&net_family_lock);
190683a9
ED
2449 if (rcu_dereference_protected(net_families[ops->family],
2450 lockdep_is_held(&net_family_lock)))
55737fda
SH
2451 err = -EEXIST;
2452 else {
cf778b00 2453 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2454 err = 0;
2455 }
55737fda
SH
2456 spin_unlock(&net_family_lock);
2457
3410f22e 2458 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2459 return err;
2460}
c6d409cf 2461EXPORT_SYMBOL(sock_register);
1da177e4 2462
55737fda
SH
2463/**
2464 * sock_unregister - remove a protocol handler
2465 * @family: protocol family to remove
2466 *
1da177e4
LT
2467 * This function is called by a protocol handler that wants to
2468 * remove its address family, and have it unlinked from the
55737fda
SH
2469 * new socket creation.
2470 *
2471 * If protocol handler is a module, then it can use module reference
2472 * counts to protect against new references. If protocol handler is not
2473 * a module then it needs to provide its own protection in
2474 * the ops->create routine.
1da177e4 2475 */
f0fd27d4 2476void sock_unregister(int family)
1da177e4 2477{
f0fd27d4 2478 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2479
55737fda 2480 spin_lock(&net_family_lock);
a9b3cd7f 2481 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2482 spin_unlock(&net_family_lock);
2483
2484 synchronize_rcu();
2485
3410f22e 2486 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2487}
c6d409cf 2488EXPORT_SYMBOL(sock_unregister);
1da177e4 2489
77d76ea3 2490static int __init sock_init(void)
1da177e4 2491{
b3e19d92 2492 int err;
2ca794e5
EB
2493 /*
2494 * Initialize the network sysctl infrastructure.
2495 */
2496 err = net_sysctl_init();
2497 if (err)
2498 goto out;
b3e19d92 2499
1da177e4 2500 /*
89bddce5 2501 * Initialize skbuff SLAB cache
1da177e4
LT
2502 */
2503 skb_init();
1da177e4
LT
2504
2505 /*
89bddce5 2506 * Initialize the protocols module.
1da177e4
LT
2507 */
2508
2509 init_inodecache();
b3e19d92
NP
2510
2511 err = register_filesystem(&sock_fs_type);
2512 if (err)
2513 goto out_fs;
1da177e4 2514 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2515 if (IS_ERR(sock_mnt)) {
2516 err = PTR_ERR(sock_mnt);
2517 goto out_mount;
2518 }
77d76ea3
AK
2519
2520 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2521 */
2522
2523#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2524 err = netfilter_init();
2525 if (err)
2526 goto out;
1da177e4 2527#endif
cbeb321a 2528
408eccce 2529 ptp_classifier_init();
c1f19b51 2530
b3e19d92
NP
2531out:
2532 return err;
2533
2534out_mount:
2535 unregister_filesystem(&sock_fs_type);
2536out_fs:
2537 goto out;
1da177e4
LT
2538}
2539
77d76ea3
AK
2540core_initcall(sock_init); /* early initcall */
2541
1da177e4
LT
2542#ifdef CONFIG_PROC_FS
2543void socket_seq_show(struct seq_file *seq)
2544{
2545 int cpu;
2546 int counter = 0;
2547
6f912042 2548 for_each_possible_cpu(cpu)
89bddce5 2549 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2550
2551 /* It can be negative, by the way. 8) */
2552 if (counter < 0)
2553 counter = 0;
2554
2555 seq_printf(seq, "sockets: used %d\n", counter);
2556}
89bddce5 2557#endif /* CONFIG_PROC_FS */
1da177e4 2558
89bbfc95 2559#ifdef CONFIG_COMPAT
6b96018b 2560static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2561 unsigned int cmd, void __user *up)
7a229387 2562{
7a229387
AB
2563 mm_segment_t old_fs = get_fs();
2564 struct timeval ktv;
2565 int err;
2566
2567 set_fs(KERNEL_DS);
6b96018b 2568 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2569 set_fs(old_fs);
644595f8 2570 if (!err)
ed6fe9d6 2571 err = compat_put_timeval(&ktv, up);
644595f8 2572
7a229387
AB
2573 return err;
2574}
2575
6b96018b 2576static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2577 unsigned int cmd, void __user *up)
7a229387 2578{
7a229387
AB
2579 mm_segment_t old_fs = get_fs();
2580 struct timespec kts;
2581 int err;
2582
2583 set_fs(KERNEL_DS);
6b96018b 2584 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2585 set_fs(old_fs);
644595f8 2586 if (!err)
ed6fe9d6 2587 err = compat_put_timespec(&kts, up);
644595f8 2588
7a229387
AB
2589 return err;
2590}
2591
6b96018b 2592static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2593{
2594 struct ifreq __user *uifr;
2595 int err;
2596
2597 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2598 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2599 return -EFAULT;
2600
6b96018b 2601 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2602 if (err)
2603 return err;
2604
6b96018b 2605 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2606 return -EFAULT;
2607
2608 return 0;
2609}
2610
6b96018b 2611static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2612{
6b96018b 2613 struct compat_ifconf ifc32;
7a229387
AB
2614 struct ifconf ifc;
2615 struct ifconf __user *uifc;
6b96018b 2616 struct compat_ifreq __user *ifr32;
7a229387
AB
2617 struct ifreq __user *ifr;
2618 unsigned int i, j;
2619 int err;
2620
6b96018b 2621 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2622 return -EFAULT;
2623
43da5f2e 2624 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2625 if (ifc32.ifcbuf == 0) {
2626 ifc32.ifc_len = 0;
2627 ifc.ifc_len = 0;
2628 ifc.ifc_req = NULL;
2629 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2630 } else {
c6d409cf
ED
2631 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2632 sizeof(struct ifreq);
7a229387
AB
2633 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2634 ifc.ifc_len = len;
2635 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2636 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2637 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2638 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2639 return -EFAULT;
2640 ifr++;
2641 ifr32++;
2642 }
2643 }
2644 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2645 return -EFAULT;
2646
6b96018b 2647 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2648 if (err)
2649 return err;
2650
2651 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2652 return -EFAULT;
2653
2654 ifr = ifc.ifc_req;
2655 ifr32 = compat_ptr(ifc32.ifcbuf);
2656 for (i = 0, j = 0;
c6d409cf
ED
2657 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2658 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2659 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2660 return -EFAULT;
2661 ifr32++;
2662 ifr++;
2663 }
2664
2665 if (ifc32.ifcbuf == 0) {
2666 /* Translate from 64-bit structure multiple to
2667 * a 32-bit one.
2668 */
2669 i = ifc.ifc_len;
6b96018b 2670 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2671 ifc32.ifc_len = i;
2672 } else {
2673 ifc32.ifc_len = i;
2674 }
6b96018b 2675 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2676 return -EFAULT;
2677
2678 return 0;
2679}
2680
6b96018b 2681static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2682{
3a7da39d
BH
2683 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2684 bool convert_in = false, convert_out = false;
2685 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2686 struct ethtool_rxnfc __user *rxnfc;
7a229387 2687 struct ifreq __user *ifr;
3a7da39d
BH
2688 u32 rule_cnt = 0, actual_rule_cnt;
2689 u32 ethcmd;
7a229387 2690 u32 data;
3a7da39d 2691 int ret;
7a229387 2692
3a7da39d
BH
2693 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2694 return -EFAULT;
7a229387 2695
3a7da39d
BH
2696 compat_rxnfc = compat_ptr(data);
2697
2698 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2699 return -EFAULT;
2700
3a7da39d
BH
2701 /* Most ethtool structures are defined without padding.
2702 * Unfortunately struct ethtool_rxnfc is an exception.
2703 */
2704 switch (ethcmd) {
2705 default:
2706 break;
2707 case ETHTOOL_GRXCLSRLALL:
2708 /* Buffer size is variable */
2709 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2710 return -EFAULT;
2711 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2712 return -ENOMEM;
2713 buf_size += rule_cnt * sizeof(u32);
2714 /* fall through */
2715 case ETHTOOL_GRXRINGS:
2716 case ETHTOOL_GRXCLSRLCNT:
2717 case ETHTOOL_GRXCLSRULE:
55664f32 2718 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2719 convert_out = true;
2720 /* fall through */
2721 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2722 buf_size += sizeof(struct ethtool_rxnfc);
2723 convert_in = true;
2724 break;
2725 }
2726
2727 ifr = compat_alloc_user_space(buf_size);
954b1244 2728 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2729
2730 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2731 return -EFAULT;
2732
3a7da39d
BH
2733 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2734 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2735 return -EFAULT;
2736
3a7da39d 2737 if (convert_in) {
127fe533 2738 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2739 * fs.ring_cookie and at the end of fs, but nowhere else.
2740 */
127fe533
AD
2741 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2742 sizeof(compat_rxnfc->fs.m_ext) !=
2743 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2744 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2745 BUILD_BUG_ON(
2746 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2747 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2748 offsetof(struct ethtool_rxnfc, fs.location) -
2749 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2750
2751 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2752 (void __user *)(&rxnfc->fs.m_ext + 1) -
2753 (void __user *)rxnfc) ||
3a7da39d
BH
2754 copy_in_user(&rxnfc->fs.ring_cookie,
2755 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2756 (void __user *)(&rxnfc->fs.location + 1) -
2757 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2758 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2759 sizeof(rxnfc->rule_cnt)))
2760 return -EFAULT;
2761 }
2762
2763 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2764 if (ret)
2765 return ret;
2766
2767 if (convert_out) {
2768 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2769 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2770 (const void __user *)rxnfc) ||
3a7da39d
BH
2771 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2772 &rxnfc->fs.ring_cookie,
954b1244
SH
2773 (const void __user *)(&rxnfc->fs.location + 1) -
2774 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2775 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2776 sizeof(rxnfc->rule_cnt)))
2777 return -EFAULT;
2778
2779 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2780 /* As an optimisation, we only copy the actual
2781 * number of rules that the underlying
2782 * function returned. Since Mallory might
2783 * change the rule count in user memory, we
2784 * check that it is less than the rule count
2785 * originally given (as the user buffer size),
2786 * which has been range-checked.
2787 */
2788 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2789 return -EFAULT;
2790 if (actual_rule_cnt < rule_cnt)
2791 rule_cnt = actual_rule_cnt;
2792 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2793 &rxnfc->rule_locs[0],
2794 rule_cnt * sizeof(u32)))
2795 return -EFAULT;
2796 }
2797 }
2798
2799 return 0;
7a229387
AB
2800}
2801
7a50a240
AB
2802static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2803{
2804 void __user *uptr;
2805 compat_uptr_t uptr32;
2806 struct ifreq __user *uifr;
2807
c6d409cf 2808 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2809 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2810 return -EFAULT;
2811
2812 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2813 return -EFAULT;
2814
2815 uptr = compat_ptr(uptr32);
2816
2817 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2818 return -EFAULT;
2819
2820 return dev_ioctl(net, SIOCWANDEV, uifr);
2821}
2822
6b96018b
AB
2823static int bond_ioctl(struct net *net, unsigned int cmd,
2824 struct compat_ifreq __user *ifr32)
7a229387
AB
2825{
2826 struct ifreq kifr;
7a229387
AB
2827 mm_segment_t old_fs;
2828 int err;
7a229387
AB
2829
2830 switch (cmd) {
2831 case SIOCBONDENSLAVE:
2832 case SIOCBONDRELEASE:
2833 case SIOCBONDSETHWADDR:
2834 case SIOCBONDCHANGEACTIVE:
6b96018b 2835 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2836 return -EFAULT;
2837
2838 old_fs = get_fs();
c6d409cf 2839 set_fs(KERNEL_DS);
c3f52ae6 2840 err = dev_ioctl(net, cmd,
2841 (struct ifreq __user __force *) &kifr);
c6d409cf 2842 set_fs(old_fs);
7a229387
AB
2843
2844 return err;
7a229387 2845 default:
07d106d0 2846 return -ENOIOCTLCMD;
ccbd6a5a 2847 }
7a229387
AB
2848}
2849
590d4693
BH
2850/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2851static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2852 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2853{
2854 struct ifreq __user *u_ifreq64;
7a229387
AB
2855 char tmp_buf[IFNAMSIZ];
2856 void __user *data64;
2857 u32 data32;
2858
2859 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2860 IFNAMSIZ))
2861 return -EFAULT;
417c3522 2862 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2863 return -EFAULT;
2864 data64 = compat_ptr(data32);
2865
2866 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2867
7a229387
AB
2868 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2869 IFNAMSIZ))
2870 return -EFAULT;
417c3522 2871 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2872 return -EFAULT;
2873
6b96018b 2874 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2875}
2876
6b96018b
AB
2877static int dev_ifsioc(struct net *net, struct socket *sock,
2878 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2879{
a2116ed2 2880 struct ifreq __user *uifr;
7a229387
AB
2881 int err;
2882
a2116ed2
AB
2883 uifr = compat_alloc_user_space(sizeof(*uifr));
2884 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2885 return -EFAULT;
2886
2887 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2888
7a229387
AB
2889 if (!err) {
2890 switch (cmd) {
2891 case SIOCGIFFLAGS:
2892 case SIOCGIFMETRIC:
2893 case SIOCGIFMTU:
2894 case SIOCGIFMEM:
2895 case SIOCGIFHWADDR:
2896 case SIOCGIFINDEX:
2897 case SIOCGIFADDR:
2898 case SIOCGIFBRDADDR:
2899 case SIOCGIFDSTADDR:
2900 case SIOCGIFNETMASK:
fab2532b 2901 case SIOCGIFPFLAGS:
7a229387 2902 case SIOCGIFTXQLEN:
fab2532b
AB
2903 case SIOCGMIIPHY:
2904 case SIOCGMIIREG:
a2116ed2 2905 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2906 err = -EFAULT;
2907 break;
2908 }
2909 }
2910 return err;
2911}
2912
a2116ed2
AB
2913static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2914 struct compat_ifreq __user *uifr32)
2915{
2916 struct ifreq ifr;
2917 struct compat_ifmap __user *uifmap32;
2918 mm_segment_t old_fs;
2919 int err;
2920
2921 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2922 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2923 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2924 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2925 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2926 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2927 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2928 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2929 if (err)
2930 return -EFAULT;
2931
2932 old_fs = get_fs();
c6d409cf 2933 set_fs(KERNEL_DS);
c3f52ae6 2934 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2935 set_fs(old_fs);
a2116ed2
AB
2936
2937 if (cmd == SIOCGIFMAP && !err) {
2938 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2939 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2940 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2941 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2942 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2943 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2944 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2945 if (err)
2946 err = -EFAULT;
2947 }
2948 return err;
2949}
2950
7a229387 2951struct rtentry32 {
c6d409cf 2952 u32 rt_pad1;
7a229387
AB
2953 struct sockaddr rt_dst; /* target address */
2954 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2955 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2956 unsigned short rt_flags;
2957 short rt_pad2;
2958 u32 rt_pad3;
2959 unsigned char rt_tos;
2960 unsigned char rt_class;
2961 short rt_pad4;
2962 short rt_metric; /* +1 for binary compatibility! */
7a229387 2963 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
2964 u32 rt_mtu; /* per route MTU/Window */
2965 u32 rt_window; /* Window clamping */
7a229387
AB
2966 unsigned short rt_irtt; /* Initial RTT */
2967};
2968
2969struct in6_rtmsg32 {
2970 struct in6_addr rtmsg_dst;
2971 struct in6_addr rtmsg_src;
2972 struct in6_addr rtmsg_gateway;
2973 u32 rtmsg_type;
2974 u16 rtmsg_dst_len;
2975 u16 rtmsg_src_len;
2976 u32 rtmsg_metric;
2977 u32 rtmsg_info;
2978 u32 rtmsg_flags;
2979 s32 rtmsg_ifindex;
2980};
2981
6b96018b
AB
2982static int routing_ioctl(struct net *net, struct socket *sock,
2983 unsigned int cmd, void __user *argp)
7a229387
AB
2984{
2985 int ret;
2986 void *r = NULL;
2987 struct in6_rtmsg r6;
2988 struct rtentry r4;
2989 char devname[16];
2990 u32 rtdev;
2991 mm_segment_t old_fs = get_fs();
2992
6b96018b
AB
2993 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2994 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 2995 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 2996 3 * sizeof(struct in6_addr));
3ddc5b46
MD
2997 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
2998 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
2999 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3000 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3001 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3002 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3003 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3004
3005 r = (void *) &r6;
3006 } else { /* ipv4 */
6b96018b 3007 struct rtentry32 __user *ur4 = argp;
c6d409cf 3008 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3009 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3010 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3011 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3012 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3013 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3014 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3015 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3016 if (rtdev) {
c6d409cf 3017 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3018 r4.rt_dev = (char __user __force *)devname;
3019 devname[15] = 0;
7a229387
AB
3020 } else
3021 r4.rt_dev = NULL;
3022
3023 r = (void *) &r4;
3024 }
3025
3026 if (ret) {
3027 ret = -EFAULT;
3028 goto out;
3029 }
3030
c6d409cf 3031 set_fs(KERNEL_DS);
6b96018b 3032 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3033 set_fs(old_fs);
7a229387
AB
3034
3035out:
7a229387
AB
3036 return ret;
3037}
3038
3039/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3040 * for some operations; this forces use of the newer bridge-utils that
25985edc 3041 * use compatible ioctls
7a229387 3042 */
6b96018b 3043static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3044{
6b96018b 3045 compat_ulong_t tmp;
7a229387 3046
6b96018b 3047 if (get_user(tmp, argp))
7a229387
AB
3048 return -EFAULT;
3049 if (tmp == BRCTL_GET_VERSION)
3050 return BRCTL_VERSION + 1;
3051 return -EINVAL;
3052}
3053
6b96018b
AB
3054static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3055 unsigned int cmd, unsigned long arg)
3056{
3057 void __user *argp = compat_ptr(arg);
3058 struct sock *sk = sock->sk;
3059 struct net *net = sock_net(sk);
7a229387 3060
6b96018b 3061 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3062 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3063
3064 switch (cmd) {
3065 case SIOCSIFBR:
3066 case SIOCGIFBR:
3067 return old_bridge_ioctl(argp);
3068 case SIOCGIFNAME:
3069 return dev_ifname32(net, argp);
3070 case SIOCGIFCONF:
3071 return dev_ifconf(net, argp);
3072 case SIOCETHTOOL:
3073 return ethtool_ioctl(net, argp);
7a50a240
AB
3074 case SIOCWANDEV:
3075 return compat_siocwandev(net, argp);
a2116ed2
AB
3076 case SIOCGIFMAP:
3077 case SIOCSIFMAP:
3078 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3079 case SIOCBONDENSLAVE:
3080 case SIOCBONDRELEASE:
3081 case SIOCBONDSETHWADDR:
6b96018b
AB
3082 case SIOCBONDCHANGEACTIVE:
3083 return bond_ioctl(net, cmd, argp);
3084 case SIOCADDRT:
3085 case SIOCDELRT:
3086 return routing_ioctl(net, sock, cmd, argp);
3087 case SIOCGSTAMP:
3088 return do_siocgstamp(net, sock, cmd, argp);
3089 case SIOCGSTAMPNS:
3090 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3091 case SIOCBONDSLAVEINFOQUERY:
3092 case SIOCBONDINFOQUERY:
a2116ed2 3093 case SIOCSHWTSTAMP:
fd468c74 3094 case SIOCGHWTSTAMP:
590d4693 3095 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3096
3097 case FIOSETOWN:
3098 case SIOCSPGRP:
3099 case FIOGETOWN:
3100 case SIOCGPGRP:
3101 case SIOCBRADDBR:
3102 case SIOCBRDELBR:
3103 case SIOCGIFVLAN:
3104 case SIOCSIFVLAN:
3105 case SIOCADDDLCI:
3106 case SIOCDELDLCI:
3107 return sock_ioctl(file, cmd, arg);
3108
3109 case SIOCGIFFLAGS:
3110 case SIOCSIFFLAGS:
3111 case SIOCGIFMETRIC:
3112 case SIOCSIFMETRIC:
3113 case SIOCGIFMTU:
3114 case SIOCSIFMTU:
3115 case SIOCGIFMEM:
3116 case SIOCSIFMEM:
3117 case SIOCGIFHWADDR:
3118 case SIOCSIFHWADDR:
3119 case SIOCADDMULTI:
3120 case SIOCDELMULTI:
3121 case SIOCGIFINDEX:
6b96018b
AB
3122 case SIOCGIFADDR:
3123 case SIOCSIFADDR:
3124 case SIOCSIFHWBROADCAST:
6b96018b 3125 case SIOCDIFADDR:
6b96018b
AB
3126 case SIOCGIFBRDADDR:
3127 case SIOCSIFBRDADDR:
3128 case SIOCGIFDSTADDR:
3129 case SIOCSIFDSTADDR:
3130 case SIOCGIFNETMASK:
3131 case SIOCSIFNETMASK:
3132 case SIOCSIFPFLAGS:
3133 case SIOCGIFPFLAGS:
3134 case SIOCGIFTXQLEN:
3135 case SIOCSIFTXQLEN:
3136 case SIOCBRADDIF:
3137 case SIOCBRDELIF:
9177efd3
AB
3138 case SIOCSIFNAME:
3139 case SIOCGMIIPHY:
3140 case SIOCGMIIREG:
3141 case SIOCSMIIREG:
6b96018b 3142 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3143
6b96018b
AB
3144 case SIOCSARP:
3145 case SIOCGARP:
3146 case SIOCDARP:
6b96018b 3147 case SIOCATMARK:
9177efd3
AB
3148 return sock_do_ioctl(net, sock, cmd, arg);
3149 }
3150
6b96018b
AB
3151 return -ENOIOCTLCMD;
3152}
7a229387 3153
95c96174 3154static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3155 unsigned long arg)
89bbfc95
SP
3156{
3157 struct socket *sock = file->private_data;
3158 int ret = -ENOIOCTLCMD;
87de87d5
DM
3159 struct sock *sk;
3160 struct net *net;
3161
3162 sk = sock->sk;
3163 net = sock_net(sk);
89bbfc95
SP
3164
3165 if (sock->ops->compat_ioctl)
3166 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3167
87de87d5
DM
3168 if (ret == -ENOIOCTLCMD &&
3169 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3170 ret = compat_wext_handle_ioctl(net, cmd, arg);
3171
6b96018b
AB
3172 if (ret == -ENOIOCTLCMD)
3173 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3174
89bbfc95
SP
3175 return ret;
3176}
3177#endif
3178
ac5a488e
SS
3179int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3180{
3181 return sock->ops->bind(sock, addr, addrlen);
3182}
c6d409cf 3183EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3184
3185int kernel_listen(struct socket *sock, int backlog)
3186{
3187 return sock->ops->listen(sock, backlog);
3188}
c6d409cf 3189EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3190
3191int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3192{
3193 struct sock *sk = sock->sk;
3194 int err;
3195
3196 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3197 newsock);
3198 if (err < 0)
3199 goto done;
3200
3201 err = sock->ops->accept(sock, *newsock, flags);
3202 if (err < 0) {
3203 sock_release(*newsock);
fa8705b0 3204 *newsock = NULL;
ac5a488e
SS
3205 goto done;
3206 }
3207
3208 (*newsock)->ops = sock->ops;
1b08534e 3209 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3210
3211done:
3212 return err;
3213}
c6d409cf 3214EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3215
3216int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3217 int flags)
ac5a488e
SS
3218{
3219 return sock->ops->connect(sock, addr, addrlen, flags);
3220}
c6d409cf 3221EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3222
3223int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3224 int *addrlen)
3225{
3226 return sock->ops->getname(sock, addr, addrlen, 0);
3227}
c6d409cf 3228EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3229
3230int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3231 int *addrlen)
3232{
3233 return sock->ops->getname(sock, addr, addrlen, 1);
3234}
c6d409cf 3235EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3236
3237int kernel_getsockopt(struct socket *sock, int level, int optname,
3238 char *optval, int *optlen)
3239{
3240 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3241 char __user *uoptval;
3242 int __user *uoptlen;
ac5a488e
SS
3243 int err;
3244
fb8621bb
NK
3245 uoptval = (char __user __force *) optval;
3246 uoptlen = (int __user __force *) optlen;
3247
ac5a488e
SS
3248 set_fs(KERNEL_DS);
3249 if (level == SOL_SOCKET)
fb8621bb 3250 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3251 else
fb8621bb
NK
3252 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3253 uoptlen);
ac5a488e
SS
3254 set_fs(oldfs);
3255 return err;
3256}
c6d409cf 3257EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3258
3259int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3260 char *optval, unsigned int optlen)
ac5a488e
SS
3261{
3262 mm_segment_t oldfs = get_fs();
fb8621bb 3263 char __user *uoptval;
ac5a488e
SS
3264 int err;
3265
fb8621bb
NK
3266 uoptval = (char __user __force *) optval;
3267
ac5a488e
SS
3268 set_fs(KERNEL_DS);
3269 if (level == SOL_SOCKET)
fb8621bb 3270 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3271 else
fb8621bb 3272 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3273 optlen);
3274 set_fs(oldfs);
3275 return err;
3276}
c6d409cf 3277EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3278
3279int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3280 size_t size, int flags)
3281{
3282 if (sock->ops->sendpage)
3283 return sock->ops->sendpage(sock, page, offset, size, flags);
3284
3285 return sock_no_sendpage(sock, page, offset, size, flags);
3286}
c6d409cf 3287EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3288
3289int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3290{
3291 mm_segment_t oldfs = get_fs();
3292 int err;
3293
3294 set_fs(KERNEL_DS);
3295 err = sock->ops->ioctl(sock, cmd, arg);
3296 set_fs(oldfs);
3297
3298 return err;
3299}
c6d409cf 3300EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3301
91cf45f0
TM
3302int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3303{
3304 return sock->ops->shutdown(sock, how);
3305}
91cf45f0 3306EXPORT_SYMBOL(kernel_sock_shutdown);