]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/socket.c
net: Make sock_alloc exportable
[mirror_ubuntu-bionic-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1da177e4
LT
290static int init_inodecache(void)
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1da177e4
LT
299 if (sock_inode_cachep == NULL)
300 return -ENOMEM;
301 return 0;
302}
303
b87221de 304static const struct super_operations sockfs_ops = {
c6d409cf
ED
305 .alloc_inode = sock_alloc_inode,
306 .destroy_inode = sock_destroy_inode,
307 .statfs = simple_statfs,
1da177e4
LT
308};
309
c23fbb6b
ED
310/*
311 * sockfs_dname() is called from d_path().
312 */
313static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
314{
315 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 316 d_inode(dentry)->i_ino);
c23fbb6b
ED
317}
318
3ba13d17 319static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 320 .d_dname = sockfs_dname,
1da177e4
LT
321};
322
c74a1cbb
AV
323static struct dentry *sockfs_mount(struct file_system_type *fs_type,
324 int flags, const char *dev_name, void *data)
325{
326 return mount_pseudo(fs_type, "socket:", &sockfs_ops,
327 &sockfs_dentry_operations, SOCKFS_MAGIC);
328}
329
330static struct vfsmount *sock_mnt __read_mostly;
331
332static struct file_system_type sock_fs_type = {
333 .name = "sockfs",
334 .mount = sockfs_mount,
335 .kill_sb = kill_anon_super,
336};
337
1da177e4
LT
338/*
339 * Obtains the first available file descriptor and sets it up for use.
340 *
39d8c1b6
DM
341 * These functions create file structures and maps them to fd space
342 * of the current process. On success it returns file descriptor
1da177e4
LT
343 * and file struct implicitly stored in sock->file.
344 * Note that another thread may close file descriptor before we return
345 * from this function. We use the fact that now we do not refer
346 * to socket after mapping. If one day we will need it, this
347 * function will increment ref. count on file by 1.
348 *
349 * In any case returned fd MAY BE not valid!
350 * This race condition is unavoidable
351 * with shared fd spaces, we cannot solve it inside kernel,
352 * but we take care of internal coherence yet.
353 */
354
aab174f0 355struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 356{
7cbe66b6 357 struct qstr name = { .name = "" };
2c48b9c4 358 struct path path;
7cbe66b6 359 struct file *file;
1da177e4 360
600e1779
MY
361 if (dname) {
362 name.name = dname;
363 name.len = strlen(name.name);
364 } else if (sock->sk) {
365 name.name = sock->sk->sk_prot_creator->name;
366 name.len = strlen(name.name);
367 }
4b936885 368 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
369 if (unlikely(!path.dentry))
370 return ERR_PTR(-ENOMEM);
2c48b9c4 371 path.mnt = mntget(sock_mnt);
39d8c1b6 372
2c48b9c4 373 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 374
2c48b9c4 375 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 376 &socket_file_ops);
b5ffe634 377 if (IS_ERR(file)) {
cc3808f8 378 /* drop dentry, keep inode */
c5ef6035 379 ihold(d_inode(path.dentry));
2c48b9c4 380 path_put(&path);
39b65252 381 return file;
cc3808f8
AV
382 }
383
384 sock->file = file;
77d27200 385 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 386 file->private_data = sock;
28407630 387 return file;
39d8c1b6 388}
56b31d1c 389EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 390
56b31d1c 391static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
392{
393 struct file *newfile;
28407630
AV
394 int fd = get_unused_fd_flags(flags);
395 if (unlikely(fd < 0))
396 return fd;
39d8c1b6 397
aab174f0 398 newfile = sock_alloc_file(sock, flags, NULL);
28407630 399 if (likely(!IS_ERR(newfile))) {
39d8c1b6 400 fd_install(fd, newfile);
28407630
AV
401 return fd;
402 }
7cbe66b6 403
28407630
AV
404 put_unused_fd(fd);
405 return PTR_ERR(newfile);
1da177e4
LT
406}
407
406a3c63 408struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 409{
6cb153ca
BL
410 if (file->f_op == &socket_file_ops)
411 return file->private_data; /* set in sock_map_fd */
412
23bb80d2
ED
413 *err = -ENOTSOCK;
414 return NULL;
6cb153ca 415}
406a3c63 416EXPORT_SYMBOL(sock_from_file);
6cb153ca 417
1da177e4 418/**
c6d409cf 419 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
420 * @fd: file handle
421 * @err: pointer to an error code return
422 *
423 * The file handle passed in is locked and the socket it is bound
424 * too is returned. If an error occurs the err pointer is overwritten
425 * with a negative errno code and NULL is returned. The function checks
426 * for both invalid handles and passing a handle which is not a socket.
427 *
428 * On a success the socket object pointer is returned.
429 */
430
431struct socket *sockfd_lookup(int fd, int *err)
432{
433 struct file *file;
1da177e4
LT
434 struct socket *sock;
435
89bddce5
SH
436 file = fget(fd);
437 if (!file) {
1da177e4
LT
438 *err = -EBADF;
439 return NULL;
440 }
89bddce5 441
6cb153ca
BL
442 sock = sock_from_file(file, err);
443 if (!sock)
1da177e4 444 fput(file);
6cb153ca
BL
445 return sock;
446}
c6d409cf 447EXPORT_SYMBOL(sockfd_lookup);
1da177e4 448
6cb153ca
BL
449static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
450{
00e188ef 451 struct fd f = fdget(fd);
6cb153ca
BL
452 struct socket *sock;
453
3672558c 454 *err = -EBADF;
00e188ef
AV
455 if (f.file) {
456 sock = sock_from_file(f.file, err);
457 if (likely(sock)) {
458 *fput_needed = f.flags;
6cb153ca 459 return sock;
00e188ef
AV
460 }
461 fdput(f);
1da177e4 462 }
6cb153ca 463 return NULL;
1da177e4
LT
464}
465
600e1779
MY
466#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
467#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
468#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
469static ssize_t sockfs_getxattr(struct dentry *dentry,
470 const char *name, void *value, size_t size)
471{
472 const char *proto_name;
473 size_t proto_size;
474 int error;
475
476 error = -ENODATA;
477 if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
478 proto_name = dentry->d_name.name;
479 proto_size = strlen(proto_name);
480
481 if (value) {
482 error = -ERANGE;
483 if (proto_size + 1 > size)
484 goto out;
485
486 strncpy(value, proto_name, proto_size + 1);
487 }
488 error = proto_size + 1;
489 }
490
491out:
492 return error;
493}
494
495static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
496 size_t size)
497{
498 ssize_t len;
499 ssize_t used = 0;
500
c5ef6035 501 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
502 if (len < 0)
503 return len;
504 used += len;
505 if (buffer) {
506 if (size < used)
507 return -ERANGE;
508 buffer += len;
509 }
510
511 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
512 used += len;
513 if (buffer) {
514 if (size < used)
515 return -ERANGE;
516 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
517 buffer += len;
518 }
519
520 return used;
521}
522
523static const struct inode_operations sockfs_inode_ops = {
524 .getxattr = sockfs_getxattr,
525 .listxattr = sockfs_listxattr,
526};
527
1da177e4
LT
528/**
529 * sock_alloc - allocate a socket
89bddce5 530 *
1da177e4
LT
531 * Allocate a new inode and socket object. The two are bound together
532 * and initialised. The socket is then returned. If we are out of inodes
533 * NULL is returned.
534 */
535
f4a00aac 536struct socket *sock_alloc(void)
1da177e4 537{
89bddce5
SH
538 struct inode *inode;
539 struct socket *sock;
1da177e4 540
a209dfc7 541 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
542 if (!inode)
543 return NULL;
544
545 sock = SOCKET_I(inode);
546
29a020d3 547 kmemcheck_annotate_bitfield(sock, type);
85fe4025 548 inode->i_ino = get_next_ino();
89bddce5 549 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
550 inode->i_uid = current_fsuid();
551 inode->i_gid = current_fsgid();
600e1779 552 inode->i_op = &sockfs_inode_ops;
1da177e4 553
19e8d69c 554 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
555 return sock;
556}
f4a00aac 557EXPORT_SYMBOL(sock_alloc);
1da177e4 558
1da177e4
LT
559/**
560 * sock_release - close a socket
561 * @sock: socket to close
562 *
563 * The socket is released from the protocol stack if it has a release
564 * callback, and the inode is then released if the socket is bound to
89bddce5 565 * an inode not a file.
1da177e4 566 */
89bddce5 567
1da177e4
LT
568void sock_release(struct socket *sock)
569{
570 if (sock->ops) {
571 struct module *owner = sock->ops->owner;
572
573 sock->ops->release(sock);
574 sock->ops = NULL;
575 module_put(owner);
576 }
577
eaefd110 578 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 579 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 580
19e8d69c 581 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
582 if (!sock->file) {
583 iput(SOCK_INODE(sock));
584 return;
585 }
89bddce5 586 sock->file = NULL;
1da177e4 587}
c6d409cf 588EXPORT_SYMBOL(sock_release);
1da177e4 589
67cc0d40 590void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
20d49473 591{
140c55d4
ED
592 u8 flags = *tx_flags;
593
b9f40e21 594 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
595 flags |= SKBTX_HW_TSTAMP;
596
b9f40e21 597 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
598 flags |= SKBTX_SW_TSTAMP;
599
e7fd2885 600 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
601 flags |= SKBTX_SCHED_TSTAMP;
602
e1c8a607 603 if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
140c55d4 604 flags |= SKBTX_ACK_TSTAMP;
e7fd2885 605
140c55d4 606 *tx_flags = flags;
20d49473 607}
67cc0d40 608EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 609
d8725c86 610static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 611{
01e97e65 612 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
613 BUG_ON(ret == -EIOCBQUEUED);
614 return ret;
1da177e4
LT
615}
616
d8725c86 617int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 618{
d8725c86 619 int err = security_socket_sendmsg(sock, msg,
01e97e65 620 msg_data_left(msg));
228e548e 621
d8725c86 622 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 623}
c6d409cf 624EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
625
626int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
627 struct kvec *vec, size_t num, size_t size)
628{
6aa24814 629 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 630 return sock_sendmsg(sock, msg);
1da177e4 631}
c6d409cf 632EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 633
92f37fd2
ED
634/*
635 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
636 */
637void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
638 struct sk_buff *skb)
639{
20d49473 640 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 641 struct scm_timestamping tss;
20d49473
PO
642 int empty = 1;
643 struct skb_shared_hwtstamps *shhwtstamps =
644 skb_hwtstamps(skb);
645
646 /* Race occurred between timestamp enabling and packet
647 receiving. Fill in the current time for now. */
648 if (need_software_tstamp && skb->tstamp.tv64 == 0)
649 __net_timestamp(skb);
650
651 if (need_software_tstamp) {
652 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
653 struct timeval tv;
654 skb_get_timestamp(skb, &tv);
655 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
656 sizeof(tv), &tv);
657 } else {
f24b9be5
WB
658 struct timespec ts;
659 skb_get_timestampns(skb, &ts);
20d49473 660 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 661 sizeof(ts), &ts);
20d49473
PO
662 }
663 }
664
f24b9be5 665 memset(&tss, 0, sizeof(tss));
c199105d 666 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 667 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 668 empty = 0;
4d276eb6 669 if (shhwtstamps &&
b9f40e21 670 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 671 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 672 empty = 0;
20d49473
PO
673 if (!empty)
674 put_cmsg(msg, SOL_SOCKET,
f24b9be5 675 SCM_TIMESTAMPING, sizeof(tss), &tss);
92f37fd2 676}
7c81fd8b
ACM
677EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
678
6e3e939f
JB
679void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
680 struct sk_buff *skb)
681{
682 int ack;
683
684 if (!sock_flag(sk, SOCK_WIFI_STATUS))
685 return;
686 if (!skb->wifi_acked_valid)
687 return;
688
689 ack = skb->wifi_acked;
690
691 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
692}
693EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
694
11165f14 695static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
696 struct sk_buff *skb)
3b885787 697{
744d5a3e 698 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 699 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 700 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
701}
702
767dd033 703void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
704 struct sk_buff *skb)
705{
706 sock_recv_timestamp(msg, sk, skb);
707 sock_recv_drops(msg, sk, skb);
708}
767dd033 709EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 710
1b784140
YX
711static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
712 size_t size, int flags)
1da177e4 713{
1b784140 714 return sock->ops->recvmsg(sock, msg, size, flags);
1da177e4
LT
715}
716
1b784140
YX
717int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
718 int flags)
a2e27255
ACM
719{
720 int err = security_socket_recvmsg(sock, msg, size, flags);
721
1b784140 722 return err ?: sock_recvmsg_nosec(sock, msg, size, flags);
1da177e4 723}
c6d409cf 724EXPORT_SYMBOL(sock_recvmsg);
1da177e4 725
c1249c0a
ML
726/**
727 * kernel_recvmsg - Receive a message from a socket (kernel space)
728 * @sock: The socket to receive the message from
729 * @msg: Received message
730 * @vec: Input s/g array for message data
731 * @num: Size of input s/g array
732 * @size: Number of bytes to read
733 * @flags: Message flags (MSG_DONTWAIT, etc...)
734 *
735 * On return the msg structure contains the scatter/gather array passed in the
736 * vec argument. The array is modified so that it consists of the unfilled
737 * portion of the original array.
738 *
739 * The returned value is the total number of bytes received, or an error.
740 */
89bddce5
SH
741int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
742 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
743{
744 mm_segment_t oldfs = get_fs();
745 int result;
746
6aa24814 747 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 748 set_fs(KERNEL_DS);
1da177e4
LT
749 result = sock_recvmsg(sock, msg, size, flags);
750 set_fs(oldfs);
751 return result;
752}
c6d409cf 753EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 754
ce1d4d3e
CH
755static ssize_t sock_sendpage(struct file *file, struct page *page,
756 int offset, size_t size, loff_t *ppos, int more)
1da177e4 757{
1da177e4
LT
758 struct socket *sock;
759 int flags;
760
ce1d4d3e
CH
761 sock = file->private_data;
762
35f9c09f
ED
763 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
764 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
765 flags |= more;
ce1d4d3e 766
e6949583 767 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 768}
1da177e4 769
9c55e01c 770static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 771 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
772 unsigned int flags)
773{
774 struct socket *sock = file->private_data;
775
997b37da
RDC
776 if (unlikely(!sock->ops->splice_read))
777 return -EINVAL;
778
9c55e01c
JA
779 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
780}
781
8ae5e030 782static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 783{
6d652330
AV
784 struct file *file = iocb->ki_filp;
785 struct socket *sock = file->private_data;
0345f931 786 struct msghdr msg = {.msg_iter = *to,
787 .msg_iocb = iocb};
8ae5e030 788 ssize_t res;
ce1d4d3e 789
8ae5e030
AV
790 if (file->f_flags & O_NONBLOCK)
791 msg.msg_flags = MSG_DONTWAIT;
792
793 if (iocb->ki_pos != 0)
1da177e4 794 return -ESPIPE;
027445c3 795
66ee59af 796 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
797 return 0;
798
237dae88 799 res = sock_recvmsg(sock, &msg, iov_iter_count(to), msg.msg_flags);
8ae5e030
AV
800 *to = msg.msg_iter;
801 return res;
1da177e4
LT
802}
803
8ae5e030 804static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 805{
6d652330
AV
806 struct file *file = iocb->ki_filp;
807 struct socket *sock = file->private_data;
0345f931 808 struct msghdr msg = {.msg_iter = *from,
809 .msg_iocb = iocb};
8ae5e030 810 ssize_t res;
1da177e4 811
8ae5e030 812 if (iocb->ki_pos != 0)
ce1d4d3e 813 return -ESPIPE;
027445c3 814
8ae5e030
AV
815 if (file->f_flags & O_NONBLOCK)
816 msg.msg_flags = MSG_DONTWAIT;
817
6d652330
AV
818 if (sock->type == SOCK_SEQPACKET)
819 msg.msg_flags |= MSG_EOR;
820
d8725c86 821 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
822 *from = msg.msg_iter;
823 return res;
1da177e4
LT
824}
825
1da177e4
LT
826/*
827 * Atomic setting of ioctl hooks to avoid race
828 * with module unload.
829 */
830
4a3e2f71 831static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 832static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 833
881d966b 834void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 835{
4a3e2f71 836 mutex_lock(&br_ioctl_mutex);
1da177e4 837 br_ioctl_hook = hook;
4a3e2f71 838 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
839}
840EXPORT_SYMBOL(brioctl_set);
841
4a3e2f71 842static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 843static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 844
881d966b 845void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 846{
4a3e2f71 847 mutex_lock(&vlan_ioctl_mutex);
1da177e4 848 vlan_ioctl_hook = hook;
4a3e2f71 849 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
850}
851EXPORT_SYMBOL(vlan_ioctl_set);
852
4a3e2f71 853static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 854static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 855
89bddce5 856void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 857{
4a3e2f71 858 mutex_lock(&dlci_ioctl_mutex);
1da177e4 859 dlci_ioctl_hook = hook;
4a3e2f71 860 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
861}
862EXPORT_SYMBOL(dlci_ioctl_set);
863
6b96018b
AB
864static long sock_do_ioctl(struct net *net, struct socket *sock,
865 unsigned int cmd, unsigned long arg)
866{
867 int err;
868 void __user *argp = (void __user *)arg;
869
870 err = sock->ops->ioctl(sock, cmd, arg);
871
872 /*
873 * If this ioctl is unknown try to hand it down
874 * to the NIC driver.
875 */
876 if (err == -ENOIOCTLCMD)
877 err = dev_ioctl(net, cmd, argp);
878
879 return err;
880}
881
1da177e4
LT
882/*
883 * With an ioctl, arg may well be a user mode pointer, but we don't know
884 * what to do with it - that's up to the protocol still.
885 */
886
887static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
888{
889 struct socket *sock;
881d966b 890 struct sock *sk;
1da177e4
LT
891 void __user *argp = (void __user *)arg;
892 int pid, err;
881d966b 893 struct net *net;
1da177e4 894
b69aee04 895 sock = file->private_data;
881d966b 896 sk = sock->sk;
3b1e0a65 897 net = sock_net(sk);
1da177e4 898 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 899 err = dev_ioctl(net, cmd, argp);
1da177e4 900 } else
3d23e349 901#ifdef CONFIG_WEXT_CORE
1da177e4 902 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 903 err = dev_ioctl(net, cmd, argp);
1da177e4 904 } else
3d23e349 905#endif
89bddce5 906 switch (cmd) {
1da177e4
LT
907 case FIOSETOWN:
908 case SIOCSPGRP:
909 err = -EFAULT;
910 if (get_user(pid, (int __user *)argp))
911 break;
e0b93edd
JL
912 f_setown(sock->file, pid, 1);
913 err = 0;
1da177e4
LT
914 break;
915 case FIOGETOWN:
916 case SIOCGPGRP:
609d7fa9 917 err = put_user(f_getown(sock->file),
89bddce5 918 (int __user *)argp);
1da177e4
LT
919 break;
920 case SIOCGIFBR:
921 case SIOCSIFBR:
922 case SIOCBRADDBR:
923 case SIOCBRDELBR:
924 err = -ENOPKG;
925 if (!br_ioctl_hook)
926 request_module("bridge");
927
4a3e2f71 928 mutex_lock(&br_ioctl_mutex);
89bddce5 929 if (br_ioctl_hook)
881d966b 930 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 931 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
932 break;
933 case SIOCGIFVLAN:
934 case SIOCSIFVLAN:
935 err = -ENOPKG;
936 if (!vlan_ioctl_hook)
937 request_module("8021q");
938
4a3e2f71 939 mutex_lock(&vlan_ioctl_mutex);
1da177e4 940 if (vlan_ioctl_hook)
881d966b 941 err = vlan_ioctl_hook(net, argp);
4a3e2f71 942 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 943 break;
1da177e4
LT
944 case SIOCADDDLCI:
945 case SIOCDELDLCI:
946 err = -ENOPKG;
947 if (!dlci_ioctl_hook)
948 request_module("dlci");
949
7512cbf6
PE
950 mutex_lock(&dlci_ioctl_mutex);
951 if (dlci_ioctl_hook)
1da177e4 952 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 953 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
954 break;
955 default:
6b96018b 956 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 957 break;
89bddce5 958 }
1da177e4
LT
959 return err;
960}
961
962int sock_create_lite(int family, int type, int protocol, struct socket **res)
963{
964 int err;
965 struct socket *sock = NULL;
89bddce5 966
1da177e4
LT
967 err = security_socket_create(family, type, protocol, 1);
968 if (err)
969 goto out;
970
971 sock = sock_alloc();
972 if (!sock) {
973 err = -ENOMEM;
974 goto out;
975 }
976
1da177e4 977 sock->type = type;
7420ed23
VY
978 err = security_socket_post_create(sock, family, type, protocol, 1);
979 if (err)
980 goto out_release;
981
1da177e4
LT
982out:
983 *res = sock;
984 return err;
7420ed23
VY
985out_release:
986 sock_release(sock);
987 sock = NULL;
988 goto out;
1da177e4 989}
c6d409cf 990EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
991
992/* No kernel lock held - perfect */
89bddce5 993static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 994{
cbf55001 995 unsigned int busy_flag = 0;
1da177e4
LT
996 struct socket *sock;
997
998 /*
89bddce5 999 * We can't return errors to poll, so it's either yes or no.
1da177e4 1000 */
b69aee04 1001 sock = file->private_data;
2d48d67f 1002
cbf55001 1003 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1004 /* this socket can poll_ll so tell the system call */
cbf55001 1005 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1006
1007 /* once, only if requested by syscall */
cbf55001
ET
1008 if (wait && (wait->_key & POLL_BUSY_LOOP))
1009 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1010 }
1011
cbf55001 1012 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1013}
1014
89bddce5 1015static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1016{
b69aee04 1017 struct socket *sock = file->private_data;
1da177e4
LT
1018
1019 return sock->ops->mmap(file, sock, vma);
1020}
1021
20380731 1022static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1023{
1da177e4
LT
1024 sock_release(SOCKET_I(inode));
1025 return 0;
1026}
1027
1028/*
1029 * Update the socket async list
1030 *
1031 * Fasync_list locking strategy.
1032 *
1033 * 1. fasync_list is modified only under process context socket lock
1034 * i.e. under semaphore.
1035 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1036 * or under socket lock
1da177e4
LT
1037 */
1038
1039static int sock_fasync(int fd, struct file *filp, int on)
1040{
989a2979
ED
1041 struct socket *sock = filp->private_data;
1042 struct sock *sk = sock->sk;
eaefd110 1043 struct socket_wq *wq;
1da177e4 1044
989a2979 1045 if (sk == NULL)
1da177e4 1046 return -EINVAL;
1da177e4
LT
1047
1048 lock_sock(sk);
eaefd110
ED
1049 wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1050 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1051
eaefd110 1052 if (!wq->fasync_list)
989a2979
ED
1053 sock_reset_flag(sk, SOCK_FASYNC);
1054 else
bcdce719 1055 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1056
989a2979 1057 release_sock(sk);
1da177e4
LT
1058 return 0;
1059}
1060
ceb5d58b 1061/* This function may be called only under rcu_lock */
1da177e4 1062
ceb5d58b 1063int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1064{
ceb5d58b 1065 if (!wq || !wq->fasync_list)
1da177e4 1066 return -1;
ceb5d58b 1067
89bddce5 1068 switch (how) {
8d8ad9d7 1069 case SOCK_WAKE_WAITD:
ceb5d58b 1070 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1071 break;
1072 goto call_kill;
8d8ad9d7 1073 case SOCK_WAKE_SPACE:
ceb5d58b 1074 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1075 break;
1076 /* fall through */
8d8ad9d7 1077 case SOCK_WAKE_IO:
89bddce5 1078call_kill:
43815482 1079 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1080 break;
8d8ad9d7 1081 case SOCK_WAKE_URG:
43815482 1082 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1083 }
ceb5d58b 1084
1da177e4
LT
1085 return 0;
1086}
c6d409cf 1087EXPORT_SYMBOL(sock_wake_async);
1da177e4 1088
721db93a 1089int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1090 struct socket **res, int kern)
1da177e4
LT
1091{
1092 int err;
1093 struct socket *sock;
55737fda 1094 const struct net_proto_family *pf;
1da177e4
LT
1095
1096 /*
89bddce5 1097 * Check protocol is in range
1da177e4
LT
1098 */
1099 if (family < 0 || family >= NPROTO)
1100 return -EAFNOSUPPORT;
1101 if (type < 0 || type >= SOCK_MAX)
1102 return -EINVAL;
1103
1104 /* Compatibility.
1105
1106 This uglymoron is moved from INET layer to here to avoid
1107 deadlock in module load.
1108 */
1109 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1110 static int warned;
1da177e4
LT
1111 if (!warned) {
1112 warned = 1;
3410f22e
YY
1113 pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1114 current->comm);
1da177e4
LT
1115 }
1116 family = PF_PACKET;
1117 }
1118
1119 err = security_socket_create(family, type, protocol, kern);
1120 if (err)
1121 return err;
89bddce5 1122
55737fda
SH
1123 /*
1124 * Allocate the socket and allow the family to set things up. if
1125 * the protocol is 0, the family is instructed to select an appropriate
1126 * default.
1127 */
1128 sock = sock_alloc();
1129 if (!sock) {
e87cc472 1130 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1131 return -ENFILE; /* Not exactly a match, but its the
1132 closest posix thing */
1133 }
1134
1135 sock->type = type;
1136
95a5afca 1137#ifdef CONFIG_MODULES
89bddce5
SH
1138 /* Attempt to load a protocol module if the find failed.
1139 *
1140 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1141 * requested real, full-featured networking support upon configuration.
1142 * Otherwise module support will break!
1143 */
190683a9 1144 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1145 request_module("net-pf-%d", family);
1da177e4
LT
1146#endif
1147
55737fda
SH
1148 rcu_read_lock();
1149 pf = rcu_dereference(net_families[family]);
1150 err = -EAFNOSUPPORT;
1151 if (!pf)
1152 goto out_release;
1da177e4
LT
1153
1154 /*
1155 * We will call the ->create function, that possibly is in a loadable
1156 * module, so we have to bump that loadable module refcnt first.
1157 */
55737fda 1158 if (!try_module_get(pf->owner))
1da177e4
LT
1159 goto out_release;
1160
55737fda
SH
1161 /* Now protected by module ref count */
1162 rcu_read_unlock();
1163
3f378b68 1164 err = pf->create(net, sock, protocol, kern);
55737fda 1165 if (err < 0)
1da177e4 1166 goto out_module_put;
a79af59e 1167
1da177e4
LT
1168 /*
1169 * Now to bump the refcnt of the [loadable] module that owns this
1170 * socket at sock_release time we decrement its refcnt.
1171 */
55737fda
SH
1172 if (!try_module_get(sock->ops->owner))
1173 goto out_module_busy;
1174
1da177e4
LT
1175 /*
1176 * Now that we're done with the ->create function, the [loadable]
1177 * module can have its refcnt decremented
1178 */
55737fda 1179 module_put(pf->owner);
7420ed23
VY
1180 err = security_socket_post_create(sock, family, type, protocol, kern);
1181 if (err)
3b185525 1182 goto out_sock_release;
55737fda 1183 *res = sock;
1da177e4 1184
55737fda
SH
1185 return 0;
1186
1187out_module_busy:
1188 err = -EAFNOSUPPORT;
1da177e4 1189out_module_put:
55737fda
SH
1190 sock->ops = NULL;
1191 module_put(pf->owner);
1192out_sock_release:
1da177e4 1193 sock_release(sock);
55737fda
SH
1194 return err;
1195
1196out_release:
1197 rcu_read_unlock();
1198 goto out_sock_release;
1da177e4 1199}
721db93a 1200EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1201
1202int sock_create(int family, int type, int protocol, struct socket **res)
1203{
1b8d7ae4 1204 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1205}
c6d409cf 1206EXPORT_SYMBOL(sock_create);
1da177e4 1207
eeb1bd5c 1208int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1209{
eeb1bd5c 1210 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1211}
c6d409cf 1212EXPORT_SYMBOL(sock_create_kern);
1da177e4 1213
3e0fa65f 1214SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1215{
1216 int retval;
1217 struct socket *sock;
a677a039
UD
1218 int flags;
1219
e38b36f3
UD
1220 /* Check the SOCK_* constants for consistency. */
1221 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1222 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1223 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1224 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1225
a677a039 1226 flags = type & ~SOCK_TYPE_MASK;
77d27200 1227 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1228 return -EINVAL;
1229 type &= SOCK_TYPE_MASK;
1da177e4 1230
aaca0bdc
UD
1231 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1232 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1233
1da177e4
LT
1234 retval = sock_create(family, type, protocol, &sock);
1235 if (retval < 0)
1236 goto out;
1237
77d27200 1238 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1239 if (retval < 0)
1240 goto out_release;
1241
1242out:
1243 /* It may be already another descriptor 8) Not kernel problem. */
1244 return retval;
1245
1246out_release:
1247 sock_release(sock);
1248 return retval;
1249}
1250
1251/*
1252 * Create a pair of connected sockets.
1253 */
1254
3e0fa65f
HC
1255SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1256 int __user *, usockvec)
1da177e4
LT
1257{
1258 struct socket *sock1, *sock2;
1259 int fd1, fd2, err;
db349509 1260 struct file *newfile1, *newfile2;
a677a039
UD
1261 int flags;
1262
1263 flags = type & ~SOCK_TYPE_MASK;
77d27200 1264 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1265 return -EINVAL;
1266 type &= SOCK_TYPE_MASK;
1da177e4 1267
aaca0bdc
UD
1268 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1269 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1270
1da177e4
LT
1271 /*
1272 * Obtain the first socket and check if the underlying protocol
1273 * supports the socketpair call.
1274 */
1275
1276 err = sock_create(family, type, protocol, &sock1);
1277 if (err < 0)
1278 goto out;
1279
1280 err = sock_create(family, type, protocol, &sock2);
1281 if (err < 0)
1282 goto out_release_1;
1283
1284 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1285 if (err < 0)
1da177e4
LT
1286 goto out_release_both;
1287
28407630 1288 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1289 if (unlikely(fd1 < 0)) {
1290 err = fd1;
db349509 1291 goto out_release_both;
bf3c23d1 1292 }
d73aa286 1293
28407630 1294 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1295 if (unlikely(fd2 < 0)) {
1296 err = fd2;
d73aa286 1297 goto out_put_unused_1;
28407630
AV
1298 }
1299
aab174f0 1300 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1301 if (IS_ERR(newfile1)) {
28407630 1302 err = PTR_ERR(newfile1);
d73aa286 1303 goto out_put_unused_both;
28407630
AV
1304 }
1305
aab174f0 1306 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1307 if (IS_ERR(newfile2)) {
1308 err = PTR_ERR(newfile2);
d73aa286 1309 goto out_fput_1;
db349509
AV
1310 }
1311
d73aa286
YD
1312 err = put_user(fd1, &usockvec[0]);
1313 if (err)
1314 goto out_fput_both;
1315
1316 err = put_user(fd2, &usockvec[1]);
1317 if (err)
1318 goto out_fput_both;
1319
157cf649 1320 audit_fd_pair(fd1, fd2);
d73aa286 1321
db349509
AV
1322 fd_install(fd1, newfile1);
1323 fd_install(fd2, newfile2);
1da177e4
LT
1324 /* fd1 and fd2 may be already another descriptors.
1325 * Not kernel problem.
1326 */
1327
d73aa286 1328 return 0;
1da177e4 1329
d73aa286
YD
1330out_fput_both:
1331 fput(newfile2);
1332 fput(newfile1);
1333 put_unused_fd(fd2);
1334 put_unused_fd(fd1);
1335 goto out;
1336
1337out_fput_1:
1338 fput(newfile1);
1339 put_unused_fd(fd2);
1340 put_unused_fd(fd1);
1341 sock_release(sock2);
1342 goto out;
1da177e4 1343
d73aa286
YD
1344out_put_unused_both:
1345 put_unused_fd(fd2);
1346out_put_unused_1:
1347 put_unused_fd(fd1);
1da177e4 1348out_release_both:
89bddce5 1349 sock_release(sock2);
1da177e4 1350out_release_1:
89bddce5 1351 sock_release(sock1);
1da177e4
LT
1352out:
1353 return err;
1354}
1355
1da177e4
LT
1356/*
1357 * Bind a name to a socket. Nothing much to do here since it's
1358 * the protocol's responsibility to handle the local address.
1359 *
1360 * We move the socket address to kernel space before we call
1361 * the protocol layer (having also checked the address is ok).
1362 */
1363
20f37034 1364SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1365{
1366 struct socket *sock;
230b1839 1367 struct sockaddr_storage address;
6cb153ca 1368 int err, fput_needed;
1da177e4 1369
89bddce5 1370 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1371 if (sock) {
43db362d 1372 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1373 if (err >= 0) {
1374 err = security_socket_bind(sock,
230b1839 1375 (struct sockaddr *)&address,
89bddce5 1376 addrlen);
6cb153ca
BL
1377 if (!err)
1378 err = sock->ops->bind(sock,
89bddce5 1379 (struct sockaddr *)
230b1839 1380 &address, addrlen);
1da177e4 1381 }
6cb153ca 1382 fput_light(sock->file, fput_needed);
89bddce5 1383 }
1da177e4
LT
1384 return err;
1385}
1386
1da177e4
LT
1387/*
1388 * Perform a listen. Basically, we allow the protocol to do anything
1389 * necessary for a listen, and if that works, we mark the socket as
1390 * ready for listening.
1391 */
1392
3e0fa65f 1393SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1394{
1395 struct socket *sock;
6cb153ca 1396 int err, fput_needed;
b8e1f9b5 1397 int somaxconn;
89bddce5
SH
1398
1399 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1400 if (sock) {
8efa6e93 1401 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1402 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1403 backlog = somaxconn;
1da177e4
LT
1404
1405 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1406 if (!err)
1407 err = sock->ops->listen(sock, backlog);
1da177e4 1408
6cb153ca 1409 fput_light(sock->file, fput_needed);
1da177e4
LT
1410 }
1411 return err;
1412}
1413
1da177e4
LT
1414/*
1415 * For accept, we attempt to create a new socket, set up the link
1416 * with the client, wake up the client, then return the new
1417 * connected fd. We collect the address of the connector in kernel
1418 * space and move it to user at the very end. This is unclean because
1419 * we open the socket then return an error.
1420 *
1421 * 1003.1g adds the ability to recvmsg() to query connection pending
1422 * status to recvmsg. We need to add that support in a way thats
1423 * clean when we restucture accept also.
1424 */
1425
20f37034
HC
1426SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1427 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1428{
1429 struct socket *sock, *newsock;
39d8c1b6 1430 struct file *newfile;
6cb153ca 1431 int err, len, newfd, fput_needed;
230b1839 1432 struct sockaddr_storage address;
1da177e4 1433
77d27200 1434 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1435 return -EINVAL;
1436
1437 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1438 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1439
6cb153ca 1440 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1441 if (!sock)
1442 goto out;
1443
1444 err = -ENFILE;
c6d409cf
ED
1445 newsock = sock_alloc();
1446 if (!newsock)
1da177e4
LT
1447 goto out_put;
1448
1449 newsock->type = sock->type;
1450 newsock->ops = sock->ops;
1451
1da177e4
LT
1452 /*
1453 * We don't need try_module_get here, as the listening socket (sock)
1454 * has the protocol module (sock->ops->owner) held.
1455 */
1456 __module_get(newsock->ops->owner);
1457
28407630 1458 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1459 if (unlikely(newfd < 0)) {
1460 err = newfd;
9a1875e6
DM
1461 sock_release(newsock);
1462 goto out_put;
39d8c1b6 1463 }
aab174f0 1464 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1465 if (IS_ERR(newfile)) {
28407630
AV
1466 err = PTR_ERR(newfile);
1467 put_unused_fd(newfd);
1468 sock_release(newsock);
1469 goto out_put;
1470 }
39d8c1b6 1471
a79af59e
FF
1472 err = security_socket_accept(sock, newsock);
1473 if (err)
39d8c1b6 1474 goto out_fd;
a79af59e 1475
1da177e4
LT
1476 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1477 if (err < 0)
39d8c1b6 1478 goto out_fd;
1da177e4
LT
1479
1480 if (upeer_sockaddr) {
230b1839 1481 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1482 &len, 2) < 0) {
1da177e4 1483 err = -ECONNABORTED;
39d8c1b6 1484 goto out_fd;
1da177e4 1485 }
43db362d 1486 err = move_addr_to_user(&address,
230b1839 1487 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1488 if (err < 0)
39d8c1b6 1489 goto out_fd;
1da177e4
LT
1490 }
1491
1492 /* File flags are not inherited via accept() unlike another OSes. */
1493
39d8c1b6
DM
1494 fd_install(newfd, newfile);
1495 err = newfd;
1da177e4 1496
1da177e4 1497out_put:
6cb153ca 1498 fput_light(sock->file, fput_needed);
1da177e4
LT
1499out:
1500 return err;
39d8c1b6 1501out_fd:
9606a216 1502 fput(newfile);
39d8c1b6 1503 put_unused_fd(newfd);
1da177e4
LT
1504 goto out_put;
1505}
1506
20f37034
HC
1507SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1508 int __user *, upeer_addrlen)
aaca0bdc 1509{
de11defe 1510 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1511}
1512
1da177e4
LT
1513/*
1514 * Attempt to connect to a socket with the server address. The address
1515 * is in user space so we verify it is OK and move it to kernel space.
1516 *
1517 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1518 * break bindings
1519 *
1520 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1521 * other SEQPACKET protocols that take time to connect() as it doesn't
1522 * include the -EINPROGRESS status for such sockets.
1523 */
1524
20f37034
HC
1525SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1526 int, addrlen)
1da177e4
LT
1527{
1528 struct socket *sock;
230b1839 1529 struct sockaddr_storage address;
6cb153ca 1530 int err, fput_needed;
1da177e4 1531
6cb153ca 1532 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1533 if (!sock)
1534 goto out;
43db362d 1535 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1536 if (err < 0)
1537 goto out_put;
1538
89bddce5 1539 err =
230b1839 1540 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1541 if (err)
1542 goto out_put;
1543
230b1839 1544 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1545 sock->file->f_flags);
1546out_put:
6cb153ca 1547 fput_light(sock->file, fput_needed);
1da177e4
LT
1548out:
1549 return err;
1550}
1551
1552/*
1553 * Get the local address ('name') of a socket object. Move the obtained
1554 * name to user space.
1555 */
1556
20f37034
HC
1557SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1558 int __user *, usockaddr_len)
1da177e4
LT
1559{
1560 struct socket *sock;
230b1839 1561 struct sockaddr_storage address;
6cb153ca 1562 int len, err, fput_needed;
89bddce5 1563
6cb153ca 1564 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1565 if (!sock)
1566 goto out;
1567
1568 err = security_socket_getsockname(sock);
1569 if (err)
1570 goto out_put;
1571
230b1839 1572 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1573 if (err)
1574 goto out_put;
43db362d 1575 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1576
1577out_put:
6cb153ca 1578 fput_light(sock->file, fput_needed);
1da177e4
LT
1579out:
1580 return err;
1581}
1582
1583/*
1584 * Get the remote address ('name') of a socket object. Move the obtained
1585 * name to user space.
1586 */
1587
20f37034
HC
1588SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1589 int __user *, usockaddr_len)
1da177e4
LT
1590{
1591 struct socket *sock;
230b1839 1592 struct sockaddr_storage address;
6cb153ca 1593 int len, err, fput_needed;
1da177e4 1594
89bddce5
SH
1595 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1596 if (sock != NULL) {
1da177e4
LT
1597 err = security_socket_getpeername(sock);
1598 if (err) {
6cb153ca 1599 fput_light(sock->file, fput_needed);
1da177e4
LT
1600 return err;
1601 }
1602
89bddce5 1603 err =
230b1839 1604 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1605 1);
1da177e4 1606 if (!err)
43db362d 1607 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1608 usockaddr_len);
6cb153ca 1609 fput_light(sock->file, fput_needed);
1da177e4
LT
1610 }
1611 return err;
1612}
1613
1614/*
1615 * Send a datagram to a given address. We move the address into kernel
1616 * space and check the user space data area is readable before invoking
1617 * the protocol.
1618 */
1619
3e0fa65f 1620SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1621 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1622 int, addr_len)
1da177e4
LT
1623{
1624 struct socket *sock;
230b1839 1625 struct sockaddr_storage address;
1da177e4
LT
1626 int err;
1627 struct msghdr msg;
1628 struct iovec iov;
6cb153ca 1629 int fput_needed;
6cb153ca 1630
602bd0e9
AV
1631 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1632 if (unlikely(err))
1633 return err;
de0fa95c
PE
1634 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1635 if (!sock)
4387ff75 1636 goto out;
6cb153ca 1637
89bddce5 1638 msg.msg_name = NULL;
89bddce5
SH
1639 msg.msg_control = NULL;
1640 msg.msg_controllen = 0;
1641 msg.msg_namelen = 0;
6cb153ca 1642 if (addr) {
43db362d 1643 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1644 if (err < 0)
1645 goto out_put;
230b1839 1646 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1647 msg.msg_namelen = addr_len;
1da177e4
LT
1648 }
1649 if (sock->file->f_flags & O_NONBLOCK)
1650 flags |= MSG_DONTWAIT;
1651 msg.msg_flags = flags;
d8725c86 1652 err = sock_sendmsg(sock, &msg);
1da177e4 1653
89bddce5 1654out_put:
de0fa95c 1655 fput_light(sock->file, fput_needed);
4387ff75 1656out:
1da177e4
LT
1657 return err;
1658}
1659
1660/*
89bddce5 1661 * Send a datagram down a socket.
1da177e4
LT
1662 */
1663
3e0fa65f 1664SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1665 unsigned int, flags)
1da177e4
LT
1666{
1667 return sys_sendto(fd, buff, len, flags, NULL, 0);
1668}
1669
1670/*
89bddce5 1671 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1672 * sender. We verify the buffers are writable and if needed move the
1673 * sender address from kernel to user space.
1674 */
1675
3e0fa65f 1676SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1677 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1678 int __user *, addr_len)
1da177e4
LT
1679{
1680 struct socket *sock;
1681 struct iovec iov;
1682 struct msghdr msg;
230b1839 1683 struct sockaddr_storage address;
89bddce5 1684 int err, err2;
6cb153ca
BL
1685 int fput_needed;
1686
602bd0e9
AV
1687 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1688 if (unlikely(err))
1689 return err;
de0fa95c 1690 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1691 if (!sock)
de0fa95c 1692 goto out;
1da177e4 1693
89bddce5
SH
1694 msg.msg_control = NULL;
1695 msg.msg_controllen = 0;
f3d33426
HFS
1696 /* Save some cycles and don't copy the address if not needed */
1697 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1698 /* We assume all kernel code knows the size of sockaddr_storage */
1699 msg.msg_namelen = 0;
130ed5d1 1700 msg.msg_iocb = NULL;
1da177e4
LT
1701 if (sock->file->f_flags & O_NONBLOCK)
1702 flags |= MSG_DONTWAIT;
602bd0e9 1703 err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags);
1da177e4 1704
89bddce5 1705 if (err >= 0 && addr != NULL) {
43db362d 1706 err2 = move_addr_to_user(&address,
230b1839 1707 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1708 if (err2 < 0)
1709 err = err2;
1da177e4 1710 }
de0fa95c
PE
1711
1712 fput_light(sock->file, fput_needed);
4387ff75 1713out:
1da177e4
LT
1714 return err;
1715}
1716
1717/*
89bddce5 1718 * Receive a datagram from a socket.
1da177e4
LT
1719 */
1720
b7c0ddf5
JG
1721SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1722 unsigned int, flags)
1da177e4
LT
1723{
1724 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1725}
1726
1727/*
1728 * Set a socket option. Because we don't know the option lengths we have
1729 * to pass the user mode parameter for the protocols to sort out.
1730 */
1731
20f37034
HC
1732SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1733 char __user *, optval, int, optlen)
1da177e4 1734{
6cb153ca 1735 int err, fput_needed;
1da177e4
LT
1736 struct socket *sock;
1737
1738 if (optlen < 0)
1739 return -EINVAL;
89bddce5
SH
1740
1741 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1742 if (sock != NULL) {
1743 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1744 if (err)
1745 goto out_put;
1da177e4
LT
1746
1747 if (level == SOL_SOCKET)
89bddce5
SH
1748 err =
1749 sock_setsockopt(sock, level, optname, optval,
1750 optlen);
1da177e4 1751 else
89bddce5
SH
1752 err =
1753 sock->ops->setsockopt(sock, level, optname, optval,
1754 optlen);
6cb153ca
BL
1755out_put:
1756 fput_light(sock->file, fput_needed);
1da177e4
LT
1757 }
1758 return err;
1759}
1760
1761/*
1762 * Get a socket option. Because we don't know the option lengths we have
1763 * to pass a user mode parameter for the protocols to sort out.
1764 */
1765
20f37034
HC
1766SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1767 char __user *, optval, int __user *, optlen)
1da177e4 1768{
6cb153ca 1769 int err, fput_needed;
1da177e4
LT
1770 struct socket *sock;
1771
89bddce5
SH
1772 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1773 if (sock != NULL) {
6cb153ca
BL
1774 err = security_socket_getsockopt(sock, level, optname);
1775 if (err)
1776 goto out_put;
1da177e4
LT
1777
1778 if (level == SOL_SOCKET)
89bddce5
SH
1779 err =
1780 sock_getsockopt(sock, level, optname, optval,
1781 optlen);
1da177e4 1782 else
89bddce5
SH
1783 err =
1784 sock->ops->getsockopt(sock, level, optname, optval,
1785 optlen);
6cb153ca
BL
1786out_put:
1787 fput_light(sock->file, fput_needed);
1da177e4
LT
1788 }
1789 return err;
1790}
1791
1da177e4
LT
1792/*
1793 * Shutdown a socket.
1794 */
1795
754fe8d2 1796SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1797{
6cb153ca 1798 int err, fput_needed;
1da177e4
LT
1799 struct socket *sock;
1800
89bddce5
SH
1801 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1802 if (sock != NULL) {
1da177e4 1803 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1804 if (!err)
1805 err = sock->ops->shutdown(sock, how);
1806 fput_light(sock->file, fput_needed);
1da177e4
LT
1807 }
1808 return err;
1809}
1810
89bddce5 1811/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1812 * fields which are the same type (int / unsigned) on our platforms.
1813 */
1814#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1815#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1816#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1817
c71d8ebe
TH
1818struct used_address {
1819 struct sockaddr_storage name;
1820 unsigned int name_len;
1821};
1822
da184284
AV
1823static int copy_msghdr_from_user(struct msghdr *kmsg,
1824 struct user_msghdr __user *umsg,
1825 struct sockaddr __user **save_addr,
1826 struct iovec **iov)
1661bf36 1827{
08adb7da
AV
1828 struct sockaddr __user *uaddr;
1829 struct iovec __user *uiov;
c0371da6 1830 size_t nr_segs;
08adb7da
AV
1831 ssize_t err;
1832
1833 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1834 __get_user(uaddr, &umsg->msg_name) ||
1835 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1836 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1837 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1838 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1839 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1840 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1841 return -EFAULT;
dbb490b9 1842
08adb7da 1843 if (!uaddr)
6a2a2b3a
AS
1844 kmsg->msg_namelen = 0;
1845
dbb490b9
ML
1846 if (kmsg->msg_namelen < 0)
1847 return -EINVAL;
1848
1661bf36 1849 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1850 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1851
1852 if (save_addr)
1853 *save_addr = uaddr;
1854
1855 if (uaddr && kmsg->msg_namelen) {
1856 if (!save_addr) {
1857 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1858 kmsg->msg_name);
1859 if (err < 0)
1860 return err;
1861 }
1862 } else {
1863 kmsg->msg_name = NULL;
1864 kmsg->msg_namelen = 0;
1865 }
1866
c0371da6 1867 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1868 return -EMSGSIZE;
1869
0345f931 1870 kmsg->msg_iocb = NULL;
1871
da184284
AV
1872 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1873 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1874}
1875
666547ff 1876static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1877 struct msghdr *msg_sys, unsigned int flags,
c71d8ebe 1878 struct used_address *used_address)
1da177e4 1879{
89bddce5
SH
1880 struct compat_msghdr __user *msg_compat =
1881 (struct compat_msghdr __user *)msg;
230b1839 1882 struct sockaddr_storage address;
1da177e4 1883 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1884 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1885 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1886 /* 20 is size of ipv6_pktinfo */
1da177e4 1887 unsigned char *ctl_buf = ctl;
d8725c86 1888 int ctl_len;
08adb7da 1889 ssize_t err;
89bddce5 1890
08adb7da 1891 msg_sys->msg_name = &address;
1da177e4 1892
08449320 1893 if (MSG_CMSG_COMPAT & flags)
08adb7da 1894 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1895 else
08adb7da 1896 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1897 if (err < 0)
da184284 1898 return err;
1da177e4
LT
1899
1900 err = -ENOBUFS;
1901
228e548e 1902 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1903 goto out_freeiov;
228e548e 1904 ctl_len = msg_sys->msg_controllen;
1da177e4 1905 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1906 err =
228e548e 1907 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1908 sizeof(ctl));
1da177e4
LT
1909 if (err)
1910 goto out_freeiov;
228e548e
AB
1911 ctl_buf = msg_sys->msg_control;
1912 ctl_len = msg_sys->msg_controllen;
1da177e4 1913 } else if (ctl_len) {
89bddce5 1914 if (ctl_len > sizeof(ctl)) {
1da177e4 1915 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1916 if (ctl_buf == NULL)
1da177e4
LT
1917 goto out_freeiov;
1918 }
1919 err = -EFAULT;
1920 /*
228e548e 1921 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1922 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1923 * checking falls down on this.
1924 */
fb8621bb 1925 if (copy_from_user(ctl_buf,
228e548e 1926 (void __user __force *)msg_sys->msg_control,
89bddce5 1927 ctl_len))
1da177e4 1928 goto out_freectl;
228e548e 1929 msg_sys->msg_control = ctl_buf;
1da177e4 1930 }
228e548e 1931 msg_sys->msg_flags = flags;
1da177e4
LT
1932
1933 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1934 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1935 /*
1936 * If this is sendmmsg() and current destination address is same as
1937 * previously succeeded address, omit asking LSM's decision.
1938 * used_address->name_len is initialized to UINT_MAX so that the first
1939 * destination address never matches.
1940 */
bc909d9d
MD
1941 if (used_address && msg_sys->msg_name &&
1942 used_address->name_len == msg_sys->msg_namelen &&
1943 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1944 used_address->name_len)) {
d8725c86 1945 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1946 goto out_freectl;
1947 }
d8725c86 1948 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1949 /*
1950 * If this is sendmmsg() and sending to current destination address was
1951 * successful, remember it.
1952 */
1953 if (used_address && err >= 0) {
1954 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1955 if (msg_sys->msg_name)
1956 memcpy(&used_address->name, msg_sys->msg_name,
1957 used_address->name_len);
c71d8ebe 1958 }
1da177e4
LT
1959
1960out_freectl:
89bddce5 1961 if (ctl_buf != ctl)
1da177e4
LT
1962 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1963out_freeiov:
da184284 1964 kfree(iov);
228e548e
AB
1965 return err;
1966}
1967
1968/*
1969 * BSD sendmsg interface
1970 */
1971
666547ff 1972long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
1973{
1974 int fput_needed, err;
1975 struct msghdr msg_sys;
1be374a0
AL
1976 struct socket *sock;
1977
1be374a0 1978 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
1979 if (!sock)
1980 goto out;
1981
a7526eb5 1982 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
228e548e 1983
6cb153ca 1984 fput_light(sock->file, fput_needed);
89bddce5 1985out:
1da177e4
LT
1986 return err;
1987}
1988
666547ff 1989SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
1990{
1991 if (flags & MSG_CMSG_COMPAT)
1992 return -EINVAL;
1993 return __sys_sendmsg(fd, msg, flags);
1994}
1995
228e548e
AB
1996/*
1997 * Linux sendmmsg interface
1998 */
1999
2000int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2001 unsigned int flags)
2002{
2003 int fput_needed, err, datagrams;
2004 struct socket *sock;
2005 struct mmsghdr __user *entry;
2006 struct compat_mmsghdr __user *compat_entry;
2007 struct msghdr msg_sys;
c71d8ebe 2008 struct used_address used_address;
228e548e 2009
98382f41
AB
2010 if (vlen > UIO_MAXIOV)
2011 vlen = UIO_MAXIOV;
228e548e
AB
2012
2013 datagrams = 0;
2014
2015 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2016 if (!sock)
2017 return err;
2018
c71d8ebe 2019 used_address.name_len = UINT_MAX;
228e548e
AB
2020 entry = mmsg;
2021 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2022 err = 0;
228e548e
AB
2023
2024 while (datagrams < vlen) {
228e548e 2025 if (MSG_CMSG_COMPAT & flags) {
666547ff 2026 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5 2027 &msg_sys, flags, &used_address);
228e548e
AB
2028 if (err < 0)
2029 break;
2030 err = __put_user(err, &compat_entry->msg_len);
2031 ++compat_entry;
2032 } else {
a7526eb5 2033 err = ___sys_sendmsg(sock,
666547ff 2034 (struct user_msghdr __user *)entry,
a7526eb5 2035 &msg_sys, flags, &used_address);
228e548e
AB
2036 if (err < 0)
2037 break;
2038 err = put_user(err, &entry->msg_len);
2039 ++entry;
2040 }
2041
2042 if (err)
2043 break;
2044 ++datagrams;
a78cb84c 2045 cond_resched();
228e548e
AB
2046 }
2047
228e548e
AB
2048 fput_light(sock->file, fput_needed);
2049
728ffb86
AB
2050 /* We only return an error if no datagrams were able to be sent */
2051 if (datagrams != 0)
228e548e
AB
2052 return datagrams;
2053
228e548e
AB
2054 return err;
2055}
2056
2057SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2058 unsigned int, vlen, unsigned int, flags)
2059{
1be374a0
AL
2060 if (flags & MSG_CMSG_COMPAT)
2061 return -EINVAL;
228e548e
AB
2062 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2063}
2064
666547ff 2065static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2066 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2067{
89bddce5
SH
2068 struct compat_msghdr __user *msg_compat =
2069 (struct compat_msghdr __user *)msg;
1da177e4 2070 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2071 struct iovec *iov = iovstack;
1da177e4 2072 unsigned long cmsg_ptr;
08adb7da
AV
2073 int total_len, len;
2074 ssize_t err;
1da177e4
LT
2075
2076 /* kernel mode address */
230b1839 2077 struct sockaddr_storage addr;
1da177e4
LT
2078
2079 /* user mode address pointers */
2080 struct sockaddr __user *uaddr;
08adb7da 2081 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2082
08adb7da 2083 msg_sys->msg_name = &addr;
1da177e4 2084
f3d33426 2085 if (MSG_CMSG_COMPAT & flags)
08adb7da 2086 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2087 else
08adb7da 2088 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2089 if (err < 0)
da184284
AV
2090 return err;
2091 total_len = iov_iter_count(&msg_sys->msg_iter);
1da177e4 2092
a2e27255
ACM
2093 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2094 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2095
f3d33426
HFS
2096 /* We assume all kernel code knows the size of sockaddr_storage */
2097 msg_sys->msg_namelen = 0;
2098
1da177e4
LT
2099 if (sock->file->f_flags & O_NONBLOCK)
2100 flags |= MSG_DONTWAIT;
a2e27255
ACM
2101 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2102 total_len, flags);
1da177e4
LT
2103 if (err < 0)
2104 goto out_freeiov;
2105 len = err;
2106
2107 if (uaddr != NULL) {
43db362d 2108 err = move_addr_to_user(&addr,
a2e27255 2109 msg_sys->msg_namelen, uaddr,
89bddce5 2110 uaddr_len);
1da177e4
LT
2111 if (err < 0)
2112 goto out_freeiov;
2113 }
a2e27255 2114 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2115 COMPAT_FLAGS(msg));
1da177e4
LT
2116 if (err)
2117 goto out_freeiov;
2118 if (MSG_CMSG_COMPAT & flags)
a2e27255 2119 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2120 &msg_compat->msg_controllen);
2121 else
a2e27255 2122 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2123 &msg->msg_controllen);
2124 if (err)
2125 goto out_freeiov;
2126 err = len;
2127
2128out_freeiov:
da184284 2129 kfree(iov);
a2e27255
ACM
2130 return err;
2131}
2132
2133/*
2134 * BSD recvmsg interface
2135 */
2136
666547ff 2137long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2138{
2139 int fput_needed, err;
2140 struct msghdr msg_sys;
1be374a0
AL
2141 struct socket *sock;
2142
1be374a0 2143 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2144 if (!sock)
2145 goto out;
2146
a7526eb5 2147 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2148
6cb153ca 2149 fput_light(sock->file, fput_needed);
1da177e4
LT
2150out:
2151 return err;
2152}
2153
666547ff 2154SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2155 unsigned int, flags)
2156{
2157 if (flags & MSG_CMSG_COMPAT)
2158 return -EINVAL;
2159 return __sys_recvmsg(fd, msg, flags);
2160}
2161
a2e27255
ACM
2162/*
2163 * Linux recvmmsg interface
2164 */
2165
2166int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2167 unsigned int flags, struct timespec *timeout)
2168{
2169 int fput_needed, err, datagrams;
2170 struct socket *sock;
2171 struct mmsghdr __user *entry;
d7256d0e 2172 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2173 struct msghdr msg_sys;
2174 struct timespec end_time;
2175
2176 if (timeout &&
2177 poll_select_set_timeout(&end_time, timeout->tv_sec,
2178 timeout->tv_nsec))
2179 return -EINVAL;
2180
2181 datagrams = 0;
2182
2183 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2184 if (!sock)
2185 return err;
2186
2187 err = sock_error(sock->sk);
2188 if (err)
2189 goto out_put;
2190
2191 entry = mmsg;
d7256d0e 2192 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2193
2194 while (datagrams < vlen) {
2195 /*
2196 * No need to ask LSM for more than the first datagram.
2197 */
d7256d0e 2198 if (MSG_CMSG_COMPAT & flags) {
666547ff 2199 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2200 &msg_sys, flags & ~MSG_WAITFORONE,
2201 datagrams);
d7256d0e
JMG
2202 if (err < 0)
2203 break;
2204 err = __put_user(err, &compat_entry->msg_len);
2205 ++compat_entry;
2206 } else {
a7526eb5 2207 err = ___sys_recvmsg(sock,
666547ff 2208 (struct user_msghdr __user *)entry,
a7526eb5
AL
2209 &msg_sys, flags & ~MSG_WAITFORONE,
2210 datagrams);
d7256d0e
JMG
2211 if (err < 0)
2212 break;
2213 err = put_user(err, &entry->msg_len);
2214 ++entry;
2215 }
2216
a2e27255
ACM
2217 if (err)
2218 break;
a2e27255
ACM
2219 ++datagrams;
2220
71c5c159
BB
2221 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2222 if (flags & MSG_WAITFORONE)
2223 flags |= MSG_DONTWAIT;
2224
a2e27255
ACM
2225 if (timeout) {
2226 ktime_get_ts(timeout);
2227 *timeout = timespec_sub(end_time, *timeout);
2228 if (timeout->tv_sec < 0) {
2229 timeout->tv_sec = timeout->tv_nsec = 0;
2230 break;
2231 }
2232
2233 /* Timeout, return less than vlen datagrams */
2234 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2235 break;
2236 }
2237
2238 /* Out of band data, return right away */
2239 if (msg_sys.msg_flags & MSG_OOB)
2240 break;
a78cb84c 2241 cond_resched();
a2e27255
ACM
2242 }
2243
2244out_put:
2245 fput_light(sock->file, fput_needed);
1da177e4 2246
a2e27255
ACM
2247 if (err == 0)
2248 return datagrams;
2249
2250 if (datagrams != 0) {
2251 /*
2252 * We may return less entries than requested (vlen) if the
2253 * sock is non block and there aren't enough datagrams...
2254 */
2255 if (err != -EAGAIN) {
2256 /*
2257 * ... or if recvmsg returns an error after we
2258 * received some datagrams, where we record the
2259 * error to return on the next call or if the
2260 * app asks about it using getsockopt(SO_ERROR).
2261 */
2262 sock->sk->sk_err = -err;
2263 }
2264
2265 return datagrams;
2266 }
2267
2268 return err;
2269}
2270
2271SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2272 unsigned int, vlen, unsigned int, flags,
2273 struct timespec __user *, timeout)
2274{
2275 int datagrams;
2276 struct timespec timeout_sys;
2277
1be374a0
AL
2278 if (flags & MSG_CMSG_COMPAT)
2279 return -EINVAL;
2280
a2e27255
ACM
2281 if (!timeout)
2282 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2283
2284 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2285 return -EFAULT;
2286
2287 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2288
2289 if (datagrams > 0 &&
2290 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2291 datagrams = -EFAULT;
2292
2293 return datagrams;
2294}
2295
2296#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2297/* Argument list sizes for sys_socketcall */
2298#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2299static const unsigned char nargs[21] = {
c6d409cf
ED
2300 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2301 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2302 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2303 AL(4), AL(5), AL(4)
89bddce5
SH
2304};
2305
1da177e4
LT
2306#undef AL
2307
2308/*
89bddce5 2309 * System call vectors.
1da177e4
LT
2310 *
2311 * Argument checking cleaned up. Saved 20% in size.
2312 * This function doesn't need to set the kernel lock because
89bddce5 2313 * it is set by the callees.
1da177e4
LT
2314 */
2315
3e0fa65f 2316SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2317{
2950fa9d 2318 unsigned long a[AUDITSC_ARGS];
89bddce5 2319 unsigned long a0, a1;
1da177e4 2320 int err;
47379052 2321 unsigned int len;
1da177e4 2322
228e548e 2323 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2324 return -EINVAL;
2325
47379052
AV
2326 len = nargs[call];
2327 if (len > sizeof(a))
2328 return -EINVAL;
2329
1da177e4 2330 /* copy_from_user should be SMP safe. */
47379052 2331 if (copy_from_user(a, args, len))
1da177e4 2332 return -EFAULT;
3ec3b2fb 2333
2950fa9d
CG
2334 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2335 if (err)
2336 return err;
3ec3b2fb 2337
89bddce5
SH
2338 a0 = a[0];
2339 a1 = a[1];
2340
2341 switch (call) {
2342 case SYS_SOCKET:
2343 err = sys_socket(a0, a1, a[2]);
2344 break;
2345 case SYS_BIND:
2346 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2347 break;
2348 case SYS_CONNECT:
2349 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2350 break;
2351 case SYS_LISTEN:
2352 err = sys_listen(a0, a1);
2353 break;
2354 case SYS_ACCEPT:
de11defe
UD
2355 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2356 (int __user *)a[2], 0);
89bddce5
SH
2357 break;
2358 case SYS_GETSOCKNAME:
2359 err =
2360 sys_getsockname(a0, (struct sockaddr __user *)a1,
2361 (int __user *)a[2]);
2362 break;
2363 case SYS_GETPEERNAME:
2364 err =
2365 sys_getpeername(a0, (struct sockaddr __user *)a1,
2366 (int __user *)a[2]);
2367 break;
2368 case SYS_SOCKETPAIR:
2369 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2370 break;
2371 case SYS_SEND:
2372 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2373 break;
2374 case SYS_SENDTO:
2375 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2376 (struct sockaddr __user *)a[4], a[5]);
2377 break;
2378 case SYS_RECV:
2379 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2380 break;
2381 case SYS_RECVFROM:
2382 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2383 (struct sockaddr __user *)a[4],
2384 (int __user *)a[5]);
2385 break;
2386 case SYS_SHUTDOWN:
2387 err = sys_shutdown(a0, a1);
2388 break;
2389 case SYS_SETSOCKOPT:
2390 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2391 break;
2392 case SYS_GETSOCKOPT:
2393 err =
2394 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2395 (int __user *)a[4]);
2396 break;
2397 case SYS_SENDMSG:
666547ff 2398 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2399 break;
228e548e
AB
2400 case SYS_SENDMMSG:
2401 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2402 break;
89bddce5 2403 case SYS_RECVMSG:
666547ff 2404 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2405 break;
a2e27255
ACM
2406 case SYS_RECVMMSG:
2407 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2408 (struct timespec __user *)a[4]);
2409 break;
de11defe
UD
2410 case SYS_ACCEPT4:
2411 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2412 (int __user *)a[2], a[3]);
aaca0bdc 2413 break;
89bddce5
SH
2414 default:
2415 err = -EINVAL;
2416 break;
1da177e4
LT
2417 }
2418 return err;
2419}
2420
89bddce5 2421#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2422
55737fda
SH
2423/**
2424 * sock_register - add a socket protocol handler
2425 * @ops: description of protocol
2426 *
1da177e4
LT
2427 * This function is called by a protocol handler that wants to
2428 * advertise its address family, and have it linked into the
e793c0f7 2429 * socket interface. The value ops->family corresponds to the
55737fda 2430 * socket system call protocol family.
1da177e4 2431 */
f0fd27d4 2432int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2433{
2434 int err;
2435
2436 if (ops->family >= NPROTO) {
3410f22e 2437 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2438 return -ENOBUFS;
2439 }
55737fda
SH
2440
2441 spin_lock(&net_family_lock);
190683a9
ED
2442 if (rcu_dereference_protected(net_families[ops->family],
2443 lockdep_is_held(&net_family_lock)))
55737fda
SH
2444 err = -EEXIST;
2445 else {
cf778b00 2446 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2447 err = 0;
2448 }
55737fda
SH
2449 spin_unlock(&net_family_lock);
2450
3410f22e 2451 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2452 return err;
2453}
c6d409cf 2454EXPORT_SYMBOL(sock_register);
1da177e4 2455
55737fda
SH
2456/**
2457 * sock_unregister - remove a protocol handler
2458 * @family: protocol family to remove
2459 *
1da177e4
LT
2460 * This function is called by a protocol handler that wants to
2461 * remove its address family, and have it unlinked from the
55737fda
SH
2462 * new socket creation.
2463 *
2464 * If protocol handler is a module, then it can use module reference
2465 * counts to protect against new references. If protocol handler is not
2466 * a module then it needs to provide its own protection in
2467 * the ops->create routine.
1da177e4 2468 */
f0fd27d4 2469void sock_unregister(int family)
1da177e4 2470{
f0fd27d4 2471 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2472
55737fda 2473 spin_lock(&net_family_lock);
a9b3cd7f 2474 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2475 spin_unlock(&net_family_lock);
2476
2477 synchronize_rcu();
2478
3410f22e 2479 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2480}
c6d409cf 2481EXPORT_SYMBOL(sock_unregister);
1da177e4 2482
77d76ea3 2483static int __init sock_init(void)
1da177e4 2484{
b3e19d92 2485 int err;
2ca794e5
EB
2486 /*
2487 * Initialize the network sysctl infrastructure.
2488 */
2489 err = net_sysctl_init();
2490 if (err)
2491 goto out;
b3e19d92 2492
1da177e4 2493 /*
89bddce5 2494 * Initialize skbuff SLAB cache
1da177e4
LT
2495 */
2496 skb_init();
1da177e4
LT
2497
2498 /*
89bddce5 2499 * Initialize the protocols module.
1da177e4
LT
2500 */
2501
2502 init_inodecache();
b3e19d92
NP
2503
2504 err = register_filesystem(&sock_fs_type);
2505 if (err)
2506 goto out_fs;
1da177e4 2507 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2508 if (IS_ERR(sock_mnt)) {
2509 err = PTR_ERR(sock_mnt);
2510 goto out_mount;
2511 }
77d76ea3
AK
2512
2513 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2514 */
2515
2516#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2517 err = netfilter_init();
2518 if (err)
2519 goto out;
1da177e4 2520#endif
cbeb321a 2521
408eccce 2522 ptp_classifier_init();
c1f19b51 2523
b3e19d92
NP
2524out:
2525 return err;
2526
2527out_mount:
2528 unregister_filesystem(&sock_fs_type);
2529out_fs:
2530 goto out;
1da177e4
LT
2531}
2532
77d76ea3
AK
2533core_initcall(sock_init); /* early initcall */
2534
1da177e4
LT
2535#ifdef CONFIG_PROC_FS
2536void socket_seq_show(struct seq_file *seq)
2537{
2538 int cpu;
2539 int counter = 0;
2540
6f912042 2541 for_each_possible_cpu(cpu)
89bddce5 2542 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2543
2544 /* It can be negative, by the way. 8) */
2545 if (counter < 0)
2546 counter = 0;
2547
2548 seq_printf(seq, "sockets: used %d\n", counter);
2549}
89bddce5 2550#endif /* CONFIG_PROC_FS */
1da177e4 2551
89bbfc95 2552#ifdef CONFIG_COMPAT
6b96018b 2553static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2554 unsigned int cmd, void __user *up)
7a229387 2555{
7a229387
AB
2556 mm_segment_t old_fs = get_fs();
2557 struct timeval ktv;
2558 int err;
2559
2560 set_fs(KERNEL_DS);
6b96018b 2561 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2562 set_fs(old_fs);
644595f8 2563 if (!err)
ed6fe9d6 2564 err = compat_put_timeval(&ktv, up);
644595f8 2565
7a229387
AB
2566 return err;
2567}
2568
6b96018b 2569static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2570 unsigned int cmd, void __user *up)
7a229387 2571{
7a229387
AB
2572 mm_segment_t old_fs = get_fs();
2573 struct timespec kts;
2574 int err;
2575
2576 set_fs(KERNEL_DS);
6b96018b 2577 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2578 set_fs(old_fs);
644595f8 2579 if (!err)
ed6fe9d6 2580 err = compat_put_timespec(&kts, up);
644595f8 2581
7a229387
AB
2582 return err;
2583}
2584
6b96018b 2585static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2586{
2587 struct ifreq __user *uifr;
2588 int err;
2589
2590 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2591 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2592 return -EFAULT;
2593
6b96018b 2594 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2595 if (err)
2596 return err;
2597
6b96018b 2598 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2599 return -EFAULT;
2600
2601 return 0;
2602}
2603
6b96018b 2604static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2605{
6b96018b 2606 struct compat_ifconf ifc32;
7a229387
AB
2607 struct ifconf ifc;
2608 struct ifconf __user *uifc;
6b96018b 2609 struct compat_ifreq __user *ifr32;
7a229387
AB
2610 struct ifreq __user *ifr;
2611 unsigned int i, j;
2612 int err;
2613
6b96018b 2614 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2615 return -EFAULT;
2616
43da5f2e 2617 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2618 if (ifc32.ifcbuf == 0) {
2619 ifc32.ifc_len = 0;
2620 ifc.ifc_len = 0;
2621 ifc.ifc_req = NULL;
2622 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2623 } else {
c6d409cf
ED
2624 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2625 sizeof(struct ifreq);
7a229387
AB
2626 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2627 ifc.ifc_len = len;
2628 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2629 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2630 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2631 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2632 return -EFAULT;
2633 ifr++;
2634 ifr32++;
2635 }
2636 }
2637 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2638 return -EFAULT;
2639
6b96018b 2640 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2641 if (err)
2642 return err;
2643
2644 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2645 return -EFAULT;
2646
2647 ifr = ifc.ifc_req;
2648 ifr32 = compat_ptr(ifc32.ifcbuf);
2649 for (i = 0, j = 0;
c6d409cf
ED
2650 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2651 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2652 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2653 return -EFAULT;
2654 ifr32++;
2655 ifr++;
2656 }
2657
2658 if (ifc32.ifcbuf == 0) {
2659 /* Translate from 64-bit structure multiple to
2660 * a 32-bit one.
2661 */
2662 i = ifc.ifc_len;
6b96018b 2663 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2664 ifc32.ifc_len = i;
2665 } else {
2666 ifc32.ifc_len = i;
2667 }
6b96018b 2668 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2669 return -EFAULT;
2670
2671 return 0;
2672}
2673
6b96018b 2674static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2675{
3a7da39d
BH
2676 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2677 bool convert_in = false, convert_out = false;
2678 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2679 struct ethtool_rxnfc __user *rxnfc;
7a229387 2680 struct ifreq __user *ifr;
3a7da39d
BH
2681 u32 rule_cnt = 0, actual_rule_cnt;
2682 u32 ethcmd;
7a229387 2683 u32 data;
3a7da39d 2684 int ret;
7a229387 2685
3a7da39d
BH
2686 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2687 return -EFAULT;
7a229387 2688
3a7da39d
BH
2689 compat_rxnfc = compat_ptr(data);
2690
2691 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2692 return -EFAULT;
2693
3a7da39d
BH
2694 /* Most ethtool structures are defined without padding.
2695 * Unfortunately struct ethtool_rxnfc is an exception.
2696 */
2697 switch (ethcmd) {
2698 default:
2699 break;
2700 case ETHTOOL_GRXCLSRLALL:
2701 /* Buffer size is variable */
2702 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2703 return -EFAULT;
2704 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2705 return -ENOMEM;
2706 buf_size += rule_cnt * sizeof(u32);
2707 /* fall through */
2708 case ETHTOOL_GRXRINGS:
2709 case ETHTOOL_GRXCLSRLCNT:
2710 case ETHTOOL_GRXCLSRULE:
55664f32 2711 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2712 convert_out = true;
2713 /* fall through */
2714 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2715 buf_size += sizeof(struct ethtool_rxnfc);
2716 convert_in = true;
2717 break;
2718 }
2719
2720 ifr = compat_alloc_user_space(buf_size);
954b1244 2721 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2722
2723 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2724 return -EFAULT;
2725
3a7da39d
BH
2726 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2727 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2728 return -EFAULT;
2729
3a7da39d 2730 if (convert_in) {
127fe533 2731 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2732 * fs.ring_cookie and at the end of fs, but nowhere else.
2733 */
127fe533
AD
2734 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2735 sizeof(compat_rxnfc->fs.m_ext) !=
2736 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2737 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2738 BUILD_BUG_ON(
2739 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2740 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2741 offsetof(struct ethtool_rxnfc, fs.location) -
2742 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2743
2744 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2745 (void __user *)(&rxnfc->fs.m_ext + 1) -
2746 (void __user *)rxnfc) ||
3a7da39d
BH
2747 copy_in_user(&rxnfc->fs.ring_cookie,
2748 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2749 (void __user *)(&rxnfc->fs.location + 1) -
2750 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2751 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2752 sizeof(rxnfc->rule_cnt)))
2753 return -EFAULT;
2754 }
2755
2756 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2757 if (ret)
2758 return ret;
2759
2760 if (convert_out) {
2761 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2762 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2763 (const void __user *)rxnfc) ||
3a7da39d
BH
2764 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2765 &rxnfc->fs.ring_cookie,
954b1244
SH
2766 (const void __user *)(&rxnfc->fs.location + 1) -
2767 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2768 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2769 sizeof(rxnfc->rule_cnt)))
2770 return -EFAULT;
2771
2772 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2773 /* As an optimisation, we only copy the actual
2774 * number of rules that the underlying
2775 * function returned. Since Mallory might
2776 * change the rule count in user memory, we
2777 * check that it is less than the rule count
2778 * originally given (as the user buffer size),
2779 * which has been range-checked.
2780 */
2781 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2782 return -EFAULT;
2783 if (actual_rule_cnt < rule_cnt)
2784 rule_cnt = actual_rule_cnt;
2785 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2786 &rxnfc->rule_locs[0],
2787 rule_cnt * sizeof(u32)))
2788 return -EFAULT;
2789 }
2790 }
2791
2792 return 0;
7a229387
AB
2793}
2794
7a50a240
AB
2795static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2796{
2797 void __user *uptr;
2798 compat_uptr_t uptr32;
2799 struct ifreq __user *uifr;
2800
c6d409cf 2801 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2802 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2803 return -EFAULT;
2804
2805 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2806 return -EFAULT;
2807
2808 uptr = compat_ptr(uptr32);
2809
2810 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2811 return -EFAULT;
2812
2813 return dev_ioctl(net, SIOCWANDEV, uifr);
2814}
2815
6b96018b
AB
2816static int bond_ioctl(struct net *net, unsigned int cmd,
2817 struct compat_ifreq __user *ifr32)
7a229387
AB
2818{
2819 struct ifreq kifr;
7a229387
AB
2820 mm_segment_t old_fs;
2821 int err;
7a229387
AB
2822
2823 switch (cmd) {
2824 case SIOCBONDENSLAVE:
2825 case SIOCBONDRELEASE:
2826 case SIOCBONDSETHWADDR:
2827 case SIOCBONDCHANGEACTIVE:
6b96018b 2828 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2829 return -EFAULT;
2830
2831 old_fs = get_fs();
c6d409cf 2832 set_fs(KERNEL_DS);
c3f52ae6 2833 err = dev_ioctl(net, cmd,
2834 (struct ifreq __user __force *) &kifr);
c6d409cf 2835 set_fs(old_fs);
7a229387
AB
2836
2837 return err;
7a229387 2838 default:
07d106d0 2839 return -ENOIOCTLCMD;
ccbd6a5a 2840 }
7a229387
AB
2841}
2842
590d4693
BH
2843/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2844static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2845 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2846{
2847 struct ifreq __user *u_ifreq64;
7a229387
AB
2848 char tmp_buf[IFNAMSIZ];
2849 void __user *data64;
2850 u32 data32;
2851
2852 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2853 IFNAMSIZ))
2854 return -EFAULT;
417c3522 2855 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2856 return -EFAULT;
2857 data64 = compat_ptr(data32);
2858
2859 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2860
7a229387
AB
2861 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2862 IFNAMSIZ))
2863 return -EFAULT;
417c3522 2864 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2865 return -EFAULT;
2866
6b96018b 2867 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2868}
2869
6b96018b
AB
2870static int dev_ifsioc(struct net *net, struct socket *sock,
2871 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2872{
a2116ed2 2873 struct ifreq __user *uifr;
7a229387
AB
2874 int err;
2875
a2116ed2
AB
2876 uifr = compat_alloc_user_space(sizeof(*uifr));
2877 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2878 return -EFAULT;
2879
2880 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2881
7a229387
AB
2882 if (!err) {
2883 switch (cmd) {
2884 case SIOCGIFFLAGS:
2885 case SIOCGIFMETRIC:
2886 case SIOCGIFMTU:
2887 case SIOCGIFMEM:
2888 case SIOCGIFHWADDR:
2889 case SIOCGIFINDEX:
2890 case SIOCGIFADDR:
2891 case SIOCGIFBRDADDR:
2892 case SIOCGIFDSTADDR:
2893 case SIOCGIFNETMASK:
fab2532b 2894 case SIOCGIFPFLAGS:
7a229387 2895 case SIOCGIFTXQLEN:
fab2532b
AB
2896 case SIOCGMIIPHY:
2897 case SIOCGMIIREG:
a2116ed2 2898 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2899 err = -EFAULT;
2900 break;
2901 }
2902 }
2903 return err;
2904}
2905
a2116ed2
AB
2906static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2907 struct compat_ifreq __user *uifr32)
2908{
2909 struct ifreq ifr;
2910 struct compat_ifmap __user *uifmap32;
2911 mm_segment_t old_fs;
2912 int err;
2913
2914 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2915 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2916 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2917 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2918 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2919 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2920 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2921 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2922 if (err)
2923 return -EFAULT;
2924
2925 old_fs = get_fs();
c6d409cf 2926 set_fs(KERNEL_DS);
c3f52ae6 2927 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2928 set_fs(old_fs);
a2116ed2
AB
2929
2930 if (cmd == SIOCGIFMAP && !err) {
2931 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2932 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2933 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2934 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2935 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2936 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2937 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2938 if (err)
2939 err = -EFAULT;
2940 }
2941 return err;
2942}
2943
7a229387 2944struct rtentry32 {
c6d409cf 2945 u32 rt_pad1;
7a229387
AB
2946 struct sockaddr rt_dst; /* target address */
2947 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2948 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2949 unsigned short rt_flags;
2950 short rt_pad2;
2951 u32 rt_pad3;
2952 unsigned char rt_tos;
2953 unsigned char rt_class;
2954 short rt_pad4;
2955 short rt_metric; /* +1 for binary compatibility! */
7a229387 2956 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
2957 u32 rt_mtu; /* per route MTU/Window */
2958 u32 rt_window; /* Window clamping */
7a229387
AB
2959 unsigned short rt_irtt; /* Initial RTT */
2960};
2961
2962struct in6_rtmsg32 {
2963 struct in6_addr rtmsg_dst;
2964 struct in6_addr rtmsg_src;
2965 struct in6_addr rtmsg_gateway;
2966 u32 rtmsg_type;
2967 u16 rtmsg_dst_len;
2968 u16 rtmsg_src_len;
2969 u32 rtmsg_metric;
2970 u32 rtmsg_info;
2971 u32 rtmsg_flags;
2972 s32 rtmsg_ifindex;
2973};
2974
6b96018b
AB
2975static int routing_ioctl(struct net *net, struct socket *sock,
2976 unsigned int cmd, void __user *argp)
7a229387
AB
2977{
2978 int ret;
2979 void *r = NULL;
2980 struct in6_rtmsg r6;
2981 struct rtentry r4;
2982 char devname[16];
2983 u32 rtdev;
2984 mm_segment_t old_fs = get_fs();
2985
6b96018b
AB
2986 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2987 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 2988 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 2989 3 * sizeof(struct in6_addr));
3ddc5b46
MD
2990 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
2991 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
2992 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
2993 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
2994 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
2995 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
2996 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
2997
2998 r = (void *) &r6;
2999 } else { /* ipv4 */
6b96018b 3000 struct rtentry32 __user *ur4 = argp;
c6d409cf 3001 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3002 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3003 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3004 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3005 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3006 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3007 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3008 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3009 if (rtdev) {
c6d409cf 3010 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3011 r4.rt_dev = (char __user __force *)devname;
3012 devname[15] = 0;
7a229387
AB
3013 } else
3014 r4.rt_dev = NULL;
3015
3016 r = (void *) &r4;
3017 }
3018
3019 if (ret) {
3020 ret = -EFAULT;
3021 goto out;
3022 }
3023
c6d409cf 3024 set_fs(KERNEL_DS);
6b96018b 3025 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3026 set_fs(old_fs);
7a229387
AB
3027
3028out:
7a229387
AB
3029 return ret;
3030}
3031
3032/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3033 * for some operations; this forces use of the newer bridge-utils that
25985edc 3034 * use compatible ioctls
7a229387 3035 */
6b96018b 3036static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3037{
6b96018b 3038 compat_ulong_t tmp;
7a229387 3039
6b96018b 3040 if (get_user(tmp, argp))
7a229387
AB
3041 return -EFAULT;
3042 if (tmp == BRCTL_GET_VERSION)
3043 return BRCTL_VERSION + 1;
3044 return -EINVAL;
3045}
3046
6b96018b
AB
3047static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3048 unsigned int cmd, unsigned long arg)
3049{
3050 void __user *argp = compat_ptr(arg);
3051 struct sock *sk = sock->sk;
3052 struct net *net = sock_net(sk);
7a229387 3053
6b96018b 3054 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3055 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3056
3057 switch (cmd) {
3058 case SIOCSIFBR:
3059 case SIOCGIFBR:
3060 return old_bridge_ioctl(argp);
3061 case SIOCGIFNAME:
3062 return dev_ifname32(net, argp);
3063 case SIOCGIFCONF:
3064 return dev_ifconf(net, argp);
3065 case SIOCETHTOOL:
3066 return ethtool_ioctl(net, argp);
7a50a240
AB
3067 case SIOCWANDEV:
3068 return compat_siocwandev(net, argp);
a2116ed2
AB
3069 case SIOCGIFMAP:
3070 case SIOCSIFMAP:
3071 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3072 case SIOCBONDENSLAVE:
3073 case SIOCBONDRELEASE:
3074 case SIOCBONDSETHWADDR:
6b96018b
AB
3075 case SIOCBONDCHANGEACTIVE:
3076 return bond_ioctl(net, cmd, argp);
3077 case SIOCADDRT:
3078 case SIOCDELRT:
3079 return routing_ioctl(net, sock, cmd, argp);
3080 case SIOCGSTAMP:
3081 return do_siocgstamp(net, sock, cmd, argp);
3082 case SIOCGSTAMPNS:
3083 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3084 case SIOCBONDSLAVEINFOQUERY:
3085 case SIOCBONDINFOQUERY:
a2116ed2 3086 case SIOCSHWTSTAMP:
fd468c74 3087 case SIOCGHWTSTAMP:
590d4693 3088 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3089
3090 case FIOSETOWN:
3091 case SIOCSPGRP:
3092 case FIOGETOWN:
3093 case SIOCGPGRP:
3094 case SIOCBRADDBR:
3095 case SIOCBRDELBR:
3096 case SIOCGIFVLAN:
3097 case SIOCSIFVLAN:
3098 case SIOCADDDLCI:
3099 case SIOCDELDLCI:
3100 return sock_ioctl(file, cmd, arg);
3101
3102 case SIOCGIFFLAGS:
3103 case SIOCSIFFLAGS:
3104 case SIOCGIFMETRIC:
3105 case SIOCSIFMETRIC:
3106 case SIOCGIFMTU:
3107 case SIOCSIFMTU:
3108 case SIOCGIFMEM:
3109 case SIOCSIFMEM:
3110 case SIOCGIFHWADDR:
3111 case SIOCSIFHWADDR:
3112 case SIOCADDMULTI:
3113 case SIOCDELMULTI:
3114 case SIOCGIFINDEX:
6b96018b
AB
3115 case SIOCGIFADDR:
3116 case SIOCSIFADDR:
3117 case SIOCSIFHWBROADCAST:
6b96018b 3118 case SIOCDIFADDR:
6b96018b
AB
3119 case SIOCGIFBRDADDR:
3120 case SIOCSIFBRDADDR:
3121 case SIOCGIFDSTADDR:
3122 case SIOCSIFDSTADDR:
3123 case SIOCGIFNETMASK:
3124 case SIOCSIFNETMASK:
3125 case SIOCSIFPFLAGS:
3126 case SIOCGIFPFLAGS:
3127 case SIOCGIFTXQLEN:
3128 case SIOCSIFTXQLEN:
3129 case SIOCBRADDIF:
3130 case SIOCBRDELIF:
9177efd3
AB
3131 case SIOCSIFNAME:
3132 case SIOCGMIIPHY:
3133 case SIOCGMIIREG:
3134 case SIOCSMIIREG:
6b96018b 3135 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3136
6b96018b
AB
3137 case SIOCSARP:
3138 case SIOCGARP:
3139 case SIOCDARP:
6b96018b 3140 case SIOCATMARK:
9177efd3
AB
3141 return sock_do_ioctl(net, sock, cmd, arg);
3142 }
3143
6b96018b
AB
3144 return -ENOIOCTLCMD;
3145}
7a229387 3146
95c96174 3147static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3148 unsigned long arg)
89bbfc95
SP
3149{
3150 struct socket *sock = file->private_data;
3151 int ret = -ENOIOCTLCMD;
87de87d5
DM
3152 struct sock *sk;
3153 struct net *net;
3154
3155 sk = sock->sk;
3156 net = sock_net(sk);
89bbfc95
SP
3157
3158 if (sock->ops->compat_ioctl)
3159 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3160
87de87d5
DM
3161 if (ret == -ENOIOCTLCMD &&
3162 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3163 ret = compat_wext_handle_ioctl(net, cmd, arg);
3164
6b96018b
AB
3165 if (ret == -ENOIOCTLCMD)
3166 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3167
89bbfc95
SP
3168 return ret;
3169}
3170#endif
3171
ac5a488e
SS
3172int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3173{
3174 return sock->ops->bind(sock, addr, addrlen);
3175}
c6d409cf 3176EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3177
3178int kernel_listen(struct socket *sock, int backlog)
3179{
3180 return sock->ops->listen(sock, backlog);
3181}
c6d409cf 3182EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3183
3184int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3185{
3186 struct sock *sk = sock->sk;
3187 int err;
3188
3189 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3190 newsock);
3191 if (err < 0)
3192 goto done;
3193
3194 err = sock->ops->accept(sock, *newsock, flags);
3195 if (err < 0) {
3196 sock_release(*newsock);
fa8705b0 3197 *newsock = NULL;
ac5a488e
SS
3198 goto done;
3199 }
3200
3201 (*newsock)->ops = sock->ops;
1b08534e 3202 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3203
3204done:
3205 return err;
3206}
c6d409cf 3207EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3208
3209int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3210 int flags)
ac5a488e
SS
3211{
3212 return sock->ops->connect(sock, addr, addrlen, flags);
3213}
c6d409cf 3214EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3215
3216int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3217 int *addrlen)
3218{
3219 return sock->ops->getname(sock, addr, addrlen, 0);
3220}
c6d409cf 3221EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3222
3223int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3224 int *addrlen)
3225{
3226 return sock->ops->getname(sock, addr, addrlen, 1);
3227}
c6d409cf 3228EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3229
3230int kernel_getsockopt(struct socket *sock, int level, int optname,
3231 char *optval, int *optlen)
3232{
3233 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3234 char __user *uoptval;
3235 int __user *uoptlen;
ac5a488e
SS
3236 int err;
3237
fb8621bb
NK
3238 uoptval = (char __user __force *) optval;
3239 uoptlen = (int __user __force *) optlen;
3240
ac5a488e
SS
3241 set_fs(KERNEL_DS);
3242 if (level == SOL_SOCKET)
fb8621bb 3243 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3244 else
fb8621bb
NK
3245 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3246 uoptlen);
ac5a488e
SS
3247 set_fs(oldfs);
3248 return err;
3249}
c6d409cf 3250EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3251
3252int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3253 char *optval, unsigned int optlen)
ac5a488e
SS
3254{
3255 mm_segment_t oldfs = get_fs();
fb8621bb 3256 char __user *uoptval;
ac5a488e
SS
3257 int err;
3258
fb8621bb
NK
3259 uoptval = (char __user __force *) optval;
3260
ac5a488e
SS
3261 set_fs(KERNEL_DS);
3262 if (level == SOL_SOCKET)
fb8621bb 3263 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3264 else
fb8621bb 3265 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3266 optlen);
3267 set_fs(oldfs);
3268 return err;
3269}
c6d409cf 3270EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3271
3272int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3273 size_t size, int flags)
3274{
3275 if (sock->ops->sendpage)
3276 return sock->ops->sendpage(sock, page, offset, size, flags);
3277
3278 return sock_no_sendpage(sock, page, offset, size, flags);
3279}
c6d409cf 3280EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3281
3282int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3283{
3284 mm_segment_t oldfs = get_fs();
3285 int err;
3286
3287 set_fs(KERNEL_DS);
3288 err = sock->ops->ioctl(sock, cmd, arg);
3289 set_fs(oldfs);
3290
3291 return err;
3292}
c6d409cf 3293EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3294
91cf45f0
TM
3295int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3296{
3297 return sock->ops->shutdown(sock, how);
3298}
91cf45f0 3299EXPORT_SYMBOL(kernel_sock_shutdown);