]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/socket.c
libceph: don't set weight to IN when OSD is destroyed
[mirror_ubuntu-zesty-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4 92
7c0f6ba6 93#include <linux/uaccess.h>
1da177e4
LT
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1da177e4
LT
290static int init_inodecache(void)
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1da177e4
LT
299 if (sock_inode_cachep == NULL)
300 return -ENOMEM;
301 return 0;
302}
303
b87221de 304static const struct super_operations sockfs_ops = {
c6d409cf
ED
305 .alloc_inode = sock_alloc_inode,
306 .destroy_inode = sock_destroy_inode,
307 .statfs = simple_statfs,
1da177e4
LT
308};
309
c23fbb6b
ED
310/*
311 * sockfs_dname() is called from d_path().
312 */
313static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
314{
315 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 316 d_inode(dentry)->i_ino);
c23fbb6b
ED
317}
318
3ba13d17 319static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 320 .d_dname = sockfs_dname,
1da177e4
LT
321};
322
bba0bd31
AG
323static int sockfs_xattr_get(const struct xattr_handler *handler,
324 struct dentry *dentry, struct inode *inode,
325 const char *suffix, void *value, size_t size)
326{
327 if (value) {
328 if (dentry->d_name.len + 1 > size)
329 return -ERANGE;
330 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
331 }
332 return dentry->d_name.len + 1;
333}
334
335#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
336#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
337#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
338
339static const struct xattr_handler sockfs_xattr_handler = {
340 .name = XATTR_NAME_SOCKPROTONAME,
341 .get = sockfs_xattr_get,
342};
343
4a590153
AG
344static int sockfs_security_xattr_set(const struct xattr_handler *handler,
345 struct dentry *dentry, struct inode *inode,
346 const char *suffix, const void *value,
347 size_t size, int flags)
348{
349 /* Handled by LSM. */
350 return -EAGAIN;
351}
352
353static const struct xattr_handler sockfs_security_xattr_handler = {
354 .prefix = XATTR_SECURITY_PREFIX,
355 .set = sockfs_security_xattr_set,
356};
357
bba0bd31
AG
358static const struct xattr_handler *sockfs_xattr_handlers[] = {
359 &sockfs_xattr_handler,
4a590153 360 &sockfs_security_xattr_handler,
bba0bd31
AG
361 NULL
362};
363
c74a1cbb
AV
364static struct dentry *sockfs_mount(struct file_system_type *fs_type,
365 int flags, const char *dev_name, void *data)
366{
bba0bd31
AG
367 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
368 sockfs_xattr_handlers,
369 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
370}
371
372static struct vfsmount *sock_mnt __read_mostly;
373
374static struct file_system_type sock_fs_type = {
375 .name = "sockfs",
376 .mount = sockfs_mount,
377 .kill_sb = kill_anon_super,
378};
379
1da177e4
LT
380/*
381 * Obtains the first available file descriptor and sets it up for use.
382 *
39d8c1b6
DM
383 * These functions create file structures and maps them to fd space
384 * of the current process. On success it returns file descriptor
1da177e4
LT
385 * and file struct implicitly stored in sock->file.
386 * Note that another thread may close file descriptor before we return
387 * from this function. We use the fact that now we do not refer
388 * to socket after mapping. If one day we will need it, this
389 * function will increment ref. count on file by 1.
390 *
391 * In any case returned fd MAY BE not valid!
392 * This race condition is unavoidable
393 * with shared fd spaces, we cannot solve it inside kernel,
394 * but we take care of internal coherence yet.
395 */
396
aab174f0 397struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 398{
7cbe66b6 399 struct qstr name = { .name = "" };
2c48b9c4 400 struct path path;
7cbe66b6 401 struct file *file;
1da177e4 402
600e1779
MY
403 if (dname) {
404 name.name = dname;
405 name.len = strlen(name.name);
406 } else if (sock->sk) {
407 name.name = sock->sk->sk_prot_creator->name;
408 name.len = strlen(name.name);
409 }
4b936885 410 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
411 if (unlikely(!path.dentry))
412 return ERR_PTR(-ENOMEM);
2c48b9c4 413 path.mnt = mntget(sock_mnt);
39d8c1b6 414
2c48b9c4 415 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 416
2c48b9c4 417 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 418 &socket_file_ops);
b5ffe634 419 if (IS_ERR(file)) {
cc3808f8 420 /* drop dentry, keep inode */
c5ef6035 421 ihold(d_inode(path.dentry));
2c48b9c4 422 path_put(&path);
39b65252 423 return file;
cc3808f8
AV
424 }
425
426 sock->file = file;
77d27200 427 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 428 file->private_data = sock;
28407630 429 return file;
39d8c1b6 430}
56b31d1c 431EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 432
56b31d1c 433static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
434{
435 struct file *newfile;
28407630
AV
436 int fd = get_unused_fd_flags(flags);
437 if (unlikely(fd < 0))
438 return fd;
39d8c1b6 439
aab174f0 440 newfile = sock_alloc_file(sock, flags, NULL);
28407630 441 if (likely(!IS_ERR(newfile))) {
39d8c1b6 442 fd_install(fd, newfile);
28407630
AV
443 return fd;
444 }
7cbe66b6 445
28407630
AV
446 put_unused_fd(fd);
447 return PTR_ERR(newfile);
1da177e4
LT
448}
449
406a3c63 450struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 451{
6cb153ca
BL
452 if (file->f_op == &socket_file_ops)
453 return file->private_data; /* set in sock_map_fd */
454
23bb80d2
ED
455 *err = -ENOTSOCK;
456 return NULL;
6cb153ca 457}
406a3c63 458EXPORT_SYMBOL(sock_from_file);
6cb153ca 459
1da177e4 460/**
c6d409cf 461 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
462 * @fd: file handle
463 * @err: pointer to an error code return
464 *
465 * The file handle passed in is locked and the socket it is bound
466 * too is returned. If an error occurs the err pointer is overwritten
467 * with a negative errno code and NULL is returned. The function checks
468 * for both invalid handles and passing a handle which is not a socket.
469 *
470 * On a success the socket object pointer is returned.
471 */
472
473struct socket *sockfd_lookup(int fd, int *err)
474{
475 struct file *file;
1da177e4
LT
476 struct socket *sock;
477
89bddce5
SH
478 file = fget(fd);
479 if (!file) {
1da177e4
LT
480 *err = -EBADF;
481 return NULL;
482 }
89bddce5 483
6cb153ca
BL
484 sock = sock_from_file(file, err);
485 if (!sock)
1da177e4 486 fput(file);
6cb153ca
BL
487 return sock;
488}
c6d409cf 489EXPORT_SYMBOL(sockfd_lookup);
1da177e4 490
6cb153ca
BL
491static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
492{
00e188ef 493 struct fd f = fdget(fd);
6cb153ca
BL
494 struct socket *sock;
495
3672558c 496 *err = -EBADF;
00e188ef
AV
497 if (f.file) {
498 sock = sock_from_file(f.file, err);
499 if (likely(sock)) {
500 *fput_needed = f.flags;
6cb153ca 501 return sock;
00e188ef
AV
502 }
503 fdput(f);
1da177e4 504 }
6cb153ca 505 return NULL;
1da177e4
LT
506}
507
600e1779
MY
508static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
509 size_t size)
510{
511 ssize_t len;
512 ssize_t used = 0;
513
c5ef6035 514 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
515 if (len < 0)
516 return len;
517 used += len;
518 if (buffer) {
519 if (size < used)
520 return -ERANGE;
521 buffer += len;
522 }
523
524 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
525 used += len;
526 if (buffer) {
527 if (size < used)
528 return -ERANGE;
529 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
530 buffer += len;
531 }
532
533 return used;
534}
535
dc647ec8 536static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
537{
538 int err = simple_setattr(dentry, iattr);
539
e1a3a60a 540 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
541 struct socket *sock = SOCKET_I(d_inode(dentry));
542
543 sock->sk->sk_uid = iattr->ia_uid;
544 }
545
546 return err;
547}
548
600e1779 549static const struct inode_operations sockfs_inode_ops = {
600e1779 550 .listxattr = sockfs_listxattr,
86741ec2 551 .setattr = sockfs_setattr,
600e1779
MY
552};
553
1da177e4
LT
554/**
555 * sock_alloc - allocate a socket
89bddce5 556 *
1da177e4
LT
557 * Allocate a new inode and socket object. The two are bound together
558 * and initialised. The socket is then returned. If we are out of inodes
559 * NULL is returned.
560 */
561
f4a00aac 562struct socket *sock_alloc(void)
1da177e4 563{
89bddce5
SH
564 struct inode *inode;
565 struct socket *sock;
1da177e4 566
a209dfc7 567 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
568 if (!inode)
569 return NULL;
570
571 sock = SOCKET_I(inode);
572
29a020d3 573 kmemcheck_annotate_bitfield(sock, type);
85fe4025 574 inode->i_ino = get_next_ino();
89bddce5 575 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
576 inode->i_uid = current_fsuid();
577 inode->i_gid = current_fsgid();
600e1779 578 inode->i_op = &sockfs_inode_ops;
1da177e4 579
19e8d69c 580 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
581 return sock;
582}
f4a00aac 583EXPORT_SYMBOL(sock_alloc);
1da177e4 584
1da177e4
LT
585/**
586 * sock_release - close a socket
587 * @sock: socket to close
588 *
589 * The socket is released from the protocol stack if it has a release
590 * callback, and the inode is then released if the socket is bound to
89bddce5 591 * an inode not a file.
1da177e4 592 */
89bddce5 593
1da177e4
LT
594void sock_release(struct socket *sock)
595{
596 if (sock->ops) {
597 struct module *owner = sock->ops->owner;
598
599 sock->ops->release(sock);
600 sock->ops = NULL;
601 module_put(owner);
602 }
603
eaefd110 604 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 605 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 606
19e8d69c 607 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
608 if (!sock->file) {
609 iput(SOCK_INODE(sock));
610 return;
611 }
89bddce5 612 sock->file = NULL;
1da177e4 613}
c6d409cf 614EXPORT_SYMBOL(sock_release);
1da177e4 615
c14ac945 616void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 617{
140c55d4
ED
618 u8 flags = *tx_flags;
619
c14ac945 620 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
621 flags |= SKBTX_HW_TSTAMP;
622
c14ac945 623 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
624 flags |= SKBTX_SW_TSTAMP;
625
c14ac945 626 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
627 flags |= SKBTX_SCHED_TSTAMP;
628
140c55d4 629 *tx_flags = flags;
20d49473 630}
67cc0d40 631EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 632
d8725c86 633static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 634{
01e97e65 635 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
636 BUG_ON(ret == -EIOCBQUEUED);
637 return ret;
1da177e4
LT
638}
639
d8725c86 640int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 641{
d8725c86 642 int err = security_socket_sendmsg(sock, msg,
01e97e65 643 msg_data_left(msg));
228e548e 644
d8725c86 645 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 646}
c6d409cf 647EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
648
649int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
650 struct kvec *vec, size_t num, size_t size)
651{
6aa24814 652 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 653 return sock_sendmsg(sock, msg);
1da177e4 654}
c6d409cf 655EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 656
92f37fd2
ED
657/*
658 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
659 */
660void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
661 struct sk_buff *skb)
662{
20d49473 663 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 664 struct scm_timestamping tss;
20d49473
PO
665 int empty = 1;
666 struct skb_shared_hwtstamps *shhwtstamps =
667 skb_hwtstamps(skb);
668
669 /* Race occurred between timestamp enabling and packet
670 receiving. Fill in the current time for now. */
2456e855 671 if (need_software_tstamp && skb->tstamp == 0)
20d49473
PO
672 __net_timestamp(skb);
673
674 if (need_software_tstamp) {
675 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
676 struct timeval tv;
677 skb_get_timestamp(skb, &tv);
678 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
679 sizeof(tv), &tv);
680 } else {
f24b9be5
WB
681 struct timespec ts;
682 skb_get_timestampns(skb, &ts);
20d49473 683 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 684 sizeof(ts), &ts);
20d49473
PO
685 }
686 }
687
f24b9be5 688 memset(&tss, 0, sizeof(tss));
c199105d 689 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 690 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 691 empty = 0;
4d276eb6 692 if (shhwtstamps &&
b9f40e21 693 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 694 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 695 empty = 0;
1c885808 696 if (!empty) {
20d49473 697 put_cmsg(msg, SOL_SOCKET,
f24b9be5 698 SCM_TIMESTAMPING, sizeof(tss), &tss);
1c885808
FY
699
700 if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
701 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
702 skb->len, skb->data);
703 }
92f37fd2 704}
7c81fd8b
ACM
705EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
706
6e3e939f
JB
707void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
708 struct sk_buff *skb)
709{
710 int ack;
711
712 if (!sock_flag(sk, SOCK_WIFI_STATUS))
713 return;
714 if (!skb->wifi_acked_valid)
715 return;
716
717 ack = skb->wifi_acked;
718
719 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
720}
721EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
722
11165f14 723static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
724 struct sk_buff *skb)
3b885787 725{
744d5a3e 726 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 727 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 728 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
729}
730
767dd033 731void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
732 struct sk_buff *skb)
733{
734 sock_recv_timestamp(msg, sk, skb);
735 sock_recv_drops(msg, sk, skb);
736}
767dd033 737EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 738
1b784140 739static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 740 int flags)
1da177e4 741{
2da62906 742 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
743}
744
2da62906 745int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 746{
2da62906 747 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 748
2da62906 749 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 750}
c6d409cf 751EXPORT_SYMBOL(sock_recvmsg);
1da177e4 752
c1249c0a
ML
753/**
754 * kernel_recvmsg - Receive a message from a socket (kernel space)
755 * @sock: The socket to receive the message from
756 * @msg: Received message
757 * @vec: Input s/g array for message data
758 * @num: Size of input s/g array
759 * @size: Number of bytes to read
760 * @flags: Message flags (MSG_DONTWAIT, etc...)
761 *
762 * On return the msg structure contains the scatter/gather array passed in the
763 * vec argument. The array is modified so that it consists of the unfilled
764 * portion of the original array.
765 *
766 * The returned value is the total number of bytes received, or an error.
767 */
89bddce5
SH
768int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
769 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
770{
771 mm_segment_t oldfs = get_fs();
772 int result;
773
6aa24814 774 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 775 set_fs(KERNEL_DS);
2da62906 776 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
777 set_fs(oldfs);
778 return result;
779}
c6d409cf 780EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 781
ce1d4d3e
CH
782static ssize_t sock_sendpage(struct file *file, struct page *page,
783 int offset, size_t size, loff_t *ppos, int more)
1da177e4 784{
1da177e4
LT
785 struct socket *sock;
786 int flags;
787
ce1d4d3e
CH
788 sock = file->private_data;
789
35f9c09f
ED
790 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
791 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
792 flags |= more;
ce1d4d3e 793
e6949583 794 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 795}
1da177e4 796
9c55e01c 797static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 798 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
799 unsigned int flags)
800{
801 struct socket *sock = file->private_data;
802
997b37da
RDC
803 if (unlikely(!sock->ops->splice_read))
804 return -EINVAL;
805
9c55e01c
JA
806 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
807}
808
8ae5e030 809static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 810{
6d652330
AV
811 struct file *file = iocb->ki_filp;
812 struct socket *sock = file->private_data;
0345f931 813 struct msghdr msg = {.msg_iter = *to,
814 .msg_iocb = iocb};
8ae5e030 815 ssize_t res;
ce1d4d3e 816
8ae5e030
AV
817 if (file->f_flags & O_NONBLOCK)
818 msg.msg_flags = MSG_DONTWAIT;
819
820 if (iocb->ki_pos != 0)
1da177e4 821 return -ESPIPE;
027445c3 822
66ee59af 823 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
824 return 0;
825
2da62906 826 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
827 *to = msg.msg_iter;
828 return res;
1da177e4
LT
829}
830
8ae5e030 831static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 832{
6d652330
AV
833 struct file *file = iocb->ki_filp;
834 struct socket *sock = file->private_data;
0345f931 835 struct msghdr msg = {.msg_iter = *from,
836 .msg_iocb = iocb};
8ae5e030 837 ssize_t res;
1da177e4 838
8ae5e030 839 if (iocb->ki_pos != 0)
ce1d4d3e 840 return -ESPIPE;
027445c3 841
8ae5e030
AV
842 if (file->f_flags & O_NONBLOCK)
843 msg.msg_flags = MSG_DONTWAIT;
844
6d652330
AV
845 if (sock->type == SOCK_SEQPACKET)
846 msg.msg_flags |= MSG_EOR;
847
d8725c86 848 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
849 *from = msg.msg_iter;
850 return res;
1da177e4
LT
851}
852
1da177e4
LT
853/*
854 * Atomic setting of ioctl hooks to avoid race
855 * with module unload.
856 */
857
4a3e2f71 858static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 859static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 860
881d966b 861void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 862{
4a3e2f71 863 mutex_lock(&br_ioctl_mutex);
1da177e4 864 br_ioctl_hook = hook;
4a3e2f71 865 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
866}
867EXPORT_SYMBOL(brioctl_set);
868
4a3e2f71 869static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 870static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 871
881d966b 872void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 873{
4a3e2f71 874 mutex_lock(&vlan_ioctl_mutex);
1da177e4 875 vlan_ioctl_hook = hook;
4a3e2f71 876 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
877}
878EXPORT_SYMBOL(vlan_ioctl_set);
879
4a3e2f71 880static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 881static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 882
89bddce5 883void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 884{
4a3e2f71 885 mutex_lock(&dlci_ioctl_mutex);
1da177e4 886 dlci_ioctl_hook = hook;
4a3e2f71 887 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
888}
889EXPORT_SYMBOL(dlci_ioctl_set);
890
6b96018b
AB
891static long sock_do_ioctl(struct net *net, struct socket *sock,
892 unsigned int cmd, unsigned long arg)
893{
894 int err;
895 void __user *argp = (void __user *)arg;
896
897 err = sock->ops->ioctl(sock, cmd, arg);
898
899 /*
900 * If this ioctl is unknown try to hand it down
901 * to the NIC driver.
902 */
903 if (err == -ENOIOCTLCMD)
904 err = dev_ioctl(net, cmd, argp);
905
906 return err;
907}
908
1da177e4
LT
909/*
910 * With an ioctl, arg may well be a user mode pointer, but we don't know
911 * what to do with it - that's up to the protocol still.
912 */
913
c62cce2c
AV
914static struct ns_common *get_net_ns(struct ns_common *ns)
915{
916 return &get_net(container_of(ns, struct net, ns))->ns;
917}
918
1da177e4
LT
919static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
920{
921 struct socket *sock;
881d966b 922 struct sock *sk;
1da177e4
LT
923 void __user *argp = (void __user *)arg;
924 int pid, err;
881d966b 925 struct net *net;
1da177e4 926
b69aee04 927 sock = file->private_data;
881d966b 928 sk = sock->sk;
3b1e0a65 929 net = sock_net(sk);
1da177e4 930 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 931 err = dev_ioctl(net, cmd, argp);
1da177e4 932 } else
3d23e349 933#ifdef CONFIG_WEXT_CORE
1da177e4 934 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 935 err = dev_ioctl(net, cmd, argp);
1da177e4 936 } else
3d23e349 937#endif
89bddce5 938 switch (cmd) {
1da177e4
LT
939 case FIOSETOWN:
940 case SIOCSPGRP:
941 err = -EFAULT;
942 if (get_user(pid, (int __user *)argp))
943 break;
e0b93edd
JL
944 f_setown(sock->file, pid, 1);
945 err = 0;
1da177e4
LT
946 break;
947 case FIOGETOWN:
948 case SIOCGPGRP:
609d7fa9 949 err = put_user(f_getown(sock->file),
89bddce5 950 (int __user *)argp);
1da177e4
LT
951 break;
952 case SIOCGIFBR:
953 case SIOCSIFBR:
954 case SIOCBRADDBR:
955 case SIOCBRDELBR:
956 err = -ENOPKG;
957 if (!br_ioctl_hook)
958 request_module("bridge");
959
4a3e2f71 960 mutex_lock(&br_ioctl_mutex);
89bddce5 961 if (br_ioctl_hook)
881d966b 962 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 963 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
964 break;
965 case SIOCGIFVLAN:
966 case SIOCSIFVLAN:
967 err = -ENOPKG;
968 if (!vlan_ioctl_hook)
969 request_module("8021q");
970
4a3e2f71 971 mutex_lock(&vlan_ioctl_mutex);
1da177e4 972 if (vlan_ioctl_hook)
881d966b 973 err = vlan_ioctl_hook(net, argp);
4a3e2f71 974 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 975 break;
1da177e4
LT
976 case SIOCADDDLCI:
977 case SIOCDELDLCI:
978 err = -ENOPKG;
979 if (!dlci_ioctl_hook)
980 request_module("dlci");
981
7512cbf6
PE
982 mutex_lock(&dlci_ioctl_mutex);
983 if (dlci_ioctl_hook)
1da177e4 984 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 985 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 986 break;
c62cce2c
AV
987 case SIOCGSKNS:
988 err = -EPERM;
989 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
990 break;
991
992 err = open_related_ns(&net->ns, get_net_ns);
993 break;
1da177e4 994 default:
6b96018b 995 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 996 break;
89bddce5 997 }
1da177e4
LT
998 return err;
999}
1000
1001int sock_create_lite(int family, int type, int protocol, struct socket **res)
1002{
1003 int err;
1004 struct socket *sock = NULL;
89bddce5 1005
1da177e4
LT
1006 err = security_socket_create(family, type, protocol, 1);
1007 if (err)
1008 goto out;
1009
1010 sock = sock_alloc();
1011 if (!sock) {
1012 err = -ENOMEM;
1013 goto out;
1014 }
1015
1da177e4 1016 sock->type = type;
7420ed23
VY
1017 err = security_socket_post_create(sock, family, type, protocol, 1);
1018 if (err)
1019 goto out_release;
1020
1da177e4
LT
1021out:
1022 *res = sock;
1023 return err;
7420ed23
VY
1024out_release:
1025 sock_release(sock);
1026 sock = NULL;
1027 goto out;
1da177e4 1028}
c6d409cf 1029EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1030
1031/* No kernel lock held - perfect */
89bddce5 1032static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1033{
cbf55001 1034 unsigned int busy_flag = 0;
1da177e4
LT
1035 struct socket *sock;
1036
1037 /*
89bddce5 1038 * We can't return errors to poll, so it's either yes or no.
1da177e4 1039 */
b69aee04 1040 sock = file->private_data;
2d48d67f 1041
cbf55001 1042 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1043 /* this socket can poll_ll so tell the system call */
cbf55001 1044 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1045
1046 /* once, only if requested by syscall */
cbf55001
ET
1047 if (wait && (wait->_key & POLL_BUSY_LOOP))
1048 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1049 }
1050
cbf55001 1051 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1052}
1053
89bddce5 1054static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1055{
b69aee04 1056 struct socket *sock = file->private_data;
1da177e4
LT
1057
1058 return sock->ops->mmap(file, sock, vma);
1059}
1060
20380731 1061static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1062{
1da177e4
LT
1063 sock_release(SOCKET_I(inode));
1064 return 0;
1065}
1066
1067/*
1068 * Update the socket async list
1069 *
1070 * Fasync_list locking strategy.
1071 *
1072 * 1. fasync_list is modified only under process context socket lock
1073 * i.e. under semaphore.
1074 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1075 * or under socket lock
1da177e4
LT
1076 */
1077
1078static int sock_fasync(int fd, struct file *filp, int on)
1079{
989a2979
ED
1080 struct socket *sock = filp->private_data;
1081 struct sock *sk = sock->sk;
eaefd110 1082 struct socket_wq *wq;
1da177e4 1083
989a2979 1084 if (sk == NULL)
1da177e4 1085 return -EINVAL;
1da177e4
LT
1086
1087 lock_sock(sk);
1e1d04e6 1088 wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
eaefd110 1089 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1090
eaefd110 1091 if (!wq->fasync_list)
989a2979
ED
1092 sock_reset_flag(sk, SOCK_FASYNC);
1093 else
bcdce719 1094 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1095
989a2979 1096 release_sock(sk);
1da177e4
LT
1097 return 0;
1098}
1099
ceb5d58b 1100/* This function may be called only under rcu_lock */
1da177e4 1101
ceb5d58b 1102int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1103{
ceb5d58b 1104 if (!wq || !wq->fasync_list)
1da177e4 1105 return -1;
ceb5d58b 1106
89bddce5 1107 switch (how) {
8d8ad9d7 1108 case SOCK_WAKE_WAITD:
ceb5d58b 1109 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1110 break;
1111 goto call_kill;
8d8ad9d7 1112 case SOCK_WAKE_SPACE:
ceb5d58b 1113 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1114 break;
1115 /* fall through */
8d8ad9d7 1116 case SOCK_WAKE_IO:
89bddce5 1117call_kill:
43815482 1118 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1119 break;
8d8ad9d7 1120 case SOCK_WAKE_URG:
43815482 1121 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1122 }
ceb5d58b 1123
1da177e4
LT
1124 return 0;
1125}
c6d409cf 1126EXPORT_SYMBOL(sock_wake_async);
1da177e4 1127
721db93a 1128int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1129 struct socket **res, int kern)
1da177e4
LT
1130{
1131 int err;
1132 struct socket *sock;
55737fda 1133 const struct net_proto_family *pf;
1da177e4
LT
1134
1135 /*
89bddce5 1136 * Check protocol is in range
1da177e4
LT
1137 */
1138 if (family < 0 || family >= NPROTO)
1139 return -EAFNOSUPPORT;
1140 if (type < 0 || type >= SOCK_MAX)
1141 return -EINVAL;
1142
1143 /* Compatibility.
1144
1145 This uglymoron is moved from INET layer to here to avoid
1146 deadlock in module load.
1147 */
1148 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1149 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1150 current->comm);
1da177e4
LT
1151 family = PF_PACKET;
1152 }
1153
1154 err = security_socket_create(family, type, protocol, kern);
1155 if (err)
1156 return err;
89bddce5 1157
55737fda
SH
1158 /*
1159 * Allocate the socket and allow the family to set things up. if
1160 * the protocol is 0, the family is instructed to select an appropriate
1161 * default.
1162 */
1163 sock = sock_alloc();
1164 if (!sock) {
e87cc472 1165 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1166 return -ENFILE; /* Not exactly a match, but its the
1167 closest posix thing */
1168 }
1169
1170 sock->type = type;
1171
95a5afca 1172#ifdef CONFIG_MODULES
89bddce5
SH
1173 /* Attempt to load a protocol module if the find failed.
1174 *
1175 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1176 * requested real, full-featured networking support upon configuration.
1177 * Otherwise module support will break!
1178 */
190683a9 1179 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1180 request_module("net-pf-%d", family);
1da177e4
LT
1181#endif
1182
55737fda
SH
1183 rcu_read_lock();
1184 pf = rcu_dereference(net_families[family]);
1185 err = -EAFNOSUPPORT;
1186 if (!pf)
1187 goto out_release;
1da177e4
LT
1188
1189 /*
1190 * We will call the ->create function, that possibly is in a loadable
1191 * module, so we have to bump that loadable module refcnt first.
1192 */
55737fda 1193 if (!try_module_get(pf->owner))
1da177e4
LT
1194 goto out_release;
1195
55737fda
SH
1196 /* Now protected by module ref count */
1197 rcu_read_unlock();
1198
3f378b68 1199 err = pf->create(net, sock, protocol, kern);
55737fda 1200 if (err < 0)
1da177e4 1201 goto out_module_put;
a79af59e 1202
1da177e4
LT
1203 /*
1204 * Now to bump the refcnt of the [loadable] module that owns this
1205 * socket at sock_release time we decrement its refcnt.
1206 */
55737fda
SH
1207 if (!try_module_get(sock->ops->owner))
1208 goto out_module_busy;
1209
1da177e4
LT
1210 /*
1211 * Now that we're done with the ->create function, the [loadable]
1212 * module can have its refcnt decremented
1213 */
55737fda 1214 module_put(pf->owner);
7420ed23
VY
1215 err = security_socket_post_create(sock, family, type, protocol, kern);
1216 if (err)
3b185525 1217 goto out_sock_release;
55737fda 1218 *res = sock;
1da177e4 1219
55737fda
SH
1220 return 0;
1221
1222out_module_busy:
1223 err = -EAFNOSUPPORT;
1da177e4 1224out_module_put:
55737fda
SH
1225 sock->ops = NULL;
1226 module_put(pf->owner);
1227out_sock_release:
1da177e4 1228 sock_release(sock);
55737fda
SH
1229 return err;
1230
1231out_release:
1232 rcu_read_unlock();
1233 goto out_sock_release;
1da177e4 1234}
721db93a 1235EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1236
1237int sock_create(int family, int type, int protocol, struct socket **res)
1238{
1b8d7ae4 1239 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1240}
c6d409cf 1241EXPORT_SYMBOL(sock_create);
1da177e4 1242
eeb1bd5c 1243int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1244{
eeb1bd5c 1245 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1246}
c6d409cf 1247EXPORT_SYMBOL(sock_create_kern);
1da177e4 1248
3e0fa65f 1249SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1250{
1251 int retval;
1252 struct socket *sock;
a677a039
UD
1253 int flags;
1254
e38b36f3
UD
1255 /* Check the SOCK_* constants for consistency. */
1256 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1257 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1258 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1259 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1260
a677a039 1261 flags = type & ~SOCK_TYPE_MASK;
77d27200 1262 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1263 return -EINVAL;
1264 type &= SOCK_TYPE_MASK;
1da177e4 1265
aaca0bdc
UD
1266 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1267 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1268
1da177e4
LT
1269 retval = sock_create(family, type, protocol, &sock);
1270 if (retval < 0)
1271 goto out;
1272
77d27200 1273 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1274 if (retval < 0)
1275 goto out_release;
1276
1277out:
1278 /* It may be already another descriptor 8) Not kernel problem. */
1279 return retval;
1280
1281out_release:
1282 sock_release(sock);
1283 return retval;
1284}
1285
1286/*
1287 * Create a pair of connected sockets.
1288 */
1289
3e0fa65f
HC
1290SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1291 int __user *, usockvec)
1da177e4
LT
1292{
1293 struct socket *sock1, *sock2;
1294 int fd1, fd2, err;
db349509 1295 struct file *newfile1, *newfile2;
a677a039
UD
1296 int flags;
1297
1298 flags = type & ~SOCK_TYPE_MASK;
77d27200 1299 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1300 return -EINVAL;
1301 type &= SOCK_TYPE_MASK;
1da177e4 1302
aaca0bdc
UD
1303 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1304 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1305
1da177e4
LT
1306 /*
1307 * Obtain the first socket and check if the underlying protocol
1308 * supports the socketpair call.
1309 */
1310
1311 err = sock_create(family, type, protocol, &sock1);
1312 if (err < 0)
1313 goto out;
1314
1315 err = sock_create(family, type, protocol, &sock2);
1316 if (err < 0)
1317 goto out_release_1;
1318
1319 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1320 if (err < 0)
1da177e4
LT
1321 goto out_release_both;
1322
28407630 1323 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1324 if (unlikely(fd1 < 0)) {
1325 err = fd1;
db349509 1326 goto out_release_both;
bf3c23d1 1327 }
d73aa286 1328
28407630 1329 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1330 if (unlikely(fd2 < 0)) {
1331 err = fd2;
d73aa286 1332 goto out_put_unused_1;
28407630
AV
1333 }
1334
aab174f0 1335 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1336 if (IS_ERR(newfile1)) {
28407630 1337 err = PTR_ERR(newfile1);
d73aa286 1338 goto out_put_unused_both;
28407630
AV
1339 }
1340
aab174f0 1341 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1342 if (IS_ERR(newfile2)) {
1343 err = PTR_ERR(newfile2);
d73aa286 1344 goto out_fput_1;
db349509
AV
1345 }
1346
d73aa286
YD
1347 err = put_user(fd1, &usockvec[0]);
1348 if (err)
1349 goto out_fput_both;
1350
1351 err = put_user(fd2, &usockvec[1]);
1352 if (err)
1353 goto out_fput_both;
1354
157cf649 1355 audit_fd_pair(fd1, fd2);
d73aa286 1356
db349509
AV
1357 fd_install(fd1, newfile1);
1358 fd_install(fd2, newfile2);
1da177e4
LT
1359 /* fd1 and fd2 may be already another descriptors.
1360 * Not kernel problem.
1361 */
1362
d73aa286 1363 return 0;
1da177e4 1364
d73aa286
YD
1365out_fput_both:
1366 fput(newfile2);
1367 fput(newfile1);
1368 put_unused_fd(fd2);
1369 put_unused_fd(fd1);
1370 goto out;
1371
1372out_fput_1:
1373 fput(newfile1);
1374 put_unused_fd(fd2);
1375 put_unused_fd(fd1);
1376 sock_release(sock2);
1377 goto out;
1da177e4 1378
d73aa286
YD
1379out_put_unused_both:
1380 put_unused_fd(fd2);
1381out_put_unused_1:
1382 put_unused_fd(fd1);
1da177e4 1383out_release_both:
89bddce5 1384 sock_release(sock2);
1da177e4 1385out_release_1:
89bddce5 1386 sock_release(sock1);
1da177e4
LT
1387out:
1388 return err;
1389}
1390
1da177e4
LT
1391/*
1392 * Bind a name to a socket. Nothing much to do here since it's
1393 * the protocol's responsibility to handle the local address.
1394 *
1395 * We move the socket address to kernel space before we call
1396 * the protocol layer (having also checked the address is ok).
1397 */
1398
20f37034 1399SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1400{
1401 struct socket *sock;
230b1839 1402 struct sockaddr_storage address;
6cb153ca 1403 int err, fput_needed;
1da177e4 1404
89bddce5 1405 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1406 if (sock) {
43db362d 1407 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1408 if (err >= 0) {
1409 err = security_socket_bind(sock,
230b1839 1410 (struct sockaddr *)&address,
89bddce5 1411 addrlen);
6cb153ca
BL
1412 if (!err)
1413 err = sock->ops->bind(sock,
89bddce5 1414 (struct sockaddr *)
230b1839 1415 &address, addrlen);
1da177e4 1416 }
6cb153ca 1417 fput_light(sock->file, fput_needed);
89bddce5 1418 }
1da177e4
LT
1419 return err;
1420}
1421
1da177e4
LT
1422/*
1423 * Perform a listen. Basically, we allow the protocol to do anything
1424 * necessary for a listen, and if that works, we mark the socket as
1425 * ready for listening.
1426 */
1427
3e0fa65f 1428SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1429{
1430 struct socket *sock;
6cb153ca 1431 int err, fput_needed;
b8e1f9b5 1432 int somaxconn;
89bddce5
SH
1433
1434 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1435 if (sock) {
8efa6e93 1436 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1437 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1438 backlog = somaxconn;
1da177e4
LT
1439
1440 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1441 if (!err)
1442 err = sock->ops->listen(sock, backlog);
1da177e4 1443
6cb153ca 1444 fput_light(sock->file, fput_needed);
1da177e4
LT
1445 }
1446 return err;
1447}
1448
1da177e4
LT
1449/*
1450 * For accept, we attempt to create a new socket, set up the link
1451 * with the client, wake up the client, then return the new
1452 * connected fd. We collect the address of the connector in kernel
1453 * space and move it to user at the very end. This is unclean because
1454 * we open the socket then return an error.
1455 *
1456 * 1003.1g adds the ability to recvmsg() to query connection pending
1457 * status to recvmsg. We need to add that support in a way thats
1458 * clean when we restucture accept also.
1459 */
1460
20f37034
HC
1461SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1462 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1463{
1464 struct socket *sock, *newsock;
39d8c1b6 1465 struct file *newfile;
6cb153ca 1466 int err, len, newfd, fput_needed;
230b1839 1467 struct sockaddr_storage address;
1da177e4 1468
77d27200 1469 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1470 return -EINVAL;
1471
1472 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1473 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1474
6cb153ca 1475 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1476 if (!sock)
1477 goto out;
1478
1479 err = -ENFILE;
c6d409cf
ED
1480 newsock = sock_alloc();
1481 if (!newsock)
1da177e4
LT
1482 goto out_put;
1483
1484 newsock->type = sock->type;
1485 newsock->ops = sock->ops;
1486
1da177e4
LT
1487 /*
1488 * We don't need try_module_get here, as the listening socket (sock)
1489 * has the protocol module (sock->ops->owner) held.
1490 */
1491 __module_get(newsock->ops->owner);
1492
28407630 1493 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1494 if (unlikely(newfd < 0)) {
1495 err = newfd;
9a1875e6
DM
1496 sock_release(newsock);
1497 goto out_put;
39d8c1b6 1498 }
aab174f0 1499 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1500 if (IS_ERR(newfile)) {
28407630
AV
1501 err = PTR_ERR(newfile);
1502 put_unused_fd(newfd);
1503 sock_release(newsock);
1504 goto out_put;
1505 }
39d8c1b6 1506
a79af59e
FF
1507 err = security_socket_accept(sock, newsock);
1508 if (err)
39d8c1b6 1509 goto out_fd;
a79af59e 1510
1da177e4
LT
1511 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1512 if (err < 0)
39d8c1b6 1513 goto out_fd;
1da177e4
LT
1514
1515 if (upeer_sockaddr) {
230b1839 1516 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1517 &len, 2) < 0) {
1da177e4 1518 err = -ECONNABORTED;
39d8c1b6 1519 goto out_fd;
1da177e4 1520 }
43db362d 1521 err = move_addr_to_user(&address,
230b1839 1522 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1523 if (err < 0)
39d8c1b6 1524 goto out_fd;
1da177e4
LT
1525 }
1526
1527 /* File flags are not inherited via accept() unlike another OSes. */
1528
39d8c1b6
DM
1529 fd_install(newfd, newfile);
1530 err = newfd;
1da177e4 1531
1da177e4 1532out_put:
6cb153ca 1533 fput_light(sock->file, fput_needed);
1da177e4
LT
1534out:
1535 return err;
39d8c1b6 1536out_fd:
9606a216 1537 fput(newfile);
39d8c1b6 1538 put_unused_fd(newfd);
1da177e4
LT
1539 goto out_put;
1540}
1541
20f37034
HC
1542SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1543 int __user *, upeer_addrlen)
aaca0bdc 1544{
de11defe 1545 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1546}
1547
1da177e4
LT
1548/*
1549 * Attempt to connect to a socket with the server address. The address
1550 * is in user space so we verify it is OK and move it to kernel space.
1551 *
1552 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1553 * break bindings
1554 *
1555 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1556 * other SEQPACKET protocols that take time to connect() as it doesn't
1557 * include the -EINPROGRESS status for such sockets.
1558 */
1559
20f37034
HC
1560SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1561 int, addrlen)
1da177e4
LT
1562{
1563 struct socket *sock;
230b1839 1564 struct sockaddr_storage address;
6cb153ca 1565 int err, fput_needed;
1da177e4 1566
6cb153ca 1567 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1568 if (!sock)
1569 goto out;
43db362d 1570 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1571 if (err < 0)
1572 goto out_put;
1573
89bddce5 1574 err =
230b1839 1575 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1576 if (err)
1577 goto out_put;
1578
230b1839 1579 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1580 sock->file->f_flags);
1581out_put:
6cb153ca 1582 fput_light(sock->file, fput_needed);
1da177e4
LT
1583out:
1584 return err;
1585}
1586
1587/*
1588 * Get the local address ('name') of a socket object. Move the obtained
1589 * name to user space.
1590 */
1591
20f37034
HC
1592SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1593 int __user *, usockaddr_len)
1da177e4
LT
1594{
1595 struct socket *sock;
230b1839 1596 struct sockaddr_storage address;
6cb153ca 1597 int len, err, fput_needed;
89bddce5 1598
6cb153ca 1599 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1600 if (!sock)
1601 goto out;
1602
1603 err = security_socket_getsockname(sock);
1604 if (err)
1605 goto out_put;
1606
230b1839 1607 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1608 if (err)
1609 goto out_put;
43db362d 1610 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1611
1612out_put:
6cb153ca 1613 fput_light(sock->file, fput_needed);
1da177e4
LT
1614out:
1615 return err;
1616}
1617
1618/*
1619 * Get the remote address ('name') of a socket object. Move the obtained
1620 * name to user space.
1621 */
1622
20f37034
HC
1623SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1624 int __user *, usockaddr_len)
1da177e4
LT
1625{
1626 struct socket *sock;
230b1839 1627 struct sockaddr_storage address;
6cb153ca 1628 int len, err, fput_needed;
1da177e4 1629
89bddce5
SH
1630 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1631 if (sock != NULL) {
1da177e4
LT
1632 err = security_socket_getpeername(sock);
1633 if (err) {
6cb153ca 1634 fput_light(sock->file, fput_needed);
1da177e4
LT
1635 return err;
1636 }
1637
89bddce5 1638 err =
230b1839 1639 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1640 1);
1da177e4 1641 if (!err)
43db362d 1642 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1643 usockaddr_len);
6cb153ca 1644 fput_light(sock->file, fput_needed);
1da177e4
LT
1645 }
1646 return err;
1647}
1648
1649/*
1650 * Send a datagram to a given address. We move the address into kernel
1651 * space and check the user space data area is readable before invoking
1652 * the protocol.
1653 */
1654
3e0fa65f 1655SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1656 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1657 int, addr_len)
1da177e4
LT
1658{
1659 struct socket *sock;
230b1839 1660 struct sockaddr_storage address;
1da177e4
LT
1661 int err;
1662 struct msghdr msg;
1663 struct iovec iov;
6cb153ca 1664 int fput_needed;
6cb153ca 1665
602bd0e9
AV
1666 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1667 if (unlikely(err))
1668 return err;
de0fa95c
PE
1669 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1670 if (!sock)
4387ff75 1671 goto out;
6cb153ca 1672
89bddce5 1673 msg.msg_name = NULL;
89bddce5
SH
1674 msg.msg_control = NULL;
1675 msg.msg_controllen = 0;
1676 msg.msg_namelen = 0;
6cb153ca 1677 if (addr) {
43db362d 1678 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1679 if (err < 0)
1680 goto out_put;
230b1839 1681 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1682 msg.msg_namelen = addr_len;
1da177e4
LT
1683 }
1684 if (sock->file->f_flags & O_NONBLOCK)
1685 flags |= MSG_DONTWAIT;
1686 msg.msg_flags = flags;
d8725c86 1687 err = sock_sendmsg(sock, &msg);
1da177e4 1688
89bddce5 1689out_put:
de0fa95c 1690 fput_light(sock->file, fput_needed);
4387ff75 1691out:
1da177e4
LT
1692 return err;
1693}
1694
1695/*
89bddce5 1696 * Send a datagram down a socket.
1da177e4
LT
1697 */
1698
3e0fa65f 1699SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1700 unsigned int, flags)
1da177e4
LT
1701{
1702 return sys_sendto(fd, buff, len, flags, NULL, 0);
1703}
1704
1705/*
89bddce5 1706 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1707 * sender. We verify the buffers are writable and if needed move the
1708 * sender address from kernel to user space.
1709 */
1710
3e0fa65f 1711SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1712 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1713 int __user *, addr_len)
1da177e4
LT
1714{
1715 struct socket *sock;
1716 struct iovec iov;
1717 struct msghdr msg;
230b1839 1718 struct sockaddr_storage address;
89bddce5 1719 int err, err2;
6cb153ca
BL
1720 int fput_needed;
1721
602bd0e9
AV
1722 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1723 if (unlikely(err))
1724 return err;
de0fa95c 1725 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1726 if (!sock)
de0fa95c 1727 goto out;
1da177e4 1728
89bddce5
SH
1729 msg.msg_control = NULL;
1730 msg.msg_controllen = 0;
f3d33426
HFS
1731 /* Save some cycles and don't copy the address if not needed */
1732 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1733 /* We assume all kernel code knows the size of sockaddr_storage */
1734 msg.msg_namelen = 0;
130ed5d1 1735 msg.msg_iocb = NULL;
1da177e4
LT
1736 if (sock->file->f_flags & O_NONBLOCK)
1737 flags |= MSG_DONTWAIT;
2da62906 1738 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1739
89bddce5 1740 if (err >= 0 && addr != NULL) {
43db362d 1741 err2 = move_addr_to_user(&address,
230b1839 1742 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1743 if (err2 < 0)
1744 err = err2;
1da177e4 1745 }
de0fa95c
PE
1746
1747 fput_light(sock->file, fput_needed);
4387ff75 1748out:
1da177e4
LT
1749 return err;
1750}
1751
1752/*
89bddce5 1753 * Receive a datagram from a socket.
1da177e4
LT
1754 */
1755
b7c0ddf5
JG
1756SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1757 unsigned int, flags)
1da177e4
LT
1758{
1759 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1760}
1761
1762/*
1763 * Set a socket option. Because we don't know the option lengths we have
1764 * to pass the user mode parameter for the protocols to sort out.
1765 */
1766
20f37034
HC
1767SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1768 char __user *, optval, int, optlen)
1da177e4 1769{
6cb153ca 1770 int err, fput_needed;
1da177e4
LT
1771 struct socket *sock;
1772
1773 if (optlen < 0)
1774 return -EINVAL;
89bddce5
SH
1775
1776 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1777 if (sock != NULL) {
1778 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1779 if (err)
1780 goto out_put;
1da177e4
LT
1781
1782 if (level == SOL_SOCKET)
89bddce5
SH
1783 err =
1784 sock_setsockopt(sock, level, optname, optval,
1785 optlen);
1da177e4 1786 else
89bddce5
SH
1787 err =
1788 sock->ops->setsockopt(sock, level, optname, optval,
1789 optlen);
6cb153ca
BL
1790out_put:
1791 fput_light(sock->file, fput_needed);
1da177e4
LT
1792 }
1793 return err;
1794}
1795
1796/*
1797 * Get a socket option. Because we don't know the option lengths we have
1798 * to pass a user mode parameter for the protocols to sort out.
1799 */
1800
20f37034
HC
1801SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1802 char __user *, optval, int __user *, optlen)
1da177e4 1803{
6cb153ca 1804 int err, fput_needed;
1da177e4
LT
1805 struct socket *sock;
1806
89bddce5
SH
1807 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1808 if (sock != NULL) {
6cb153ca
BL
1809 err = security_socket_getsockopt(sock, level, optname);
1810 if (err)
1811 goto out_put;
1da177e4
LT
1812
1813 if (level == SOL_SOCKET)
89bddce5
SH
1814 err =
1815 sock_getsockopt(sock, level, optname, optval,
1816 optlen);
1da177e4 1817 else
89bddce5
SH
1818 err =
1819 sock->ops->getsockopt(sock, level, optname, optval,
1820 optlen);
6cb153ca
BL
1821out_put:
1822 fput_light(sock->file, fput_needed);
1da177e4
LT
1823 }
1824 return err;
1825}
1826
1da177e4
LT
1827/*
1828 * Shutdown a socket.
1829 */
1830
754fe8d2 1831SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1832{
6cb153ca 1833 int err, fput_needed;
1da177e4
LT
1834 struct socket *sock;
1835
89bddce5
SH
1836 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1837 if (sock != NULL) {
1da177e4 1838 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1839 if (!err)
1840 err = sock->ops->shutdown(sock, how);
1841 fput_light(sock->file, fput_needed);
1da177e4
LT
1842 }
1843 return err;
1844}
1845
89bddce5 1846/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1847 * fields which are the same type (int / unsigned) on our platforms.
1848 */
1849#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1850#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1851#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1852
c71d8ebe
TH
1853struct used_address {
1854 struct sockaddr_storage name;
1855 unsigned int name_len;
1856};
1857
da184284
AV
1858static int copy_msghdr_from_user(struct msghdr *kmsg,
1859 struct user_msghdr __user *umsg,
1860 struct sockaddr __user **save_addr,
1861 struct iovec **iov)
1661bf36 1862{
08adb7da
AV
1863 struct sockaddr __user *uaddr;
1864 struct iovec __user *uiov;
c0371da6 1865 size_t nr_segs;
08adb7da
AV
1866 ssize_t err;
1867
1868 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1869 __get_user(uaddr, &umsg->msg_name) ||
1870 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1871 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1872 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1873 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1874 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1875 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1876 return -EFAULT;
dbb490b9 1877
08adb7da 1878 if (!uaddr)
6a2a2b3a
AS
1879 kmsg->msg_namelen = 0;
1880
dbb490b9
ML
1881 if (kmsg->msg_namelen < 0)
1882 return -EINVAL;
1883
1661bf36 1884 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1885 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1886
1887 if (save_addr)
1888 *save_addr = uaddr;
1889
1890 if (uaddr && kmsg->msg_namelen) {
1891 if (!save_addr) {
1892 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1893 kmsg->msg_name);
1894 if (err < 0)
1895 return err;
1896 }
1897 } else {
1898 kmsg->msg_name = NULL;
1899 kmsg->msg_namelen = 0;
1900 }
1901
c0371da6 1902 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1903 return -EMSGSIZE;
1904
0345f931 1905 kmsg->msg_iocb = NULL;
1906
da184284
AV
1907 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1908 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1909}
1910
666547ff 1911static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1912 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1913 struct used_address *used_address,
1914 unsigned int allowed_msghdr_flags)
1da177e4 1915{
89bddce5
SH
1916 struct compat_msghdr __user *msg_compat =
1917 (struct compat_msghdr __user *)msg;
230b1839 1918 struct sockaddr_storage address;
1da177e4 1919 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1920 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 1921 __aligned(sizeof(__kernel_size_t));
89bddce5 1922 /* 20 is size of ipv6_pktinfo */
1da177e4 1923 unsigned char *ctl_buf = ctl;
d8725c86 1924 int ctl_len;
08adb7da 1925 ssize_t err;
89bddce5 1926
08adb7da 1927 msg_sys->msg_name = &address;
1da177e4 1928
08449320 1929 if (MSG_CMSG_COMPAT & flags)
08adb7da 1930 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1931 else
08adb7da 1932 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1933 if (err < 0)
da184284 1934 return err;
1da177e4
LT
1935
1936 err = -ENOBUFS;
1937
228e548e 1938 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1939 goto out_freeiov;
28a94d8f 1940 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 1941 ctl_len = msg_sys->msg_controllen;
1da177e4 1942 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1943 err =
228e548e 1944 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1945 sizeof(ctl));
1da177e4
LT
1946 if (err)
1947 goto out_freeiov;
228e548e
AB
1948 ctl_buf = msg_sys->msg_control;
1949 ctl_len = msg_sys->msg_controllen;
1da177e4 1950 } else if (ctl_len) {
89bddce5 1951 if (ctl_len > sizeof(ctl)) {
1da177e4 1952 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1953 if (ctl_buf == NULL)
1da177e4
LT
1954 goto out_freeiov;
1955 }
1956 err = -EFAULT;
1957 /*
228e548e 1958 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1959 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1960 * checking falls down on this.
1961 */
fb8621bb 1962 if (copy_from_user(ctl_buf,
228e548e 1963 (void __user __force *)msg_sys->msg_control,
89bddce5 1964 ctl_len))
1da177e4 1965 goto out_freectl;
228e548e 1966 msg_sys->msg_control = ctl_buf;
1da177e4 1967 }
228e548e 1968 msg_sys->msg_flags = flags;
1da177e4
LT
1969
1970 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1971 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1972 /*
1973 * If this is sendmmsg() and current destination address is same as
1974 * previously succeeded address, omit asking LSM's decision.
1975 * used_address->name_len is initialized to UINT_MAX so that the first
1976 * destination address never matches.
1977 */
bc909d9d
MD
1978 if (used_address && msg_sys->msg_name &&
1979 used_address->name_len == msg_sys->msg_namelen &&
1980 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1981 used_address->name_len)) {
d8725c86 1982 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1983 goto out_freectl;
1984 }
d8725c86 1985 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1986 /*
1987 * If this is sendmmsg() and sending to current destination address was
1988 * successful, remember it.
1989 */
1990 if (used_address && err >= 0) {
1991 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1992 if (msg_sys->msg_name)
1993 memcpy(&used_address->name, msg_sys->msg_name,
1994 used_address->name_len);
c71d8ebe 1995 }
1da177e4
LT
1996
1997out_freectl:
89bddce5 1998 if (ctl_buf != ctl)
1da177e4
LT
1999 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2000out_freeiov:
da184284 2001 kfree(iov);
228e548e
AB
2002 return err;
2003}
2004
2005/*
2006 * BSD sendmsg interface
2007 */
2008
666547ff 2009long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
2010{
2011 int fput_needed, err;
2012 struct msghdr msg_sys;
1be374a0
AL
2013 struct socket *sock;
2014
1be374a0 2015 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2016 if (!sock)
2017 goto out;
2018
28a94d8f 2019 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2020
6cb153ca 2021 fput_light(sock->file, fput_needed);
89bddce5 2022out:
1da177e4
LT
2023 return err;
2024}
2025
666547ff 2026SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
2027{
2028 if (flags & MSG_CMSG_COMPAT)
2029 return -EINVAL;
2030 return __sys_sendmsg(fd, msg, flags);
2031}
2032
228e548e
AB
2033/*
2034 * Linux sendmmsg interface
2035 */
2036
2037int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2038 unsigned int flags)
2039{
2040 int fput_needed, err, datagrams;
2041 struct socket *sock;
2042 struct mmsghdr __user *entry;
2043 struct compat_mmsghdr __user *compat_entry;
2044 struct msghdr msg_sys;
c71d8ebe 2045 struct used_address used_address;
f092276d 2046 unsigned int oflags = flags;
228e548e 2047
98382f41
AB
2048 if (vlen > UIO_MAXIOV)
2049 vlen = UIO_MAXIOV;
228e548e
AB
2050
2051 datagrams = 0;
2052
2053 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2054 if (!sock)
2055 return err;
2056
c71d8ebe 2057 used_address.name_len = UINT_MAX;
228e548e
AB
2058 entry = mmsg;
2059 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2060 err = 0;
f092276d 2061 flags |= MSG_BATCH;
228e548e
AB
2062
2063 while (datagrams < vlen) {
f092276d
TH
2064 if (datagrams == vlen - 1)
2065 flags = oflags;
2066
228e548e 2067 if (MSG_CMSG_COMPAT & flags) {
666547ff 2068 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2069 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2070 if (err < 0)
2071 break;
2072 err = __put_user(err, &compat_entry->msg_len);
2073 ++compat_entry;
2074 } else {
a7526eb5 2075 err = ___sys_sendmsg(sock,
666547ff 2076 (struct user_msghdr __user *)entry,
28a94d8f 2077 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2078 if (err < 0)
2079 break;
2080 err = put_user(err, &entry->msg_len);
2081 ++entry;
2082 }
2083
2084 if (err)
2085 break;
2086 ++datagrams;
3023898b
SHY
2087 if (msg_data_left(&msg_sys))
2088 break;
a78cb84c 2089 cond_resched();
228e548e
AB
2090 }
2091
228e548e
AB
2092 fput_light(sock->file, fput_needed);
2093
728ffb86
AB
2094 /* We only return an error if no datagrams were able to be sent */
2095 if (datagrams != 0)
228e548e
AB
2096 return datagrams;
2097
228e548e
AB
2098 return err;
2099}
2100
2101SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2102 unsigned int, vlen, unsigned int, flags)
2103{
1be374a0
AL
2104 if (flags & MSG_CMSG_COMPAT)
2105 return -EINVAL;
228e548e
AB
2106 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2107}
2108
666547ff 2109static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2110 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2111{
89bddce5
SH
2112 struct compat_msghdr __user *msg_compat =
2113 (struct compat_msghdr __user *)msg;
1da177e4 2114 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2115 struct iovec *iov = iovstack;
1da177e4 2116 unsigned long cmsg_ptr;
2da62906 2117 int len;
08adb7da 2118 ssize_t err;
1da177e4
LT
2119
2120 /* kernel mode address */
230b1839 2121 struct sockaddr_storage addr;
1da177e4
LT
2122
2123 /* user mode address pointers */
2124 struct sockaddr __user *uaddr;
08adb7da 2125 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2126
08adb7da 2127 msg_sys->msg_name = &addr;
1da177e4 2128
f3d33426 2129 if (MSG_CMSG_COMPAT & flags)
08adb7da 2130 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2131 else
08adb7da 2132 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2133 if (err < 0)
da184284 2134 return err;
1da177e4 2135
a2e27255
ACM
2136 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2137 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2138
f3d33426
HFS
2139 /* We assume all kernel code knows the size of sockaddr_storage */
2140 msg_sys->msg_namelen = 0;
2141
1da177e4
LT
2142 if (sock->file->f_flags & O_NONBLOCK)
2143 flags |= MSG_DONTWAIT;
2da62906 2144 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2145 if (err < 0)
2146 goto out_freeiov;
2147 len = err;
2148
2149 if (uaddr != NULL) {
43db362d 2150 err = move_addr_to_user(&addr,
a2e27255 2151 msg_sys->msg_namelen, uaddr,
89bddce5 2152 uaddr_len);
1da177e4
LT
2153 if (err < 0)
2154 goto out_freeiov;
2155 }
a2e27255 2156 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2157 COMPAT_FLAGS(msg));
1da177e4
LT
2158 if (err)
2159 goto out_freeiov;
2160 if (MSG_CMSG_COMPAT & flags)
a2e27255 2161 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2162 &msg_compat->msg_controllen);
2163 else
a2e27255 2164 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2165 &msg->msg_controllen);
2166 if (err)
2167 goto out_freeiov;
2168 err = len;
2169
2170out_freeiov:
da184284 2171 kfree(iov);
a2e27255
ACM
2172 return err;
2173}
2174
2175/*
2176 * BSD recvmsg interface
2177 */
2178
666547ff 2179long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2180{
2181 int fput_needed, err;
2182 struct msghdr msg_sys;
1be374a0
AL
2183 struct socket *sock;
2184
1be374a0 2185 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2186 if (!sock)
2187 goto out;
2188
a7526eb5 2189 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2190
6cb153ca 2191 fput_light(sock->file, fput_needed);
1da177e4
LT
2192out:
2193 return err;
2194}
2195
666547ff 2196SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2197 unsigned int, flags)
2198{
2199 if (flags & MSG_CMSG_COMPAT)
2200 return -EINVAL;
2201 return __sys_recvmsg(fd, msg, flags);
2202}
2203
a2e27255
ACM
2204/*
2205 * Linux recvmmsg interface
2206 */
2207
2208int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2209 unsigned int flags, struct timespec *timeout)
2210{
2211 int fput_needed, err, datagrams;
2212 struct socket *sock;
2213 struct mmsghdr __user *entry;
d7256d0e 2214 struct compat_mmsghdr __user *compat_entry;
a2e27255 2215 struct msghdr msg_sys;
766b9f92
DD
2216 struct timespec64 end_time;
2217 struct timespec64 timeout64;
a2e27255
ACM
2218
2219 if (timeout &&
2220 poll_select_set_timeout(&end_time, timeout->tv_sec,
2221 timeout->tv_nsec))
2222 return -EINVAL;
2223
2224 datagrams = 0;
2225
2226 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2227 if (!sock)
2228 return err;
2229
2230 err = sock_error(sock->sk);
5c537dff
MJ
2231 if (err) {
2232 datagrams = err;
a2e27255 2233 goto out_put;
5c537dff 2234 }
a2e27255
ACM
2235
2236 entry = mmsg;
d7256d0e 2237 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2238
2239 while (datagrams < vlen) {
2240 /*
2241 * No need to ask LSM for more than the first datagram.
2242 */
d7256d0e 2243 if (MSG_CMSG_COMPAT & flags) {
666547ff 2244 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2245 &msg_sys, flags & ~MSG_WAITFORONE,
2246 datagrams);
d7256d0e
JMG
2247 if (err < 0)
2248 break;
2249 err = __put_user(err, &compat_entry->msg_len);
2250 ++compat_entry;
2251 } else {
a7526eb5 2252 err = ___sys_recvmsg(sock,
666547ff 2253 (struct user_msghdr __user *)entry,
a7526eb5
AL
2254 &msg_sys, flags & ~MSG_WAITFORONE,
2255 datagrams);
d7256d0e
JMG
2256 if (err < 0)
2257 break;
2258 err = put_user(err, &entry->msg_len);
2259 ++entry;
2260 }
2261
a2e27255
ACM
2262 if (err)
2263 break;
a2e27255
ACM
2264 ++datagrams;
2265
71c5c159
BB
2266 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2267 if (flags & MSG_WAITFORONE)
2268 flags |= MSG_DONTWAIT;
2269
a2e27255 2270 if (timeout) {
766b9f92
DD
2271 ktime_get_ts64(&timeout64);
2272 *timeout = timespec64_to_timespec(
2273 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2274 if (timeout->tv_sec < 0) {
2275 timeout->tv_sec = timeout->tv_nsec = 0;
2276 break;
2277 }
2278
2279 /* Timeout, return less than vlen datagrams */
2280 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2281 break;
2282 }
2283
2284 /* Out of band data, return right away */
2285 if (msg_sys.msg_flags & MSG_OOB)
2286 break;
a78cb84c 2287 cond_resched();
a2e27255
ACM
2288 }
2289
a2e27255 2290 if (err == 0)
34b88a68
ACM
2291 goto out_put;
2292
2293 if (datagrams == 0) {
2294 datagrams = err;
2295 goto out_put;
2296 }
a2e27255 2297
34b88a68
ACM
2298 /*
2299 * We may return less entries than requested (vlen) if the
2300 * sock is non block and there aren't enough datagrams...
2301 */
2302 if (err != -EAGAIN) {
a2e27255 2303 /*
34b88a68
ACM
2304 * ... or if recvmsg returns an error after we
2305 * received some datagrams, where we record the
2306 * error to return on the next call or if the
2307 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2308 */
34b88a68 2309 sock->sk->sk_err = -err;
a2e27255 2310 }
34b88a68
ACM
2311out_put:
2312 fput_light(sock->file, fput_needed);
a2e27255 2313
34b88a68 2314 return datagrams;
a2e27255
ACM
2315}
2316
2317SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2318 unsigned int, vlen, unsigned int, flags,
2319 struct timespec __user *, timeout)
2320{
2321 int datagrams;
2322 struct timespec timeout_sys;
2323
1be374a0
AL
2324 if (flags & MSG_CMSG_COMPAT)
2325 return -EINVAL;
2326
a2e27255
ACM
2327 if (!timeout)
2328 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2329
2330 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2331 return -EFAULT;
2332
2333 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2334
2335 if (datagrams > 0 &&
2336 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2337 datagrams = -EFAULT;
2338
2339 return datagrams;
2340}
2341
2342#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2343/* Argument list sizes for sys_socketcall */
2344#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2345static const unsigned char nargs[21] = {
c6d409cf
ED
2346 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2347 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2348 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2349 AL(4), AL(5), AL(4)
89bddce5
SH
2350};
2351
1da177e4
LT
2352#undef AL
2353
2354/*
89bddce5 2355 * System call vectors.
1da177e4
LT
2356 *
2357 * Argument checking cleaned up. Saved 20% in size.
2358 * This function doesn't need to set the kernel lock because
89bddce5 2359 * it is set by the callees.
1da177e4
LT
2360 */
2361
3e0fa65f 2362SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2363{
2950fa9d 2364 unsigned long a[AUDITSC_ARGS];
89bddce5 2365 unsigned long a0, a1;
1da177e4 2366 int err;
47379052 2367 unsigned int len;
1da177e4 2368
228e548e 2369 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2370 return -EINVAL;
2371
47379052
AV
2372 len = nargs[call];
2373 if (len > sizeof(a))
2374 return -EINVAL;
2375
1da177e4 2376 /* copy_from_user should be SMP safe. */
47379052 2377 if (copy_from_user(a, args, len))
1da177e4 2378 return -EFAULT;
3ec3b2fb 2379
2950fa9d
CG
2380 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2381 if (err)
2382 return err;
3ec3b2fb 2383
89bddce5
SH
2384 a0 = a[0];
2385 a1 = a[1];
2386
2387 switch (call) {
2388 case SYS_SOCKET:
2389 err = sys_socket(a0, a1, a[2]);
2390 break;
2391 case SYS_BIND:
2392 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2393 break;
2394 case SYS_CONNECT:
2395 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2396 break;
2397 case SYS_LISTEN:
2398 err = sys_listen(a0, a1);
2399 break;
2400 case SYS_ACCEPT:
de11defe
UD
2401 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2402 (int __user *)a[2], 0);
89bddce5
SH
2403 break;
2404 case SYS_GETSOCKNAME:
2405 err =
2406 sys_getsockname(a0, (struct sockaddr __user *)a1,
2407 (int __user *)a[2]);
2408 break;
2409 case SYS_GETPEERNAME:
2410 err =
2411 sys_getpeername(a0, (struct sockaddr __user *)a1,
2412 (int __user *)a[2]);
2413 break;
2414 case SYS_SOCKETPAIR:
2415 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2416 break;
2417 case SYS_SEND:
2418 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2419 break;
2420 case SYS_SENDTO:
2421 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2422 (struct sockaddr __user *)a[4], a[5]);
2423 break;
2424 case SYS_RECV:
2425 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2426 break;
2427 case SYS_RECVFROM:
2428 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2429 (struct sockaddr __user *)a[4],
2430 (int __user *)a[5]);
2431 break;
2432 case SYS_SHUTDOWN:
2433 err = sys_shutdown(a0, a1);
2434 break;
2435 case SYS_SETSOCKOPT:
2436 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2437 break;
2438 case SYS_GETSOCKOPT:
2439 err =
2440 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2441 (int __user *)a[4]);
2442 break;
2443 case SYS_SENDMSG:
666547ff 2444 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2445 break;
228e548e
AB
2446 case SYS_SENDMMSG:
2447 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2448 break;
89bddce5 2449 case SYS_RECVMSG:
666547ff 2450 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2451 break;
a2e27255
ACM
2452 case SYS_RECVMMSG:
2453 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2454 (struct timespec __user *)a[4]);
2455 break;
de11defe
UD
2456 case SYS_ACCEPT4:
2457 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2458 (int __user *)a[2], a[3]);
aaca0bdc 2459 break;
89bddce5
SH
2460 default:
2461 err = -EINVAL;
2462 break;
1da177e4
LT
2463 }
2464 return err;
2465}
2466
89bddce5 2467#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2468
55737fda
SH
2469/**
2470 * sock_register - add a socket protocol handler
2471 * @ops: description of protocol
2472 *
1da177e4
LT
2473 * This function is called by a protocol handler that wants to
2474 * advertise its address family, and have it linked into the
e793c0f7 2475 * socket interface. The value ops->family corresponds to the
55737fda 2476 * socket system call protocol family.
1da177e4 2477 */
f0fd27d4 2478int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2479{
2480 int err;
2481
2482 if (ops->family >= NPROTO) {
3410f22e 2483 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2484 return -ENOBUFS;
2485 }
55737fda
SH
2486
2487 spin_lock(&net_family_lock);
190683a9
ED
2488 if (rcu_dereference_protected(net_families[ops->family],
2489 lockdep_is_held(&net_family_lock)))
55737fda
SH
2490 err = -EEXIST;
2491 else {
cf778b00 2492 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2493 err = 0;
2494 }
55737fda
SH
2495 spin_unlock(&net_family_lock);
2496
3410f22e 2497 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2498 return err;
2499}
c6d409cf 2500EXPORT_SYMBOL(sock_register);
1da177e4 2501
55737fda
SH
2502/**
2503 * sock_unregister - remove a protocol handler
2504 * @family: protocol family to remove
2505 *
1da177e4
LT
2506 * This function is called by a protocol handler that wants to
2507 * remove its address family, and have it unlinked from the
55737fda
SH
2508 * new socket creation.
2509 *
2510 * If protocol handler is a module, then it can use module reference
2511 * counts to protect against new references. If protocol handler is not
2512 * a module then it needs to provide its own protection in
2513 * the ops->create routine.
1da177e4 2514 */
f0fd27d4 2515void sock_unregister(int family)
1da177e4 2516{
f0fd27d4 2517 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2518
55737fda 2519 spin_lock(&net_family_lock);
a9b3cd7f 2520 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2521 spin_unlock(&net_family_lock);
2522
2523 synchronize_rcu();
2524
3410f22e 2525 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2526}
c6d409cf 2527EXPORT_SYMBOL(sock_unregister);
1da177e4 2528
77d76ea3 2529static int __init sock_init(void)
1da177e4 2530{
b3e19d92 2531 int err;
2ca794e5
EB
2532 /*
2533 * Initialize the network sysctl infrastructure.
2534 */
2535 err = net_sysctl_init();
2536 if (err)
2537 goto out;
b3e19d92 2538
1da177e4 2539 /*
89bddce5 2540 * Initialize skbuff SLAB cache
1da177e4
LT
2541 */
2542 skb_init();
1da177e4
LT
2543
2544 /*
89bddce5 2545 * Initialize the protocols module.
1da177e4
LT
2546 */
2547
2548 init_inodecache();
b3e19d92
NP
2549
2550 err = register_filesystem(&sock_fs_type);
2551 if (err)
2552 goto out_fs;
1da177e4 2553 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2554 if (IS_ERR(sock_mnt)) {
2555 err = PTR_ERR(sock_mnt);
2556 goto out_mount;
2557 }
77d76ea3
AK
2558
2559 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2560 */
2561
2562#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2563 err = netfilter_init();
2564 if (err)
2565 goto out;
1da177e4 2566#endif
cbeb321a 2567
408eccce 2568 ptp_classifier_init();
c1f19b51 2569
b3e19d92
NP
2570out:
2571 return err;
2572
2573out_mount:
2574 unregister_filesystem(&sock_fs_type);
2575out_fs:
2576 goto out;
1da177e4
LT
2577}
2578
77d76ea3
AK
2579core_initcall(sock_init); /* early initcall */
2580
1da177e4
LT
2581#ifdef CONFIG_PROC_FS
2582void socket_seq_show(struct seq_file *seq)
2583{
2584 int cpu;
2585 int counter = 0;
2586
6f912042 2587 for_each_possible_cpu(cpu)
89bddce5 2588 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2589
2590 /* It can be negative, by the way. 8) */
2591 if (counter < 0)
2592 counter = 0;
2593
2594 seq_printf(seq, "sockets: used %d\n", counter);
2595}
89bddce5 2596#endif /* CONFIG_PROC_FS */
1da177e4 2597
89bbfc95 2598#ifdef CONFIG_COMPAT
6b96018b 2599static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2600 unsigned int cmd, void __user *up)
7a229387 2601{
7a229387
AB
2602 mm_segment_t old_fs = get_fs();
2603 struct timeval ktv;
2604 int err;
2605
2606 set_fs(KERNEL_DS);
6b96018b 2607 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2608 set_fs(old_fs);
644595f8 2609 if (!err)
ed6fe9d6 2610 err = compat_put_timeval(&ktv, up);
644595f8 2611
7a229387
AB
2612 return err;
2613}
2614
6b96018b 2615static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2616 unsigned int cmd, void __user *up)
7a229387 2617{
7a229387
AB
2618 mm_segment_t old_fs = get_fs();
2619 struct timespec kts;
2620 int err;
2621
2622 set_fs(KERNEL_DS);
6b96018b 2623 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2624 set_fs(old_fs);
644595f8 2625 if (!err)
ed6fe9d6 2626 err = compat_put_timespec(&kts, up);
644595f8 2627
7a229387
AB
2628 return err;
2629}
2630
6b96018b 2631static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2632{
2633 struct ifreq __user *uifr;
2634 int err;
2635
2636 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2637 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2638 return -EFAULT;
2639
6b96018b 2640 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2641 if (err)
2642 return err;
2643
6b96018b 2644 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2645 return -EFAULT;
2646
2647 return 0;
2648}
2649
6b96018b 2650static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2651{
6b96018b 2652 struct compat_ifconf ifc32;
7a229387
AB
2653 struct ifconf ifc;
2654 struct ifconf __user *uifc;
6b96018b 2655 struct compat_ifreq __user *ifr32;
7a229387
AB
2656 struct ifreq __user *ifr;
2657 unsigned int i, j;
2658 int err;
2659
6b96018b 2660 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2661 return -EFAULT;
2662
43da5f2e 2663 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2664 if (ifc32.ifcbuf == 0) {
2665 ifc32.ifc_len = 0;
2666 ifc.ifc_len = 0;
2667 ifc.ifc_req = NULL;
2668 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2669 } else {
c6d409cf
ED
2670 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2671 sizeof(struct ifreq);
7a229387
AB
2672 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2673 ifc.ifc_len = len;
2674 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2675 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2676 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2677 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2678 return -EFAULT;
2679 ifr++;
2680 ifr32++;
2681 }
2682 }
2683 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2684 return -EFAULT;
2685
6b96018b 2686 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2687 if (err)
2688 return err;
2689
2690 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2691 return -EFAULT;
2692
2693 ifr = ifc.ifc_req;
2694 ifr32 = compat_ptr(ifc32.ifcbuf);
2695 for (i = 0, j = 0;
c6d409cf
ED
2696 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2697 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2698 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2699 return -EFAULT;
2700 ifr32++;
2701 ifr++;
2702 }
2703
2704 if (ifc32.ifcbuf == 0) {
2705 /* Translate from 64-bit structure multiple to
2706 * a 32-bit one.
2707 */
2708 i = ifc.ifc_len;
6b96018b 2709 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2710 ifc32.ifc_len = i;
2711 } else {
2712 ifc32.ifc_len = i;
2713 }
6b96018b 2714 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2715 return -EFAULT;
2716
2717 return 0;
2718}
2719
6b96018b 2720static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2721{
3a7da39d
BH
2722 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2723 bool convert_in = false, convert_out = false;
2724 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2725 struct ethtool_rxnfc __user *rxnfc;
7a229387 2726 struct ifreq __user *ifr;
3a7da39d
BH
2727 u32 rule_cnt = 0, actual_rule_cnt;
2728 u32 ethcmd;
7a229387 2729 u32 data;
3a7da39d 2730 int ret;
7a229387 2731
3a7da39d
BH
2732 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2733 return -EFAULT;
7a229387 2734
3a7da39d
BH
2735 compat_rxnfc = compat_ptr(data);
2736
2737 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2738 return -EFAULT;
2739
3a7da39d
BH
2740 /* Most ethtool structures are defined without padding.
2741 * Unfortunately struct ethtool_rxnfc is an exception.
2742 */
2743 switch (ethcmd) {
2744 default:
2745 break;
2746 case ETHTOOL_GRXCLSRLALL:
2747 /* Buffer size is variable */
2748 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2749 return -EFAULT;
2750 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2751 return -ENOMEM;
2752 buf_size += rule_cnt * sizeof(u32);
2753 /* fall through */
2754 case ETHTOOL_GRXRINGS:
2755 case ETHTOOL_GRXCLSRLCNT:
2756 case ETHTOOL_GRXCLSRULE:
55664f32 2757 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2758 convert_out = true;
2759 /* fall through */
2760 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2761 buf_size += sizeof(struct ethtool_rxnfc);
2762 convert_in = true;
2763 break;
2764 }
2765
2766 ifr = compat_alloc_user_space(buf_size);
954b1244 2767 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2768
2769 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2770 return -EFAULT;
2771
3a7da39d
BH
2772 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2773 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2774 return -EFAULT;
2775
3a7da39d 2776 if (convert_in) {
127fe533 2777 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2778 * fs.ring_cookie and at the end of fs, but nowhere else.
2779 */
127fe533
AD
2780 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2781 sizeof(compat_rxnfc->fs.m_ext) !=
2782 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2783 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2784 BUILD_BUG_ON(
2785 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2786 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2787 offsetof(struct ethtool_rxnfc, fs.location) -
2788 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2789
2790 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2791 (void __user *)(&rxnfc->fs.m_ext + 1) -
2792 (void __user *)rxnfc) ||
3a7da39d
BH
2793 copy_in_user(&rxnfc->fs.ring_cookie,
2794 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2795 (void __user *)(&rxnfc->fs.location + 1) -
2796 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2797 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2798 sizeof(rxnfc->rule_cnt)))
2799 return -EFAULT;
2800 }
2801
2802 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2803 if (ret)
2804 return ret;
2805
2806 if (convert_out) {
2807 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2808 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2809 (const void __user *)rxnfc) ||
3a7da39d
BH
2810 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2811 &rxnfc->fs.ring_cookie,
954b1244
SH
2812 (const void __user *)(&rxnfc->fs.location + 1) -
2813 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2814 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2815 sizeof(rxnfc->rule_cnt)))
2816 return -EFAULT;
2817
2818 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2819 /* As an optimisation, we only copy the actual
2820 * number of rules that the underlying
2821 * function returned. Since Mallory might
2822 * change the rule count in user memory, we
2823 * check that it is less than the rule count
2824 * originally given (as the user buffer size),
2825 * which has been range-checked.
2826 */
2827 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2828 return -EFAULT;
2829 if (actual_rule_cnt < rule_cnt)
2830 rule_cnt = actual_rule_cnt;
2831 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2832 &rxnfc->rule_locs[0],
2833 rule_cnt * sizeof(u32)))
2834 return -EFAULT;
2835 }
2836 }
2837
2838 return 0;
7a229387
AB
2839}
2840
7a50a240
AB
2841static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2842{
2843 void __user *uptr;
2844 compat_uptr_t uptr32;
2845 struct ifreq __user *uifr;
2846
c6d409cf 2847 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2848 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2849 return -EFAULT;
2850
2851 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2852 return -EFAULT;
2853
2854 uptr = compat_ptr(uptr32);
2855
2856 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2857 return -EFAULT;
2858
2859 return dev_ioctl(net, SIOCWANDEV, uifr);
2860}
2861
6b96018b
AB
2862static int bond_ioctl(struct net *net, unsigned int cmd,
2863 struct compat_ifreq __user *ifr32)
7a229387
AB
2864{
2865 struct ifreq kifr;
7a229387
AB
2866 mm_segment_t old_fs;
2867 int err;
7a229387
AB
2868
2869 switch (cmd) {
2870 case SIOCBONDENSLAVE:
2871 case SIOCBONDRELEASE:
2872 case SIOCBONDSETHWADDR:
2873 case SIOCBONDCHANGEACTIVE:
6b96018b 2874 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2875 return -EFAULT;
2876
2877 old_fs = get_fs();
c6d409cf 2878 set_fs(KERNEL_DS);
c3f52ae6 2879 err = dev_ioctl(net, cmd,
2880 (struct ifreq __user __force *) &kifr);
c6d409cf 2881 set_fs(old_fs);
7a229387
AB
2882
2883 return err;
7a229387 2884 default:
07d106d0 2885 return -ENOIOCTLCMD;
ccbd6a5a 2886 }
7a229387
AB
2887}
2888
590d4693
BH
2889/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2890static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2891 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2892{
2893 struct ifreq __user *u_ifreq64;
7a229387
AB
2894 char tmp_buf[IFNAMSIZ];
2895 void __user *data64;
2896 u32 data32;
2897
2898 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2899 IFNAMSIZ))
2900 return -EFAULT;
417c3522 2901 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2902 return -EFAULT;
2903 data64 = compat_ptr(data32);
2904
2905 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2906
7a229387
AB
2907 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2908 IFNAMSIZ))
2909 return -EFAULT;
417c3522 2910 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2911 return -EFAULT;
2912
6b96018b 2913 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2914}
2915
6b96018b
AB
2916static int dev_ifsioc(struct net *net, struct socket *sock,
2917 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2918{
a2116ed2 2919 struct ifreq __user *uifr;
7a229387
AB
2920 int err;
2921
a2116ed2
AB
2922 uifr = compat_alloc_user_space(sizeof(*uifr));
2923 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2924 return -EFAULT;
2925
2926 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2927
7a229387
AB
2928 if (!err) {
2929 switch (cmd) {
2930 case SIOCGIFFLAGS:
2931 case SIOCGIFMETRIC:
2932 case SIOCGIFMTU:
2933 case SIOCGIFMEM:
2934 case SIOCGIFHWADDR:
2935 case SIOCGIFINDEX:
2936 case SIOCGIFADDR:
2937 case SIOCGIFBRDADDR:
2938 case SIOCGIFDSTADDR:
2939 case SIOCGIFNETMASK:
fab2532b 2940 case SIOCGIFPFLAGS:
7a229387 2941 case SIOCGIFTXQLEN:
fab2532b
AB
2942 case SIOCGMIIPHY:
2943 case SIOCGMIIREG:
a2116ed2 2944 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2945 err = -EFAULT;
2946 break;
2947 }
2948 }
2949 return err;
2950}
2951
a2116ed2
AB
2952static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2953 struct compat_ifreq __user *uifr32)
2954{
2955 struct ifreq ifr;
2956 struct compat_ifmap __user *uifmap32;
2957 mm_segment_t old_fs;
2958 int err;
2959
2960 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2961 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2962 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2963 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2964 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2965 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2966 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2967 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2968 if (err)
2969 return -EFAULT;
2970
2971 old_fs = get_fs();
c6d409cf 2972 set_fs(KERNEL_DS);
c3f52ae6 2973 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2974 set_fs(old_fs);
a2116ed2
AB
2975
2976 if (cmd == SIOCGIFMAP && !err) {
2977 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2978 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2979 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2980 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2981 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2982 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2983 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2984 if (err)
2985 err = -EFAULT;
2986 }
2987 return err;
2988}
2989
7a229387 2990struct rtentry32 {
c6d409cf 2991 u32 rt_pad1;
7a229387
AB
2992 struct sockaddr rt_dst; /* target address */
2993 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2994 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2995 unsigned short rt_flags;
2996 short rt_pad2;
2997 u32 rt_pad3;
2998 unsigned char rt_tos;
2999 unsigned char rt_class;
3000 short rt_pad4;
3001 short rt_metric; /* +1 for binary compatibility! */
7a229387 3002 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3003 u32 rt_mtu; /* per route MTU/Window */
3004 u32 rt_window; /* Window clamping */
7a229387
AB
3005 unsigned short rt_irtt; /* Initial RTT */
3006};
3007
3008struct in6_rtmsg32 {
3009 struct in6_addr rtmsg_dst;
3010 struct in6_addr rtmsg_src;
3011 struct in6_addr rtmsg_gateway;
3012 u32 rtmsg_type;
3013 u16 rtmsg_dst_len;
3014 u16 rtmsg_src_len;
3015 u32 rtmsg_metric;
3016 u32 rtmsg_info;
3017 u32 rtmsg_flags;
3018 s32 rtmsg_ifindex;
3019};
3020
6b96018b
AB
3021static int routing_ioctl(struct net *net, struct socket *sock,
3022 unsigned int cmd, void __user *argp)
7a229387
AB
3023{
3024 int ret;
3025 void *r = NULL;
3026 struct in6_rtmsg r6;
3027 struct rtentry r4;
3028 char devname[16];
3029 u32 rtdev;
3030 mm_segment_t old_fs = get_fs();
3031
6b96018b
AB
3032 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3033 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3034 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3035 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3036 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3037 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3038 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3039 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3040 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3041 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3042 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3043
3044 r = (void *) &r6;
3045 } else { /* ipv4 */
6b96018b 3046 struct rtentry32 __user *ur4 = argp;
c6d409cf 3047 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3048 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3049 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3050 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3051 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3052 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3053 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3054 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3055 if (rtdev) {
c6d409cf 3056 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3057 r4.rt_dev = (char __user __force *)devname;
3058 devname[15] = 0;
7a229387
AB
3059 } else
3060 r4.rt_dev = NULL;
3061
3062 r = (void *) &r4;
3063 }
3064
3065 if (ret) {
3066 ret = -EFAULT;
3067 goto out;
3068 }
3069
c6d409cf 3070 set_fs(KERNEL_DS);
6b96018b 3071 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3072 set_fs(old_fs);
7a229387
AB
3073
3074out:
7a229387
AB
3075 return ret;
3076}
3077
3078/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3079 * for some operations; this forces use of the newer bridge-utils that
25985edc 3080 * use compatible ioctls
7a229387 3081 */
6b96018b 3082static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3083{
6b96018b 3084 compat_ulong_t tmp;
7a229387 3085
6b96018b 3086 if (get_user(tmp, argp))
7a229387
AB
3087 return -EFAULT;
3088 if (tmp == BRCTL_GET_VERSION)
3089 return BRCTL_VERSION + 1;
3090 return -EINVAL;
3091}
3092
6b96018b
AB
3093static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3094 unsigned int cmd, unsigned long arg)
3095{
3096 void __user *argp = compat_ptr(arg);
3097 struct sock *sk = sock->sk;
3098 struct net *net = sock_net(sk);
7a229387 3099
6b96018b 3100 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3101 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3102
3103 switch (cmd) {
3104 case SIOCSIFBR:
3105 case SIOCGIFBR:
3106 return old_bridge_ioctl(argp);
3107 case SIOCGIFNAME:
3108 return dev_ifname32(net, argp);
3109 case SIOCGIFCONF:
3110 return dev_ifconf(net, argp);
3111 case SIOCETHTOOL:
3112 return ethtool_ioctl(net, argp);
7a50a240
AB
3113 case SIOCWANDEV:
3114 return compat_siocwandev(net, argp);
a2116ed2
AB
3115 case SIOCGIFMAP:
3116 case SIOCSIFMAP:
3117 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3118 case SIOCBONDENSLAVE:
3119 case SIOCBONDRELEASE:
3120 case SIOCBONDSETHWADDR:
6b96018b
AB
3121 case SIOCBONDCHANGEACTIVE:
3122 return bond_ioctl(net, cmd, argp);
3123 case SIOCADDRT:
3124 case SIOCDELRT:
3125 return routing_ioctl(net, sock, cmd, argp);
3126 case SIOCGSTAMP:
3127 return do_siocgstamp(net, sock, cmd, argp);
3128 case SIOCGSTAMPNS:
3129 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3130 case SIOCBONDSLAVEINFOQUERY:
3131 case SIOCBONDINFOQUERY:
a2116ed2 3132 case SIOCSHWTSTAMP:
fd468c74 3133 case SIOCGHWTSTAMP:
590d4693 3134 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3135
3136 case FIOSETOWN:
3137 case SIOCSPGRP:
3138 case FIOGETOWN:
3139 case SIOCGPGRP:
3140 case SIOCBRADDBR:
3141 case SIOCBRDELBR:
3142 case SIOCGIFVLAN:
3143 case SIOCSIFVLAN:
3144 case SIOCADDDLCI:
3145 case SIOCDELDLCI:
c62cce2c 3146 case SIOCGSKNS:
6b96018b
AB
3147 return sock_ioctl(file, cmd, arg);
3148
3149 case SIOCGIFFLAGS:
3150 case SIOCSIFFLAGS:
3151 case SIOCGIFMETRIC:
3152 case SIOCSIFMETRIC:
3153 case SIOCGIFMTU:
3154 case SIOCSIFMTU:
3155 case SIOCGIFMEM:
3156 case SIOCSIFMEM:
3157 case SIOCGIFHWADDR:
3158 case SIOCSIFHWADDR:
3159 case SIOCADDMULTI:
3160 case SIOCDELMULTI:
3161 case SIOCGIFINDEX:
6b96018b
AB
3162 case SIOCGIFADDR:
3163 case SIOCSIFADDR:
3164 case SIOCSIFHWBROADCAST:
6b96018b 3165 case SIOCDIFADDR:
6b96018b
AB
3166 case SIOCGIFBRDADDR:
3167 case SIOCSIFBRDADDR:
3168 case SIOCGIFDSTADDR:
3169 case SIOCSIFDSTADDR:
3170 case SIOCGIFNETMASK:
3171 case SIOCSIFNETMASK:
3172 case SIOCSIFPFLAGS:
3173 case SIOCGIFPFLAGS:
3174 case SIOCGIFTXQLEN:
3175 case SIOCSIFTXQLEN:
3176 case SIOCBRADDIF:
3177 case SIOCBRDELIF:
9177efd3
AB
3178 case SIOCSIFNAME:
3179 case SIOCGMIIPHY:
3180 case SIOCGMIIREG:
3181 case SIOCSMIIREG:
6b96018b 3182 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3183
6b96018b
AB
3184 case SIOCSARP:
3185 case SIOCGARP:
3186 case SIOCDARP:
6b96018b 3187 case SIOCATMARK:
9177efd3
AB
3188 return sock_do_ioctl(net, sock, cmd, arg);
3189 }
3190
6b96018b
AB
3191 return -ENOIOCTLCMD;
3192}
7a229387 3193
95c96174 3194static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3195 unsigned long arg)
89bbfc95
SP
3196{
3197 struct socket *sock = file->private_data;
3198 int ret = -ENOIOCTLCMD;
87de87d5
DM
3199 struct sock *sk;
3200 struct net *net;
3201
3202 sk = sock->sk;
3203 net = sock_net(sk);
89bbfc95
SP
3204
3205 if (sock->ops->compat_ioctl)
3206 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3207
87de87d5
DM
3208 if (ret == -ENOIOCTLCMD &&
3209 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3210 ret = compat_wext_handle_ioctl(net, cmd, arg);
3211
6b96018b
AB
3212 if (ret == -ENOIOCTLCMD)
3213 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3214
89bbfc95
SP
3215 return ret;
3216}
3217#endif
3218
ac5a488e
SS
3219int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3220{
3221 return sock->ops->bind(sock, addr, addrlen);
3222}
c6d409cf 3223EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3224
3225int kernel_listen(struct socket *sock, int backlog)
3226{
3227 return sock->ops->listen(sock, backlog);
3228}
c6d409cf 3229EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3230
3231int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3232{
3233 struct sock *sk = sock->sk;
3234 int err;
3235
3236 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3237 newsock);
3238 if (err < 0)
3239 goto done;
3240
3241 err = sock->ops->accept(sock, *newsock, flags);
3242 if (err < 0) {
3243 sock_release(*newsock);
fa8705b0 3244 *newsock = NULL;
ac5a488e
SS
3245 goto done;
3246 }
3247
3248 (*newsock)->ops = sock->ops;
1b08534e 3249 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3250
3251done:
3252 return err;
3253}
c6d409cf 3254EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3255
3256int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3257 int flags)
ac5a488e
SS
3258{
3259 return sock->ops->connect(sock, addr, addrlen, flags);
3260}
c6d409cf 3261EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3262
3263int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3264 int *addrlen)
3265{
3266 return sock->ops->getname(sock, addr, addrlen, 0);
3267}
c6d409cf 3268EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3269
3270int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3271 int *addrlen)
3272{
3273 return sock->ops->getname(sock, addr, addrlen, 1);
3274}
c6d409cf 3275EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3276
3277int kernel_getsockopt(struct socket *sock, int level, int optname,
3278 char *optval, int *optlen)
3279{
3280 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3281 char __user *uoptval;
3282 int __user *uoptlen;
ac5a488e
SS
3283 int err;
3284
fb8621bb
NK
3285 uoptval = (char __user __force *) optval;
3286 uoptlen = (int __user __force *) optlen;
3287
ac5a488e
SS
3288 set_fs(KERNEL_DS);
3289 if (level == SOL_SOCKET)
fb8621bb 3290 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3291 else
fb8621bb
NK
3292 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3293 uoptlen);
ac5a488e
SS
3294 set_fs(oldfs);
3295 return err;
3296}
c6d409cf 3297EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3298
3299int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3300 char *optval, unsigned int optlen)
ac5a488e
SS
3301{
3302 mm_segment_t oldfs = get_fs();
fb8621bb 3303 char __user *uoptval;
ac5a488e
SS
3304 int err;
3305
fb8621bb
NK
3306 uoptval = (char __user __force *) optval;
3307
ac5a488e
SS
3308 set_fs(KERNEL_DS);
3309 if (level == SOL_SOCKET)
fb8621bb 3310 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3311 else
fb8621bb 3312 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3313 optlen);
3314 set_fs(oldfs);
3315 return err;
3316}
c6d409cf 3317EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3318
3319int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3320 size_t size, int flags)
3321{
3322 if (sock->ops->sendpage)
3323 return sock->ops->sendpage(sock, page, offset, size, flags);
3324
3325 return sock_no_sendpage(sock, page, offset, size, flags);
3326}
c6d409cf 3327EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3328
3329int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3330{
3331 mm_segment_t oldfs = get_fs();
3332 int err;
3333
3334 set_fs(KERNEL_DS);
3335 err = sock->ops->ioctl(sock, cmd, arg);
3336 set_fs(oldfs);
3337
3338 return err;
3339}
c6d409cf 3340EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3341
91cf45f0
TM
3342int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3343{
3344 return sock->ops->shutdown(sock, how);
3345}
91cf45f0 3346EXPORT_SYMBOL(kernel_sock_shutdown);