]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/socket.c
UBUNTU: Start new release
[mirror_ubuntu-zesty-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4 92
7c0f6ba6 93#include <linux/uaccess.h>
1da177e4
LT
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1da177e4
LT
290static int init_inodecache(void)
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1da177e4
LT
299 if (sock_inode_cachep == NULL)
300 return -ENOMEM;
301 return 0;
302}
303
b87221de 304static const struct super_operations sockfs_ops = {
c6d409cf
ED
305 .alloc_inode = sock_alloc_inode,
306 .destroy_inode = sock_destroy_inode,
307 .statfs = simple_statfs,
1da177e4
LT
308};
309
c23fbb6b
ED
310/*
311 * sockfs_dname() is called from d_path().
312 */
313static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
314{
315 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 316 d_inode(dentry)->i_ino);
c23fbb6b
ED
317}
318
3ba13d17 319static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 320 .d_dname = sockfs_dname,
1da177e4
LT
321};
322
bba0bd31
AG
323static int sockfs_xattr_get(const struct xattr_handler *handler,
324 struct dentry *dentry, struct inode *inode,
325 const char *suffix, void *value, size_t size)
326{
327 if (value) {
328 if (dentry->d_name.len + 1 > size)
329 return -ERANGE;
330 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
331 }
332 return dentry->d_name.len + 1;
333}
334
335#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
336#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
337#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
338
339static const struct xattr_handler sockfs_xattr_handler = {
340 .name = XATTR_NAME_SOCKPROTONAME,
341 .get = sockfs_xattr_get,
342};
343
4a590153
AG
344static int sockfs_security_xattr_set(const struct xattr_handler *handler,
345 struct dentry *dentry, struct inode *inode,
346 const char *suffix, const void *value,
347 size_t size, int flags)
348{
349 /* Handled by LSM. */
350 return -EAGAIN;
351}
352
353static const struct xattr_handler sockfs_security_xattr_handler = {
354 .prefix = XATTR_SECURITY_PREFIX,
355 .set = sockfs_security_xattr_set,
356};
357
bba0bd31
AG
358static const struct xattr_handler *sockfs_xattr_handlers[] = {
359 &sockfs_xattr_handler,
4a590153 360 &sockfs_security_xattr_handler,
bba0bd31
AG
361 NULL
362};
363
c74a1cbb
AV
364static struct dentry *sockfs_mount(struct file_system_type *fs_type,
365 int flags, const char *dev_name, void *data)
366{
bba0bd31
AG
367 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
368 sockfs_xattr_handlers,
369 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
370}
371
372static struct vfsmount *sock_mnt __read_mostly;
373
374static struct file_system_type sock_fs_type = {
375 .name = "sockfs",
376 .mount = sockfs_mount,
377 .kill_sb = kill_anon_super,
378};
379
1da177e4
LT
380/*
381 * Obtains the first available file descriptor and sets it up for use.
382 *
39d8c1b6
DM
383 * These functions create file structures and maps them to fd space
384 * of the current process. On success it returns file descriptor
1da177e4
LT
385 * and file struct implicitly stored in sock->file.
386 * Note that another thread may close file descriptor before we return
387 * from this function. We use the fact that now we do not refer
388 * to socket after mapping. If one day we will need it, this
389 * function will increment ref. count on file by 1.
390 *
391 * In any case returned fd MAY BE not valid!
392 * This race condition is unavoidable
393 * with shared fd spaces, we cannot solve it inside kernel,
394 * but we take care of internal coherence yet.
395 */
396
aab174f0 397struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 398{
7cbe66b6 399 struct qstr name = { .name = "" };
2c48b9c4 400 struct path path;
7cbe66b6 401 struct file *file;
1da177e4 402
600e1779
MY
403 if (dname) {
404 name.name = dname;
405 name.len = strlen(name.name);
406 } else if (sock->sk) {
407 name.name = sock->sk->sk_prot_creator->name;
408 name.len = strlen(name.name);
409 }
4b936885 410 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
411 if (unlikely(!path.dentry))
412 return ERR_PTR(-ENOMEM);
2c48b9c4 413 path.mnt = mntget(sock_mnt);
39d8c1b6 414
2c48b9c4 415 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 416
2c48b9c4 417 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 418 &socket_file_ops);
b5ffe634 419 if (IS_ERR(file)) {
cc3808f8 420 /* drop dentry, keep inode */
c5ef6035 421 ihold(d_inode(path.dentry));
2c48b9c4 422 path_put(&path);
39b65252 423 return file;
cc3808f8
AV
424 }
425
426 sock->file = file;
77d27200 427 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 428 file->private_data = sock;
28407630 429 return file;
39d8c1b6 430}
56b31d1c 431EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 432
56b31d1c 433static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
434{
435 struct file *newfile;
28407630
AV
436 int fd = get_unused_fd_flags(flags);
437 if (unlikely(fd < 0))
438 return fd;
39d8c1b6 439
aab174f0 440 newfile = sock_alloc_file(sock, flags, NULL);
28407630 441 if (likely(!IS_ERR(newfile))) {
39d8c1b6 442 fd_install(fd, newfile);
28407630
AV
443 return fd;
444 }
7cbe66b6 445
28407630
AV
446 put_unused_fd(fd);
447 return PTR_ERR(newfile);
1da177e4
LT
448}
449
406a3c63 450struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 451{
6cb153ca
BL
452 if (file->f_op == &socket_file_ops)
453 return file->private_data; /* set in sock_map_fd */
454
23bb80d2
ED
455 *err = -ENOTSOCK;
456 return NULL;
6cb153ca 457}
406a3c63 458EXPORT_SYMBOL(sock_from_file);
6cb153ca 459
1da177e4 460/**
c6d409cf 461 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
462 * @fd: file handle
463 * @err: pointer to an error code return
464 *
465 * The file handle passed in is locked and the socket it is bound
466 * too is returned. If an error occurs the err pointer is overwritten
467 * with a negative errno code and NULL is returned. The function checks
468 * for both invalid handles and passing a handle which is not a socket.
469 *
470 * On a success the socket object pointer is returned.
471 */
472
473struct socket *sockfd_lookup(int fd, int *err)
474{
475 struct file *file;
1da177e4
LT
476 struct socket *sock;
477
89bddce5
SH
478 file = fget(fd);
479 if (!file) {
1da177e4
LT
480 *err = -EBADF;
481 return NULL;
482 }
89bddce5 483
6cb153ca
BL
484 sock = sock_from_file(file, err);
485 if (!sock)
1da177e4 486 fput(file);
6cb153ca
BL
487 return sock;
488}
c6d409cf 489EXPORT_SYMBOL(sockfd_lookup);
1da177e4 490
6cb153ca
BL
491static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
492{
00e188ef 493 struct fd f = fdget(fd);
6cb153ca
BL
494 struct socket *sock;
495
3672558c 496 *err = -EBADF;
00e188ef
AV
497 if (f.file) {
498 sock = sock_from_file(f.file, err);
499 if (likely(sock)) {
500 *fput_needed = f.flags;
6cb153ca 501 return sock;
00e188ef
AV
502 }
503 fdput(f);
1da177e4 504 }
6cb153ca 505 return NULL;
1da177e4
LT
506}
507
600e1779
MY
508static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
509 size_t size)
510{
511 ssize_t len;
512 ssize_t used = 0;
513
c5ef6035 514 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
515 if (len < 0)
516 return len;
517 used += len;
518 if (buffer) {
519 if (size < used)
520 return -ERANGE;
521 buffer += len;
522 }
523
524 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
525 used += len;
526 if (buffer) {
527 if (size < used)
528 return -ERANGE;
529 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
530 buffer += len;
531 }
532
533 return used;
534}
535
dc647ec8 536static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
537{
538 int err = simple_setattr(dentry, iattr);
539
e1a3a60a 540 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
541 struct socket *sock = SOCKET_I(d_inode(dentry));
542
543 sock->sk->sk_uid = iattr->ia_uid;
544 }
545
546 return err;
547}
548
600e1779 549static const struct inode_operations sockfs_inode_ops = {
600e1779 550 .listxattr = sockfs_listxattr,
86741ec2 551 .setattr = sockfs_setattr,
600e1779
MY
552};
553
1da177e4
LT
554/**
555 * sock_alloc - allocate a socket
89bddce5 556 *
1da177e4
LT
557 * Allocate a new inode and socket object. The two are bound together
558 * and initialised. The socket is then returned. If we are out of inodes
559 * NULL is returned.
560 */
561
f4a00aac 562struct socket *sock_alloc(void)
1da177e4 563{
89bddce5
SH
564 struct inode *inode;
565 struct socket *sock;
1da177e4 566
a209dfc7 567 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
568 if (!inode)
569 return NULL;
570
571 sock = SOCKET_I(inode);
572
29a020d3 573 kmemcheck_annotate_bitfield(sock, type);
85fe4025 574 inode->i_ino = get_next_ino();
89bddce5 575 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
576 inode->i_uid = current_fsuid();
577 inode->i_gid = current_fsgid();
600e1779 578 inode->i_op = &sockfs_inode_ops;
1da177e4 579
19e8d69c 580 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
581 return sock;
582}
f4a00aac 583EXPORT_SYMBOL(sock_alloc);
1da177e4 584
1da177e4
LT
585/**
586 * sock_release - close a socket
587 * @sock: socket to close
588 *
589 * The socket is released from the protocol stack if it has a release
590 * callback, and the inode is then released if the socket is bound to
89bddce5 591 * an inode not a file.
1da177e4 592 */
89bddce5 593
1da177e4
LT
594void sock_release(struct socket *sock)
595{
596 if (sock->ops) {
597 struct module *owner = sock->ops->owner;
598
599 sock->ops->release(sock);
600 sock->ops = NULL;
601 module_put(owner);
602 }
603
eaefd110 604 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 605 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 606
19e8d69c 607 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
608 if (!sock->file) {
609 iput(SOCK_INODE(sock));
610 return;
611 }
89bddce5 612 sock->file = NULL;
1da177e4 613}
c6d409cf 614EXPORT_SYMBOL(sock_release);
1da177e4 615
c14ac945 616void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 617{
140c55d4
ED
618 u8 flags = *tx_flags;
619
c14ac945 620 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
621 flags |= SKBTX_HW_TSTAMP;
622
c14ac945 623 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
624 flags |= SKBTX_SW_TSTAMP;
625
c14ac945 626 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
627 flags |= SKBTX_SCHED_TSTAMP;
628
140c55d4 629 *tx_flags = flags;
20d49473 630}
67cc0d40 631EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 632
d8725c86 633static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 634{
01e97e65 635 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
636 BUG_ON(ret == -EIOCBQUEUED);
637 return ret;
1da177e4
LT
638}
639
d8725c86 640int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 641{
d8725c86 642 int err = security_socket_sendmsg(sock, msg,
01e97e65 643 msg_data_left(msg));
228e548e 644
d8725c86 645 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 646}
c6d409cf 647EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
648
649int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
650 struct kvec *vec, size_t num, size_t size)
651{
6aa24814 652 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 653 return sock_sendmsg(sock, msg);
1da177e4 654}
c6d409cf 655EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 656
1972c29b
SHY
657static bool skb_is_err_queue(const struct sk_buff *skb)
658{
659 /* pkt_type of skbs enqueued on the error queue are set to
660 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
661 * in recvmsg, since skbs received on a local socket will never
662 * have a pkt_type of PACKET_OUTGOING.
663 */
664 return skb->pkt_type == PACKET_OUTGOING;
665}
666
92f37fd2
ED
667/*
668 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
669 */
670void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
671 struct sk_buff *skb)
672{
20d49473 673 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 674 struct scm_timestamping tss;
20d49473
PO
675 int empty = 1;
676 struct skb_shared_hwtstamps *shhwtstamps =
677 skb_hwtstamps(skb);
678
679 /* Race occurred between timestamp enabling and packet
680 receiving. Fill in the current time for now. */
2456e855 681 if (need_software_tstamp && skb->tstamp == 0)
20d49473
PO
682 __net_timestamp(skb);
683
684 if (need_software_tstamp) {
685 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
686 struct timeval tv;
687 skb_get_timestamp(skb, &tv);
688 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
689 sizeof(tv), &tv);
690 } else {
f24b9be5
WB
691 struct timespec ts;
692 skb_get_timestampns(skb, &ts);
20d49473 693 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 694 sizeof(ts), &ts);
20d49473
PO
695 }
696 }
697
f24b9be5 698 memset(&tss, 0, sizeof(tss));
c199105d 699 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 700 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 701 empty = 0;
4d276eb6 702 if (shhwtstamps &&
b9f40e21 703 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 704 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 705 empty = 0;
1c885808 706 if (!empty) {
20d49473 707 put_cmsg(msg, SOL_SOCKET,
f24b9be5 708 SCM_TIMESTAMPING, sizeof(tss), &tss);
1c885808 709
1972c29b 710 if (skb_is_err_queue(skb) && skb->len &&
9e80911c 711 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
712 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
713 skb->len, skb->data);
714 }
92f37fd2 715}
7c81fd8b
ACM
716EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
717
6e3e939f
JB
718void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
719 struct sk_buff *skb)
720{
721 int ack;
722
723 if (!sock_flag(sk, SOCK_WIFI_STATUS))
724 return;
725 if (!skb->wifi_acked_valid)
726 return;
727
728 ack = skb->wifi_acked;
729
730 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
731}
732EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
733
11165f14 734static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
735 struct sk_buff *skb)
3b885787 736{
744d5a3e 737 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 738 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 739 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
740}
741
767dd033 742void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
743 struct sk_buff *skb)
744{
745 sock_recv_timestamp(msg, sk, skb);
746 sock_recv_drops(msg, sk, skb);
747}
767dd033 748EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 749
1b784140 750static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 751 int flags)
1da177e4 752{
2da62906 753 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
754}
755
2da62906 756int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 757{
2da62906 758 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 759
2da62906 760 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 761}
c6d409cf 762EXPORT_SYMBOL(sock_recvmsg);
1da177e4 763
c1249c0a
ML
764/**
765 * kernel_recvmsg - Receive a message from a socket (kernel space)
766 * @sock: The socket to receive the message from
767 * @msg: Received message
768 * @vec: Input s/g array for message data
769 * @num: Size of input s/g array
770 * @size: Number of bytes to read
771 * @flags: Message flags (MSG_DONTWAIT, etc...)
772 *
773 * On return the msg structure contains the scatter/gather array passed in the
774 * vec argument. The array is modified so that it consists of the unfilled
775 * portion of the original array.
776 *
777 * The returned value is the total number of bytes received, or an error.
778 */
89bddce5
SH
779int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
780 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
781{
782 mm_segment_t oldfs = get_fs();
783 int result;
784
6aa24814 785 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 786 set_fs(KERNEL_DS);
2da62906 787 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
788 set_fs(oldfs);
789 return result;
790}
c6d409cf 791EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 792
ce1d4d3e
CH
793static ssize_t sock_sendpage(struct file *file, struct page *page,
794 int offset, size_t size, loff_t *ppos, int more)
1da177e4 795{
1da177e4
LT
796 struct socket *sock;
797 int flags;
798
ce1d4d3e
CH
799 sock = file->private_data;
800
35f9c09f
ED
801 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
802 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
803 flags |= more;
ce1d4d3e 804
e6949583 805 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 806}
1da177e4 807
9c55e01c 808static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 809 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
810 unsigned int flags)
811{
812 struct socket *sock = file->private_data;
813
997b37da
RDC
814 if (unlikely(!sock->ops->splice_read))
815 return -EINVAL;
816
9c55e01c
JA
817 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
818}
819
8ae5e030 820static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 821{
6d652330
AV
822 struct file *file = iocb->ki_filp;
823 struct socket *sock = file->private_data;
0345f931 824 struct msghdr msg = {.msg_iter = *to,
825 .msg_iocb = iocb};
8ae5e030 826 ssize_t res;
ce1d4d3e 827
8ae5e030
AV
828 if (file->f_flags & O_NONBLOCK)
829 msg.msg_flags = MSG_DONTWAIT;
830
831 if (iocb->ki_pos != 0)
1da177e4 832 return -ESPIPE;
027445c3 833
66ee59af 834 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
835 return 0;
836
2da62906 837 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
838 *to = msg.msg_iter;
839 return res;
1da177e4
LT
840}
841
8ae5e030 842static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 843{
6d652330
AV
844 struct file *file = iocb->ki_filp;
845 struct socket *sock = file->private_data;
0345f931 846 struct msghdr msg = {.msg_iter = *from,
847 .msg_iocb = iocb};
8ae5e030 848 ssize_t res;
1da177e4 849
8ae5e030 850 if (iocb->ki_pos != 0)
ce1d4d3e 851 return -ESPIPE;
027445c3 852
8ae5e030
AV
853 if (file->f_flags & O_NONBLOCK)
854 msg.msg_flags = MSG_DONTWAIT;
855
6d652330
AV
856 if (sock->type == SOCK_SEQPACKET)
857 msg.msg_flags |= MSG_EOR;
858
d8725c86 859 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
860 *from = msg.msg_iter;
861 return res;
1da177e4
LT
862}
863
1da177e4
LT
864/*
865 * Atomic setting of ioctl hooks to avoid race
866 * with module unload.
867 */
868
4a3e2f71 869static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 870static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 871
881d966b 872void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 873{
4a3e2f71 874 mutex_lock(&br_ioctl_mutex);
1da177e4 875 br_ioctl_hook = hook;
4a3e2f71 876 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
877}
878EXPORT_SYMBOL(brioctl_set);
879
4a3e2f71 880static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 881static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 882
881d966b 883void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 884{
4a3e2f71 885 mutex_lock(&vlan_ioctl_mutex);
1da177e4 886 vlan_ioctl_hook = hook;
4a3e2f71 887 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
888}
889EXPORT_SYMBOL(vlan_ioctl_set);
890
4a3e2f71 891static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 892static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 893
89bddce5 894void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 895{
4a3e2f71 896 mutex_lock(&dlci_ioctl_mutex);
1da177e4 897 dlci_ioctl_hook = hook;
4a3e2f71 898 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
899}
900EXPORT_SYMBOL(dlci_ioctl_set);
901
6b96018b
AB
902static long sock_do_ioctl(struct net *net, struct socket *sock,
903 unsigned int cmd, unsigned long arg)
904{
905 int err;
906 void __user *argp = (void __user *)arg;
907
908 err = sock->ops->ioctl(sock, cmd, arg);
909
910 /*
911 * If this ioctl is unknown try to hand it down
912 * to the NIC driver.
913 */
914 if (err == -ENOIOCTLCMD)
915 err = dev_ioctl(net, cmd, argp);
916
917 return err;
918}
919
1da177e4
LT
920/*
921 * With an ioctl, arg may well be a user mode pointer, but we don't know
922 * what to do with it - that's up to the protocol still.
923 */
924
c62cce2c
AV
925static struct ns_common *get_net_ns(struct ns_common *ns)
926{
927 return &get_net(container_of(ns, struct net, ns))->ns;
928}
929
1da177e4
LT
930static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
931{
932 struct socket *sock;
881d966b 933 struct sock *sk;
1da177e4
LT
934 void __user *argp = (void __user *)arg;
935 int pid, err;
881d966b 936 struct net *net;
1da177e4 937
b69aee04 938 sock = file->private_data;
881d966b 939 sk = sock->sk;
3b1e0a65 940 net = sock_net(sk);
1da177e4 941 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 942 err = dev_ioctl(net, cmd, argp);
1da177e4 943 } else
3d23e349 944#ifdef CONFIG_WEXT_CORE
1da177e4 945 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 946 err = dev_ioctl(net, cmd, argp);
1da177e4 947 } else
3d23e349 948#endif
89bddce5 949 switch (cmd) {
1da177e4
LT
950 case FIOSETOWN:
951 case SIOCSPGRP:
952 err = -EFAULT;
953 if (get_user(pid, (int __user *)argp))
954 break;
e0b93edd
JL
955 f_setown(sock->file, pid, 1);
956 err = 0;
1da177e4
LT
957 break;
958 case FIOGETOWN:
959 case SIOCGPGRP:
609d7fa9 960 err = put_user(f_getown(sock->file),
89bddce5 961 (int __user *)argp);
1da177e4
LT
962 break;
963 case SIOCGIFBR:
964 case SIOCSIFBR:
965 case SIOCBRADDBR:
966 case SIOCBRDELBR:
967 err = -ENOPKG;
968 if (!br_ioctl_hook)
969 request_module("bridge");
970
4a3e2f71 971 mutex_lock(&br_ioctl_mutex);
89bddce5 972 if (br_ioctl_hook)
881d966b 973 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 974 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
975 break;
976 case SIOCGIFVLAN:
977 case SIOCSIFVLAN:
978 err = -ENOPKG;
979 if (!vlan_ioctl_hook)
980 request_module("8021q");
981
4a3e2f71 982 mutex_lock(&vlan_ioctl_mutex);
1da177e4 983 if (vlan_ioctl_hook)
881d966b 984 err = vlan_ioctl_hook(net, argp);
4a3e2f71 985 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 986 break;
1da177e4
LT
987 case SIOCADDDLCI:
988 case SIOCDELDLCI:
989 err = -ENOPKG;
990 if (!dlci_ioctl_hook)
991 request_module("dlci");
992
7512cbf6
PE
993 mutex_lock(&dlci_ioctl_mutex);
994 if (dlci_ioctl_hook)
1da177e4 995 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 996 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 997 break;
c62cce2c
AV
998 case SIOCGSKNS:
999 err = -EPERM;
1000 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1001 break;
1002
1003 err = open_related_ns(&net->ns, get_net_ns);
1004 break;
1da177e4 1005 default:
6b96018b 1006 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1007 break;
89bddce5 1008 }
1da177e4
LT
1009 return err;
1010}
1011
1012int sock_create_lite(int family, int type, int protocol, struct socket **res)
1013{
1014 int err;
1015 struct socket *sock = NULL;
89bddce5 1016
1da177e4
LT
1017 err = security_socket_create(family, type, protocol, 1);
1018 if (err)
1019 goto out;
1020
1021 sock = sock_alloc();
1022 if (!sock) {
1023 err = -ENOMEM;
1024 goto out;
1025 }
1026
1da177e4 1027 sock->type = type;
7420ed23
VY
1028 err = security_socket_post_create(sock, family, type, protocol, 1);
1029 if (err)
1030 goto out_release;
1031
1da177e4
LT
1032out:
1033 *res = sock;
1034 return err;
7420ed23
VY
1035out_release:
1036 sock_release(sock);
1037 sock = NULL;
1038 goto out;
1da177e4 1039}
c6d409cf 1040EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1041
1042/* No kernel lock held - perfect */
89bddce5 1043static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1044{
cbf55001 1045 unsigned int busy_flag = 0;
1da177e4
LT
1046 struct socket *sock;
1047
1048 /*
89bddce5 1049 * We can't return errors to poll, so it's either yes or no.
1da177e4 1050 */
b69aee04 1051 sock = file->private_data;
2d48d67f 1052
cbf55001 1053 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1054 /* this socket can poll_ll so tell the system call */
cbf55001 1055 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1056
1057 /* once, only if requested by syscall */
cbf55001
ET
1058 if (wait && (wait->_key & POLL_BUSY_LOOP))
1059 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1060 }
1061
cbf55001 1062 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1063}
1064
89bddce5 1065static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1066{
b69aee04 1067 struct socket *sock = file->private_data;
1da177e4
LT
1068
1069 return sock->ops->mmap(file, sock, vma);
1070}
1071
20380731 1072static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1073{
1da177e4
LT
1074 sock_release(SOCKET_I(inode));
1075 return 0;
1076}
1077
1078/*
1079 * Update the socket async list
1080 *
1081 * Fasync_list locking strategy.
1082 *
1083 * 1. fasync_list is modified only under process context socket lock
1084 * i.e. under semaphore.
1085 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1086 * or under socket lock
1da177e4
LT
1087 */
1088
1089static int sock_fasync(int fd, struct file *filp, int on)
1090{
989a2979
ED
1091 struct socket *sock = filp->private_data;
1092 struct sock *sk = sock->sk;
eaefd110 1093 struct socket_wq *wq;
1da177e4 1094
989a2979 1095 if (sk == NULL)
1da177e4 1096 return -EINVAL;
1da177e4
LT
1097
1098 lock_sock(sk);
1e1d04e6 1099 wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
eaefd110 1100 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1101
eaefd110 1102 if (!wq->fasync_list)
989a2979
ED
1103 sock_reset_flag(sk, SOCK_FASYNC);
1104 else
bcdce719 1105 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1106
989a2979 1107 release_sock(sk);
1da177e4
LT
1108 return 0;
1109}
1110
ceb5d58b 1111/* This function may be called only under rcu_lock */
1da177e4 1112
ceb5d58b 1113int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1114{
ceb5d58b 1115 if (!wq || !wq->fasync_list)
1da177e4 1116 return -1;
ceb5d58b 1117
89bddce5 1118 switch (how) {
8d8ad9d7 1119 case SOCK_WAKE_WAITD:
ceb5d58b 1120 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1121 break;
1122 goto call_kill;
8d8ad9d7 1123 case SOCK_WAKE_SPACE:
ceb5d58b 1124 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1125 break;
1126 /* fall through */
8d8ad9d7 1127 case SOCK_WAKE_IO:
89bddce5 1128call_kill:
43815482 1129 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1130 break;
8d8ad9d7 1131 case SOCK_WAKE_URG:
43815482 1132 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1133 }
ceb5d58b 1134
1da177e4
LT
1135 return 0;
1136}
c6d409cf 1137EXPORT_SYMBOL(sock_wake_async);
1da177e4 1138
721db93a 1139int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1140 struct socket **res, int kern)
1da177e4
LT
1141{
1142 int err;
1143 struct socket *sock;
55737fda 1144 const struct net_proto_family *pf;
1da177e4
LT
1145
1146 /*
89bddce5 1147 * Check protocol is in range
1da177e4
LT
1148 */
1149 if (family < 0 || family >= NPROTO)
1150 return -EAFNOSUPPORT;
1151 if (type < 0 || type >= SOCK_MAX)
1152 return -EINVAL;
1153
1154 /* Compatibility.
1155
1156 This uglymoron is moved from INET layer to here to avoid
1157 deadlock in module load.
1158 */
1159 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1160 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1161 current->comm);
1da177e4
LT
1162 family = PF_PACKET;
1163 }
1164
1165 err = security_socket_create(family, type, protocol, kern);
1166 if (err)
1167 return err;
89bddce5 1168
55737fda
SH
1169 /*
1170 * Allocate the socket and allow the family to set things up. if
1171 * the protocol is 0, the family is instructed to select an appropriate
1172 * default.
1173 */
1174 sock = sock_alloc();
1175 if (!sock) {
e87cc472 1176 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1177 return -ENFILE; /* Not exactly a match, but its the
1178 closest posix thing */
1179 }
1180
1181 sock->type = type;
1182
95a5afca 1183#ifdef CONFIG_MODULES
89bddce5
SH
1184 /* Attempt to load a protocol module if the find failed.
1185 *
1186 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1187 * requested real, full-featured networking support upon configuration.
1188 * Otherwise module support will break!
1189 */
190683a9 1190 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1191 request_module("net-pf-%d", family);
1da177e4
LT
1192#endif
1193
55737fda
SH
1194 rcu_read_lock();
1195 pf = rcu_dereference(net_families[family]);
1196 err = -EAFNOSUPPORT;
1197 if (!pf)
1198 goto out_release;
1da177e4
LT
1199
1200 /*
1201 * We will call the ->create function, that possibly is in a loadable
1202 * module, so we have to bump that loadable module refcnt first.
1203 */
55737fda 1204 if (!try_module_get(pf->owner))
1da177e4
LT
1205 goto out_release;
1206
55737fda
SH
1207 /* Now protected by module ref count */
1208 rcu_read_unlock();
1209
3f378b68 1210 err = pf->create(net, sock, protocol, kern);
55737fda 1211 if (err < 0)
1da177e4 1212 goto out_module_put;
a79af59e 1213
1da177e4
LT
1214 /*
1215 * Now to bump the refcnt of the [loadable] module that owns this
1216 * socket at sock_release time we decrement its refcnt.
1217 */
55737fda
SH
1218 if (!try_module_get(sock->ops->owner))
1219 goto out_module_busy;
1220
1da177e4
LT
1221 /*
1222 * Now that we're done with the ->create function, the [loadable]
1223 * module can have its refcnt decremented
1224 */
55737fda 1225 module_put(pf->owner);
7420ed23
VY
1226 err = security_socket_post_create(sock, family, type, protocol, kern);
1227 if (err)
3b185525 1228 goto out_sock_release;
55737fda 1229 *res = sock;
1da177e4 1230
55737fda
SH
1231 return 0;
1232
1233out_module_busy:
1234 err = -EAFNOSUPPORT;
1da177e4 1235out_module_put:
55737fda
SH
1236 sock->ops = NULL;
1237 module_put(pf->owner);
1238out_sock_release:
1da177e4 1239 sock_release(sock);
55737fda
SH
1240 return err;
1241
1242out_release:
1243 rcu_read_unlock();
1244 goto out_sock_release;
1da177e4 1245}
721db93a 1246EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1247
1248int sock_create(int family, int type, int protocol, struct socket **res)
1249{
1b8d7ae4 1250 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1251}
c6d409cf 1252EXPORT_SYMBOL(sock_create);
1da177e4 1253
eeb1bd5c 1254int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1255{
eeb1bd5c 1256 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1257}
c6d409cf 1258EXPORT_SYMBOL(sock_create_kern);
1da177e4 1259
3e0fa65f 1260SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1261{
1262 int retval;
1263 struct socket *sock;
a677a039
UD
1264 int flags;
1265
e38b36f3
UD
1266 /* Check the SOCK_* constants for consistency. */
1267 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1268 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1269 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1270 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1271
a677a039 1272 flags = type & ~SOCK_TYPE_MASK;
77d27200 1273 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1274 return -EINVAL;
1275 type &= SOCK_TYPE_MASK;
1da177e4 1276
aaca0bdc
UD
1277 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1278 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1279
1da177e4
LT
1280 retval = sock_create(family, type, protocol, &sock);
1281 if (retval < 0)
1282 goto out;
1283
77d27200 1284 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1285 if (retval < 0)
1286 goto out_release;
1287
1288out:
1289 /* It may be already another descriptor 8) Not kernel problem. */
1290 return retval;
1291
1292out_release:
1293 sock_release(sock);
1294 return retval;
1295}
1296
1297/*
1298 * Create a pair of connected sockets.
1299 */
1300
3e0fa65f
HC
1301SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1302 int __user *, usockvec)
1da177e4
LT
1303{
1304 struct socket *sock1, *sock2;
1305 int fd1, fd2, err;
db349509 1306 struct file *newfile1, *newfile2;
a677a039
UD
1307 int flags;
1308
1309 flags = type & ~SOCK_TYPE_MASK;
77d27200 1310 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1311 return -EINVAL;
1312 type &= SOCK_TYPE_MASK;
1da177e4 1313
aaca0bdc
UD
1314 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1315 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1316
1da177e4
LT
1317 /*
1318 * Obtain the first socket and check if the underlying protocol
1319 * supports the socketpair call.
1320 */
1321
1322 err = sock_create(family, type, protocol, &sock1);
1323 if (err < 0)
1324 goto out;
1325
1326 err = sock_create(family, type, protocol, &sock2);
1327 if (err < 0)
1328 goto out_release_1;
1329
1330 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1331 if (err < 0)
1da177e4
LT
1332 goto out_release_both;
1333
28407630 1334 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1335 if (unlikely(fd1 < 0)) {
1336 err = fd1;
db349509 1337 goto out_release_both;
bf3c23d1 1338 }
d73aa286 1339
28407630 1340 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1341 if (unlikely(fd2 < 0)) {
1342 err = fd2;
d73aa286 1343 goto out_put_unused_1;
28407630
AV
1344 }
1345
aab174f0 1346 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1347 if (IS_ERR(newfile1)) {
28407630 1348 err = PTR_ERR(newfile1);
d73aa286 1349 goto out_put_unused_both;
28407630
AV
1350 }
1351
aab174f0 1352 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1353 if (IS_ERR(newfile2)) {
1354 err = PTR_ERR(newfile2);
d73aa286 1355 goto out_fput_1;
db349509
AV
1356 }
1357
d73aa286
YD
1358 err = put_user(fd1, &usockvec[0]);
1359 if (err)
1360 goto out_fput_both;
1361
1362 err = put_user(fd2, &usockvec[1]);
1363 if (err)
1364 goto out_fput_both;
1365
157cf649 1366 audit_fd_pair(fd1, fd2);
d73aa286 1367
db349509
AV
1368 fd_install(fd1, newfile1);
1369 fd_install(fd2, newfile2);
1da177e4
LT
1370 /* fd1 and fd2 may be already another descriptors.
1371 * Not kernel problem.
1372 */
1373
d73aa286 1374 return 0;
1da177e4 1375
d73aa286
YD
1376out_fput_both:
1377 fput(newfile2);
1378 fput(newfile1);
1379 put_unused_fd(fd2);
1380 put_unused_fd(fd1);
1381 goto out;
1382
1383out_fput_1:
1384 fput(newfile1);
1385 put_unused_fd(fd2);
1386 put_unused_fd(fd1);
1387 sock_release(sock2);
1388 goto out;
1da177e4 1389
d73aa286
YD
1390out_put_unused_both:
1391 put_unused_fd(fd2);
1392out_put_unused_1:
1393 put_unused_fd(fd1);
1da177e4 1394out_release_both:
89bddce5 1395 sock_release(sock2);
1da177e4 1396out_release_1:
89bddce5 1397 sock_release(sock1);
1da177e4
LT
1398out:
1399 return err;
1400}
1401
1da177e4
LT
1402/*
1403 * Bind a name to a socket. Nothing much to do here since it's
1404 * the protocol's responsibility to handle the local address.
1405 *
1406 * We move the socket address to kernel space before we call
1407 * the protocol layer (having also checked the address is ok).
1408 */
1409
20f37034 1410SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1411{
1412 struct socket *sock;
230b1839 1413 struct sockaddr_storage address;
6cb153ca 1414 int err, fput_needed;
1da177e4 1415
89bddce5 1416 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1417 if (sock) {
43db362d 1418 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1419 if (err >= 0) {
1420 err = security_socket_bind(sock,
230b1839 1421 (struct sockaddr *)&address,
89bddce5 1422 addrlen);
6cb153ca
BL
1423 if (!err)
1424 err = sock->ops->bind(sock,
89bddce5 1425 (struct sockaddr *)
230b1839 1426 &address, addrlen);
1da177e4 1427 }
6cb153ca 1428 fput_light(sock->file, fput_needed);
89bddce5 1429 }
1da177e4
LT
1430 return err;
1431}
1432
1da177e4
LT
1433/*
1434 * Perform a listen. Basically, we allow the protocol to do anything
1435 * necessary for a listen, and if that works, we mark the socket as
1436 * ready for listening.
1437 */
1438
3e0fa65f 1439SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1440{
1441 struct socket *sock;
6cb153ca 1442 int err, fput_needed;
b8e1f9b5 1443 int somaxconn;
89bddce5
SH
1444
1445 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1446 if (sock) {
8efa6e93 1447 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1448 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1449 backlog = somaxconn;
1da177e4
LT
1450
1451 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1452 if (!err)
1453 err = sock->ops->listen(sock, backlog);
1da177e4 1454
6cb153ca 1455 fput_light(sock->file, fput_needed);
1da177e4
LT
1456 }
1457 return err;
1458}
1459
1da177e4
LT
1460/*
1461 * For accept, we attempt to create a new socket, set up the link
1462 * with the client, wake up the client, then return the new
1463 * connected fd. We collect the address of the connector in kernel
1464 * space and move it to user at the very end. This is unclean because
1465 * we open the socket then return an error.
1466 *
1467 * 1003.1g adds the ability to recvmsg() to query connection pending
1468 * status to recvmsg. We need to add that support in a way thats
1469 * clean when we restucture accept also.
1470 */
1471
20f37034
HC
1472SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1473 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1474{
1475 struct socket *sock, *newsock;
39d8c1b6 1476 struct file *newfile;
6cb153ca 1477 int err, len, newfd, fput_needed;
230b1839 1478 struct sockaddr_storage address;
1da177e4 1479
77d27200 1480 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1481 return -EINVAL;
1482
1483 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1484 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1485
6cb153ca 1486 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1487 if (!sock)
1488 goto out;
1489
1490 err = -ENFILE;
c6d409cf
ED
1491 newsock = sock_alloc();
1492 if (!newsock)
1da177e4
LT
1493 goto out_put;
1494
1495 newsock->type = sock->type;
1496 newsock->ops = sock->ops;
1497
1da177e4
LT
1498 /*
1499 * We don't need try_module_get here, as the listening socket (sock)
1500 * has the protocol module (sock->ops->owner) held.
1501 */
1502 __module_get(newsock->ops->owner);
1503
28407630 1504 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1505 if (unlikely(newfd < 0)) {
1506 err = newfd;
9a1875e6
DM
1507 sock_release(newsock);
1508 goto out_put;
39d8c1b6 1509 }
aab174f0 1510 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1511 if (IS_ERR(newfile)) {
28407630
AV
1512 err = PTR_ERR(newfile);
1513 put_unused_fd(newfd);
1514 sock_release(newsock);
1515 goto out_put;
1516 }
39d8c1b6 1517
a79af59e
FF
1518 err = security_socket_accept(sock, newsock);
1519 if (err)
39d8c1b6 1520 goto out_fd;
a79af59e 1521
1da177e4
LT
1522 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1523 if (err < 0)
39d8c1b6 1524 goto out_fd;
1da177e4
LT
1525
1526 if (upeer_sockaddr) {
230b1839 1527 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1528 &len, 2) < 0) {
1da177e4 1529 err = -ECONNABORTED;
39d8c1b6 1530 goto out_fd;
1da177e4 1531 }
43db362d 1532 err = move_addr_to_user(&address,
230b1839 1533 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1534 if (err < 0)
39d8c1b6 1535 goto out_fd;
1da177e4
LT
1536 }
1537
1538 /* File flags are not inherited via accept() unlike another OSes. */
1539
39d8c1b6
DM
1540 fd_install(newfd, newfile);
1541 err = newfd;
1da177e4 1542
1da177e4 1543out_put:
6cb153ca 1544 fput_light(sock->file, fput_needed);
1da177e4
LT
1545out:
1546 return err;
39d8c1b6 1547out_fd:
9606a216 1548 fput(newfile);
39d8c1b6 1549 put_unused_fd(newfd);
1da177e4
LT
1550 goto out_put;
1551}
1552
20f37034
HC
1553SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1554 int __user *, upeer_addrlen)
aaca0bdc 1555{
de11defe 1556 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1557}
1558
1da177e4
LT
1559/*
1560 * Attempt to connect to a socket with the server address. The address
1561 * is in user space so we verify it is OK and move it to kernel space.
1562 *
1563 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1564 * break bindings
1565 *
1566 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1567 * other SEQPACKET protocols that take time to connect() as it doesn't
1568 * include the -EINPROGRESS status for such sockets.
1569 */
1570
20f37034
HC
1571SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1572 int, addrlen)
1da177e4
LT
1573{
1574 struct socket *sock;
230b1839 1575 struct sockaddr_storage address;
6cb153ca 1576 int err, fput_needed;
1da177e4 1577
6cb153ca 1578 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1579 if (!sock)
1580 goto out;
43db362d 1581 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1582 if (err < 0)
1583 goto out_put;
1584
89bddce5 1585 err =
230b1839 1586 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1587 if (err)
1588 goto out_put;
1589
230b1839 1590 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1591 sock->file->f_flags);
1592out_put:
6cb153ca 1593 fput_light(sock->file, fput_needed);
1da177e4
LT
1594out:
1595 return err;
1596}
1597
1598/*
1599 * Get the local address ('name') of a socket object. Move the obtained
1600 * name to user space.
1601 */
1602
20f37034
HC
1603SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1604 int __user *, usockaddr_len)
1da177e4
LT
1605{
1606 struct socket *sock;
230b1839 1607 struct sockaddr_storage address;
6cb153ca 1608 int len, err, fput_needed;
89bddce5 1609
6cb153ca 1610 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1611 if (!sock)
1612 goto out;
1613
1614 err = security_socket_getsockname(sock);
1615 if (err)
1616 goto out_put;
1617
230b1839 1618 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1619 if (err)
1620 goto out_put;
43db362d 1621 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1622
1623out_put:
6cb153ca 1624 fput_light(sock->file, fput_needed);
1da177e4
LT
1625out:
1626 return err;
1627}
1628
1629/*
1630 * Get the remote address ('name') of a socket object. Move the obtained
1631 * name to user space.
1632 */
1633
20f37034
HC
1634SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1635 int __user *, usockaddr_len)
1da177e4
LT
1636{
1637 struct socket *sock;
230b1839 1638 struct sockaddr_storage address;
6cb153ca 1639 int len, err, fput_needed;
1da177e4 1640
89bddce5
SH
1641 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1642 if (sock != NULL) {
1da177e4
LT
1643 err = security_socket_getpeername(sock);
1644 if (err) {
6cb153ca 1645 fput_light(sock->file, fput_needed);
1da177e4
LT
1646 return err;
1647 }
1648
89bddce5 1649 err =
230b1839 1650 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1651 1);
1da177e4 1652 if (!err)
43db362d 1653 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1654 usockaddr_len);
6cb153ca 1655 fput_light(sock->file, fput_needed);
1da177e4
LT
1656 }
1657 return err;
1658}
1659
1660/*
1661 * Send a datagram to a given address. We move the address into kernel
1662 * space and check the user space data area is readable before invoking
1663 * the protocol.
1664 */
1665
3e0fa65f 1666SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1667 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1668 int, addr_len)
1da177e4
LT
1669{
1670 struct socket *sock;
230b1839 1671 struct sockaddr_storage address;
1da177e4
LT
1672 int err;
1673 struct msghdr msg;
1674 struct iovec iov;
6cb153ca 1675 int fput_needed;
6cb153ca 1676
602bd0e9
AV
1677 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1678 if (unlikely(err))
1679 return err;
de0fa95c
PE
1680 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1681 if (!sock)
4387ff75 1682 goto out;
6cb153ca 1683
89bddce5 1684 msg.msg_name = NULL;
89bddce5
SH
1685 msg.msg_control = NULL;
1686 msg.msg_controllen = 0;
1687 msg.msg_namelen = 0;
6cb153ca 1688 if (addr) {
43db362d 1689 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1690 if (err < 0)
1691 goto out_put;
230b1839 1692 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1693 msg.msg_namelen = addr_len;
1da177e4
LT
1694 }
1695 if (sock->file->f_flags & O_NONBLOCK)
1696 flags |= MSG_DONTWAIT;
1697 msg.msg_flags = flags;
d8725c86 1698 err = sock_sendmsg(sock, &msg);
1da177e4 1699
89bddce5 1700out_put:
de0fa95c 1701 fput_light(sock->file, fput_needed);
4387ff75 1702out:
1da177e4
LT
1703 return err;
1704}
1705
1706/*
89bddce5 1707 * Send a datagram down a socket.
1da177e4
LT
1708 */
1709
3e0fa65f 1710SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1711 unsigned int, flags)
1da177e4
LT
1712{
1713 return sys_sendto(fd, buff, len, flags, NULL, 0);
1714}
1715
1716/*
89bddce5 1717 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1718 * sender. We verify the buffers are writable and if needed move the
1719 * sender address from kernel to user space.
1720 */
1721
3e0fa65f 1722SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1723 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1724 int __user *, addr_len)
1da177e4
LT
1725{
1726 struct socket *sock;
1727 struct iovec iov;
1728 struct msghdr msg;
230b1839 1729 struct sockaddr_storage address;
89bddce5 1730 int err, err2;
6cb153ca
BL
1731 int fput_needed;
1732
602bd0e9
AV
1733 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1734 if (unlikely(err))
1735 return err;
de0fa95c 1736 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1737 if (!sock)
de0fa95c 1738 goto out;
1da177e4 1739
89bddce5
SH
1740 msg.msg_control = NULL;
1741 msg.msg_controllen = 0;
f3d33426
HFS
1742 /* Save some cycles and don't copy the address if not needed */
1743 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1744 /* We assume all kernel code knows the size of sockaddr_storage */
1745 msg.msg_namelen = 0;
130ed5d1 1746 msg.msg_iocb = NULL;
1da177e4
LT
1747 if (sock->file->f_flags & O_NONBLOCK)
1748 flags |= MSG_DONTWAIT;
2da62906 1749 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1750
89bddce5 1751 if (err >= 0 && addr != NULL) {
43db362d 1752 err2 = move_addr_to_user(&address,
230b1839 1753 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1754 if (err2 < 0)
1755 err = err2;
1da177e4 1756 }
de0fa95c
PE
1757
1758 fput_light(sock->file, fput_needed);
4387ff75 1759out:
1da177e4
LT
1760 return err;
1761}
1762
1763/*
89bddce5 1764 * Receive a datagram from a socket.
1da177e4
LT
1765 */
1766
b7c0ddf5
JG
1767SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1768 unsigned int, flags)
1da177e4
LT
1769{
1770 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1771}
1772
1773/*
1774 * Set a socket option. Because we don't know the option lengths we have
1775 * to pass the user mode parameter for the protocols to sort out.
1776 */
1777
20f37034
HC
1778SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1779 char __user *, optval, int, optlen)
1da177e4 1780{
6cb153ca 1781 int err, fput_needed;
1da177e4
LT
1782 struct socket *sock;
1783
1784 if (optlen < 0)
1785 return -EINVAL;
89bddce5
SH
1786
1787 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1788 if (sock != NULL) {
1789 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1790 if (err)
1791 goto out_put;
1da177e4
LT
1792
1793 if (level == SOL_SOCKET)
89bddce5
SH
1794 err =
1795 sock_setsockopt(sock, level, optname, optval,
1796 optlen);
1da177e4 1797 else
89bddce5
SH
1798 err =
1799 sock->ops->setsockopt(sock, level, optname, optval,
1800 optlen);
6cb153ca
BL
1801out_put:
1802 fput_light(sock->file, fput_needed);
1da177e4
LT
1803 }
1804 return err;
1805}
1806
1807/*
1808 * Get a socket option. Because we don't know the option lengths we have
1809 * to pass a user mode parameter for the protocols to sort out.
1810 */
1811
20f37034
HC
1812SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1813 char __user *, optval, int __user *, optlen)
1da177e4 1814{
6cb153ca 1815 int err, fput_needed;
1da177e4
LT
1816 struct socket *sock;
1817
89bddce5
SH
1818 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1819 if (sock != NULL) {
6cb153ca
BL
1820 err = security_socket_getsockopt(sock, level, optname);
1821 if (err)
1822 goto out_put;
1da177e4
LT
1823
1824 if (level == SOL_SOCKET)
89bddce5
SH
1825 err =
1826 sock_getsockopt(sock, level, optname, optval,
1827 optlen);
1da177e4 1828 else
89bddce5
SH
1829 err =
1830 sock->ops->getsockopt(sock, level, optname, optval,
1831 optlen);
6cb153ca
BL
1832out_put:
1833 fput_light(sock->file, fput_needed);
1da177e4
LT
1834 }
1835 return err;
1836}
1837
1da177e4
LT
1838/*
1839 * Shutdown a socket.
1840 */
1841
754fe8d2 1842SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1843{
6cb153ca 1844 int err, fput_needed;
1da177e4
LT
1845 struct socket *sock;
1846
89bddce5
SH
1847 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1848 if (sock != NULL) {
1da177e4 1849 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1850 if (!err)
1851 err = sock->ops->shutdown(sock, how);
1852 fput_light(sock->file, fput_needed);
1da177e4
LT
1853 }
1854 return err;
1855}
1856
89bddce5 1857/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1858 * fields which are the same type (int / unsigned) on our platforms.
1859 */
1860#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1861#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1862#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1863
c71d8ebe
TH
1864struct used_address {
1865 struct sockaddr_storage name;
1866 unsigned int name_len;
1867};
1868
da184284
AV
1869static int copy_msghdr_from_user(struct msghdr *kmsg,
1870 struct user_msghdr __user *umsg,
1871 struct sockaddr __user **save_addr,
1872 struct iovec **iov)
1661bf36 1873{
08adb7da
AV
1874 struct sockaddr __user *uaddr;
1875 struct iovec __user *uiov;
c0371da6 1876 size_t nr_segs;
08adb7da
AV
1877 ssize_t err;
1878
1879 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1880 __get_user(uaddr, &umsg->msg_name) ||
1881 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1882 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1883 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1884 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1885 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1886 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1887 return -EFAULT;
dbb490b9 1888
08adb7da 1889 if (!uaddr)
6a2a2b3a
AS
1890 kmsg->msg_namelen = 0;
1891
dbb490b9
ML
1892 if (kmsg->msg_namelen < 0)
1893 return -EINVAL;
1894
1661bf36 1895 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1896 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1897
1898 if (save_addr)
1899 *save_addr = uaddr;
1900
1901 if (uaddr && kmsg->msg_namelen) {
1902 if (!save_addr) {
1903 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1904 kmsg->msg_name);
1905 if (err < 0)
1906 return err;
1907 }
1908 } else {
1909 kmsg->msg_name = NULL;
1910 kmsg->msg_namelen = 0;
1911 }
1912
c0371da6 1913 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1914 return -EMSGSIZE;
1915
0345f931 1916 kmsg->msg_iocb = NULL;
1917
da184284
AV
1918 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1919 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1920}
1921
666547ff 1922static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1923 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1924 struct used_address *used_address,
1925 unsigned int allowed_msghdr_flags)
1da177e4 1926{
89bddce5
SH
1927 struct compat_msghdr __user *msg_compat =
1928 (struct compat_msghdr __user *)msg;
230b1839 1929 struct sockaddr_storage address;
1da177e4 1930 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1931 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 1932 __aligned(sizeof(__kernel_size_t));
89bddce5 1933 /* 20 is size of ipv6_pktinfo */
1da177e4 1934 unsigned char *ctl_buf = ctl;
d8725c86 1935 int ctl_len;
08adb7da 1936 ssize_t err;
89bddce5 1937
08adb7da 1938 msg_sys->msg_name = &address;
1da177e4 1939
08449320 1940 if (MSG_CMSG_COMPAT & flags)
08adb7da 1941 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1942 else
08adb7da 1943 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1944 if (err < 0)
da184284 1945 return err;
1da177e4
LT
1946
1947 err = -ENOBUFS;
1948
228e548e 1949 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1950 goto out_freeiov;
28a94d8f 1951 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 1952 ctl_len = msg_sys->msg_controllen;
1da177e4 1953 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1954 err =
228e548e 1955 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1956 sizeof(ctl));
1da177e4
LT
1957 if (err)
1958 goto out_freeiov;
228e548e
AB
1959 ctl_buf = msg_sys->msg_control;
1960 ctl_len = msg_sys->msg_controllen;
1da177e4 1961 } else if (ctl_len) {
89bddce5 1962 if (ctl_len > sizeof(ctl)) {
1da177e4 1963 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1964 if (ctl_buf == NULL)
1da177e4
LT
1965 goto out_freeiov;
1966 }
1967 err = -EFAULT;
1968 /*
228e548e 1969 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1970 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1971 * checking falls down on this.
1972 */
fb8621bb 1973 if (copy_from_user(ctl_buf,
228e548e 1974 (void __user __force *)msg_sys->msg_control,
89bddce5 1975 ctl_len))
1da177e4 1976 goto out_freectl;
228e548e 1977 msg_sys->msg_control = ctl_buf;
1da177e4 1978 }
228e548e 1979 msg_sys->msg_flags = flags;
1da177e4
LT
1980
1981 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1982 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1983 /*
1984 * If this is sendmmsg() and current destination address is same as
1985 * previously succeeded address, omit asking LSM's decision.
1986 * used_address->name_len is initialized to UINT_MAX so that the first
1987 * destination address never matches.
1988 */
bc909d9d
MD
1989 if (used_address && msg_sys->msg_name &&
1990 used_address->name_len == msg_sys->msg_namelen &&
1991 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1992 used_address->name_len)) {
d8725c86 1993 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1994 goto out_freectl;
1995 }
d8725c86 1996 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1997 /*
1998 * If this is sendmmsg() and sending to current destination address was
1999 * successful, remember it.
2000 */
2001 if (used_address && err >= 0) {
2002 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2003 if (msg_sys->msg_name)
2004 memcpy(&used_address->name, msg_sys->msg_name,
2005 used_address->name_len);
c71d8ebe 2006 }
1da177e4
LT
2007
2008out_freectl:
89bddce5 2009 if (ctl_buf != ctl)
1da177e4
LT
2010 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2011out_freeiov:
da184284 2012 kfree(iov);
228e548e
AB
2013 return err;
2014}
2015
2016/*
2017 * BSD sendmsg interface
2018 */
2019
666547ff 2020long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
2021{
2022 int fput_needed, err;
2023 struct msghdr msg_sys;
1be374a0
AL
2024 struct socket *sock;
2025
1be374a0 2026 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2027 if (!sock)
2028 goto out;
2029
28a94d8f 2030 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2031
6cb153ca 2032 fput_light(sock->file, fput_needed);
89bddce5 2033out:
1da177e4
LT
2034 return err;
2035}
2036
666547ff 2037SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
2038{
2039 if (flags & MSG_CMSG_COMPAT)
2040 return -EINVAL;
2041 return __sys_sendmsg(fd, msg, flags);
2042}
2043
228e548e
AB
2044/*
2045 * Linux sendmmsg interface
2046 */
2047
2048int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2049 unsigned int flags)
2050{
2051 int fput_needed, err, datagrams;
2052 struct socket *sock;
2053 struct mmsghdr __user *entry;
2054 struct compat_mmsghdr __user *compat_entry;
2055 struct msghdr msg_sys;
c71d8ebe 2056 struct used_address used_address;
f092276d 2057 unsigned int oflags = flags;
228e548e 2058
98382f41
AB
2059 if (vlen > UIO_MAXIOV)
2060 vlen = UIO_MAXIOV;
228e548e
AB
2061
2062 datagrams = 0;
2063
2064 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2065 if (!sock)
2066 return err;
2067
c71d8ebe 2068 used_address.name_len = UINT_MAX;
228e548e
AB
2069 entry = mmsg;
2070 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2071 err = 0;
f092276d 2072 flags |= MSG_BATCH;
228e548e
AB
2073
2074 while (datagrams < vlen) {
f092276d
TH
2075 if (datagrams == vlen - 1)
2076 flags = oflags;
2077
228e548e 2078 if (MSG_CMSG_COMPAT & flags) {
666547ff 2079 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2080 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2081 if (err < 0)
2082 break;
2083 err = __put_user(err, &compat_entry->msg_len);
2084 ++compat_entry;
2085 } else {
a7526eb5 2086 err = ___sys_sendmsg(sock,
666547ff 2087 (struct user_msghdr __user *)entry,
28a94d8f 2088 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2089 if (err < 0)
2090 break;
2091 err = put_user(err, &entry->msg_len);
2092 ++entry;
2093 }
2094
2095 if (err)
2096 break;
2097 ++datagrams;
3023898b
SHY
2098 if (msg_data_left(&msg_sys))
2099 break;
a78cb84c 2100 cond_resched();
228e548e
AB
2101 }
2102
228e548e
AB
2103 fput_light(sock->file, fput_needed);
2104
728ffb86
AB
2105 /* We only return an error if no datagrams were able to be sent */
2106 if (datagrams != 0)
228e548e
AB
2107 return datagrams;
2108
228e548e
AB
2109 return err;
2110}
2111
2112SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2113 unsigned int, vlen, unsigned int, flags)
2114{
1be374a0
AL
2115 if (flags & MSG_CMSG_COMPAT)
2116 return -EINVAL;
228e548e
AB
2117 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2118}
2119
666547ff 2120static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2121 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2122{
89bddce5
SH
2123 struct compat_msghdr __user *msg_compat =
2124 (struct compat_msghdr __user *)msg;
1da177e4 2125 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2126 struct iovec *iov = iovstack;
1da177e4 2127 unsigned long cmsg_ptr;
2da62906 2128 int len;
08adb7da 2129 ssize_t err;
1da177e4
LT
2130
2131 /* kernel mode address */
230b1839 2132 struct sockaddr_storage addr;
1da177e4
LT
2133
2134 /* user mode address pointers */
2135 struct sockaddr __user *uaddr;
08adb7da 2136 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2137
08adb7da 2138 msg_sys->msg_name = &addr;
1da177e4 2139
f3d33426 2140 if (MSG_CMSG_COMPAT & flags)
08adb7da 2141 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2142 else
08adb7da 2143 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2144 if (err < 0)
da184284 2145 return err;
1da177e4 2146
a2e27255
ACM
2147 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2148 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2149
f3d33426
HFS
2150 /* We assume all kernel code knows the size of sockaddr_storage */
2151 msg_sys->msg_namelen = 0;
2152
1da177e4
LT
2153 if (sock->file->f_flags & O_NONBLOCK)
2154 flags |= MSG_DONTWAIT;
2da62906 2155 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2156 if (err < 0)
2157 goto out_freeiov;
2158 len = err;
2159
2160 if (uaddr != NULL) {
43db362d 2161 err = move_addr_to_user(&addr,
a2e27255 2162 msg_sys->msg_namelen, uaddr,
89bddce5 2163 uaddr_len);
1da177e4
LT
2164 if (err < 0)
2165 goto out_freeiov;
2166 }
a2e27255 2167 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2168 COMPAT_FLAGS(msg));
1da177e4
LT
2169 if (err)
2170 goto out_freeiov;
2171 if (MSG_CMSG_COMPAT & flags)
a2e27255 2172 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2173 &msg_compat->msg_controllen);
2174 else
a2e27255 2175 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2176 &msg->msg_controllen);
2177 if (err)
2178 goto out_freeiov;
2179 err = len;
2180
2181out_freeiov:
da184284 2182 kfree(iov);
a2e27255
ACM
2183 return err;
2184}
2185
2186/*
2187 * BSD recvmsg interface
2188 */
2189
666547ff 2190long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2191{
2192 int fput_needed, err;
2193 struct msghdr msg_sys;
1be374a0
AL
2194 struct socket *sock;
2195
1be374a0 2196 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2197 if (!sock)
2198 goto out;
2199
a7526eb5 2200 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2201
6cb153ca 2202 fput_light(sock->file, fput_needed);
1da177e4
LT
2203out:
2204 return err;
2205}
2206
666547ff 2207SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2208 unsigned int, flags)
2209{
2210 if (flags & MSG_CMSG_COMPAT)
2211 return -EINVAL;
2212 return __sys_recvmsg(fd, msg, flags);
2213}
2214
a2e27255
ACM
2215/*
2216 * Linux recvmmsg interface
2217 */
2218
2219int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2220 unsigned int flags, struct timespec *timeout)
2221{
2222 int fput_needed, err, datagrams;
2223 struct socket *sock;
2224 struct mmsghdr __user *entry;
d7256d0e 2225 struct compat_mmsghdr __user *compat_entry;
a2e27255 2226 struct msghdr msg_sys;
766b9f92
DD
2227 struct timespec64 end_time;
2228 struct timespec64 timeout64;
a2e27255
ACM
2229
2230 if (timeout &&
2231 poll_select_set_timeout(&end_time, timeout->tv_sec,
2232 timeout->tv_nsec))
2233 return -EINVAL;
2234
2235 datagrams = 0;
2236
2237 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2238 if (!sock)
2239 return err;
2240
2241 err = sock_error(sock->sk);
5c537dff
MJ
2242 if (err) {
2243 datagrams = err;
a2e27255 2244 goto out_put;
5c537dff 2245 }
a2e27255
ACM
2246
2247 entry = mmsg;
d7256d0e 2248 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2249
2250 while (datagrams < vlen) {
2251 /*
2252 * No need to ask LSM for more than the first datagram.
2253 */
d7256d0e 2254 if (MSG_CMSG_COMPAT & flags) {
666547ff 2255 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2256 &msg_sys, flags & ~MSG_WAITFORONE,
2257 datagrams);
d7256d0e
JMG
2258 if (err < 0)
2259 break;
2260 err = __put_user(err, &compat_entry->msg_len);
2261 ++compat_entry;
2262 } else {
a7526eb5 2263 err = ___sys_recvmsg(sock,
666547ff 2264 (struct user_msghdr __user *)entry,
a7526eb5
AL
2265 &msg_sys, flags & ~MSG_WAITFORONE,
2266 datagrams);
d7256d0e
JMG
2267 if (err < 0)
2268 break;
2269 err = put_user(err, &entry->msg_len);
2270 ++entry;
2271 }
2272
a2e27255
ACM
2273 if (err)
2274 break;
a2e27255
ACM
2275 ++datagrams;
2276
71c5c159
BB
2277 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2278 if (flags & MSG_WAITFORONE)
2279 flags |= MSG_DONTWAIT;
2280
a2e27255 2281 if (timeout) {
766b9f92
DD
2282 ktime_get_ts64(&timeout64);
2283 *timeout = timespec64_to_timespec(
2284 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2285 if (timeout->tv_sec < 0) {
2286 timeout->tv_sec = timeout->tv_nsec = 0;
2287 break;
2288 }
2289
2290 /* Timeout, return less than vlen datagrams */
2291 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2292 break;
2293 }
2294
2295 /* Out of band data, return right away */
2296 if (msg_sys.msg_flags & MSG_OOB)
2297 break;
a78cb84c 2298 cond_resched();
a2e27255
ACM
2299 }
2300
a2e27255 2301 if (err == 0)
34b88a68
ACM
2302 goto out_put;
2303
2304 if (datagrams == 0) {
2305 datagrams = err;
2306 goto out_put;
2307 }
a2e27255 2308
34b88a68
ACM
2309 /*
2310 * We may return less entries than requested (vlen) if the
2311 * sock is non block and there aren't enough datagrams...
2312 */
2313 if (err != -EAGAIN) {
a2e27255 2314 /*
34b88a68
ACM
2315 * ... or if recvmsg returns an error after we
2316 * received some datagrams, where we record the
2317 * error to return on the next call or if the
2318 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2319 */
34b88a68 2320 sock->sk->sk_err = -err;
a2e27255 2321 }
34b88a68
ACM
2322out_put:
2323 fput_light(sock->file, fput_needed);
a2e27255 2324
34b88a68 2325 return datagrams;
a2e27255
ACM
2326}
2327
2328SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2329 unsigned int, vlen, unsigned int, flags,
2330 struct timespec __user *, timeout)
2331{
2332 int datagrams;
2333 struct timespec timeout_sys;
2334
1be374a0
AL
2335 if (flags & MSG_CMSG_COMPAT)
2336 return -EINVAL;
2337
a2e27255
ACM
2338 if (!timeout)
2339 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2340
2341 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2342 return -EFAULT;
2343
2344 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2345
2346 if (datagrams > 0 &&
2347 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2348 datagrams = -EFAULT;
2349
2350 return datagrams;
2351}
2352
2353#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2354/* Argument list sizes for sys_socketcall */
2355#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2356static const unsigned char nargs[21] = {
c6d409cf
ED
2357 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2358 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2359 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2360 AL(4), AL(5), AL(4)
89bddce5
SH
2361};
2362
1da177e4
LT
2363#undef AL
2364
2365/*
89bddce5 2366 * System call vectors.
1da177e4
LT
2367 *
2368 * Argument checking cleaned up. Saved 20% in size.
2369 * This function doesn't need to set the kernel lock because
89bddce5 2370 * it is set by the callees.
1da177e4
LT
2371 */
2372
3e0fa65f 2373SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2374{
2950fa9d 2375 unsigned long a[AUDITSC_ARGS];
89bddce5 2376 unsigned long a0, a1;
1da177e4 2377 int err;
47379052 2378 unsigned int len;
1da177e4 2379
228e548e 2380 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2381 return -EINVAL;
2382
47379052
AV
2383 len = nargs[call];
2384 if (len > sizeof(a))
2385 return -EINVAL;
2386
1da177e4 2387 /* copy_from_user should be SMP safe. */
47379052 2388 if (copy_from_user(a, args, len))
1da177e4 2389 return -EFAULT;
3ec3b2fb 2390
2950fa9d
CG
2391 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2392 if (err)
2393 return err;
3ec3b2fb 2394
89bddce5
SH
2395 a0 = a[0];
2396 a1 = a[1];
2397
2398 switch (call) {
2399 case SYS_SOCKET:
2400 err = sys_socket(a0, a1, a[2]);
2401 break;
2402 case SYS_BIND:
2403 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2404 break;
2405 case SYS_CONNECT:
2406 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2407 break;
2408 case SYS_LISTEN:
2409 err = sys_listen(a0, a1);
2410 break;
2411 case SYS_ACCEPT:
de11defe
UD
2412 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2413 (int __user *)a[2], 0);
89bddce5
SH
2414 break;
2415 case SYS_GETSOCKNAME:
2416 err =
2417 sys_getsockname(a0, (struct sockaddr __user *)a1,
2418 (int __user *)a[2]);
2419 break;
2420 case SYS_GETPEERNAME:
2421 err =
2422 sys_getpeername(a0, (struct sockaddr __user *)a1,
2423 (int __user *)a[2]);
2424 break;
2425 case SYS_SOCKETPAIR:
2426 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2427 break;
2428 case SYS_SEND:
2429 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2430 break;
2431 case SYS_SENDTO:
2432 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2433 (struct sockaddr __user *)a[4], a[5]);
2434 break;
2435 case SYS_RECV:
2436 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2437 break;
2438 case SYS_RECVFROM:
2439 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2440 (struct sockaddr __user *)a[4],
2441 (int __user *)a[5]);
2442 break;
2443 case SYS_SHUTDOWN:
2444 err = sys_shutdown(a0, a1);
2445 break;
2446 case SYS_SETSOCKOPT:
2447 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2448 break;
2449 case SYS_GETSOCKOPT:
2450 err =
2451 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2452 (int __user *)a[4]);
2453 break;
2454 case SYS_SENDMSG:
666547ff 2455 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2456 break;
228e548e
AB
2457 case SYS_SENDMMSG:
2458 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2459 break;
89bddce5 2460 case SYS_RECVMSG:
666547ff 2461 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2462 break;
a2e27255
ACM
2463 case SYS_RECVMMSG:
2464 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2465 (struct timespec __user *)a[4]);
2466 break;
de11defe
UD
2467 case SYS_ACCEPT4:
2468 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2469 (int __user *)a[2], a[3]);
aaca0bdc 2470 break;
89bddce5
SH
2471 default:
2472 err = -EINVAL;
2473 break;
1da177e4
LT
2474 }
2475 return err;
2476}
2477
89bddce5 2478#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2479
55737fda
SH
2480/**
2481 * sock_register - add a socket protocol handler
2482 * @ops: description of protocol
2483 *
1da177e4
LT
2484 * This function is called by a protocol handler that wants to
2485 * advertise its address family, and have it linked into the
e793c0f7 2486 * socket interface. The value ops->family corresponds to the
55737fda 2487 * socket system call protocol family.
1da177e4 2488 */
f0fd27d4 2489int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2490{
2491 int err;
2492
2493 if (ops->family >= NPROTO) {
3410f22e 2494 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2495 return -ENOBUFS;
2496 }
55737fda
SH
2497
2498 spin_lock(&net_family_lock);
190683a9
ED
2499 if (rcu_dereference_protected(net_families[ops->family],
2500 lockdep_is_held(&net_family_lock)))
55737fda
SH
2501 err = -EEXIST;
2502 else {
cf778b00 2503 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2504 err = 0;
2505 }
55737fda
SH
2506 spin_unlock(&net_family_lock);
2507
3410f22e 2508 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2509 return err;
2510}
c6d409cf 2511EXPORT_SYMBOL(sock_register);
1da177e4 2512
55737fda
SH
2513/**
2514 * sock_unregister - remove a protocol handler
2515 * @family: protocol family to remove
2516 *
1da177e4
LT
2517 * This function is called by a protocol handler that wants to
2518 * remove its address family, and have it unlinked from the
55737fda
SH
2519 * new socket creation.
2520 *
2521 * If protocol handler is a module, then it can use module reference
2522 * counts to protect against new references. If protocol handler is not
2523 * a module then it needs to provide its own protection in
2524 * the ops->create routine.
1da177e4 2525 */
f0fd27d4 2526void sock_unregister(int family)
1da177e4 2527{
f0fd27d4 2528 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2529
55737fda 2530 spin_lock(&net_family_lock);
a9b3cd7f 2531 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2532 spin_unlock(&net_family_lock);
2533
2534 synchronize_rcu();
2535
3410f22e 2536 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2537}
c6d409cf 2538EXPORT_SYMBOL(sock_unregister);
1da177e4 2539
77d76ea3 2540static int __init sock_init(void)
1da177e4 2541{
b3e19d92 2542 int err;
2ca794e5
EB
2543 /*
2544 * Initialize the network sysctl infrastructure.
2545 */
2546 err = net_sysctl_init();
2547 if (err)
2548 goto out;
b3e19d92 2549
1da177e4 2550 /*
89bddce5 2551 * Initialize skbuff SLAB cache
1da177e4
LT
2552 */
2553 skb_init();
1da177e4
LT
2554
2555 /*
89bddce5 2556 * Initialize the protocols module.
1da177e4
LT
2557 */
2558
2559 init_inodecache();
b3e19d92
NP
2560
2561 err = register_filesystem(&sock_fs_type);
2562 if (err)
2563 goto out_fs;
1da177e4 2564 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2565 if (IS_ERR(sock_mnt)) {
2566 err = PTR_ERR(sock_mnt);
2567 goto out_mount;
2568 }
77d76ea3
AK
2569
2570 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2571 */
2572
2573#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2574 err = netfilter_init();
2575 if (err)
2576 goto out;
1da177e4 2577#endif
cbeb321a 2578
408eccce 2579 ptp_classifier_init();
c1f19b51 2580
b3e19d92
NP
2581out:
2582 return err;
2583
2584out_mount:
2585 unregister_filesystem(&sock_fs_type);
2586out_fs:
2587 goto out;
1da177e4
LT
2588}
2589
77d76ea3
AK
2590core_initcall(sock_init); /* early initcall */
2591
1da177e4
LT
2592#ifdef CONFIG_PROC_FS
2593void socket_seq_show(struct seq_file *seq)
2594{
2595 int cpu;
2596 int counter = 0;
2597
6f912042 2598 for_each_possible_cpu(cpu)
89bddce5 2599 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2600
2601 /* It can be negative, by the way. 8) */
2602 if (counter < 0)
2603 counter = 0;
2604
2605 seq_printf(seq, "sockets: used %d\n", counter);
2606}
89bddce5 2607#endif /* CONFIG_PROC_FS */
1da177e4 2608
89bbfc95 2609#ifdef CONFIG_COMPAT
6b96018b 2610static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2611 unsigned int cmd, void __user *up)
7a229387 2612{
7a229387
AB
2613 mm_segment_t old_fs = get_fs();
2614 struct timeval ktv;
2615 int err;
2616
2617 set_fs(KERNEL_DS);
6b96018b 2618 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2619 set_fs(old_fs);
644595f8 2620 if (!err)
ed6fe9d6 2621 err = compat_put_timeval(&ktv, up);
644595f8 2622
7a229387
AB
2623 return err;
2624}
2625
6b96018b 2626static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2627 unsigned int cmd, void __user *up)
7a229387 2628{
7a229387
AB
2629 mm_segment_t old_fs = get_fs();
2630 struct timespec kts;
2631 int err;
2632
2633 set_fs(KERNEL_DS);
6b96018b 2634 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2635 set_fs(old_fs);
644595f8 2636 if (!err)
ed6fe9d6 2637 err = compat_put_timespec(&kts, up);
644595f8 2638
7a229387
AB
2639 return err;
2640}
2641
6b96018b 2642static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2643{
2644 struct ifreq __user *uifr;
2645 int err;
2646
2647 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2648 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2649 return -EFAULT;
2650
6b96018b 2651 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2652 if (err)
2653 return err;
2654
6b96018b 2655 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2656 return -EFAULT;
2657
2658 return 0;
2659}
2660
6b96018b 2661static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2662{
6b96018b 2663 struct compat_ifconf ifc32;
7a229387
AB
2664 struct ifconf ifc;
2665 struct ifconf __user *uifc;
6b96018b 2666 struct compat_ifreq __user *ifr32;
7a229387
AB
2667 struct ifreq __user *ifr;
2668 unsigned int i, j;
2669 int err;
2670
6b96018b 2671 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2672 return -EFAULT;
2673
43da5f2e 2674 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2675 if (ifc32.ifcbuf == 0) {
2676 ifc32.ifc_len = 0;
2677 ifc.ifc_len = 0;
2678 ifc.ifc_req = NULL;
2679 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2680 } else {
c6d409cf
ED
2681 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2682 sizeof(struct ifreq);
7a229387
AB
2683 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2684 ifc.ifc_len = len;
2685 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2686 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2687 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2688 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2689 return -EFAULT;
2690 ifr++;
2691 ifr32++;
2692 }
2693 }
2694 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2695 return -EFAULT;
2696
6b96018b 2697 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2698 if (err)
2699 return err;
2700
2701 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2702 return -EFAULT;
2703
2704 ifr = ifc.ifc_req;
2705 ifr32 = compat_ptr(ifc32.ifcbuf);
2706 for (i = 0, j = 0;
c6d409cf
ED
2707 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2708 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2709 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2710 return -EFAULT;
2711 ifr32++;
2712 ifr++;
2713 }
2714
2715 if (ifc32.ifcbuf == 0) {
2716 /* Translate from 64-bit structure multiple to
2717 * a 32-bit one.
2718 */
2719 i = ifc.ifc_len;
6b96018b 2720 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2721 ifc32.ifc_len = i;
2722 } else {
2723 ifc32.ifc_len = i;
2724 }
6b96018b 2725 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2726 return -EFAULT;
2727
2728 return 0;
2729}
2730
6b96018b 2731static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2732{
3a7da39d
BH
2733 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2734 bool convert_in = false, convert_out = false;
2735 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2736 struct ethtool_rxnfc __user *rxnfc;
7a229387 2737 struct ifreq __user *ifr;
3a7da39d
BH
2738 u32 rule_cnt = 0, actual_rule_cnt;
2739 u32 ethcmd;
7a229387 2740 u32 data;
3a7da39d 2741 int ret;
7a229387 2742
3a7da39d
BH
2743 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2744 return -EFAULT;
7a229387 2745
3a7da39d
BH
2746 compat_rxnfc = compat_ptr(data);
2747
2748 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2749 return -EFAULT;
2750
3a7da39d
BH
2751 /* Most ethtool structures are defined without padding.
2752 * Unfortunately struct ethtool_rxnfc is an exception.
2753 */
2754 switch (ethcmd) {
2755 default:
2756 break;
2757 case ETHTOOL_GRXCLSRLALL:
2758 /* Buffer size is variable */
2759 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2760 return -EFAULT;
2761 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2762 return -ENOMEM;
2763 buf_size += rule_cnt * sizeof(u32);
2764 /* fall through */
2765 case ETHTOOL_GRXRINGS:
2766 case ETHTOOL_GRXCLSRLCNT:
2767 case ETHTOOL_GRXCLSRULE:
55664f32 2768 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2769 convert_out = true;
2770 /* fall through */
2771 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2772 buf_size += sizeof(struct ethtool_rxnfc);
2773 convert_in = true;
2774 break;
2775 }
2776
2777 ifr = compat_alloc_user_space(buf_size);
954b1244 2778 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2779
2780 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2781 return -EFAULT;
2782
3a7da39d
BH
2783 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2784 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2785 return -EFAULT;
2786
3a7da39d 2787 if (convert_in) {
127fe533 2788 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2789 * fs.ring_cookie and at the end of fs, but nowhere else.
2790 */
127fe533
AD
2791 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2792 sizeof(compat_rxnfc->fs.m_ext) !=
2793 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2794 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2795 BUILD_BUG_ON(
2796 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2797 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2798 offsetof(struct ethtool_rxnfc, fs.location) -
2799 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2800
2801 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2802 (void __user *)(&rxnfc->fs.m_ext + 1) -
2803 (void __user *)rxnfc) ||
3a7da39d
BH
2804 copy_in_user(&rxnfc->fs.ring_cookie,
2805 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2806 (void __user *)(&rxnfc->fs.location + 1) -
2807 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2808 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2809 sizeof(rxnfc->rule_cnt)))
2810 return -EFAULT;
2811 }
2812
2813 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2814 if (ret)
2815 return ret;
2816
2817 if (convert_out) {
2818 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2819 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2820 (const void __user *)rxnfc) ||
3a7da39d
BH
2821 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2822 &rxnfc->fs.ring_cookie,
954b1244
SH
2823 (const void __user *)(&rxnfc->fs.location + 1) -
2824 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2825 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2826 sizeof(rxnfc->rule_cnt)))
2827 return -EFAULT;
2828
2829 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2830 /* As an optimisation, we only copy the actual
2831 * number of rules that the underlying
2832 * function returned. Since Mallory might
2833 * change the rule count in user memory, we
2834 * check that it is less than the rule count
2835 * originally given (as the user buffer size),
2836 * which has been range-checked.
2837 */
2838 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2839 return -EFAULT;
2840 if (actual_rule_cnt < rule_cnt)
2841 rule_cnt = actual_rule_cnt;
2842 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2843 &rxnfc->rule_locs[0],
2844 rule_cnt * sizeof(u32)))
2845 return -EFAULT;
2846 }
2847 }
2848
2849 return 0;
7a229387
AB
2850}
2851
7a50a240
AB
2852static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2853{
2854 void __user *uptr;
2855 compat_uptr_t uptr32;
2856 struct ifreq __user *uifr;
2857
c6d409cf 2858 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2859 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2860 return -EFAULT;
2861
2862 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2863 return -EFAULT;
2864
2865 uptr = compat_ptr(uptr32);
2866
2867 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2868 return -EFAULT;
2869
2870 return dev_ioctl(net, SIOCWANDEV, uifr);
2871}
2872
6b96018b
AB
2873static int bond_ioctl(struct net *net, unsigned int cmd,
2874 struct compat_ifreq __user *ifr32)
7a229387
AB
2875{
2876 struct ifreq kifr;
7a229387
AB
2877 mm_segment_t old_fs;
2878 int err;
7a229387
AB
2879
2880 switch (cmd) {
2881 case SIOCBONDENSLAVE:
2882 case SIOCBONDRELEASE:
2883 case SIOCBONDSETHWADDR:
2884 case SIOCBONDCHANGEACTIVE:
6b96018b 2885 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2886 return -EFAULT;
2887
2888 old_fs = get_fs();
c6d409cf 2889 set_fs(KERNEL_DS);
c3f52ae6 2890 err = dev_ioctl(net, cmd,
2891 (struct ifreq __user __force *) &kifr);
c6d409cf 2892 set_fs(old_fs);
7a229387
AB
2893
2894 return err;
7a229387 2895 default:
07d106d0 2896 return -ENOIOCTLCMD;
ccbd6a5a 2897 }
7a229387
AB
2898}
2899
590d4693
BH
2900/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2901static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2902 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2903{
2904 struct ifreq __user *u_ifreq64;
7a229387
AB
2905 char tmp_buf[IFNAMSIZ];
2906 void __user *data64;
2907 u32 data32;
2908
2909 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2910 IFNAMSIZ))
2911 return -EFAULT;
417c3522 2912 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2913 return -EFAULT;
2914 data64 = compat_ptr(data32);
2915
2916 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2917
7a229387
AB
2918 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2919 IFNAMSIZ))
2920 return -EFAULT;
417c3522 2921 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2922 return -EFAULT;
2923
6b96018b 2924 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2925}
2926
6b96018b
AB
2927static int dev_ifsioc(struct net *net, struct socket *sock,
2928 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2929{
a2116ed2 2930 struct ifreq __user *uifr;
7a229387
AB
2931 int err;
2932
a2116ed2
AB
2933 uifr = compat_alloc_user_space(sizeof(*uifr));
2934 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2935 return -EFAULT;
2936
2937 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2938
7a229387
AB
2939 if (!err) {
2940 switch (cmd) {
2941 case SIOCGIFFLAGS:
2942 case SIOCGIFMETRIC:
2943 case SIOCGIFMTU:
2944 case SIOCGIFMEM:
2945 case SIOCGIFHWADDR:
2946 case SIOCGIFINDEX:
2947 case SIOCGIFADDR:
2948 case SIOCGIFBRDADDR:
2949 case SIOCGIFDSTADDR:
2950 case SIOCGIFNETMASK:
fab2532b 2951 case SIOCGIFPFLAGS:
7a229387 2952 case SIOCGIFTXQLEN:
fab2532b
AB
2953 case SIOCGMIIPHY:
2954 case SIOCGMIIREG:
a2116ed2 2955 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2956 err = -EFAULT;
2957 break;
2958 }
2959 }
2960 return err;
2961}
2962
a2116ed2
AB
2963static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2964 struct compat_ifreq __user *uifr32)
2965{
2966 struct ifreq ifr;
2967 struct compat_ifmap __user *uifmap32;
2968 mm_segment_t old_fs;
2969 int err;
2970
2971 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2972 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2973 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2974 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2975 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2976 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2977 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2978 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2979 if (err)
2980 return -EFAULT;
2981
2982 old_fs = get_fs();
c6d409cf 2983 set_fs(KERNEL_DS);
c3f52ae6 2984 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2985 set_fs(old_fs);
a2116ed2
AB
2986
2987 if (cmd == SIOCGIFMAP && !err) {
2988 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2989 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2990 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2991 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2992 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2993 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2994 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2995 if (err)
2996 err = -EFAULT;
2997 }
2998 return err;
2999}
3000
7a229387 3001struct rtentry32 {
c6d409cf 3002 u32 rt_pad1;
7a229387
AB
3003 struct sockaddr rt_dst; /* target address */
3004 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3005 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3006 unsigned short rt_flags;
3007 short rt_pad2;
3008 u32 rt_pad3;
3009 unsigned char rt_tos;
3010 unsigned char rt_class;
3011 short rt_pad4;
3012 short rt_metric; /* +1 for binary compatibility! */
7a229387 3013 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3014 u32 rt_mtu; /* per route MTU/Window */
3015 u32 rt_window; /* Window clamping */
7a229387
AB
3016 unsigned short rt_irtt; /* Initial RTT */
3017};
3018
3019struct in6_rtmsg32 {
3020 struct in6_addr rtmsg_dst;
3021 struct in6_addr rtmsg_src;
3022 struct in6_addr rtmsg_gateway;
3023 u32 rtmsg_type;
3024 u16 rtmsg_dst_len;
3025 u16 rtmsg_src_len;
3026 u32 rtmsg_metric;
3027 u32 rtmsg_info;
3028 u32 rtmsg_flags;
3029 s32 rtmsg_ifindex;
3030};
3031
6b96018b
AB
3032static int routing_ioctl(struct net *net, struct socket *sock,
3033 unsigned int cmd, void __user *argp)
7a229387
AB
3034{
3035 int ret;
3036 void *r = NULL;
3037 struct in6_rtmsg r6;
3038 struct rtentry r4;
3039 char devname[16];
3040 u32 rtdev;
3041 mm_segment_t old_fs = get_fs();
3042
6b96018b
AB
3043 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3044 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3045 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3046 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3047 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3048 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3049 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3050 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3051 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3052 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3053 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3054
3055 r = (void *) &r6;
3056 } else { /* ipv4 */
6b96018b 3057 struct rtentry32 __user *ur4 = argp;
c6d409cf 3058 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3059 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3060 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3061 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3062 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3063 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3064 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3065 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3066 if (rtdev) {
c6d409cf 3067 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3068 r4.rt_dev = (char __user __force *)devname;
3069 devname[15] = 0;
7a229387
AB
3070 } else
3071 r4.rt_dev = NULL;
3072
3073 r = (void *) &r4;
3074 }
3075
3076 if (ret) {
3077 ret = -EFAULT;
3078 goto out;
3079 }
3080
c6d409cf 3081 set_fs(KERNEL_DS);
6b96018b 3082 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3083 set_fs(old_fs);
7a229387
AB
3084
3085out:
7a229387
AB
3086 return ret;
3087}
3088
3089/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3090 * for some operations; this forces use of the newer bridge-utils that
25985edc 3091 * use compatible ioctls
7a229387 3092 */
6b96018b 3093static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3094{
6b96018b 3095 compat_ulong_t tmp;
7a229387 3096
6b96018b 3097 if (get_user(tmp, argp))
7a229387
AB
3098 return -EFAULT;
3099 if (tmp == BRCTL_GET_VERSION)
3100 return BRCTL_VERSION + 1;
3101 return -EINVAL;
3102}
3103
6b96018b
AB
3104static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3105 unsigned int cmd, unsigned long arg)
3106{
3107 void __user *argp = compat_ptr(arg);
3108 struct sock *sk = sock->sk;
3109 struct net *net = sock_net(sk);
7a229387 3110
6b96018b 3111 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3112 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3113
3114 switch (cmd) {
3115 case SIOCSIFBR:
3116 case SIOCGIFBR:
3117 return old_bridge_ioctl(argp);
3118 case SIOCGIFNAME:
3119 return dev_ifname32(net, argp);
3120 case SIOCGIFCONF:
3121 return dev_ifconf(net, argp);
3122 case SIOCETHTOOL:
3123 return ethtool_ioctl(net, argp);
7a50a240
AB
3124 case SIOCWANDEV:
3125 return compat_siocwandev(net, argp);
a2116ed2
AB
3126 case SIOCGIFMAP:
3127 case SIOCSIFMAP:
3128 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3129 case SIOCBONDENSLAVE:
3130 case SIOCBONDRELEASE:
3131 case SIOCBONDSETHWADDR:
6b96018b
AB
3132 case SIOCBONDCHANGEACTIVE:
3133 return bond_ioctl(net, cmd, argp);
3134 case SIOCADDRT:
3135 case SIOCDELRT:
3136 return routing_ioctl(net, sock, cmd, argp);
3137 case SIOCGSTAMP:
3138 return do_siocgstamp(net, sock, cmd, argp);
3139 case SIOCGSTAMPNS:
3140 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3141 case SIOCBONDSLAVEINFOQUERY:
3142 case SIOCBONDINFOQUERY:
a2116ed2 3143 case SIOCSHWTSTAMP:
fd468c74 3144 case SIOCGHWTSTAMP:
590d4693 3145 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3146
3147 case FIOSETOWN:
3148 case SIOCSPGRP:
3149 case FIOGETOWN:
3150 case SIOCGPGRP:
3151 case SIOCBRADDBR:
3152 case SIOCBRDELBR:
3153 case SIOCGIFVLAN:
3154 case SIOCSIFVLAN:
3155 case SIOCADDDLCI:
3156 case SIOCDELDLCI:
c62cce2c 3157 case SIOCGSKNS:
6b96018b
AB
3158 return sock_ioctl(file, cmd, arg);
3159
3160 case SIOCGIFFLAGS:
3161 case SIOCSIFFLAGS:
3162 case SIOCGIFMETRIC:
3163 case SIOCSIFMETRIC:
3164 case SIOCGIFMTU:
3165 case SIOCSIFMTU:
3166 case SIOCGIFMEM:
3167 case SIOCSIFMEM:
3168 case SIOCGIFHWADDR:
3169 case SIOCSIFHWADDR:
3170 case SIOCADDMULTI:
3171 case SIOCDELMULTI:
3172 case SIOCGIFINDEX:
6b96018b
AB
3173 case SIOCGIFADDR:
3174 case SIOCSIFADDR:
3175 case SIOCSIFHWBROADCAST:
6b96018b 3176 case SIOCDIFADDR:
6b96018b
AB
3177 case SIOCGIFBRDADDR:
3178 case SIOCSIFBRDADDR:
3179 case SIOCGIFDSTADDR:
3180 case SIOCSIFDSTADDR:
3181 case SIOCGIFNETMASK:
3182 case SIOCSIFNETMASK:
3183 case SIOCSIFPFLAGS:
3184 case SIOCGIFPFLAGS:
3185 case SIOCGIFTXQLEN:
3186 case SIOCSIFTXQLEN:
3187 case SIOCBRADDIF:
3188 case SIOCBRDELIF:
9177efd3
AB
3189 case SIOCSIFNAME:
3190 case SIOCGMIIPHY:
3191 case SIOCGMIIREG:
3192 case SIOCSMIIREG:
6b96018b 3193 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3194
6b96018b
AB
3195 case SIOCSARP:
3196 case SIOCGARP:
3197 case SIOCDARP:
6b96018b 3198 case SIOCATMARK:
9177efd3
AB
3199 return sock_do_ioctl(net, sock, cmd, arg);
3200 }
3201
6b96018b
AB
3202 return -ENOIOCTLCMD;
3203}
7a229387 3204
95c96174 3205static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3206 unsigned long arg)
89bbfc95
SP
3207{
3208 struct socket *sock = file->private_data;
3209 int ret = -ENOIOCTLCMD;
87de87d5
DM
3210 struct sock *sk;
3211 struct net *net;
3212
3213 sk = sock->sk;
3214 net = sock_net(sk);
89bbfc95
SP
3215
3216 if (sock->ops->compat_ioctl)
3217 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3218
87de87d5
DM
3219 if (ret == -ENOIOCTLCMD &&
3220 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3221 ret = compat_wext_handle_ioctl(net, cmd, arg);
3222
6b96018b
AB
3223 if (ret == -ENOIOCTLCMD)
3224 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3225
89bbfc95
SP
3226 return ret;
3227}
3228#endif
3229
ac5a488e
SS
3230int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3231{
3232 return sock->ops->bind(sock, addr, addrlen);
3233}
c6d409cf 3234EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3235
3236int kernel_listen(struct socket *sock, int backlog)
3237{
3238 return sock->ops->listen(sock, backlog);
3239}
c6d409cf 3240EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3241
3242int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3243{
3244 struct sock *sk = sock->sk;
3245 int err;
3246
3247 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3248 newsock);
3249 if (err < 0)
3250 goto done;
3251
3252 err = sock->ops->accept(sock, *newsock, flags);
3253 if (err < 0) {
3254 sock_release(*newsock);
fa8705b0 3255 *newsock = NULL;
ac5a488e
SS
3256 goto done;
3257 }
3258
3259 (*newsock)->ops = sock->ops;
1b08534e 3260 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3261
3262done:
3263 return err;
3264}
c6d409cf 3265EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3266
3267int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3268 int flags)
ac5a488e
SS
3269{
3270 return sock->ops->connect(sock, addr, addrlen, flags);
3271}
c6d409cf 3272EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3273
3274int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3275 int *addrlen)
3276{
3277 return sock->ops->getname(sock, addr, addrlen, 0);
3278}
c6d409cf 3279EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3280
3281int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3282 int *addrlen)
3283{
3284 return sock->ops->getname(sock, addr, addrlen, 1);
3285}
c6d409cf 3286EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3287
3288int kernel_getsockopt(struct socket *sock, int level, int optname,
3289 char *optval, int *optlen)
3290{
3291 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3292 char __user *uoptval;
3293 int __user *uoptlen;
ac5a488e
SS
3294 int err;
3295
fb8621bb
NK
3296 uoptval = (char __user __force *) optval;
3297 uoptlen = (int __user __force *) optlen;
3298
ac5a488e
SS
3299 set_fs(KERNEL_DS);
3300 if (level == SOL_SOCKET)
fb8621bb 3301 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3302 else
fb8621bb
NK
3303 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3304 uoptlen);
ac5a488e
SS
3305 set_fs(oldfs);
3306 return err;
3307}
c6d409cf 3308EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3309
3310int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3311 char *optval, unsigned int optlen)
ac5a488e
SS
3312{
3313 mm_segment_t oldfs = get_fs();
fb8621bb 3314 char __user *uoptval;
ac5a488e
SS
3315 int err;
3316
fb8621bb
NK
3317 uoptval = (char __user __force *) optval;
3318
ac5a488e
SS
3319 set_fs(KERNEL_DS);
3320 if (level == SOL_SOCKET)
fb8621bb 3321 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3322 else
fb8621bb 3323 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3324 optlen);
3325 set_fs(oldfs);
3326 return err;
3327}
c6d409cf 3328EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3329
3330int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3331 size_t size, int flags)
3332{
3333 if (sock->ops->sendpage)
3334 return sock->ops->sendpage(sock, page, offset, size, flags);
3335
3336 return sock_no_sendpage(sock, page, offset, size, flags);
3337}
c6d409cf 3338EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3339
3340int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3341{
3342 mm_segment_t oldfs = get_fs();
3343 int err;
3344
3345 set_fs(KERNEL_DS);
3346 err = sock->ops->ioctl(sock, cmd, arg);
3347 set_fs(oldfs);
3348
3349 return err;
3350}
c6d409cf 3351EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3352
91cf45f0
TM
3353int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3354{
3355 return sock->ops->shutdown(sock, how);
3356}
91cf45f0 3357EXPORT_SYMBOL(kernel_sock_shutdown);