]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/socket.c
tcp: export sender limits chronographs to TCP_INFO
[mirror_ubuntu-jammy-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1da177e4
LT
290static int init_inodecache(void)
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1da177e4
LT
299 if (sock_inode_cachep == NULL)
300 return -ENOMEM;
301 return 0;
302}
303
b87221de 304static const struct super_operations sockfs_ops = {
c6d409cf
ED
305 .alloc_inode = sock_alloc_inode,
306 .destroy_inode = sock_destroy_inode,
307 .statfs = simple_statfs,
1da177e4
LT
308};
309
c23fbb6b
ED
310/*
311 * sockfs_dname() is called from d_path().
312 */
313static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
314{
315 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 316 d_inode(dentry)->i_ino);
c23fbb6b
ED
317}
318
3ba13d17 319static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 320 .d_dname = sockfs_dname,
1da177e4
LT
321};
322
bba0bd31
AG
323static int sockfs_xattr_get(const struct xattr_handler *handler,
324 struct dentry *dentry, struct inode *inode,
325 const char *suffix, void *value, size_t size)
326{
327 if (value) {
328 if (dentry->d_name.len + 1 > size)
329 return -ERANGE;
330 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
331 }
332 return dentry->d_name.len + 1;
333}
334
335#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
336#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
337#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
338
339static const struct xattr_handler sockfs_xattr_handler = {
340 .name = XATTR_NAME_SOCKPROTONAME,
341 .get = sockfs_xattr_get,
342};
343
4a590153
AG
344static int sockfs_security_xattr_set(const struct xattr_handler *handler,
345 struct dentry *dentry, struct inode *inode,
346 const char *suffix, const void *value,
347 size_t size, int flags)
348{
349 /* Handled by LSM. */
350 return -EAGAIN;
351}
352
353static const struct xattr_handler sockfs_security_xattr_handler = {
354 .prefix = XATTR_SECURITY_PREFIX,
355 .set = sockfs_security_xattr_set,
356};
357
bba0bd31
AG
358static const struct xattr_handler *sockfs_xattr_handlers[] = {
359 &sockfs_xattr_handler,
4a590153 360 &sockfs_security_xattr_handler,
bba0bd31
AG
361 NULL
362};
363
c74a1cbb
AV
364static struct dentry *sockfs_mount(struct file_system_type *fs_type,
365 int flags, const char *dev_name, void *data)
366{
bba0bd31
AG
367 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
368 sockfs_xattr_handlers,
369 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
370}
371
372static struct vfsmount *sock_mnt __read_mostly;
373
374static struct file_system_type sock_fs_type = {
375 .name = "sockfs",
376 .mount = sockfs_mount,
377 .kill_sb = kill_anon_super,
378};
379
1da177e4
LT
380/*
381 * Obtains the first available file descriptor and sets it up for use.
382 *
39d8c1b6
DM
383 * These functions create file structures and maps them to fd space
384 * of the current process. On success it returns file descriptor
1da177e4
LT
385 * and file struct implicitly stored in sock->file.
386 * Note that another thread may close file descriptor before we return
387 * from this function. We use the fact that now we do not refer
388 * to socket after mapping. If one day we will need it, this
389 * function will increment ref. count on file by 1.
390 *
391 * In any case returned fd MAY BE not valid!
392 * This race condition is unavoidable
393 * with shared fd spaces, we cannot solve it inside kernel,
394 * but we take care of internal coherence yet.
395 */
396
aab174f0 397struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 398{
7cbe66b6 399 struct qstr name = { .name = "" };
2c48b9c4 400 struct path path;
7cbe66b6 401 struct file *file;
1da177e4 402
600e1779
MY
403 if (dname) {
404 name.name = dname;
405 name.len = strlen(name.name);
406 } else if (sock->sk) {
407 name.name = sock->sk->sk_prot_creator->name;
408 name.len = strlen(name.name);
409 }
4b936885 410 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
411 if (unlikely(!path.dentry))
412 return ERR_PTR(-ENOMEM);
2c48b9c4 413 path.mnt = mntget(sock_mnt);
39d8c1b6 414
2c48b9c4 415 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 416
2c48b9c4 417 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 418 &socket_file_ops);
b5ffe634 419 if (IS_ERR(file)) {
cc3808f8 420 /* drop dentry, keep inode */
c5ef6035 421 ihold(d_inode(path.dentry));
2c48b9c4 422 path_put(&path);
39b65252 423 return file;
cc3808f8
AV
424 }
425
426 sock->file = file;
77d27200 427 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 428 file->private_data = sock;
28407630 429 return file;
39d8c1b6 430}
56b31d1c 431EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 432
56b31d1c 433static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
434{
435 struct file *newfile;
28407630
AV
436 int fd = get_unused_fd_flags(flags);
437 if (unlikely(fd < 0))
438 return fd;
39d8c1b6 439
aab174f0 440 newfile = sock_alloc_file(sock, flags, NULL);
28407630 441 if (likely(!IS_ERR(newfile))) {
39d8c1b6 442 fd_install(fd, newfile);
28407630
AV
443 return fd;
444 }
7cbe66b6 445
28407630
AV
446 put_unused_fd(fd);
447 return PTR_ERR(newfile);
1da177e4
LT
448}
449
406a3c63 450struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 451{
6cb153ca
BL
452 if (file->f_op == &socket_file_ops)
453 return file->private_data; /* set in sock_map_fd */
454
23bb80d2
ED
455 *err = -ENOTSOCK;
456 return NULL;
6cb153ca 457}
406a3c63 458EXPORT_SYMBOL(sock_from_file);
6cb153ca 459
1da177e4 460/**
c6d409cf 461 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
462 * @fd: file handle
463 * @err: pointer to an error code return
464 *
465 * The file handle passed in is locked and the socket it is bound
466 * too is returned. If an error occurs the err pointer is overwritten
467 * with a negative errno code and NULL is returned. The function checks
468 * for both invalid handles and passing a handle which is not a socket.
469 *
470 * On a success the socket object pointer is returned.
471 */
472
473struct socket *sockfd_lookup(int fd, int *err)
474{
475 struct file *file;
1da177e4
LT
476 struct socket *sock;
477
89bddce5
SH
478 file = fget(fd);
479 if (!file) {
1da177e4
LT
480 *err = -EBADF;
481 return NULL;
482 }
89bddce5 483
6cb153ca
BL
484 sock = sock_from_file(file, err);
485 if (!sock)
1da177e4 486 fput(file);
6cb153ca
BL
487 return sock;
488}
c6d409cf 489EXPORT_SYMBOL(sockfd_lookup);
1da177e4 490
6cb153ca
BL
491static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
492{
00e188ef 493 struct fd f = fdget(fd);
6cb153ca
BL
494 struct socket *sock;
495
3672558c 496 *err = -EBADF;
00e188ef
AV
497 if (f.file) {
498 sock = sock_from_file(f.file, err);
499 if (likely(sock)) {
500 *fput_needed = f.flags;
6cb153ca 501 return sock;
00e188ef
AV
502 }
503 fdput(f);
1da177e4 504 }
6cb153ca 505 return NULL;
1da177e4
LT
506}
507
600e1779
MY
508static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
509 size_t size)
510{
511 ssize_t len;
512 ssize_t used = 0;
513
c5ef6035 514 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
515 if (len < 0)
516 return len;
517 used += len;
518 if (buffer) {
519 if (size < used)
520 return -ERANGE;
521 buffer += len;
522 }
523
524 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
525 used += len;
526 if (buffer) {
527 if (size < used)
528 return -ERANGE;
529 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
530 buffer += len;
531 }
532
533 return used;
534}
535
86741ec2
LC
536int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
537{
538 int err = simple_setattr(dentry, iattr);
539
540 if (!err) {
541 struct socket *sock = SOCKET_I(d_inode(dentry));
542
543 sock->sk->sk_uid = iattr->ia_uid;
544 }
545
546 return err;
547}
548
600e1779 549static const struct inode_operations sockfs_inode_ops = {
600e1779 550 .listxattr = sockfs_listxattr,
86741ec2 551 .setattr = sockfs_setattr,
600e1779
MY
552};
553
1da177e4
LT
554/**
555 * sock_alloc - allocate a socket
89bddce5 556 *
1da177e4
LT
557 * Allocate a new inode and socket object. The two are bound together
558 * and initialised. The socket is then returned. If we are out of inodes
559 * NULL is returned.
560 */
561
f4a00aac 562struct socket *sock_alloc(void)
1da177e4 563{
89bddce5
SH
564 struct inode *inode;
565 struct socket *sock;
1da177e4 566
a209dfc7 567 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
568 if (!inode)
569 return NULL;
570
571 sock = SOCKET_I(inode);
572
29a020d3 573 kmemcheck_annotate_bitfield(sock, type);
85fe4025 574 inode->i_ino = get_next_ino();
89bddce5 575 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
576 inode->i_uid = current_fsuid();
577 inode->i_gid = current_fsgid();
600e1779 578 inode->i_op = &sockfs_inode_ops;
1da177e4 579
19e8d69c 580 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
581 return sock;
582}
f4a00aac 583EXPORT_SYMBOL(sock_alloc);
1da177e4 584
1da177e4
LT
585/**
586 * sock_release - close a socket
587 * @sock: socket to close
588 *
589 * The socket is released from the protocol stack if it has a release
590 * callback, and the inode is then released if the socket is bound to
89bddce5 591 * an inode not a file.
1da177e4 592 */
89bddce5 593
1da177e4
LT
594void sock_release(struct socket *sock)
595{
596 if (sock->ops) {
597 struct module *owner = sock->ops->owner;
598
599 sock->ops->release(sock);
600 sock->ops = NULL;
601 module_put(owner);
602 }
603
eaefd110 604 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 605 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 606
19e8d69c 607 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
608 if (!sock->file) {
609 iput(SOCK_INODE(sock));
610 return;
611 }
89bddce5 612 sock->file = NULL;
1da177e4 613}
c6d409cf 614EXPORT_SYMBOL(sock_release);
1da177e4 615
c14ac945 616void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 617{
140c55d4
ED
618 u8 flags = *tx_flags;
619
c14ac945 620 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
621 flags |= SKBTX_HW_TSTAMP;
622
c14ac945 623 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
624 flags |= SKBTX_SW_TSTAMP;
625
c14ac945 626 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
627 flags |= SKBTX_SCHED_TSTAMP;
628
140c55d4 629 *tx_flags = flags;
20d49473 630}
67cc0d40 631EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 632
d8725c86 633static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 634{
01e97e65 635 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
636 BUG_ON(ret == -EIOCBQUEUED);
637 return ret;
1da177e4
LT
638}
639
d8725c86 640int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 641{
d8725c86 642 int err = security_socket_sendmsg(sock, msg,
01e97e65 643 msg_data_left(msg));
228e548e 644
d8725c86 645 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 646}
c6d409cf 647EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
648
649int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
650 struct kvec *vec, size_t num, size_t size)
651{
6aa24814 652 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 653 return sock_sendmsg(sock, msg);
1da177e4 654}
c6d409cf 655EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 656
92f37fd2
ED
657/*
658 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
659 */
660void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
661 struct sk_buff *skb)
662{
20d49473 663 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 664 struct scm_timestamping tss;
20d49473
PO
665 int empty = 1;
666 struct skb_shared_hwtstamps *shhwtstamps =
667 skb_hwtstamps(skb);
668
669 /* Race occurred between timestamp enabling and packet
670 receiving. Fill in the current time for now. */
671 if (need_software_tstamp && skb->tstamp.tv64 == 0)
672 __net_timestamp(skb);
673
674 if (need_software_tstamp) {
675 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
676 struct timeval tv;
677 skb_get_timestamp(skb, &tv);
678 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
679 sizeof(tv), &tv);
680 } else {
f24b9be5
WB
681 struct timespec ts;
682 skb_get_timestampns(skb, &ts);
20d49473 683 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 684 sizeof(ts), &ts);
20d49473
PO
685 }
686 }
687
f24b9be5 688 memset(&tss, 0, sizeof(tss));
c199105d 689 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 690 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 691 empty = 0;
4d276eb6 692 if (shhwtstamps &&
b9f40e21 693 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 694 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 695 empty = 0;
20d49473
PO
696 if (!empty)
697 put_cmsg(msg, SOL_SOCKET,
f24b9be5 698 SCM_TIMESTAMPING, sizeof(tss), &tss);
92f37fd2 699}
7c81fd8b
ACM
700EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
701
6e3e939f
JB
702void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
703 struct sk_buff *skb)
704{
705 int ack;
706
707 if (!sock_flag(sk, SOCK_WIFI_STATUS))
708 return;
709 if (!skb->wifi_acked_valid)
710 return;
711
712 ack = skb->wifi_acked;
713
714 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
715}
716EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
717
11165f14 718static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
719 struct sk_buff *skb)
3b885787 720{
744d5a3e 721 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 722 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 723 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
724}
725
767dd033 726void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
727 struct sk_buff *skb)
728{
729 sock_recv_timestamp(msg, sk, skb);
730 sock_recv_drops(msg, sk, skb);
731}
767dd033 732EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 733
1b784140 734static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 735 int flags)
1da177e4 736{
2da62906 737 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
738}
739
2da62906 740int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 741{
2da62906 742 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 743
2da62906 744 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 745}
c6d409cf 746EXPORT_SYMBOL(sock_recvmsg);
1da177e4 747
c1249c0a
ML
748/**
749 * kernel_recvmsg - Receive a message from a socket (kernel space)
750 * @sock: The socket to receive the message from
751 * @msg: Received message
752 * @vec: Input s/g array for message data
753 * @num: Size of input s/g array
754 * @size: Number of bytes to read
755 * @flags: Message flags (MSG_DONTWAIT, etc...)
756 *
757 * On return the msg structure contains the scatter/gather array passed in the
758 * vec argument. The array is modified so that it consists of the unfilled
759 * portion of the original array.
760 *
761 * The returned value is the total number of bytes received, or an error.
762 */
89bddce5
SH
763int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
764 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
765{
766 mm_segment_t oldfs = get_fs();
767 int result;
768
6aa24814 769 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 770 set_fs(KERNEL_DS);
2da62906 771 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
772 set_fs(oldfs);
773 return result;
774}
c6d409cf 775EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 776
ce1d4d3e
CH
777static ssize_t sock_sendpage(struct file *file, struct page *page,
778 int offset, size_t size, loff_t *ppos, int more)
1da177e4 779{
1da177e4
LT
780 struct socket *sock;
781 int flags;
782
ce1d4d3e
CH
783 sock = file->private_data;
784
35f9c09f
ED
785 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
786 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
787 flags |= more;
ce1d4d3e 788
e6949583 789 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 790}
1da177e4 791
9c55e01c 792static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 793 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
794 unsigned int flags)
795{
796 struct socket *sock = file->private_data;
797
997b37da
RDC
798 if (unlikely(!sock->ops->splice_read))
799 return -EINVAL;
800
9c55e01c
JA
801 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
802}
803
8ae5e030 804static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 805{
6d652330
AV
806 struct file *file = iocb->ki_filp;
807 struct socket *sock = file->private_data;
0345f931 808 struct msghdr msg = {.msg_iter = *to,
809 .msg_iocb = iocb};
8ae5e030 810 ssize_t res;
ce1d4d3e 811
8ae5e030
AV
812 if (file->f_flags & O_NONBLOCK)
813 msg.msg_flags = MSG_DONTWAIT;
814
815 if (iocb->ki_pos != 0)
1da177e4 816 return -ESPIPE;
027445c3 817
66ee59af 818 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
819 return 0;
820
2da62906 821 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
822 *to = msg.msg_iter;
823 return res;
1da177e4
LT
824}
825
8ae5e030 826static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 827{
6d652330
AV
828 struct file *file = iocb->ki_filp;
829 struct socket *sock = file->private_data;
0345f931 830 struct msghdr msg = {.msg_iter = *from,
831 .msg_iocb = iocb};
8ae5e030 832 ssize_t res;
1da177e4 833
8ae5e030 834 if (iocb->ki_pos != 0)
ce1d4d3e 835 return -ESPIPE;
027445c3 836
8ae5e030
AV
837 if (file->f_flags & O_NONBLOCK)
838 msg.msg_flags = MSG_DONTWAIT;
839
6d652330
AV
840 if (sock->type == SOCK_SEQPACKET)
841 msg.msg_flags |= MSG_EOR;
842
d8725c86 843 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
844 *from = msg.msg_iter;
845 return res;
1da177e4
LT
846}
847
1da177e4
LT
848/*
849 * Atomic setting of ioctl hooks to avoid race
850 * with module unload.
851 */
852
4a3e2f71 853static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 854static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 855
881d966b 856void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 857{
4a3e2f71 858 mutex_lock(&br_ioctl_mutex);
1da177e4 859 br_ioctl_hook = hook;
4a3e2f71 860 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
861}
862EXPORT_SYMBOL(brioctl_set);
863
4a3e2f71 864static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 865static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 866
881d966b 867void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 868{
4a3e2f71 869 mutex_lock(&vlan_ioctl_mutex);
1da177e4 870 vlan_ioctl_hook = hook;
4a3e2f71 871 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
872}
873EXPORT_SYMBOL(vlan_ioctl_set);
874
4a3e2f71 875static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 876static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 877
89bddce5 878void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 879{
4a3e2f71 880 mutex_lock(&dlci_ioctl_mutex);
1da177e4 881 dlci_ioctl_hook = hook;
4a3e2f71 882 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
883}
884EXPORT_SYMBOL(dlci_ioctl_set);
885
6b96018b
AB
886static long sock_do_ioctl(struct net *net, struct socket *sock,
887 unsigned int cmd, unsigned long arg)
888{
889 int err;
890 void __user *argp = (void __user *)arg;
891
892 err = sock->ops->ioctl(sock, cmd, arg);
893
894 /*
895 * If this ioctl is unknown try to hand it down
896 * to the NIC driver.
897 */
898 if (err == -ENOIOCTLCMD)
899 err = dev_ioctl(net, cmd, argp);
900
901 return err;
902}
903
1da177e4
LT
904/*
905 * With an ioctl, arg may well be a user mode pointer, but we don't know
906 * what to do with it - that's up to the protocol still.
907 */
908
c62cce2c
AV
909static struct ns_common *get_net_ns(struct ns_common *ns)
910{
911 return &get_net(container_of(ns, struct net, ns))->ns;
912}
913
1da177e4
LT
914static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
915{
916 struct socket *sock;
881d966b 917 struct sock *sk;
1da177e4
LT
918 void __user *argp = (void __user *)arg;
919 int pid, err;
881d966b 920 struct net *net;
1da177e4 921
b69aee04 922 sock = file->private_data;
881d966b 923 sk = sock->sk;
3b1e0a65 924 net = sock_net(sk);
1da177e4 925 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 926 err = dev_ioctl(net, cmd, argp);
1da177e4 927 } else
3d23e349 928#ifdef CONFIG_WEXT_CORE
1da177e4 929 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 930 err = dev_ioctl(net, cmd, argp);
1da177e4 931 } else
3d23e349 932#endif
89bddce5 933 switch (cmd) {
1da177e4
LT
934 case FIOSETOWN:
935 case SIOCSPGRP:
936 err = -EFAULT;
937 if (get_user(pid, (int __user *)argp))
938 break;
e0b93edd
JL
939 f_setown(sock->file, pid, 1);
940 err = 0;
1da177e4
LT
941 break;
942 case FIOGETOWN:
943 case SIOCGPGRP:
609d7fa9 944 err = put_user(f_getown(sock->file),
89bddce5 945 (int __user *)argp);
1da177e4
LT
946 break;
947 case SIOCGIFBR:
948 case SIOCSIFBR:
949 case SIOCBRADDBR:
950 case SIOCBRDELBR:
951 err = -ENOPKG;
952 if (!br_ioctl_hook)
953 request_module("bridge");
954
4a3e2f71 955 mutex_lock(&br_ioctl_mutex);
89bddce5 956 if (br_ioctl_hook)
881d966b 957 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 958 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
959 break;
960 case SIOCGIFVLAN:
961 case SIOCSIFVLAN:
962 err = -ENOPKG;
963 if (!vlan_ioctl_hook)
964 request_module("8021q");
965
4a3e2f71 966 mutex_lock(&vlan_ioctl_mutex);
1da177e4 967 if (vlan_ioctl_hook)
881d966b 968 err = vlan_ioctl_hook(net, argp);
4a3e2f71 969 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 970 break;
1da177e4
LT
971 case SIOCADDDLCI:
972 case SIOCDELDLCI:
973 err = -ENOPKG;
974 if (!dlci_ioctl_hook)
975 request_module("dlci");
976
7512cbf6
PE
977 mutex_lock(&dlci_ioctl_mutex);
978 if (dlci_ioctl_hook)
1da177e4 979 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 980 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 981 break;
c62cce2c
AV
982 case SIOCGSKNS:
983 err = -EPERM;
984 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
985 break;
986
987 err = open_related_ns(&net->ns, get_net_ns);
988 break;
1da177e4 989 default:
6b96018b 990 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 991 break;
89bddce5 992 }
1da177e4
LT
993 return err;
994}
995
996int sock_create_lite(int family, int type, int protocol, struct socket **res)
997{
998 int err;
999 struct socket *sock = NULL;
89bddce5 1000
1da177e4
LT
1001 err = security_socket_create(family, type, protocol, 1);
1002 if (err)
1003 goto out;
1004
1005 sock = sock_alloc();
1006 if (!sock) {
1007 err = -ENOMEM;
1008 goto out;
1009 }
1010
1da177e4 1011 sock->type = type;
7420ed23
VY
1012 err = security_socket_post_create(sock, family, type, protocol, 1);
1013 if (err)
1014 goto out_release;
1015
1da177e4
LT
1016out:
1017 *res = sock;
1018 return err;
7420ed23
VY
1019out_release:
1020 sock_release(sock);
1021 sock = NULL;
1022 goto out;
1da177e4 1023}
c6d409cf 1024EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1025
1026/* No kernel lock held - perfect */
89bddce5 1027static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1028{
cbf55001 1029 unsigned int busy_flag = 0;
1da177e4
LT
1030 struct socket *sock;
1031
1032 /*
89bddce5 1033 * We can't return errors to poll, so it's either yes or no.
1da177e4 1034 */
b69aee04 1035 sock = file->private_data;
2d48d67f 1036
cbf55001 1037 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1038 /* this socket can poll_ll so tell the system call */
cbf55001 1039 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1040
1041 /* once, only if requested by syscall */
cbf55001
ET
1042 if (wait && (wait->_key & POLL_BUSY_LOOP))
1043 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1044 }
1045
cbf55001 1046 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1047}
1048
89bddce5 1049static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1050{
b69aee04 1051 struct socket *sock = file->private_data;
1da177e4
LT
1052
1053 return sock->ops->mmap(file, sock, vma);
1054}
1055
20380731 1056static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1057{
1da177e4
LT
1058 sock_release(SOCKET_I(inode));
1059 return 0;
1060}
1061
1062/*
1063 * Update the socket async list
1064 *
1065 * Fasync_list locking strategy.
1066 *
1067 * 1. fasync_list is modified only under process context socket lock
1068 * i.e. under semaphore.
1069 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1070 * or under socket lock
1da177e4
LT
1071 */
1072
1073static int sock_fasync(int fd, struct file *filp, int on)
1074{
989a2979
ED
1075 struct socket *sock = filp->private_data;
1076 struct sock *sk = sock->sk;
eaefd110 1077 struct socket_wq *wq;
1da177e4 1078
989a2979 1079 if (sk == NULL)
1da177e4 1080 return -EINVAL;
1da177e4
LT
1081
1082 lock_sock(sk);
1e1d04e6 1083 wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
eaefd110 1084 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1085
eaefd110 1086 if (!wq->fasync_list)
989a2979
ED
1087 sock_reset_flag(sk, SOCK_FASYNC);
1088 else
bcdce719 1089 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1090
989a2979 1091 release_sock(sk);
1da177e4
LT
1092 return 0;
1093}
1094
ceb5d58b 1095/* This function may be called only under rcu_lock */
1da177e4 1096
ceb5d58b 1097int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1098{
ceb5d58b 1099 if (!wq || !wq->fasync_list)
1da177e4 1100 return -1;
ceb5d58b 1101
89bddce5 1102 switch (how) {
8d8ad9d7 1103 case SOCK_WAKE_WAITD:
ceb5d58b 1104 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1105 break;
1106 goto call_kill;
8d8ad9d7 1107 case SOCK_WAKE_SPACE:
ceb5d58b 1108 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1109 break;
1110 /* fall through */
8d8ad9d7 1111 case SOCK_WAKE_IO:
89bddce5 1112call_kill:
43815482 1113 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1114 break;
8d8ad9d7 1115 case SOCK_WAKE_URG:
43815482 1116 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1117 }
ceb5d58b 1118
1da177e4
LT
1119 return 0;
1120}
c6d409cf 1121EXPORT_SYMBOL(sock_wake_async);
1da177e4 1122
721db93a 1123int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1124 struct socket **res, int kern)
1da177e4
LT
1125{
1126 int err;
1127 struct socket *sock;
55737fda 1128 const struct net_proto_family *pf;
1da177e4
LT
1129
1130 /*
89bddce5 1131 * Check protocol is in range
1da177e4
LT
1132 */
1133 if (family < 0 || family >= NPROTO)
1134 return -EAFNOSUPPORT;
1135 if (type < 0 || type >= SOCK_MAX)
1136 return -EINVAL;
1137
1138 /* Compatibility.
1139
1140 This uglymoron is moved from INET layer to here to avoid
1141 deadlock in module load.
1142 */
1143 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1144 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1145 current->comm);
1da177e4
LT
1146 family = PF_PACKET;
1147 }
1148
1149 err = security_socket_create(family, type, protocol, kern);
1150 if (err)
1151 return err;
89bddce5 1152
55737fda
SH
1153 /*
1154 * Allocate the socket and allow the family to set things up. if
1155 * the protocol is 0, the family is instructed to select an appropriate
1156 * default.
1157 */
1158 sock = sock_alloc();
1159 if (!sock) {
e87cc472 1160 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1161 return -ENFILE; /* Not exactly a match, but its the
1162 closest posix thing */
1163 }
1164
1165 sock->type = type;
1166
95a5afca 1167#ifdef CONFIG_MODULES
89bddce5
SH
1168 /* Attempt to load a protocol module if the find failed.
1169 *
1170 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1171 * requested real, full-featured networking support upon configuration.
1172 * Otherwise module support will break!
1173 */
190683a9 1174 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1175 request_module("net-pf-%d", family);
1da177e4
LT
1176#endif
1177
55737fda
SH
1178 rcu_read_lock();
1179 pf = rcu_dereference(net_families[family]);
1180 err = -EAFNOSUPPORT;
1181 if (!pf)
1182 goto out_release;
1da177e4
LT
1183
1184 /*
1185 * We will call the ->create function, that possibly is in a loadable
1186 * module, so we have to bump that loadable module refcnt first.
1187 */
55737fda 1188 if (!try_module_get(pf->owner))
1da177e4
LT
1189 goto out_release;
1190
55737fda
SH
1191 /* Now protected by module ref count */
1192 rcu_read_unlock();
1193
3f378b68 1194 err = pf->create(net, sock, protocol, kern);
55737fda 1195 if (err < 0)
1da177e4 1196 goto out_module_put;
a79af59e 1197
1da177e4
LT
1198 /*
1199 * Now to bump the refcnt of the [loadable] module that owns this
1200 * socket at sock_release time we decrement its refcnt.
1201 */
55737fda
SH
1202 if (!try_module_get(sock->ops->owner))
1203 goto out_module_busy;
1204
1da177e4
LT
1205 /*
1206 * Now that we're done with the ->create function, the [loadable]
1207 * module can have its refcnt decremented
1208 */
55737fda 1209 module_put(pf->owner);
7420ed23
VY
1210 err = security_socket_post_create(sock, family, type, protocol, kern);
1211 if (err)
3b185525 1212 goto out_sock_release;
55737fda 1213 *res = sock;
1da177e4 1214
55737fda
SH
1215 return 0;
1216
1217out_module_busy:
1218 err = -EAFNOSUPPORT;
1da177e4 1219out_module_put:
55737fda
SH
1220 sock->ops = NULL;
1221 module_put(pf->owner);
1222out_sock_release:
1da177e4 1223 sock_release(sock);
55737fda
SH
1224 return err;
1225
1226out_release:
1227 rcu_read_unlock();
1228 goto out_sock_release;
1da177e4 1229}
721db93a 1230EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1231
1232int sock_create(int family, int type, int protocol, struct socket **res)
1233{
1b8d7ae4 1234 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1235}
c6d409cf 1236EXPORT_SYMBOL(sock_create);
1da177e4 1237
eeb1bd5c 1238int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1239{
eeb1bd5c 1240 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1241}
c6d409cf 1242EXPORT_SYMBOL(sock_create_kern);
1da177e4 1243
3e0fa65f 1244SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1245{
1246 int retval;
1247 struct socket *sock;
a677a039
UD
1248 int flags;
1249
e38b36f3
UD
1250 /* Check the SOCK_* constants for consistency. */
1251 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1252 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1253 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1254 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1255
a677a039 1256 flags = type & ~SOCK_TYPE_MASK;
77d27200 1257 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1258 return -EINVAL;
1259 type &= SOCK_TYPE_MASK;
1da177e4 1260
aaca0bdc
UD
1261 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1262 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1263
1da177e4
LT
1264 retval = sock_create(family, type, protocol, &sock);
1265 if (retval < 0)
1266 goto out;
1267
77d27200 1268 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1269 if (retval < 0)
1270 goto out_release;
1271
1272out:
1273 /* It may be already another descriptor 8) Not kernel problem. */
1274 return retval;
1275
1276out_release:
1277 sock_release(sock);
1278 return retval;
1279}
1280
1281/*
1282 * Create a pair of connected sockets.
1283 */
1284
3e0fa65f
HC
1285SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1286 int __user *, usockvec)
1da177e4
LT
1287{
1288 struct socket *sock1, *sock2;
1289 int fd1, fd2, err;
db349509 1290 struct file *newfile1, *newfile2;
a677a039
UD
1291 int flags;
1292
1293 flags = type & ~SOCK_TYPE_MASK;
77d27200 1294 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1295 return -EINVAL;
1296 type &= SOCK_TYPE_MASK;
1da177e4 1297
aaca0bdc
UD
1298 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1299 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1300
1da177e4
LT
1301 /*
1302 * Obtain the first socket and check if the underlying protocol
1303 * supports the socketpair call.
1304 */
1305
1306 err = sock_create(family, type, protocol, &sock1);
1307 if (err < 0)
1308 goto out;
1309
1310 err = sock_create(family, type, protocol, &sock2);
1311 if (err < 0)
1312 goto out_release_1;
1313
1314 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1315 if (err < 0)
1da177e4
LT
1316 goto out_release_both;
1317
28407630 1318 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1319 if (unlikely(fd1 < 0)) {
1320 err = fd1;
db349509 1321 goto out_release_both;
bf3c23d1 1322 }
d73aa286 1323
28407630 1324 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1325 if (unlikely(fd2 < 0)) {
1326 err = fd2;
d73aa286 1327 goto out_put_unused_1;
28407630
AV
1328 }
1329
aab174f0 1330 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1331 if (IS_ERR(newfile1)) {
28407630 1332 err = PTR_ERR(newfile1);
d73aa286 1333 goto out_put_unused_both;
28407630
AV
1334 }
1335
aab174f0 1336 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1337 if (IS_ERR(newfile2)) {
1338 err = PTR_ERR(newfile2);
d73aa286 1339 goto out_fput_1;
db349509
AV
1340 }
1341
d73aa286
YD
1342 err = put_user(fd1, &usockvec[0]);
1343 if (err)
1344 goto out_fput_both;
1345
1346 err = put_user(fd2, &usockvec[1]);
1347 if (err)
1348 goto out_fput_both;
1349
157cf649 1350 audit_fd_pair(fd1, fd2);
d73aa286 1351
db349509
AV
1352 fd_install(fd1, newfile1);
1353 fd_install(fd2, newfile2);
1da177e4
LT
1354 /* fd1 and fd2 may be already another descriptors.
1355 * Not kernel problem.
1356 */
1357
d73aa286 1358 return 0;
1da177e4 1359
d73aa286
YD
1360out_fput_both:
1361 fput(newfile2);
1362 fput(newfile1);
1363 put_unused_fd(fd2);
1364 put_unused_fd(fd1);
1365 goto out;
1366
1367out_fput_1:
1368 fput(newfile1);
1369 put_unused_fd(fd2);
1370 put_unused_fd(fd1);
1371 sock_release(sock2);
1372 goto out;
1da177e4 1373
d73aa286
YD
1374out_put_unused_both:
1375 put_unused_fd(fd2);
1376out_put_unused_1:
1377 put_unused_fd(fd1);
1da177e4 1378out_release_both:
89bddce5 1379 sock_release(sock2);
1da177e4 1380out_release_1:
89bddce5 1381 sock_release(sock1);
1da177e4
LT
1382out:
1383 return err;
1384}
1385
1da177e4
LT
1386/*
1387 * Bind a name to a socket. Nothing much to do here since it's
1388 * the protocol's responsibility to handle the local address.
1389 *
1390 * We move the socket address to kernel space before we call
1391 * the protocol layer (having also checked the address is ok).
1392 */
1393
20f37034 1394SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1395{
1396 struct socket *sock;
230b1839 1397 struct sockaddr_storage address;
6cb153ca 1398 int err, fput_needed;
1da177e4 1399
89bddce5 1400 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1401 if (sock) {
43db362d 1402 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1403 if (err >= 0) {
1404 err = security_socket_bind(sock,
230b1839 1405 (struct sockaddr *)&address,
89bddce5 1406 addrlen);
6cb153ca
BL
1407 if (!err)
1408 err = sock->ops->bind(sock,
89bddce5 1409 (struct sockaddr *)
230b1839 1410 &address, addrlen);
1da177e4 1411 }
6cb153ca 1412 fput_light(sock->file, fput_needed);
89bddce5 1413 }
1da177e4
LT
1414 return err;
1415}
1416
1da177e4
LT
1417/*
1418 * Perform a listen. Basically, we allow the protocol to do anything
1419 * necessary for a listen, and if that works, we mark the socket as
1420 * ready for listening.
1421 */
1422
3e0fa65f 1423SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1424{
1425 struct socket *sock;
6cb153ca 1426 int err, fput_needed;
b8e1f9b5 1427 int somaxconn;
89bddce5
SH
1428
1429 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1430 if (sock) {
8efa6e93 1431 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1432 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1433 backlog = somaxconn;
1da177e4
LT
1434
1435 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1436 if (!err)
1437 err = sock->ops->listen(sock, backlog);
1da177e4 1438
6cb153ca 1439 fput_light(sock->file, fput_needed);
1da177e4
LT
1440 }
1441 return err;
1442}
1443
1da177e4
LT
1444/*
1445 * For accept, we attempt to create a new socket, set up the link
1446 * with the client, wake up the client, then return the new
1447 * connected fd. We collect the address of the connector in kernel
1448 * space and move it to user at the very end. This is unclean because
1449 * we open the socket then return an error.
1450 *
1451 * 1003.1g adds the ability to recvmsg() to query connection pending
1452 * status to recvmsg. We need to add that support in a way thats
1453 * clean when we restucture accept also.
1454 */
1455
20f37034
HC
1456SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1457 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1458{
1459 struct socket *sock, *newsock;
39d8c1b6 1460 struct file *newfile;
6cb153ca 1461 int err, len, newfd, fput_needed;
230b1839 1462 struct sockaddr_storage address;
1da177e4 1463
77d27200 1464 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1465 return -EINVAL;
1466
1467 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1468 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1469
6cb153ca 1470 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1471 if (!sock)
1472 goto out;
1473
1474 err = -ENFILE;
c6d409cf
ED
1475 newsock = sock_alloc();
1476 if (!newsock)
1da177e4
LT
1477 goto out_put;
1478
1479 newsock->type = sock->type;
1480 newsock->ops = sock->ops;
1481
1da177e4
LT
1482 /*
1483 * We don't need try_module_get here, as the listening socket (sock)
1484 * has the protocol module (sock->ops->owner) held.
1485 */
1486 __module_get(newsock->ops->owner);
1487
28407630 1488 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1489 if (unlikely(newfd < 0)) {
1490 err = newfd;
9a1875e6
DM
1491 sock_release(newsock);
1492 goto out_put;
39d8c1b6 1493 }
aab174f0 1494 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1495 if (IS_ERR(newfile)) {
28407630
AV
1496 err = PTR_ERR(newfile);
1497 put_unused_fd(newfd);
1498 sock_release(newsock);
1499 goto out_put;
1500 }
39d8c1b6 1501
a79af59e
FF
1502 err = security_socket_accept(sock, newsock);
1503 if (err)
39d8c1b6 1504 goto out_fd;
a79af59e 1505
1da177e4
LT
1506 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1507 if (err < 0)
39d8c1b6 1508 goto out_fd;
1da177e4
LT
1509
1510 if (upeer_sockaddr) {
230b1839 1511 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1512 &len, 2) < 0) {
1da177e4 1513 err = -ECONNABORTED;
39d8c1b6 1514 goto out_fd;
1da177e4 1515 }
43db362d 1516 err = move_addr_to_user(&address,
230b1839 1517 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1518 if (err < 0)
39d8c1b6 1519 goto out_fd;
1da177e4
LT
1520 }
1521
1522 /* File flags are not inherited via accept() unlike another OSes. */
1523
39d8c1b6
DM
1524 fd_install(newfd, newfile);
1525 err = newfd;
1da177e4 1526
1da177e4 1527out_put:
6cb153ca 1528 fput_light(sock->file, fput_needed);
1da177e4
LT
1529out:
1530 return err;
39d8c1b6 1531out_fd:
9606a216 1532 fput(newfile);
39d8c1b6 1533 put_unused_fd(newfd);
1da177e4
LT
1534 goto out_put;
1535}
1536
20f37034
HC
1537SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1538 int __user *, upeer_addrlen)
aaca0bdc 1539{
de11defe 1540 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1541}
1542
1da177e4
LT
1543/*
1544 * Attempt to connect to a socket with the server address. The address
1545 * is in user space so we verify it is OK and move it to kernel space.
1546 *
1547 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1548 * break bindings
1549 *
1550 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1551 * other SEQPACKET protocols that take time to connect() as it doesn't
1552 * include the -EINPROGRESS status for such sockets.
1553 */
1554
20f37034
HC
1555SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1556 int, addrlen)
1da177e4
LT
1557{
1558 struct socket *sock;
230b1839 1559 struct sockaddr_storage address;
6cb153ca 1560 int err, fput_needed;
1da177e4 1561
6cb153ca 1562 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1563 if (!sock)
1564 goto out;
43db362d 1565 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1566 if (err < 0)
1567 goto out_put;
1568
89bddce5 1569 err =
230b1839 1570 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1571 if (err)
1572 goto out_put;
1573
230b1839 1574 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1575 sock->file->f_flags);
1576out_put:
6cb153ca 1577 fput_light(sock->file, fput_needed);
1da177e4
LT
1578out:
1579 return err;
1580}
1581
1582/*
1583 * Get the local address ('name') of a socket object. Move the obtained
1584 * name to user space.
1585 */
1586
20f37034
HC
1587SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1588 int __user *, usockaddr_len)
1da177e4
LT
1589{
1590 struct socket *sock;
230b1839 1591 struct sockaddr_storage address;
6cb153ca 1592 int len, err, fput_needed;
89bddce5 1593
6cb153ca 1594 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1595 if (!sock)
1596 goto out;
1597
1598 err = security_socket_getsockname(sock);
1599 if (err)
1600 goto out_put;
1601
230b1839 1602 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1603 if (err)
1604 goto out_put;
43db362d 1605 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1606
1607out_put:
6cb153ca 1608 fput_light(sock->file, fput_needed);
1da177e4
LT
1609out:
1610 return err;
1611}
1612
1613/*
1614 * Get the remote address ('name') of a socket object. Move the obtained
1615 * name to user space.
1616 */
1617
20f37034
HC
1618SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1619 int __user *, usockaddr_len)
1da177e4
LT
1620{
1621 struct socket *sock;
230b1839 1622 struct sockaddr_storage address;
6cb153ca 1623 int len, err, fput_needed;
1da177e4 1624
89bddce5
SH
1625 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1626 if (sock != NULL) {
1da177e4
LT
1627 err = security_socket_getpeername(sock);
1628 if (err) {
6cb153ca 1629 fput_light(sock->file, fput_needed);
1da177e4
LT
1630 return err;
1631 }
1632
89bddce5 1633 err =
230b1839 1634 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1635 1);
1da177e4 1636 if (!err)
43db362d 1637 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1638 usockaddr_len);
6cb153ca 1639 fput_light(sock->file, fput_needed);
1da177e4
LT
1640 }
1641 return err;
1642}
1643
1644/*
1645 * Send a datagram to a given address. We move the address into kernel
1646 * space and check the user space data area is readable before invoking
1647 * the protocol.
1648 */
1649
3e0fa65f 1650SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1651 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1652 int, addr_len)
1da177e4
LT
1653{
1654 struct socket *sock;
230b1839 1655 struct sockaddr_storage address;
1da177e4
LT
1656 int err;
1657 struct msghdr msg;
1658 struct iovec iov;
6cb153ca 1659 int fput_needed;
6cb153ca 1660
602bd0e9
AV
1661 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1662 if (unlikely(err))
1663 return err;
de0fa95c
PE
1664 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1665 if (!sock)
4387ff75 1666 goto out;
6cb153ca 1667
89bddce5 1668 msg.msg_name = NULL;
89bddce5
SH
1669 msg.msg_control = NULL;
1670 msg.msg_controllen = 0;
1671 msg.msg_namelen = 0;
6cb153ca 1672 if (addr) {
43db362d 1673 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1674 if (err < 0)
1675 goto out_put;
230b1839 1676 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1677 msg.msg_namelen = addr_len;
1da177e4
LT
1678 }
1679 if (sock->file->f_flags & O_NONBLOCK)
1680 flags |= MSG_DONTWAIT;
1681 msg.msg_flags = flags;
d8725c86 1682 err = sock_sendmsg(sock, &msg);
1da177e4 1683
89bddce5 1684out_put:
de0fa95c 1685 fput_light(sock->file, fput_needed);
4387ff75 1686out:
1da177e4
LT
1687 return err;
1688}
1689
1690/*
89bddce5 1691 * Send a datagram down a socket.
1da177e4
LT
1692 */
1693
3e0fa65f 1694SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1695 unsigned int, flags)
1da177e4
LT
1696{
1697 return sys_sendto(fd, buff, len, flags, NULL, 0);
1698}
1699
1700/*
89bddce5 1701 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1702 * sender. We verify the buffers are writable and if needed move the
1703 * sender address from kernel to user space.
1704 */
1705
3e0fa65f 1706SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1707 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1708 int __user *, addr_len)
1da177e4
LT
1709{
1710 struct socket *sock;
1711 struct iovec iov;
1712 struct msghdr msg;
230b1839 1713 struct sockaddr_storage address;
89bddce5 1714 int err, err2;
6cb153ca
BL
1715 int fput_needed;
1716
602bd0e9
AV
1717 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1718 if (unlikely(err))
1719 return err;
de0fa95c 1720 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1721 if (!sock)
de0fa95c 1722 goto out;
1da177e4 1723
89bddce5
SH
1724 msg.msg_control = NULL;
1725 msg.msg_controllen = 0;
f3d33426
HFS
1726 /* Save some cycles and don't copy the address if not needed */
1727 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1728 /* We assume all kernel code knows the size of sockaddr_storage */
1729 msg.msg_namelen = 0;
130ed5d1 1730 msg.msg_iocb = NULL;
1da177e4
LT
1731 if (sock->file->f_flags & O_NONBLOCK)
1732 flags |= MSG_DONTWAIT;
2da62906 1733 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1734
89bddce5 1735 if (err >= 0 && addr != NULL) {
43db362d 1736 err2 = move_addr_to_user(&address,
230b1839 1737 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1738 if (err2 < 0)
1739 err = err2;
1da177e4 1740 }
de0fa95c
PE
1741
1742 fput_light(sock->file, fput_needed);
4387ff75 1743out:
1da177e4
LT
1744 return err;
1745}
1746
1747/*
89bddce5 1748 * Receive a datagram from a socket.
1da177e4
LT
1749 */
1750
b7c0ddf5
JG
1751SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1752 unsigned int, flags)
1da177e4
LT
1753{
1754 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1755}
1756
1757/*
1758 * Set a socket option. Because we don't know the option lengths we have
1759 * to pass the user mode parameter for the protocols to sort out.
1760 */
1761
20f37034
HC
1762SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1763 char __user *, optval, int, optlen)
1da177e4 1764{
6cb153ca 1765 int err, fput_needed;
1da177e4
LT
1766 struct socket *sock;
1767
1768 if (optlen < 0)
1769 return -EINVAL;
89bddce5
SH
1770
1771 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1772 if (sock != NULL) {
1773 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1774 if (err)
1775 goto out_put;
1da177e4
LT
1776
1777 if (level == SOL_SOCKET)
89bddce5
SH
1778 err =
1779 sock_setsockopt(sock, level, optname, optval,
1780 optlen);
1da177e4 1781 else
89bddce5
SH
1782 err =
1783 sock->ops->setsockopt(sock, level, optname, optval,
1784 optlen);
6cb153ca
BL
1785out_put:
1786 fput_light(sock->file, fput_needed);
1da177e4
LT
1787 }
1788 return err;
1789}
1790
1791/*
1792 * Get a socket option. Because we don't know the option lengths we have
1793 * to pass a user mode parameter for the protocols to sort out.
1794 */
1795
20f37034
HC
1796SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1797 char __user *, optval, int __user *, optlen)
1da177e4 1798{
6cb153ca 1799 int err, fput_needed;
1da177e4
LT
1800 struct socket *sock;
1801
89bddce5
SH
1802 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1803 if (sock != NULL) {
6cb153ca
BL
1804 err = security_socket_getsockopt(sock, level, optname);
1805 if (err)
1806 goto out_put;
1da177e4
LT
1807
1808 if (level == SOL_SOCKET)
89bddce5
SH
1809 err =
1810 sock_getsockopt(sock, level, optname, optval,
1811 optlen);
1da177e4 1812 else
89bddce5
SH
1813 err =
1814 sock->ops->getsockopt(sock, level, optname, optval,
1815 optlen);
6cb153ca
BL
1816out_put:
1817 fput_light(sock->file, fput_needed);
1da177e4
LT
1818 }
1819 return err;
1820}
1821
1da177e4
LT
1822/*
1823 * Shutdown a socket.
1824 */
1825
754fe8d2 1826SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1827{
6cb153ca 1828 int err, fput_needed;
1da177e4
LT
1829 struct socket *sock;
1830
89bddce5
SH
1831 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1832 if (sock != NULL) {
1da177e4 1833 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1834 if (!err)
1835 err = sock->ops->shutdown(sock, how);
1836 fput_light(sock->file, fput_needed);
1da177e4
LT
1837 }
1838 return err;
1839}
1840
89bddce5 1841/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1842 * fields which are the same type (int / unsigned) on our platforms.
1843 */
1844#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1845#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1846#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1847
c71d8ebe
TH
1848struct used_address {
1849 struct sockaddr_storage name;
1850 unsigned int name_len;
1851};
1852
da184284
AV
1853static int copy_msghdr_from_user(struct msghdr *kmsg,
1854 struct user_msghdr __user *umsg,
1855 struct sockaddr __user **save_addr,
1856 struct iovec **iov)
1661bf36 1857{
08adb7da
AV
1858 struct sockaddr __user *uaddr;
1859 struct iovec __user *uiov;
c0371da6 1860 size_t nr_segs;
08adb7da
AV
1861 ssize_t err;
1862
1863 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1864 __get_user(uaddr, &umsg->msg_name) ||
1865 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1866 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1867 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1868 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1869 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1870 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1871 return -EFAULT;
dbb490b9 1872
08adb7da 1873 if (!uaddr)
6a2a2b3a
AS
1874 kmsg->msg_namelen = 0;
1875
dbb490b9
ML
1876 if (kmsg->msg_namelen < 0)
1877 return -EINVAL;
1878
1661bf36 1879 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1880 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1881
1882 if (save_addr)
1883 *save_addr = uaddr;
1884
1885 if (uaddr && kmsg->msg_namelen) {
1886 if (!save_addr) {
1887 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1888 kmsg->msg_name);
1889 if (err < 0)
1890 return err;
1891 }
1892 } else {
1893 kmsg->msg_name = NULL;
1894 kmsg->msg_namelen = 0;
1895 }
1896
c0371da6 1897 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1898 return -EMSGSIZE;
1899
0345f931 1900 kmsg->msg_iocb = NULL;
1901
da184284
AV
1902 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1903 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1904}
1905
666547ff 1906static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1907 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1908 struct used_address *used_address,
1909 unsigned int allowed_msghdr_flags)
1da177e4 1910{
89bddce5
SH
1911 struct compat_msghdr __user *msg_compat =
1912 (struct compat_msghdr __user *)msg;
230b1839 1913 struct sockaddr_storage address;
1da177e4 1914 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1915 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1916 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1917 /* 20 is size of ipv6_pktinfo */
1da177e4 1918 unsigned char *ctl_buf = ctl;
d8725c86 1919 int ctl_len;
08adb7da 1920 ssize_t err;
89bddce5 1921
08adb7da 1922 msg_sys->msg_name = &address;
1da177e4 1923
08449320 1924 if (MSG_CMSG_COMPAT & flags)
08adb7da 1925 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1926 else
08adb7da 1927 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1928 if (err < 0)
da184284 1929 return err;
1da177e4
LT
1930
1931 err = -ENOBUFS;
1932
228e548e 1933 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1934 goto out_freeiov;
28a94d8f 1935 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 1936 ctl_len = msg_sys->msg_controllen;
1da177e4 1937 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1938 err =
228e548e 1939 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1940 sizeof(ctl));
1da177e4
LT
1941 if (err)
1942 goto out_freeiov;
228e548e
AB
1943 ctl_buf = msg_sys->msg_control;
1944 ctl_len = msg_sys->msg_controllen;
1da177e4 1945 } else if (ctl_len) {
89bddce5 1946 if (ctl_len > sizeof(ctl)) {
1da177e4 1947 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1948 if (ctl_buf == NULL)
1da177e4
LT
1949 goto out_freeiov;
1950 }
1951 err = -EFAULT;
1952 /*
228e548e 1953 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1954 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1955 * checking falls down on this.
1956 */
fb8621bb 1957 if (copy_from_user(ctl_buf,
228e548e 1958 (void __user __force *)msg_sys->msg_control,
89bddce5 1959 ctl_len))
1da177e4 1960 goto out_freectl;
228e548e 1961 msg_sys->msg_control = ctl_buf;
1da177e4 1962 }
228e548e 1963 msg_sys->msg_flags = flags;
1da177e4
LT
1964
1965 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1966 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1967 /*
1968 * If this is sendmmsg() and current destination address is same as
1969 * previously succeeded address, omit asking LSM's decision.
1970 * used_address->name_len is initialized to UINT_MAX so that the first
1971 * destination address never matches.
1972 */
bc909d9d
MD
1973 if (used_address && msg_sys->msg_name &&
1974 used_address->name_len == msg_sys->msg_namelen &&
1975 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1976 used_address->name_len)) {
d8725c86 1977 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1978 goto out_freectl;
1979 }
d8725c86 1980 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1981 /*
1982 * If this is sendmmsg() and sending to current destination address was
1983 * successful, remember it.
1984 */
1985 if (used_address && err >= 0) {
1986 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1987 if (msg_sys->msg_name)
1988 memcpy(&used_address->name, msg_sys->msg_name,
1989 used_address->name_len);
c71d8ebe 1990 }
1da177e4
LT
1991
1992out_freectl:
89bddce5 1993 if (ctl_buf != ctl)
1da177e4
LT
1994 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1995out_freeiov:
da184284 1996 kfree(iov);
228e548e
AB
1997 return err;
1998}
1999
2000/*
2001 * BSD sendmsg interface
2002 */
2003
666547ff 2004long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
2005{
2006 int fput_needed, err;
2007 struct msghdr msg_sys;
1be374a0
AL
2008 struct socket *sock;
2009
1be374a0 2010 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2011 if (!sock)
2012 goto out;
2013
28a94d8f 2014 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2015
6cb153ca 2016 fput_light(sock->file, fput_needed);
89bddce5 2017out:
1da177e4
LT
2018 return err;
2019}
2020
666547ff 2021SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
2022{
2023 if (flags & MSG_CMSG_COMPAT)
2024 return -EINVAL;
2025 return __sys_sendmsg(fd, msg, flags);
2026}
2027
228e548e
AB
2028/*
2029 * Linux sendmmsg interface
2030 */
2031
2032int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2033 unsigned int flags)
2034{
2035 int fput_needed, err, datagrams;
2036 struct socket *sock;
2037 struct mmsghdr __user *entry;
2038 struct compat_mmsghdr __user *compat_entry;
2039 struct msghdr msg_sys;
c71d8ebe 2040 struct used_address used_address;
f092276d 2041 unsigned int oflags = flags;
228e548e 2042
98382f41
AB
2043 if (vlen > UIO_MAXIOV)
2044 vlen = UIO_MAXIOV;
228e548e
AB
2045
2046 datagrams = 0;
2047
2048 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2049 if (!sock)
2050 return err;
2051
c71d8ebe 2052 used_address.name_len = UINT_MAX;
228e548e
AB
2053 entry = mmsg;
2054 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2055 err = 0;
f092276d 2056 flags |= MSG_BATCH;
228e548e
AB
2057
2058 while (datagrams < vlen) {
f092276d
TH
2059 if (datagrams == vlen - 1)
2060 flags = oflags;
2061
228e548e 2062 if (MSG_CMSG_COMPAT & flags) {
666547ff 2063 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2064 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2065 if (err < 0)
2066 break;
2067 err = __put_user(err, &compat_entry->msg_len);
2068 ++compat_entry;
2069 } else {
a7526eb5 2070 err = ___sys_sendmsg(sock,
666547ff 2071 (struct user_msghdr __user *)entry,
28a94d8f 2072 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2073 if (err < 0)
2074 break;
2075 err = put_user(err, &entry->msg_len);
2076 ++entry;
2077 }
2078
2079 if (err)
2080 break;
2081 ++datagrams;
3023898b
SHY
2082 if (msg_data_left(&msg_sys))
2083 break;
a78cb84c 2084 cond_resched();
228e548e
AB
2085 }
2086
228e548e
AB
2087 fput_light(sock->file, fput_needed);
2088
728ffb86
AB
2089 /* We only return an error if no datagrams were able to be sent */
2090 if (datagrams != 0)
228e548e
AB
2091 return datagrams;
2092
228e548e
AB
2093 return err;
2094}
2095
2096SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2097 unsigned int, vlen, unsigned int, flags)
2098{
1be374a0
AL
2099 if (flags & MSG_CMSG_COMPAT)
2100 return -EINVAL;
228e548e
AB
2101 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2102}
2103
666547ff 2104static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2105 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2106{
89bddce5
SH
2107 struct compat_msghdr __user *msg_compat =
2108 (struct compat_msghdr __user *)msg;
1da177e4 2109 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2110 struct iovec *iov = iovstack;
1da177e4 2111 unsigned long cmsg_ptr;
2da62906 2112 int len;
08adb7da 2113 ssize_t err;
1da177e4
LT
2114
2115 /* kernel mode address */
230b1839 2116 struct sockaddr_storage addr;
1da177e4
LT
2117
2118 /* user mode address pointers */
2119 struct sockaddr __user *uaddr;
08adb7da 2120 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2121
08adb7da 2122 msg_sys->msg_name = &addr;
1da177e4 2123
f3d33426 2124 if (MSG_CMSG_COMPAT & flags)
08adb7da 2125 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2126 else
08adb7da 2127 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2128 if (err < 0)
da184284 2129 return err;
1da177e4 2130
a2e27255
ACM
2131 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2132 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2133
f3d33426
HFS
2134 /* We assume all kernel code knows the size of sockaddr_storage */
2135 msg_sys->msg_namelen = 0;
2136
1da177e4
LT
2137 if (sock->file->f_flags & O_NONBLOCK)
2138 flags |= MSG_DONTWAIT;
2da62906 2139 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2140 if (err < 0)
2141 goto out_freeiov;
2142 len = err;
2143
2144 if (uaddr != NULL) {
43db362d 2145 err = move_addr_to_user(&addr,
a2e27255 2146 msg_sys->msg_namelen, uaddr,
89bddce5 2147 uaddr_len);
1da177e4
LT
2148 if (err < 0)
2149 goto out_freeiov;
2150 }
a2e27255 2151 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2152 COMPAT_FLAGS(msg));
1da177e4
LT
2153 if (err)
2154 goto out_freeiov;
2155 if (MSG_CMSG_COMPAT & flags)
a2e27255 2156 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2157 &msg_compat->msg_controllen);
2158 else
a2e27255 2159 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2160 &msg->msg_controllen);
2161 if (err)
2162 goto out_freeiov;
2163 err = len;
2164
2165out_freeiov:
da184284 2166 kfree(iov);
a2e27255
ACM
2167 return err;
2168}
2169
2170/*
2171 * BSD recvmsg interface
2172 */
2173
666547ff 2174long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2175{
2176 int fput_needed, err;
2177 struct msghdr msg_sys;
1be374a0
AL
2178 struct socket *sock;
2179
1be374a0 2180 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2181 if (!sock)
2182 goto out;
2183
a7526eb5 2184 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2185
6cb153ca 2186 fput_light(sock->file, fput_needed);
1da177e4
LT
2187out:
2188 return err;
2189}
2190
666547ff 2191SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2192 unsigned int, flags)
2193{
2194 if (flags & MSG_CMSG_COMPAT)
2195 return -EINVAL;
2196 return __sys_recvmsg(fd, msg, flags);
2197}
2198
a2e27255
ACM
2199/*
2200 * Linux recvmmsg interface
2201 */
2202
2203int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2204 unsigned int flags, struct timespec *timeout)
2205{
2206 int fput_needed, err, datagrams;
2207 struct socket *sock;
2208 struct mmsghdr __user *entry;
d7256d0e 2209 struct compat_mmsghdr __user *compat_entry;
a2e27255 2210 struct msghdr msg_sys;
766b9f92
DD
2211 struct timespec64 end_time;
2212 struct timespec64 timeout64;
a2e27255
ACM
2213
2214 if (timeout &&
2215 poll_select_set_timeout(&end_time, timeout->tv_sec,
2216 timeout->tv_nsec))
2217 return -EINVAL;
2218
2219 datagrams = 0;
2220
2221 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2222 if (!sock)
2223 return err;
2224
2225 err = sock_error(sock->sk);
2226 if (err)
2227 goto out_put;
2228
2229 entry = mmsg;
d7256d0e 2230 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2231
2232 while (datagrams < vlen) {
2233 /*
2234 * No need to ask LSM for more than the first datagram.
2235 */
d7256d0e 2236 if (MSG_CMSG_COMPAT & flags) {
666547ff 2237 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2238 &msg_sys, flags & ~MSG_WAITFORONE,
2239 datagrams);
d7256d0e
JMG
2240 if (err < 0)
2241 break;
2242 err = __put_user(err, &compat_entry->msg_len);
2243 ++compat_entry;
2244 } else {
a7526eb5 2245 err = ___sys_recvmsg(sock,
666547ff 2246 (struct user_msghdr __user *)entry,
a7526eb5
AL
2247 &msg_sys, flags & ~MSG_WAITFORONE,
2248 datagrams);
d7256d0e
JMG
2249 if (err < 0)
2250 break;
2251 err = put_user(err, &entry->msg_len);
2252 ++entry;
2253 }
2254
a2e27255
ACM
2255 if (err)
2256 break;
a2e27255
ACM
2257 ++datagrams;
2258
71c5c159
BB
2259 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2260 if (flags & MSG_WAITFORONE)
2261 flags |= MSG_DONTWAIT;
2262
a2e27255 2263 if (timeout) {
766b9f92
DD
2264 ktime_get_ts64(&timeout64);
2265 *timeout = timespec64_to_timespec(
2266 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2267 if (timeout->tv_sec < 0) {
2268 timeout->tv_sec = timeout->tv_nsec = 0;
2269 break;
2270 }
2271
2272 /* Timeout, return less than vlen datagrams */
2273 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2274 break;
2275 }
2276
2277 /* Out of band data, return right away */
2278 if (msg_sys.msg_flags & MSG_OOB)
2279 break;
a78cb84c 2280 cond_resched();
a2e27255
ACM
2281 }
2282
a2e27255 2283 if (err == 0)
34b88a68
ACM
2284 goto out_put;
2285
2286 if (datagrams == 0) {
2287 datagrams = err;
2288 goto out_put;
2289 }
a2e27255 2290
34b88a68
ACM
2291 /*
2292 * We may return less entries than requested (vlen) if the
2293 * sock is non block and there aren't enough datagrams...
2294 */
2295 if (err != -EAGAIN) {
a2e27255 2296 /*
34b88a68
ACM
2297 * ... or if recvmsg returns an error after we
2298 * received some datagrams, where we record the
2299 * error to return on the next call or if the
2300 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2301 */
34b88a68 2302 sock->sk->sk_err = -err;
a2e27255 2303 }
34b88a68
ACM
2304out_put:
2305 fput_light(sock->file, fput_needed);
a2e27255 2306
34b88a68 2307 return datagrams;
a2e27255
ACM
2308}
2309
2310SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2311 unsigned int, vlen, unsigned int, flags,
2312 struct timespec __user *, timeout)
2313{
2314 int datagrams;
2315 struct timespec timeout_sys;
2316
1be374a0
AL
2317 if (flags & MSG_CMSG_COMPAT)
2318 return -EINVAL;
2319
a2e27255
ACM
2320 if (!timeout)
2321 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2322
2323 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2324 return -EFAULT;
2325
2326 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2327
2328 if (datagrams > 0 &&
2329 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2330 datagrams = -EFAULT;
2331
2332 return datagrams;
2333}
2334
2335#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2336/* Argument list sizes for sys_socketcall */
2337#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2338static const unsigned char nargs[21] = {
c6d409cf
ED
2339 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2340 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2341 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2342 AL(4), AL(5), AL(4)
89bddce5
SH
2343};
2344
1da177e4
LT
2345#undef AL
2346
2347/*
89bddce5 2348 * System call vectors.
1da177e4
LT
2349 *
2350 * Argument checking cleaned up. Saved 20% in size.
2351 * This function doesn't need to set the kernel lock because
89bddce5 2352 * it is set by the callees.
1da177e4
LT
2353 */
2354
3e0fa65f 2355SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2356{
2950fa9d 2357 unsigned long a[AUDITSC_ARGS];
89bddce5 2358 unsigned long a0, a1;
1da177e4 2359 int err;
47379052 2360 unsigned int len;
1da177e4 2361
228e548e 2362 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2363 return -EINVAL;
2364
47379052
AV
2365 len = nargs[call];
2366 if (len > sizeof(a))
2367 return -EINVAL;
2368
1da177e4 2369 /* copy_from_user should be SMP safe. */
47379052 2370 if (copy_from_user(a, args, len))
1da177e4 2371 return -EFAULT;
3ec3b2fb 2372
2950fa9d
CG
2373 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2374 if (err)
2375 return err;
3ec3b2fb 2376
89bddce5
SH
2377 a0 = a[0];
2378 a1 = a[1];
2379
2380 switch (call) {
2381 case SYS_SOCKET:
2382 err = sys_socket(a0, a1, a[2]);
2383 break;
2384 case SYS_BIND:
2385 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2386 break;
2387 case SYS_CONNECT:
2388 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2389 break;
2390 case SYS_LISTEN:
2391 err = sys_listen(a0, a1);
2392 break;
2393 case SYS_ACCEPT:
de11defe
UD
2394 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2395 (int __user *)a[2], 0);
89bddce5
SH
2396 break;
2397 case SYS_GETSOCKNAME:
2398 err =
2399 sys_getsockname(a0, (struct sockaddr __user *)a1,
2400 (int __user *)a[2]);
2401 break;
2402 case SYS_GETPEERNAME:
2403 err =
2404 sys_getpeername(a0, (struct sockaddr __user *)a1,
2405 (int __user *)a[2]);
2406 break;
2407 case SYS_SOCKETPAIR:
2408 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2409 break;
2410 case SYS_SEND:
2411 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2412 break;
2413 case SYS_SENDTO:
2414 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2415 (struct sockaddr __user *)a[4], a[5]);
2416 break;
2417 case SYS_RECV:
2418 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2419 break;
2420 case SYS_RECVFROM:
2421 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2422 (struct sockaddr __user *)a[4],
2423 (int __user *)a[5]);
2424 break;
2425 case SYS_SHUTDOWN:
2426 err = sys_shutdown(a0, a1);
2427 break;
2428 case SYS_SETSOCKOPT:
2429 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2430 break;
2431 case SYS_GETSOCKOPT:
2432 err =
2433 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2434 (int __user *)a[4]);
2435 break;
2436 case SYS_SENDMSG:
666547ff 2437 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2438 break;
228e548e
AB
2439 case SYS_SENDMMSG:
2440 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2441 break;
89bddce5 2442 case SYS_RECVMSG:
666547ff 2443 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2444 break;
a2e27255
ACM
2445 case SYS_RECVMMSG:
2446 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2447 (struct timespec __user *)a[4]);
2448 break;
de11defe
UD
2449 case SYS_ACCEPT4:
2450 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2451 (int __user *)a[2], a[3]);
aaca0bdc 2452 break;
89bddce5
SH
2453 default:
2454 err = -EINVAL;
2455 break;
1da177e4
LT
2456 }
2457 return err;
2458}
2459
89bddce5 2460#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2461
55737fda
SH
2462/**
2463 * sock_register - add a socket protocol handler
2464 * @ops: description of protocol
2465 *
1da177e4
LT
2466 * This function is called by a protocol handler that wants to
2467 * advertise its address family, and have it linked into the
e793c0f7 2468 * socket interface. The value ops->family corresponds to the
55737fda 2469 * socket system call protocol family.
1da177e4 2470 */
f0fd27d4 2471int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2472{
2473 int err;
2474
2475 if (ops->family >= NPROTO) {
3410f22e 2476 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2477 return -ENOBUFS;
2478 }
55737fda
SH
2479
2480 spin_lock(&net_family_lock);
190683a9
ED
2481 if (rcu_dereference_protected(net_families[ops->family],
2482 lockdep_is_held(&net_family_lock)))
55737fda
SH
2483 err = -EEXIST;
2484 else {
cf778b00 2485 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2486 err = 0;
2487 }
55737fda
SH
2488 spin_unlock(&net_family_lock);
2489
3410f22e 2490 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2491 return err;
2492}
c6d409cf 2493EXPORT_SYMBOL(sock_register);
1da177e4 2494
55737fda
SH
2495/**
2496 * sock_unregister - remove a protocol handler
2497 * @family: protocol family to remove
2498 *
1da177e4
LT
2499 * This function is called by a protocol handler that wants to
2500 * remove its address family, and have it unlinked from the
55737fda
SH
2501 * new socket creation.
2502 *
2503 * If protocol handler is a module, then it can use module reference
2504 * counts to protect against new references. If protocol handler is not
2505 * a module then it needs to provide its own protection in
2506 * the ops->create routine.
1da177e4 2507 */
f0fd27d4 2508void sock_unregister(int family)
1da177e4 2509{
f0fd27d4 2510 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2511
55737fda 2512 spin_lock(&net_family_lock);
a9b3cd7f 2513 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2514 spin_unlock(&net_family_lock);
2515
2516 synchronize_rcu();
2517
3410f22e 2518 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2519}
c6d409cf 2520EXPORT_SYMBOL(sock_unregister);
1da177e4 2521
77d76ea3 2522static int __init sock_init(void)
1da177e4 2523{
b3e19d92 2524 int err;
2ca794e5
EB
2525 /*
2526 * Initialize the network sysctl infrastructure.
2527 */
2528 err = net_sysctl_init();
2529 if (err)
2530 goto out;
b3e19d92 2531
1da177e4 2532 /*
89bddce5 2533 * Initialize skbuff SLAB cache
1da177e4
LT
2534 */
2535 skb_init();
1da177e4
LT
2536
2537 /*
89bddce5 2538 * Initialize the protocols module.
1da177e4
LT
2539 */
2540
2541 init_inodecache();
b3e19d92
NP
2542
2543 err = register_filesystem(&sock_fs_type);
2544 if (err)
2545 goto out_fs;
1da177e4 2546 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2547 if (IS_ERR(sock_mnt)) {
2548 err = PTR_ERR(sock_mnt);
2549 goto out_mount;
2550 }
77d76ea3
AK
2551
2552 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2553 */
2554
2555#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2556 err = netfilter_init();
2557 if (err)
2558 goto out;
1da177e4 2559#endif
cbeb321a 2560
408eccce 2561 ptp_classifier_init();
c1f19b51 2562
b3e19d92
NP
2563out:
2564 return err;
2565
2566out_mount:
2567 unregister_filesystem(&sock_fs_type);
2568out_fs:
2569 goto out;
1da177e4
LT
2570}
2571
77d76ea3
AK
2572core_initcall(sock_init); /* early initcall */
2573
1da177e4
LT
2574#ifdef CONFIG_PROC_FS
2575void socket_seq_show(struct seq_file *seq)
2576{
2577 int cpu;
2578 int counter = 0;
2579
6f912042 2580 for_each_possible_cpu(cpu)
89bddce5 2581 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2582
2583 /* It can be negative, by the way. 8) */
2584 if (counter < 0)
2585 counter = 0;
2586
2587 seq_printf(seq, "sockets: used %d\n", counter);
2588}
89bddce5 2589#endif /* CONFIG_PROC_FS */
1da177e4 2590
89bbfc95 2591#ifdef CONFIG_COMPAT
6b96018b 2592static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2593 unsigned int cmd, void __user *up)
7a229387 2594{
7a229387
AB
2595 mm_segment_t old_fs = get_fs();
2596 struct timeval ktv;
2597 int err;
2598
2599 set_fs(KERNEL_DS);
6b96018b 2600 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2601 set_fs(old_fs);
644595f8 2602 if (!err)
ed6fe9d6 2603 err = compat_put_timeval(&ktv, up);
644595f8 2604
7a229387
AB
2605 return err;
2606}
2607
6b96018b 2608static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2609 unsigned int cmd, void __user *up)
7a229387 2610{
7a229387
AB
2611 mm_segment_t old_fs = get_fs();
2612 struct timespec kts;
2613 int err;
2614
2615 set_fs(KERNEL_DS);
6b96018b 2616 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2617 set_fs(old_fs);
644595f8 2618 if (!err)
ed6fe9d6 2619 err = compat_put_timespec(&kts, up);
644595f8 2620
7a229387
AB
2621 return err;
2622}
2623
6b96018b 2624static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2625{
2626 struct ifreq __user *uifr;
2627 int err;
2628
2629 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2630 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2631 return -EFAULT;
2632
6b96018b 2633 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2634 if (err)
2635 return err;
2636
6b96018b 2637 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2638 return -EFAULT;
2639
2640 return 0;
2641}
2642
6b96018b 2643static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2644{
6b96018b 2645 struct compat_ifconf ifc32;
7a229387
AB
2646 struct ifconf ifc;
2647 struct ifconf __user *uifc;
6b96018b 2648 struct compat_ifreq __user *ifr32;
7a229387
AB
2649 struct ifreq __user *ifr;
2650 unsigned int i, j;
2651 int err;
2652
6b96018b 2653 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2654 return -EFAULT;
2655
43da5f2e 2656 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2657 if (ifc32.ifcbuf == 0) {
2658 ifc32.ifc_len = 0;
2659 ifc.ifc_len = 0;
2660 ifc.ifc_req = NULL;
2661 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2662 } else {
c6d409cf
ED
2663 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2664 sizeof(struct ifreq);
7a229387
AB
2665 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2666 ifc.ifc_len = len;
2667 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2668 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2669 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2670 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2671 return -EFAULT;
2672 ifr++;
2673 ifr32++;
2674 }
2675 }
2676 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2677 return -EFAULT;
2678
6b96018b 2679 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2680 if (err)
2681 return err;
2682
2683 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2684 return -EFAULT;
2685
2686 ifr = ifc.ifc_req;
2687 ifr32 = compat_ptr(ifc32.ifcbuf);
2688 for (i = 0, j = 0;
c6d409cf
ED
2689 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2690 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2691 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2692 return -EFAULT;
2693 ifr32++;
2694 ifr++;
2695 }
2696
2697 if (ifc32.ifcbuf == 0) {
2698 /* Translate from 64-bit structure multiple to
2699 * a 32-bit one.
2700 */
2701 i = ifc.ifc_len;
6b96018b 2702 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2703 ifc32.ifc_len = i;
2704 } else {
2705 ifc32.ifc_len = i;
2706 }
6b96018b 2707 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2708 return -EFAULT;
2709
2710 return 0;
2711}
2712
6b96018b 2713static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2714{
3a7da39d
BH
2715 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2716 bool convert_in = false, convert_out = false;
2717 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2718 struct ethtool_rxnfc __user *rxnfc;
7a229387 2719 struct ifreq __user *ifr;
3a7da39d
BH
2720 u32 rule_cnt = 0, actual_rule_cnt;
2721 u32 ethcmd;
7a229387 2722 u32 data;
3a7da39d 2723 int ret;
7a229387 2724
3a7da39d
BH
2725 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2726 return -EFAULT;
7a229387 2727
3a7da39d
BH
2728 compat_rxnfc = compat_ptr(data);
2729
2730 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2731 return -EFAULT;
2732
3a7da39d
BH
2733 /* Most ethtool structures are defined without padding.
2734 * Unfortunately struct ethtool_rxnfc is an exception.
2735 */
2736 switch (ethcmd) {
2737 default:
2738 break;
2739 case ETHTOOL_GRXCLSRLALL:
2740 /* Buffer size is variable */
2741 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2742 return -EFAULT;
2743 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2744 return -ENOMEM;
2745 buf_size += rule_cnt * sizeof(u32);
2746 /* fall through */
2747 case ETHTOOL_GRXRINGS:
2748 case ETHTOOL_GRXCLSRLCNT:
2749 case ETHTOOL_GRXCLSRULE:
55664f32 2750 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2751 convert_out = true;
2752 /* fall through */
2753 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2754 buf_size += sizeof(struct ethtool_rxnfc);
2755 convert_in = true;
2756 break;
2757 }
2758
2759 ifr = compat_alloc_user_space(buf_size);
954b1244 2760 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2761
2762 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2763 return -EFAULT;
2764
3a7da39d
BH
2765 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2766 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2767 return -EFAULT;
2768
3a7da39d 2769 if (convert_in) {
127fe533 2770 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2771 * fs.ring_cookie and at the end of fs, but nowhere else.
2772 */
127fe533
AD
2773 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2774 sizeof(compat_rxnfc->fs.m_ext) !=
2775 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2776 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2777 BUILD_BUG_ON(
2778 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2779 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2780 offsetof(struct ethtool_rxnfc, fs.location) -
2781 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2782
2783 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2784 (void __user *)(&rxnfc->fs.m_ext + 1) -
2785 (void __user *)rxnfc) ||
3a7da39d
BH
2786 copy_in_user(&rxnfc->fs.ring_cookie,
2787 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2788 (void __user *)(&rxnfc->fs.location + 1) -
2789 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2790 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2791 sizeof(rxnfc->rule_cnt)))
2792 return -EFAULT;
2793 }
2794
2795 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2796 if (ret)
2797 return ret;
2798
2799 if (convert_out) {
2800 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2801 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2802 (const void __user *)rxnfc) ||
3a7da39d
BH
2803 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2804 &rxnfc->fs.ring_cookie,
954b1244
SH
2805 (const void __user *)(&rxnfc->fs.location + 1) -
2806 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2807 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2808 sizeof(rxnfc->rule_cnt)))
2809 return -EFAULT;
2810
2811 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2812 /* As an optimisation, we only copy the actual
2813 * number of rules that the underlying
2814 * function returned. Since Mallory might
2815 * change the rule count in user memory, we
2816 * check that it is less than the rule count
2817 * originally given (as the user buffer size),
2818 * which has been range-checked.
2819 */
2820 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2821 return -EFAULT;
2822 if (actual_rule_cnt < rule_cnt)
2823 rule_cnt = actual_rule_cnt;
2824 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2825 &rxnfc->rule_locs[0],
2826 rule_cnt * sizeof(u32)))
2827 return -EFAULT;
2828 }
2829 }
2830
2831 return 0;
7a229387
AB
2832}
2833
7a50a240
AB
2834static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2835{
2836 void __user *uptr;
2837 compat_uptr_t uptr32;
2838 struct ifreq __user *uifr;
2839
c6d409cf 2840 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2841 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2842 return -EFAULT;
2843
2844 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2845 return -EFAULT;
2846
2847 uptr = compat_ptr(uptr32);
2848
2849 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2850 return -EFAULT;
2851
2852 return dev_ioctl(net, SIOCWANDEV, uifr);
2853}
2854
6b96018b
AB
2855static int bond_ioctl(struct net *net, unsigned int cmd,
2856 struct compat_ifreq __user *ifr32)
7a229387
AB
2857{
2858 struct ifreq kifr;
7a229387
AB
2859 mm_segment_t old_fs;
2860 int err;
7a229387
AB
2861
2862 switch (cmd) {
2863 case SIOCBONDENSLAVE:
2864 case SIOCBONDRELEASE:
2865 case SIOCBONDSETHWADDR:
2866 case SIOCBONDCHANGEACTIVE:
6b96018b 2867 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2868 return -EFAULT;
2869
2870 old_fs = get_fs();
c6d409cf 2871 set_fs(KERNEL_DS);
c3f52ae6 2872 err = dev_ioctl(net, cmd,
2873 (struct ifreq __user __force *) &kifr);
c6d409cf 2874 set_fs(old_fs);
7a229387
AB
2875
2876 return err;
7a229387 2877 default:
07d106d0 2878 return -ENOIOCTLCMD;
ccbd6a5a 2879 }
7a229387
AB
2880}
2881
590d4693
BH
2882/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2883static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2884 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2885{
2886 struct ifreq __user *u_ifreq64;
7a229387
AB
2887 char tmp_buf[IFNAMSIZ];
2888 void __user *data64;
2889 u32 data32;
2890
2891 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2892 IFNAMSIZ))
2893 return -EFAULT;
417c3522 2894 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2895 return -EFAULT;
2896 data64 = compat_ptr(data32);
2897
2898 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2899
7a229387
AB
2900 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2901 IFNAMSIZ))
2902 return -EFAULT;
417c3522 2903 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2904 return -EFAULT;
2905
6b96018b 2906 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2907}
2908
6b96018b
AB
2909static int dev_ifsioc(struct net *net, struct socket *sock,
2910 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2911{
a2116ed2 2912 struct ifreq __user *uifr;
7a229387
AB
2913 int err;
2914
a2116ed2
AB
2915 uifr = compat_alloc_user_space(sizeof(*uifr));
2916 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2917 return -EFAULT;
2918
2919 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2920
7a229387
AB
2921 if (!err) {
2922 switch (cmd) {
2923 case SIOCGIFFLAGS:
2924 case SIOCGIFMETRIC:
2925 case SIOCGIFMTU:
2926 case SIOCGIFMEM:
2927 case SIOCGIFHWADDR:
2928 case SIOCGIFINDEX:
2929 case SIOCGIFADDR:
2930 case SIOCGIFBRDADDR:
2931 case SIOCGIFDSTADDR:
2932 case SIOCGIFNETMASK:
fab2532b 2933 case SIOCGIFPFLAGS:
7a229387 2934 case SIOCGIFTXQLEN:
fab2532b
AB
2935 case SIOCGMIIPHY:
2936 case SIOCGMIIREG:
a2116ed2 2937 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2938 err = -EFAULT;
2939 break;
2940 }
2941 }
2942 return err;
2943}
2944
a2116ed2
AB
2945static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2946 struct compat_ifreq __user *uifr32)
2947{
2948 struct ifreq ifr;
2949 struct compat_ifmap __user *uifmap32;
2950 mm_segment_t old_fs;
2951 int err;
2952
2953 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2954 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2955 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2956 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2957 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2958 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2959 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2960 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2961 if (err)
2962 return -EFAULT;
2963
2964 old_fs = get_fs();
c6d409cf 2965 set_fs(KERNEL_DS);
c3f52ae6 2966 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2967 set_fs(old_fs);
a2116ed2
AB
2968
2969 if (cmd == SIOCGIFMAP && !err) {
2970 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2971 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2972 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2973 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2974 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2975 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2976 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2977 if (err)
2978 err = -EFAULT;
2979 }
2980 return err;
2981}
2982
7a229387 2983struct rtentry32 {
c6d409cf 2984 u32 rt_pad1;
7a229387
AB
2985 struct sockaddr rt_dst; /* target address */
2986 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2987 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2988 unsigned short rt_flags;
2989 short rt_pad2;
2990 u32 rt_pad3;
2991 unsigned char rt_tos;
2992 unsigned char rt_class;
2993 short rt_pad4;
2994 short rt_metric; /* +1 for binary compatibility! */
7a229387 2995 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
2996 u32 rt_mtu; /* per route MTU/Window */
2997 u32 rt_window; /* Window clamping */
7a229387
AB
2998 unsigned short rt_irtt; /* Initial RTT */
2999};
3000
3001struct in6_rtmsg32 {
3002 struct in6_addr rtmsg_dst;
3003 struct in6_addr rtmsg_src;
3004 struct in6_addr rtmsg_gateway;
3005 u32 rtmsg_type;
3006 u16 rtmsg_dst_len;
3007 u16 rtmsg_src_len;
3008 u32 rtmsg_metric;
3009 u32 rtmsg_info;
3010 u32 rtmsg_flags;
3011 s32 rtmsg_ifindex;
3012};
3013
6b96018b
AB
3014static int routing_ioctl(struct net *net, struct socket *sock,
3015 unsigned int cmd, void __user *argp)
7a229387
AB
3016{
3017 int ret;
3018 void *r = NULL;
3019 struct in6_rtmsg r6;
3020 struct rtentry r4;
3021 char devname[16];
3022 u32 rtdev;
3023 mm_segment_t old_fs = get_fs();
3024
6b96018b
AB
3025 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3026 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3027 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3028 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3029 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3030 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3031 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3032 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3033 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3034 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3035 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3036
3037 r = (void *) &r6;
3038 } else { /* ipv4 */
6b96018b 3039 struct rtentry32 __user *ur4 = argp;
c6d409cf 3040 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3041 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3042 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3043 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3044 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3045 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3046 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3047 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3048 if (rtdev) {
c6d409cf 3049 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3050 r4.rt_dev = (char __user __force *)devname;
3051 devname[15] = 0;
7a229387
AB
3052 } else
3053 r4.rt_dev = NULL;
3054
3055 r = (void *) &r4;
3056 }
3057
3058 if (ret) {
3059 ret = -EFAULT;
3060 goto out;
3061 }
3062
c6d409cf 3063 set_fs(KERNEL_DS);
6b96018b 3064 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3065 set_fs(old_fs);
7a229387
AB
3066
3067out:
7a229387
AB
3068 return ret;
3069}
3070
3071/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3072 * for some operations; this forces use of the newer bridge-utils that
25985edc 3073 * use compatible ioctls
7a229387 3074 */
6b96018b 3075static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3076{
6b96018b 3077 compat_ulong_t tmp;
7a229387 3078
6b96018b 3079 if (get_user(tmp, argp))
7a229387
AB
3080 return -EFAULT;
3081 if (tmp == BRCTL_GET_VERSION)
3082 return BRCTL_VERSION + 1;
3083 return -EINVAL;
3084}
3085
6b96018b
AB
3086static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3087 unsigned int cmd, unsigned long arg)
3088{
3089 void __user *argp = compat_ptr(arg);
3090 struct sock *sk = sock->sk;
3091 struct net *net = sock_net(sk);
7a229387 3092
6b96018b 3093 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3094 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3095
3096 switch (cmd) {
3097 case SIOCSIFBR:
3098 case SIOCGIFBR:
3099 return old_bridge_ioctl(argp);
3100 case SIOCGIFNAME:
3101 return dev_ifname32(net, argp);
3102 case SIOCGIFCONF:
3103 return dev_ifconf(net, argp);
3104 case SIOCETHTOOL:
3105 return ethtool_ioctl(net, argp);
7a50a240
AB
3106 case SIOCWANDEV:
3107 return compat_siocwandev(net, argp);
a2116ed2
AB
3108 case SIOCGIFMAP:
3109 case SIOCSIFMAP:
3110 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3111 case SIOCBONDENSLAVE:
3112 case SIOCBONDRELEASE:
3113 case SIOCBONDSETHWADDR:
6b96018b
AB
3114 case SIOCBONDCHANGEACTIVE:
3115 return bond_ioctl(net, cmd, argp);
3116 case SIOCADDRT:
3117 case SIOCDELRT:
3118 return routing_ioctl(net, sock, cmd, argp);
3119 case SIOCGSTAMP:
3120 return do_siocgstamp(net, sock, cmd, argp);
3121 case SIOCGSTAMPNS:
3122 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3123 case SIOCBONDSLAVEINFOQUERY:
3124 case SIOCBONDINFOQUERY:
a2116ed2 3125 case SIOCSHWTSTAMP:
fd468c74 3126 case SIOCGHWTSTAMP:
590d4693 3127 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3128
3129 case FIOSETOWN:
3130 case SIOCSPGRP:
3131 case FIOGETOWN:
3132 case SIOCGPGRP:
3133 case SIOCBRADDBR:
3134 case SIOCBRDELBR:
3135 case SIOCGIFVLAN:
3136 case SIOCSIFVLAN:
3137 case SIOCADDDLCI:
3138 case SIOCDELDLCI:
c62cce2c 3139 case SIOCGSKNS:
6b96018b
AB
3140 return sock_ioctl(file, cmd, arg);
3141
3142 case SIOCGIFFLAGS:
3143 case SIOCSIFFLAGS:
3144 case SIOCGIFMETRIC:
3145 case SIOCSIFMETRIC:
3146 case SIOCGIFMTU:
3147 case SIOCSIFMTU:
3148 case SIOCGIFMEM:
3149 case SIOCSIFMEM:
3150 case SIOCGIFHWADDR:
3151 case SIOCSIFHWADDR:
3152 case SIOCADDMULTI:
3153 case SIOCDELMULTI:
3154 case SIOCGIFINDEX:
6b96018b
AB
3155 case SIOCGIFADDR:
3156 case SIOCSIFADDR:
3157 case SIOCSIFHWBROADCAST:
6b96018b 3158 case SIOCDIFADDR:
6b96018b
AB
3159 case SIOCGIFBRDADDR:
3160 case SIOCSIFBRDADDR:
3161 case SIOCGIFDSTADDR:
3162 case SIOCSIFDSTADDR:
3163 case SIOCGIFNETMASK:
3164 case SIOCSIFNETMASK:
3165 case SIOCSIFPFLAGS:
3166 case SIOCGIFPFLAGS:
3167 case SIOCGIFTXQLEN:
3168 case SIOCSIFTXQLEN:
3169 case SIOCBRADDIF:
3170 case SIOCBRDELIF:
9177efd3
AB
3171 case SIOCSIFNAME:
3172 case SIOCGMIIPHY:
3173 case SIOCGMIIREG:
3174 case SIOCSMIIREG:
6b96018b 3175 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3176
6b96018b
AB
3177 case SIOCSARP:
3178 case SIOCGARP:
3179 case SIOCDARP:
6b96018b 3180 case SIOCATMARK:
9177efd3
AB
3181 return sock_do_ioctl(net, sock, cmd, arg);
3182 }
3183
6b96018b
AB
3184 return -ENOIOCTLCMD;
3185}
7a229387 3186
95c96174 3187static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3188 unsigned long arg)
89bbfc95
SP
3189{
3190 struct socket *sock = file->private_data;
3191 int ret = -ENOIOCTLCMD;
87de87d5
DM
3192 struct sock *sk;
3193 struct net *net;
3194
3195 sk = sock->sk;
3196 net = sock_net(sk);
89bbfc95
SP
3197
3198 if (sock->ops->compat_ioctl)
3199 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3200
87de87d5
DM
3201 if (ret == -ENOIOCTLCMD &&
3202 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3203 ret = compat_wext_handle_ioctl(net, cmd, arg);
3204
6b96018b
AB
3205 if (ret == -ENOIOCTLCMD)
3206 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3207
89bbfc95
SP
3208 return ret;
3209}
3210#endif
3211
ac5a488e
SS
3212int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3213{
3214 return sock->ops->bind(sock, addr, addrlen);
3215}
c6d409cf 3216EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3217
3218int kernel_listen(struct socket *sock, int backlog)
3219{
3220 return sock->ops->listen(sock, backlog);
3221}
c6d409cf 3222EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3223
3224int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3225{
3226 struct sock *sk = sock->sk;
3227 int err;
3228
3229 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3230 newsock);
3231 if (err < 0)
3232 goto done;
3233
3234 err = sock->ops->accept(sock, *newsock, flags);
3235 if (err < 0) {
3236 sock_release(*newsock);
fa8705b0 3237 *newsock = NULL;
ac5a488e
SS
3238 goto done;
3239 }
3240
3241 (*newsock)->ops = sock->ops;
1b08534e 3242 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3243
3244done:
3245 return err;
3246}
c6d409cf 3247EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3248
3249int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3250 int flags)
ac5a488e
SS
3251{
3252 return sock->ops->connect(sock, addr, addrlen, flags);
3253}
c6d409cf 3254EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3255
3256int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3257 int *addrlen)
3258{
3259 return sock->ops->getname(sock, addr, addrlen, 0);
3260}
c6d409cf 3261EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3262
3263int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3264 int *addrlen)
3265{
3266 return sock->ops->getname(sock, addr, addrlen, 1);
3267}
c6d409cf 3268EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3269
3270int kernel_getsockopt(struct socket *sock, int level, int optname,
3271 char *optval, int *optlen)
3272{
3273 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3274 char __user *uoptval;
3275 int __user *uoptlen;
ac5a488e
SS
3276 int err;
3277
fb8621bb
NK
3278 uoptval = (char __user __force *) optval;
3279 uoptlen = (int __user __force *) optlen;
3280
ac5a488e
SS
3281 set_fs(KERNEL_DS);
3282 if (level == SOL_SOCKET)
fb8621bb 3283 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3284 else
fb8621bb
NK
3285 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3286 uoptlen);
ac5a488e
SS
3287 set_fs(oldfs);
3288 return err;
3289}
c6d409cf 3290EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3291
3292int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3293 char *optval, unsigned int optlen)
ac5a488e
SS
3294{
3295 mm_segment_t oldfs = get_fs();
fb8621bb 3296 char __user *uoptval;
ac5a488e
SS
3297 int err;
3298
fb8621bb
NK
3299 uoptval = (char __user __force *) optval;
3300
ac5a488e
SS
3301 set_fs(KERNEL_DS);
3302 if (level == SOL_SOCKET)
fb8621bb 3303 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3304 else
fb8621bb 3305 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3306 optlen);
3307 set_fs(oldfs);
3308 return err;
3309}
c6d409cf 3310EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3311
3312int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3313 size_t size, int flags)
3314{
3315 if (sock->ops->sendpage)
3316 return sock->ops->sendpage(sock, page, offset, size, flags);
3317
3318 return sock_no_sendpage(sock, page, offset, size, flags);
3319}
c6d409cf 3320EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3321
3322int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3323{
3324 mm_segment_t oldfs = get_fs();
3325 int err;
3326
3327 set_fs(KERNEL_DS);
3328 err = sock->ops->ioctl(sock, cmd, arg);
3329 set_fs(oldfs);
3330
3331 return err;
3332}
c6d409cf 3333EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3334
91cf45f0
TM
3335int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3336{
3337 return sock->ops->shutdown(sock, how);
3338}
91cf45f0 3339EXPORT_SYMBOL(kernel_sock_shutdown);