]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/socket.c
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
[mirror_ubuntu-artful-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1da177e4
LT
290static int init_inodecache(void)
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1da177e4
LT
299 if (sock_inode_cachep == NULL)
300 return -ENOMEM;
301 return 0;
302}
303
b87221de 304static const struct super_operations sockfs_ops = {
c6d409cf
ED
305 .alloc_inode = sock_alloc_inode,
306 .destroy_inode = sock_destroy_inode,
307 .statfs = simple_statfs,
1da177e4
LT
308};
309
c23fbb6b
ED
310/*
311 * sockfs_dname() is called from d_path().
312 */
313static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
314{
315 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 316 d_inode(dentry)->i_ino);
c23fbb6b
ED
317}
318
3ba13d17 319static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 320 .d_dname = sockfs_dname,
1da177e4
LT
321};
322
bba0bd31
AG
323static int sockfs_xattr_get(const struct xattr_handler *handler,
324 struct dentry *dentry, struct inode *inode,
325 const char *suffix, void *value, size_t size)
326{
327 if (value) {
328 if (dentry->d_name.len + 1 > size)
329 return -ERANGE;
330 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
331 }
332 return dentry->d_name.len + 1;
333}
334
335#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
336#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
337#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
338
339static const struct xattr_handler sockfs_xattr_handler = {
340 .name = XATTR_NAME_SOCKPROTONAME,
341 .get = sockfs_xattr_get,
342};
343
4a590153
AG
344static int sockfs_security_xattr_set(const struct xattr_handler *handler,
345 struct dentry *dentry, struct inode *inode,
346 const char *suffix, const void *value,
347 size_t size, int flags)
348{
349 /* Handled by LSM. */
350 return -EAGAIN;
351}
352
353static const struct xattr_handler sockfs_security_xattr_handler = {
354 .prefix = XATTR_SECURITY_PREFIX,
355 .set = sockfs_security_xattr_set,
356};
357
bba0bd31
AG
358static const struct xattr_handler *sockfs_xattr_handlers[] = {
359 &sockfs_xattr_handler,
4a590153 360 &sockfs_security_xattr_handler,
bba0bd31
AG
361 NULL
362};
363
c74a1cbb
AV
364static struct dentry *sockfs_mount(struct file_system_type *fs_type,
365 int flags, const char *dev_name, void *data)
366{
bba0bd31
AG
367 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
368 sockfs_xattr_handlers,
369 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
370}
371
372static struct vfsmount *sock_mnt __read_mostly;
373
374static struct file_system_type sock_fs_type = {
375 .name = "sockfs",
376 .mount = sockfs_mount,
377 .kill_sb = kill_anon_super,
378};
379
1da177e4
LT
380/*
381 * Obtains the first available file descriptor and sets it up for use.
382 *
39d8c1b6
DM
383 * These functions create file structures and maps them to fd space
384 * of the current process. On success it returns file descriptor
1da177e4
LT
385 * and file struct implicitly stored in sock->file.
386 * Note that another thread may close file descriptor before we return
387 * from this function. We use the fact that now we do not refer
388 * to socket after mapping. If one day we will need it, this
389 * function will increment ref. count on file by 1.
390 *
391 * In any case returned fd MAY BE not valid!
392 * This race condition is unavoidable
393 * with shared fd spaces, we cannot solve it inside kernel,
394 * but we take care of internal coherence yet.
395 */
396
aab174f0 397struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 398{
7cbe66b6 399 struct qstr name = { .name = "" };
2c48b9c4 400 struct path path;
7cbe66b6 401 struct file *file;
1da177e4 402
600e1779
MY
403 if (dname) {
404 name.name = dname;
405 name.len = strlen(name.name);
406 } else if (sock->sk) {
407 name.name = sock->sk->sk_prot_creator->name;
408 name.len = strlen(name.name);
409 }
4b936885 410 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
411 if (unlikely(!path.dentry))
412 return ERR_PTR(-ENOMEM);
2c48b9c4 413 path.mnt = mntget(sock_mnt);
39d8c1b6 414
2c48b9c4 415 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 416
2c48b9c4 417 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 418 &socket_file_ops);
b5ffe634 419 if (IS_ERR(file)) {
cc3808f8 420 /* drop dentry, keep inode */
c5ef6035 421 ihold(d_inode(path.dentry));
2c48b9c4 422 path_put(&path);
39b65252 423 return file;
cc3808f8
AV
424 }
425
426 sock->file = file;
77d27200 427 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 428 file->private_data = sock;
28407630 429 return file;
39d8c1b6 430}
56b31d1c 431EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 432
56b31d1c 433static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
434{
435 struct file *newfile;
28407630
AV
436 int fd = get_unused_fd_flags(flags);
437 if (unlikely(fd < 0))
438 return fd;
39d8c1b6 439
aab174f0 440 newfile = sock_alloc_file(sock, flags, NULL);
28407630 441 if (likely(!IS_ERR(newfile))) {
39d8c1b6 442 fd_install(fd, newfile);
28407630
AV
443 return fd;
444 }
7cbe66b6 445
28407630
AV
446 put_unused_fd(fd);
447 return PTR_ERR(newfile);
1da177e4
LT
448}
449
406a3c63 450struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 451{
6cb153ca
BL
452 if (file->f_op == &socket_file_ops)
453 return file->private_data; /* set in sock_map_fd */
454
23bb80d2
ED
455 *err = -ENOTSOCK;
456 return NULL;
6cb153ca 457}
406a3c63 458EXPORT_SYMBOL(sock_from_file);
6cb153ca 459
1da177e4 460/**
c6d409cf 461 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
462 * @fd: file handle
463 * @err: pointer to an error code return
464 *
465 * The file handle passed in is locked and the socket it is bound
466 * too is returned. If an error occurs the err pointer is overwritten
467 * with a negative errno code and NULL is returned. The function checks
468 * for both invalid handles and passing a handle which is not a socket.
469 *
470 * On a success the socket object pointer is returned.
471 */
472
473struct socket *sockfd_lookup(int fd, int *err)
474{
475 struct file *file;
1da177e4
LT
476 struct socket *sock;
477
89bddce5
SH
478 file = fget(fd);
479 if (!file) {
1da177e4
LT
480 *err = -EBADF;
481 return NULL;
482 }
89bddce5 483
6cb153ca
BL
484 sock = sock_from_file(file, err);
485 if (!sock)
1da177e4 486 fput(file);
6cb153ca
BL
487 return sock;
488}
c6d409cf 489EXPORT_SYMBOL(sockfd_lookup);
1da177e4 490
6cb153ca
BL
491static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
492{
00e188ef 493 struct fd f = fdget(fd);
6cb153ca
BL
494 struct socket *sock;
495
3672558c 496 *err = -EBADF;
00e188ef
AV
497 if (f.file) {
498 sock = sock_from_file(f.file, err);
499 if (likely(sock)) {
500 *fput_needed = f.flags;
6cb153ca 501 return sock;
00e188ef
AV
502 }
503 fdput(f);
1da177e4 504 }
6cb153ca 505 return NULL;
1da177e4
LT
506}
507
600e1779
MY
508static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
509 size_t size)
510{
511 ssize_t len;
512 ssize_t used = 0;
513
c5ef6035 514 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
515 if (len < 0)
516 return len;
517 used += len;
518 if (buffer) {
519 if (size < used)
520 return -ERANGE;
521 buffer += len;
522 }
523
524 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
525 used += len;
526 if (buffer) {
527 if (size < used)
528 return -ERANGE;
529 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
530 buffer += len;
531 }
532
533 return used;
534}
535
86741ec2
LC
536int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
537{
538 int err = simple_setattr(dentry, iattr);
539
540 if (!err) {
541 struct socket *sock = SOCKET_I(d_inode(dentry));
542
543 sock->sk->sk_uid = iattr->ia_uid;
544 }
545
546 return err;
547}
548
600e1779 549static const struct inode_operations sockfs_inode_ops = {
600e1779 550 .listxattr = sockfs_listxattr,
86741ec2 551 .setattr = sockfs_setattr,
600e1779
MY
552};
553
1da177e4
LT
554/**
555 * sock_alloc - allocate a socket
89bddce5 556 *
1da177e4
LT
557 * Allocate a new inode and socket object. The two are bound together
558 * and initialised. The socket is then returned. If we are out of inodes
559 * NULL is returned.
560 */
561
f4a00aac 562struct socket *sock_alloc(void)
1da177e4 563{
89bddce5
SH
564 struct inode *inode;
565 struct socket *sock;
1da177e4 566
a209dfc7 567 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
568 if (!inode)
569 return NULL;
570
571 sock = SOCKET_I(inode);
572
29a020d3 573 kmemcheck_annotate_bitfield(sock, type);
85fe4025 574 inode->i_ino = get_next_ino();
89bddce5 575 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
576 inode->i_uid = current_fsuid();
577 inode->i_gid = current_fsgid();
600e1779 578 inode->i_op = &sockfs_inode_ops;
1da177e4 579
19e8d69c 580 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
581 return sock;
582}
f4a00aac 583EXPORT_SYMBOL(sock_alloc);
1da177e4 584
1da177e4
LT
585/**
586 * sock_release - close a socket
587 * @sock: socket to close
588 *
589 * The socket is released from the protocol stack if it has a release
590 * callback, and the inode is then released if the socket is bound to
89bddce5 591 * an inode not a file.
1da177e4 592 */
89bddce5 593
1da177e4
LT
594void sock_release(struct socket *sock)
595{
596 if (sock->ops) {
597 struct module *owner = sock->ops->owner;
598
599 sock->ops->release(sock);
600 sock->ops = NULL;
601 module_put(owner);
602 }
603
eaefd110 604 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 605 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 606
19e8d69c 607 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
608 if (!sock->file) {
609 iput(SOCK_INODE(sock));
610 return;
611 }
89bddce5 612 sock->file = NULL;
1da177e4 613}
c6d409cf 614EXPORT_SYMBOL(sock_release);
1da177e4 615
c14ac945 616void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 617{
140c55d4
ED
618 u8 flags = *tx_flags;
619
c14ac945 620 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
621 flags |= SKBTX_HW_TSTAMP;
622
c14ac945 623 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
624 flags |= SKBTX_SW_TSTAMP;
625
c14ac945 626 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
627 flags |= SKBTX_SCHED_TSTAMP;
628
140c55d4 629 *tx_flags = flags;
20d49473 630}
67cc0d40 631EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 632
d8725c86 633static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 634{
01e97e65 635 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
636 BUG_ON(ret == -EIOCBQUEUED);
637 return ret;
1da177e4
LT
638}
639
d8725c86 640int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 641{
d8725c86 642 int err = security_socket_sendmsg(sock, msg,
01e97e65 643 msg_data_left(msg));
228e548e 644
d8725c86 645 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 646}
c6d409cf 647EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
648
649int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
650 struct kvec *vec, size_t num, size_t size)
651{
6aa24814 652 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 653 return sock_sendmsg(sock, msg);
1da177e4 654}
c6d409cf 655EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 656
92f37fd2
ED
657/*
658 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
659 */
660void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
661 struct sk_buff *skb)
662{
20d49473 663 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 664 struct scm_timestamping tss;
20d49473
PO
665 int empty = 1;
666 struct skb_shared_hwtstamps *shhwtstamps =
667 skb_hwtstamps(skb);
668
669 /* Race occurred between timestamp enabling and packet
670 receiving. Fill in the current time for now. */
671 if (need_software_tstamp && skb->tstamp.tv64 == 0)
672 __net_timestamp(skb);
673
674 if (need_software_tstamp) {
675 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
676 struct timeval tv;
677 skb_get_timestamp(skb, &tv);
678 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
679 sizeof(tv), &tv);
680 } else {
f24b9be5
WB
681 struct timespec ts;
682 skb_get_timestampns(skb, &ts);
20d49473 683 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 684 sizeof(ts), &ts);
20d49473
PO
685 }
686 }
687
f24b9be5 688 memset(&tss, 0, sizeof(tss));
c199105d 689 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 690 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 691 empty = 0;
4d276eb6 692 if (shhwtstamps &&
b9f40e21 693 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 694 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 695 empty = 0;
1c885808 696 if (!empty) {
20d49473 697 put_cmsg(msg, SOL_SOCKET,
f24b9be5 698 SCM_TIMESTAMPING, sizeof(tss), &tss);
1c885808
FY
699
700 if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
701 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
702 skb->len, skb->data);
703 }
92f37fd2 704}
7c81fd8b
ACM
705EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
706
6e3e939f
JB
707void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
708 struct sk_buff *skb)
709{
710 int ack;
711
712 if (!sock_flag(sk, SOCK_WIFI_STATUS))
713 return;
714 if (!skb->wifi_acked_valid)
715 return;
716
717 ack = skb->wifi_acked;
718
719 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
720}
721EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
722
11165f14 723static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
724 struct sk_buff *skb)
3b885787 725{
744d5a3e 726 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 727 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 728 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
729}
730
767dd033 731void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
732 struct sk_buff *skb)
733{
734 sock_recv_timestamp(msg, sk, skb);
735 sock_recv_drops(msg, sk, skb);
736}
767dd033 737EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 738
1b784140 739static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 740 int flags)
1da177e4 741{
2da62906 742 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
743}
744
2da62906 745int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 746{
2da62906 747 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 748
2da62906 749 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 750}
c6d409cf 751EXPORT_SYMBOL(sock_recvmsg);
1da177e4 752
c1249c0a
ML
753/**
754 * kernel_recvmsg - Receive a message from a socket (kernel space)
755 * @sock: The socket to receive the message from
756 * @msg: Received message
757 * @vec: Input s/g array for message data
758 * @num: Size of input s/g array
759 * @size: Number of bytes to read
760 * @flags: Message flags (MSG_DONTWAIT, etc...)
761 *
762 * On return the msg structure contains the scatter/gather array passed in the
763 * vec argument. The array is modified so that it consists of the unfilled
764 * portion of the original array.
765 *
766 * The returned value is the total number of bytes received, or an error.
767 */
89bddce5
SH
768int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
769 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
770{
771 mm_segment_t oldfs = get_fs();
772 int result;
773
6aa24814 774 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 775 set_fs(KERNEL_DS);
2da62906 776 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
777 set_fs(oldfs);
778 return result;
779}
c6d409cf 780EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 781
ce1d4d3e
CH
782static ssize_t sock_sendpage(struct file *file, struct page *page,
783 int offset, size_t size, loff_t *ppos, int more)
1da177e4 784{
1da177e4
LT
785 struct socket *sock;
786 int flags;
787
ce1d4d3e
CH
788 sock = file->private_data;
789
35f9c09f
ED
790 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
791 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
792 flags |= more;
ce1d4d3e 793
e6949583 794 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 795}
1da177e4 796
9c55e01c 797static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 798 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
799 unsigned int flags)
800{
801 struct socket *sock = file->private_data;
802
997b37da
RDC
803 if (unlikely(!sock->ops->splice_read))
804 return -EINVAL;
805
9c55e01c
JA
806 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
807}
808
8ae5e030 809static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 810{
6d652330
AV
811 struct file *file = iocb->ki_filp;
812 struct socket *sock = file->private_data;
0345f931 813 struct msghdr msg = {.msg_iter = *to,
814 .msg_iocb = iocb};
8ae5e030 815 ssize_t res;
ce1d4d3e 816
8ae5e030
AV
817 if (file->f_flags & O_NONBLOCK)
818 msg.msg_flags = MSG_DONTWAIT;
819
820 if (iocb->ki_pos != 0)
1da177e4 821 return -ESPIPE;
027445c3 822
66ee59af 823 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
824 return 0;
825
2da62906 826 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
827 *to = msg.msg_iter;
828 return res;
1da177e4
LT
829}
830
8ae5e030 831static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 832{
6d652330
AV
833 struct file *file = iocb->ki_filp;
834 struct socket *sock = file->private_data;
0345f931 835 struct msghdr msg = {.msg_iter = *from,
836 .msg_iocb = iocb};
8ae5e030 837 ssize_t res;
1da177e4 838
8ae5e030 839 if (iocb->ki_pos != 0)
ce1d4d3e 840 return -ESPIPE;
027445c3 841
8ae5e030
AV
842 if (file->f_flags & O_NONBLOCK)
843 msg.msg_flags = MSG_DONTWAIT;
844
6d652330
AV
845 if (sock->type == SOCK_SEQPACKET)
846 msg.msg_flags |= MSG_EOR;
847
d8725c86 848 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
849 *from = msg.msg_iter;
850 return res;
1da177e4
LT
851}
852
1da177e4
LT
853/*
854 * Atomic setting of ioctl hooks to avoid race
855 * with module unload.
856 */
857
4a3e2f71 858static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 859static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 860
881d966b 861void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 862{
4a3e2f71 863 mutex_lock(&br_ioctl_mutex);
1da177e4 864 br_ioctl_hook = hook;
4a3e2f71 865 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
866}
867EXPORT_SYMBOL(brioctl_set);
868
4a3e2f71 869static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 870static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 871
881d966b 872void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 873{
4a3e2f71 874 mutex_lock(&vlan_ioctl_mutex);
1da177e4 875 vlan_ioctl_hook = hook;
4a3e2f71 876 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
877}
878EXPORT_SYMBOL(vlan_ioctl_set);
879
4a3e2f71 880static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 881static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 882
89bddce5 883void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 884{
4a3e2f71 885 mutex_lock(&dlci_ioctl_mutex);
1da177e4 886 dlci_ioctl_hook = hook;
4a3e2f71 887 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
888}
889EXPORT_SYMBOL(dlci_ioctl_set);
890
6b96018b
AB
891static long sock_do_ioctl(struct net *net, struct socket *sock,
892 unsigned int cmd, unsigned long arg)
893{
894 int err;
895 void __user *argp = (void __user *)arg;
896
897 err = sock->ops->ioctl(sock, cmd, arg);
898
899 /*
900 * If this ioctl is unknown try to hand it down
901 * to the NIC driver.
902 */
903 if (err == -ENOIOCTLCMD)
904 err = dev_ioctl(net, cmd, argp);
905
906 return err;
907}
908
1da177e4
LT
909/*
910 * With an ioctl, arg may well be a user mode pointer, but we don't know
911 * what to do with it - that's up to the protocol still.
912 */
913
c62cce2c
AV
914static struct ns_common *get_net_ns(struct ns_common *ns)
915{
916 return &get_net(container_of(ns, struct net, ns))->ns;
917}
918
1da177e4
LT
919static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
920{
921 struct socket *sock;
881d966b 922 struct sock *sk;
1da177e4
LT
923 void __user *argp = (void __user *)arg;
924 int pid, err;
881d966b 925 struct net *net;
1da177e4 926
b69aee04 927 sock = file->private_data;
881d966b 928 sk = sock->sk;
3b1e0a65 929 net = sock_net(sk);
1da177e4 930 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 931 err = dev_ioctl(net, cmd, argp);
1da177e4 932 } else
3d23e349 933#ifdef CONFIG_WEXT_CORE
1da177e4 934 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 935 err = dev_ioctl(net, cmd, argp);
1da177e4 936 } else
3d23e349 937#endif
89bddce5 938 switch (cmd) {
1da177e4
LT
939 case FIOSETOWN:
940 case SIOCSPGRP:
941 err = -EFAULT;
942 if (get_user(pid, (int __user *)argp))
943 break;
e0b93edd
JL
944 f_setown(sock->file, pid, 1);
945 err = 0;
1da177e4
LT
946 break;
947 case FIOGETOWN:
948 case SIOCGPGRP:
609d7fa9 949 err = put_user(f_getown(sock->file),
89bddce5 950 (int __user *)argp);
1da177e4
LT
951 break;
952 case SIOCGIFBR:
953 case SIOCSIFBR:
954 case SIOCBRADDBR:
955 case SIOCBRDELBR:
956 err = -ENOPKG;
957 if (!br_ioctl_hook)
958 request_module("bridge");
959
4a3e2f71 960 mutex_lock(&br_ioctl_mutex);
89bddce5 961 if (br_ioctl_hook)
881d966b 962 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 963 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
964 break;
965 case SIOCGIFVLAN:
966 case SIOCSIFVLAN:
967 err = -ENOPKG;
968 if (!vlan_ioctl_hook)
969 request_module("8021q");
970
4a3e2f71 971 mutex_lock(&vlan_ioctl_mutex);
1da177e4 972 if (vlan_ioctl_hook)
881d966b 973 err = vlan_ioctl_hook(net, argp);
4a3e2f71 974 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 975 break;
1da177e4
LT
976 case SIOCADDDLCI:
977 case SIOCDELDLCI:
978 err = -ENOPKG;
979 if (!dlci_ioctl_hook)
980 request_module("dlci");
981
7512cbf6
PE
982 mutex_lock(&dlci_ioctl_mutex);
983 if (dlci_ioctl_hook)
1da177e4 984 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 985 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 986 break;
c62cce2c
AV
987 case SIOCGSKNS:
988 err = -EPERM;
989 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
990 break;
991
992 err = open_related_ns(&net->ns, get_net_ns);
993 break;
1da177e4 994 default:
6b96018b 995 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 996 break;
89bddce5 997 }
1da177e4
LT
998 return err;
999}
1000
1001int sock_create_lite(int family, int type, int protocol, struct socket **res)
1002{
1003 int err;
1004 struct socket *sock = NULL;
89bddce5 1005
1da177e4
LT
1006 err = security_socket_create(family, type, protocol, 1);
1007 if (err)
1008 goto out;
1009
1010 sock = sock_alloc();
1011 if (!sock) {
1012 err = -ENOMEM;
1013 goto out;
1014 }
1015
1da177e4 1016 sock->type = type;
7420ed23
VY
1017 err = security_socket_post_create(sock, family, type, protocol, 1);
1018 if (err)
1019 goto out_release;
1020
1da177e4
LT
1021out:
1022 *res = sock;
1023 return err;
7420ed23
VY
1024out_release:
1025 sock_release(sock);
1026 sock = NULL;
1027 goto out;
1da177e4 1028}
c6d409cf 1029EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1030
1031/* No kernel lock held - perfect */
89bddce5 1032static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1033{
cbf55001 1034 unsigned int busy_flag = 0;
1da177e4
LT
1035 struct socket *sock;
1036
1037 /*
89bddce5 1038 * We can't return errors to poll, so it's either yes or no.
1da177e4 1039 */
b69aee04 1040 sock = file->private_data;
2d48d67f 1041
cbf55001 1042 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1043 /* this socket can poll_ll so tell the system call */
cbf55001 1044 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1045
1046 /* once, only if requested by syscall */
cbf55001
ET
1047 if (wait && (wait->_key & POLL_BUSY_LOOP))
1048 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1049 }
1050
cbf55001 1051 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1052}
1053
89bddce5 1054static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1055{
b69aee04 1056 struct socket *sock = file->private_data;
1da177e4
LT
1057
1058 return sock->ops->mmap(file, sock, vma);
1059}
1060
20380731 1061static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1062{
1da177e4
LT
1063 sock_release(SOCKET_I(inode));
1064 return 0;
1065}
1066
1067/*
1068 * Update the socket async list
1069 *
1070 * Fasync_list locking strategy.
1071 *
1072 * 1. fasync_list is modified only under process context socket lock
1073 * i.e. under semaphore.
1074 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1075 * or under socket lock
1da177e4
LT
1076 */
1077
1078static int sock_fasync(int fd, struct file *filp, int on)
1079{
989a2979
ED
1080 struct socket *sock = filp->private_data;
1081 struct sock *sk = sock->sk;
eaefd110 1082 struct socket_wq *wq;
1da177e4 1083
989a2979 1084 if (sk == NULL)
1da177e4 1085 return -EINVAL;
1da177e4
LT
1086
1087 lock_sock(sk);
1e1d04e6 1088 wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
eaefd110 1089 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1090
eaefd110 1091 if (!wq->fasync_list)
989a2979
ED
1092 sock_reset_flag(sk, SOCK_FASYNC);
1093 else
bcdce719 1094 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1095
989a2979 1096 release_sock(sk);
1da177e4
LT
1097 return 0;
1098}
1099
ceb5d58b 1100/* This function may be called only under rcu_lock */
1da177e4 1101
ceb5d58b 1102int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1103{
ceb5d58b 1104 if (!wq || !wq->fasync_list)
1da177e4 1105 return -1;
ceb5d58b 1106
89bddce5 1107 switch (how) {
8d8ad9d7 1108 case SOCK_WAKE_WAITD:
ceb5d58b 1109 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1110 break;
1111 goto call_kill;
8d8ad9d7 1112 case SOCK_WAKE_SPACE:
ceb5d58b 1113 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1114 break;
1115 /* fall through */
8d8ad9d7 1116 case SOCK_WAKE_IO:
89bddce5 1117call_kill:
43815482 1118 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1119 break;
8d8ad9d7 1120 case SOCK_WAKE_URG:
43815482 1121 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1122 }
ceb5d58b 1123
1da177e4
LT
1124 return 0;
1125}
c6d409cf 1126EXPORT_SYMBOL(sock_wake_async);
1da177e4 1127
721db93a 1128int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1129 struct socket **res, int kern)
1da177e4
LT
1130{
1131 int err;
1132 struct socket *sock;
55737fda 1133 const struct net_proto_family *pf;
1da177e4
LT
1134
1135 /*
89bddce5 1136 * Check protocol is in range
1da177e4
LT
1137 */
1138 if (family < 0 || family >= NPROTO)
1139 return -EAFNOSUPPORT;
1140 if (type < 0 || type >= SOCK_MAX)
1141 return -EINVAL;
1142
1143 /* Compatibility.
1144
1145 This uglymoron is moved from INET layer to here to avoid
1146 deadlock in module load.
1147 */
1148 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1149 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1150 current->comm);
1da177e4
LT
1151 family = PF_PACKET;
1152 }
1153
1154 err = security_socket_create(family, type, protocol, kern);
1155 if (err)
1156 return err;
89bddce5 1157
55737fda
SH
1158 /*
1159 * Allocate the socket and allow the family to set things up. if
1160 * the protocol is 0, the family is instructed to select an appropriate
1161 * default.
1162 */
1163 sock = sock_alloc();
1164 if (!sock) {
e87cc472 1165 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1166 return -ENFILE; /* Not exactly a match, but its the
1167 closest posix thing */
1168 }
1169
1170 sock->type = type;
1171
95a5afca 1172#ifdef CONFIG_MODULES
89bddce5
SH
1173 /* Attempt to load a protocol module if the find failed.
1174 *
1175 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1176 * requested real, full-featured networking support upon configuration.
1177 * Otherwise module support will break!
1178 */
190683a9 1179 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1180 request_module("net-pf-%d", family);
1da177e4
LT
1181#endif
1182
55737fda
SH
1183 rcu_read_lock();
1184 pf = rcu_dereference(net_families[family]);
1185 err = -EAFNOSUPPORT;
1186 if (!pf)
1187 goto out_release;
1da177e4
LT
1188
1189 /*
1190 * We will call the ->create function, that possibly is in a loadable
1191 * module, so we have to bump that loadable module refcnt first.
1192 */
55737fda 1193 if (!try_module_get(pf->owner))
1da177e4
LT
1194 goto out_release;
1195
55737fda
SH
1196 /* Now protected by module ref count */
1197 rcu_read_unlock();
1198
3f378b68 1199 err = pf->create(net, sock, protocol, kern);
55737fda 1200 if (err < 0)
1da177e4 1201 goto out_module_put;
a79af59e 1202
1da177e4
LT
1203 /*
1204 * Now to bump the refcnt of the [loadable] module that owns this
1205 * socket at sock_release time we decrement its refcnt.
1206 */
55737fda
SH
1207 if (!try_module_get(sock->ops->owner))
1208 goto out_module_busy;
1209
1da177e4
LT
1210 /*
1211 * Now that we're done with the ->create function, the [loadable]
1212 * module can have its refcnt decremented
1213 */
55737fda 1214 module_put(pf->owner);
7420ed23
VY
1215 err = security_socket_post_create(sock, family, type, protocol, kern);
1216 if (err)
3b185525 1217 goto out_sock_release;
55737fda 1218 *res = sock;
1da177e4 1219
55737fda
SH
1220 return 0;
1221
1222out_module_busy:
1223 err = -EAFNOSUPPORT;
1da177e4 1224out_module_put:
55737fda
SH
1225 sock->ops = NULL;
1226 module_put(pf->owner);
1227out_sock_release:
1da177e4 1228 sock_release(sock);
55737fda
SH
1229 return err;
1230
1231out_release:
1232 rcu_read_unlock();
1233 goto out_sock_release;
1da177e4 1234}
721db93a 1235EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1236
1237int sock_create(int family, int type, int protocol, struct socket **res)
1238{
1b8d7ae4 1239 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1240}
c6d409cf 1241EXPORT_SYMBOL(sock_create);
1da177e4 1242
eeb1bd5c 1243int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1244{
eeb1bd5c 1245 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1246}
c6d409cf 1247EXPORT_SYMBOL(sock_create_kern);
1da177e4 1248
3e0fa65f 1249SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1250{
1251 int retval;
1252 struct socket *sock;
a677a039
UD
1253 int flags;
1254
e38b36f3
UD
1255 /* Check the SOCK_* constants for consistency. */
1256 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1257 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1258 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1259 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1260
a677a039 1261 flags = type & ~SOCK_TYPE_MASK;
77d27200 1262 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1263 return -EINVAL;
1264 type &= SOCK_TYPE_MASK;
1da177e4 1265
aaca0bdc
UD
1266 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1267 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1268
1da177e4
LT
1269 retval = sock_create(family, type, protocol, &sock);
1270 if (retval < 0)
1271 goto out;
1272
77d27200 1273 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1274 if (retval < 0)
1275 goto out_release;
1276
1277out:
1278 /* It may be already another descriptor 8) Not kernel problem. */
1279 return retval;
1280
1281out_release:
1282 sock_release(sock);
1283 return retval;
1284}
1285
1286/*
1287 * Create a pair of connected sockets.
1288 */
1289
3e0fa65f
HC
1290SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1291 int __user *, usockvec)
1da177e4
LT
1292{
1293 struct socket *sock1, *sock2;
1294 int fd1, fd2, err;
db349509 1295 struct file *newfile1, *newfile2;
a677a039
UD
1296 int flags;
1297
1298 flags = type & ~SOCK_TYPE_MASK;
77d27200 1299 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1300 return -EINVAL;
1301 type &= SOCK_TYPE_MASK;
1da177e4 1302
aaca0bdc
UD
1303 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1304 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1305
1da177e4
LT
1306 /*
1307 * Obtain the first socket and check if the underlying protocol
1308 * supports the socketpair call.
1309 */
1310
1311 err = sock_create(family, type, protocol, &sock1);
1312 if (err < 0)
1313 goto out;
1314
1315 err = sock_create(family, type, protocol, &sock2);
1316 if (err < 0)
1317 goto out_release_1;
1318
1319 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1320 if (err < 0)
1da177e4
LT
1321 goto out_release_both;
1322
28407630 1323 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1324 if (unlikely(fd1 < 0)) {
1325 err = fd1;
db349509 1326 goto out_release_both;
bf3c23d1 1327 }
d73aa286 1328
28407630 1329 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1330 if (unlikely(fd2 < 0)) {
1331 err = fd2;
d73aa286 1332 goto out_put_unused_1;
28407630
AV
1333 }
1334
aab174f0 1335 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1336 if (IS_ERR(newfile1)) {
28407630 1337 err = PTR_ERR(newfile1);
d73aa286 1338 goto out_put_unused_both;
28407630
AV
1339 }
1340
aab174f0 1341 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1342 if (IS_ERR(newfile2)) {
1343 err = PTR_ERR(newfile2);
d73aa286 1344 goto out_fput_1;
db349509
AV
1345 }
1346
d73aa286
YD
1347 err = put_user(fd1, &usockvec[0]);
1348 if (err)
1349 goto out_fput_both;
1350
1351 err = put_user(fd2, &usockvec[1]);
1352 if (err)
1353 goto out_fput_both;
1354
157cf649 1355 audit_fd_pair(fd1, fd2);
d73aa286 1356
db349509
AV
1357 fd_install(fd1, newfile1);
1358 fd_install(fd2, newfile2);
1da177e4
LT
1359 /* fd1 and fd2 may be already another descriptors.
1360 * Not kernel problem.
1361 */
1362
d73aa286 1363 return 0;
1da177e4 1364
d73aa286
YD
1365out_fput_both:
1366 fput(newfile2);
1367 fput(newfile1);
1368 put_unused_fd(fd2);
1369 put_unused_fd(fd1);
1370 goto out;
1371
1372out_fput_1:
1373 fput(newfile1);
1374 put_unused_fd(fd2);
1375 put_unused_fd(fd1);
1376 sock_release(sock2);
1377 goto out;
1da177e4 1378
d73aa286
YD
1379out_put_unused_both:
1380 put_unused_fd(fd2);
1381out_put_unused_1:
1382 put_unused_fd(fd1);
1da177e4 1383out_release_both:
89bddce5 1384 sock_release(sock2);
1da177e4 1385out_release_1:
89bddce5 1386 sock_release(sock1);
1da177e4
LT
1387out:
1388 return err;
1389}
1390
1da177e4
LT
1391/*
1392 * Bind a name to a socket. Nothing much to do here since it's
1393 * the protocol's responsibility to handle the local address.
1394 *
1395 * We move the socket address to kernel space before we call
1396 * the protocol layer (having also checked the address is ok).
1397 */
1398
20f37034 1399SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1400{
1401 struct socket *sock;
230b1839 1402 struct sockaddr_storage address;
6cb153ca 1403 int err, fput_needed;
1da177e4 1404
89bddce5 1405 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1406 if (sock) {
43db362d 1407 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1408 if (err >= 0) {
1409 err = security_socket_bind(sock,
230b1839 1410 (struct sockaddr *)&address,
89bddce5 1411 addrlen);
6cb153ca
BL
1412 if (!err)
1413 err = sock->ops->bind(sock,
89bddce5 1414 (struct sockaddr *)
230b1839 1415 &address, addrlen);
1da177e4 1416 }
6cb153ca 1417 fput_light(sock->file, fput_needed);
89bddce5 1418 }
1da177e4
LT
1419 return err;
1420}
1421
1da177e4
LT
1422/*
1423 * Perform a listen. Basically, we allow the protocol to do anything
1424 * necessary for a listen, and if that works, we mark the socket as
1425 * ready for listening.
1426 */
1427
3e0fa65f 1428SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1429{
1430 struct socket *sock;
6cb153ca 1431 int err, fput_needed;
b8e1f9b5 1432 int somaxconn;
89bddce5
SH
1433
1434 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1435 if (sock) {
8efa6e93 1436 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1437 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1438 backlog = somaxconn;
1da177e4
LT
1439
1440 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1441 if (!err)
1442 err = sock->ops->listen(sock, backlog);
1da177e4 1443
6cb153ca 1444 fput_light(sock->file, fput_needed);
1da177e4
LT
1445 }
1446 return err;
1447}
1448
1da177e4
LT
1449/*
1450 * For accept, we attempt to create a new socket, set up the link
1451 * with the client, wake up the client, then return the new
1452 * connected fd. We collect the address of the connector in kernel
1453 * space and move it to user at the very end. This is unclean because
1454 * we open the socket then return an error.
1455 *
1456 * 1003.1g adds the ability to recvmsg() to query connection pending
1457 * status to recvmsg. We need to add that support in a way thats
1458 * clean when we restucture accept also.
1459 */
1460
20f37034
HC
1461SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1462 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1463{
1464 struct socket *sock, *newsock;
39d8c1b6 1465 struct file *newfile;
6cb153ca 1466 int err, len, newfd, fput_needed;
230b1839 1467 struct sockaddr_storage address;
1da177e4 1468
77d27200 1469 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1470 return -EINVAL;
1471
1472 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1473 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1474
6cb153ca 1475 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1476 if (!sock)
1477 goto out;
1478
1479 err = -ENFILE;
c6d409cf
ED
1480 newsock = sock_alloc();
1481 if (!newsock)
1da177e4
LT
1482 goto out_put;
1483
1484 newsock->type = sock->type;
1485 newsock->ops = sock->ops;
1486
1da177e4
LT
1487 /*
1488 * We don't need try_module_get here, as the listening socket (sock)
1489 * has the protocol module (sock->ops->owner) held.
1490 */
1491 __module_get(newsock->ops->owner);
1492
28407630 1493 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1494 if (unlikely(newfd < 0)) {
1495 err = newfd;
9a1875e6
DM
1496 sock_release(newsock);
1497 goto out_put;
39d8c1b6 1498 }
aab174f0 1499 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1500 if (IS_ERR(newfile)) {
28407630
AV
1501 err = PTR_ERR(newfile);
1502 put_unused_fd(newfd);
1503 sock_release(newsock);
1504 goto out_put;
1505 }
39d8c1b6 1506
a79af59e
FF
1507 err = security_socket_accept(sock, newsock);
1508 if (err)
39d8c1b6 1509 goto out_fd;
a79af59e 1510
1da177e4
LT
1511 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1512 if (err < 0)
39d8c1b6 1513 goto out_fd;
1da177e4
LT
1514
1515 if (upeer_sockaddr) {
230b1839 1516 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1517 &len, 2) < 0) {
1da177e4 1518 err = -ECONNABORTED;
39d8c1b6 1519 goto out_fd;
1da177e4 1520 }
43db362d 1521 err = move_addr_to_user(&address,
230b1839 1522 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1523 if (err < 0)
39d8c1b6 1524 goto out_fd;
1da177e4
LT
1525 }
1526
1527 /* File flags are not inherited via accept() unlike another OSes. */
1528
39d8c1b6
DM
1529 fd_install(newfd, newfile);
1530 err = newfd;
1da177e4 1531
1da177e4 1532out_put:
6cb153ca 1533 fput_light(sock->file, fput_needed);
1da177e4
LT
1534out:
1535 return err;
39d8c1b6 1536out_fd:
9606a216 1537 fput(newfile);
39d8c1b6 1538 put_unused_fd(newfd);
1da177e4
LT
1539 goto out_put;
1540}
1541
20f37034
HC
1542SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1543 int __user *, upeer_addrlen)
aaca0bdc 1544{
de11defe 1545 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1546}
1547
1da177e4
LT
1548/*
1549 * Attempt to connect to a socket with the server address. The address
1550 * is in user space so we verify it is OK and move it to kernel space.
1551 *
1552 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1553 * break bindings
1554 *
1555 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1556 * other SEQPACKET protocols that take time to connect() as it doesn't
1557 * include the -EINPROGRESS status for such sockets.
1558 */
1559
20f37034
HC
1560SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1561 int, addrlen)
1da177e4
LT
1562{
1563 struct socket *sock;
230b1839 1564 struct sockaddr_storage address;
6cb153ca 1565 int err, fput_needed;
1da177e4 1566
6cb153ca 1567 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1568 if (!sock)
1569 goto out;
43db362d 1570 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1571 if (err < 0)
1572 goto out_put;
1573
89bddce5 1574 err =
230b1839 1575 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1576 if (err)
1577 goto out_put;
1578
230b1839 1579 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1580 sock->file->f_flags);
1581out_put:
6cb153ca 1582 fput_light(sock->file, fput_needed);
1da177e4
LT
1583out:
1584 return err;
1585}
1586
1587/*
1588 * Get the local address ('name') of a socket object. Move the obtained
1589 * name to user space.
1590 */
1591
20f37034
HC
1592SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1593 int __user *, usockaddr_len)
1da177e4
LT
1594{
1595 struct socket *sock;
230b1839 1596 struct sockaddr_storage address;
6cb153ca 1597 int len, err, fput_needed;
89bddce5 1598
6cb153ca 1599 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1600 if (!sock)
1601 goto out;
1602
1603 err = security_socket_getsockname(sock);
1604 if (err)
1605 goto out_put;
1606
230b1839 1607 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1608 if (err)
1609 goto out_put;
43db362d 1610 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1611
1612out_put:
6cb153ca 1613 fput_light(sock->file, fput_needed);
1da177e4
LT
1614out:
1615 return err;
1616}
1617
1618/*
1619 * Get the remote address ('name') of a socket object. Move the obtained
1620 * name to user space.
1621 */
1622
20f37034
HC
1623SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1624 int __user *, usockaddr_len)
1da177e4
LT
1625{
1626 struct socket *sock;
230b1839 1627 struct sockaddr_storage address;
6cb153ca 1628 int len, err, fput_needed;
1da177e4 1629
89bddce5
SH
1630 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1631 if (sock != NULL) {
1da177e4
LT
1632 err = security_socket_getpeername(sock);
1633 if (err) {
6cb153ca 1634 fput_light(sock->file, fput_needed);
1da177e4
LT
1635 return err;
1636 }
1637
89bddce5 1638 err =
230b1839 1639 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1640 1);
1da177e4 1641 if (!err)
43db362d 1642 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1643 usockaddr_len);
6cb153ca 1644 fput_light(sock->file, fput_needed);
1da177e4
LT
1645 }
1646 return err;
1647}
1648
1649/*
1650 * Send a datagram to a given address. We move the address into kernel
1651 * space and check the user space data area is readable before invoking
1652 * the protocol.
1653 */
1654
3e0fa65f 1655SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1656 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1657 int, addr_len)
1da177e4
LT
1658{
1659 struct socket *sock;
230b1839 1660 struct sockaddr_storage address;
1da177e4
LT
1661 int err;
1662 struct msghdr msg;
1663 struct iovec iov;
6cb153ca 1664 int fput_needed;
6cb153ca 1665
602bd0e9
AV
1666 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1667 if (unlikely(err))
1668 return err;
de0fa95c
PE
1669 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1670 if (!sock)
4387ff75 1671 goto out;
6cb153ca 1672
89bddce5 1673 msg.msg_name = NULL;
89bddce5
SH
1674 msg.msg_control = NULL;
1675 msg.msg_controllen = 0;
1676 msg.msg_namelen = 0;
6cb153ca 1677 if (addr) {
43db362d 1678 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1679 if (err < 0)
1680 goto out_put;
230b1839 1681 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1682 msg.msg_namelen = addr_len;
1da177e4
LT
1683 }
1684 if (sock->file->f_flags & O_NONBLOCK)
1685 flags |= MSG_DONTWAIT;
1686 msg.msg_flags = flags;
d8725c86 1687 err = sock_sendmsg(sock, &msg);
1da177e4 1688
89bddce5 1689out_put:
de0fa95c 1690 fput_light(sock->file, fput_needed);
4387ff75 1691out:
1da177e4
LT
1692 return err;
1693}
1694
1695/*
89bddce5 1696 * Send a datagram down a socket.
1da177e4
LT
1697 */
1698
3e0fa65f 1699SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1700 unsigned int, flags)
1da177e4
LT
1701{
1702 return sys_sendto(fd, buff, len, flags, NULL, 0);
1703}
1704
1705/*
89bddce5 1706 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1707 * sender. We verify the buffers are writable and if needed move the
1708 * sender address from kernel to user space.
1709 */
1710
3e0fa65f 1711SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1712 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1713 int __user *, addr_len)
1da177e4
LT
1714{
1715 struct socket *sock;
1716 struct iovec iov;
1717 struct msghdr msg;
230b1839 1718 struct sockaddr_storage address;
89bddce5 1719 int err, err2;
6cb153ca
BL
1720 int fput_needed;
1721
602bd0e9
AV
1722 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1723 if (unlikely(err))
1724 return err;
de0fa95c 1725 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1726 if (!sock)
de0fa95c 1727 goto out;
1da177e4 1728
89bddce5
SH
1729 msg.msg_control = NULL;
1730 msg.msg_controllen = 0;
f3d33426
HFS
1731 /* Save some cycles and don't copy the address if not needed */
1732 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1733 /* We assume all kernel code knows the size of sockaddr_storage */
1734 msg.msg_namelen = 0;
130ed5d1 1735 msg.msg_iocb = NULL;
1da177e4
LT
1736 if (sock->file->f_flags & O_NONBLOCK)
1737 flags |= MSG_DONTWAIT;
2da62906 1738 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1739
89bddce5 1740 if (err >= 0 && addr != NULL) {
43db362d 1741 err2 = move_addr_to_user(&address,
230b1839 1742 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1743 if (err2 < 0)
1744 err = err2;
1da177e4 1745 }
de0fa95c
PE
1746
1747 fput_light(sock->file, fput_needed);
4387ff75 1748out:
1da177e4
LT
1749 return err;
1750}
1751
1752/*
89bddce5 1753 * Receive a datagram from a socket.
1da177e4
LT
1754 */
1755
b7c0ddf5
JG
1756SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1757 unsigned int, flags)
1da177e4
LT
1758{
1759 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1760}
1761
1762/*
1763 * Set a socket option. Because we don't know the option lengths we have
1764 * to pass the user mode parameter for the protocols to sort out.
1765 */
1766
20f37034
HC
1767SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1768 char __user *, optval, int, optlen)
1da177e4 1769{
6cb153ca 1770 int err, fput_needed;
1da177e4
LT
1771 struct socket *sock;
1772
1773 if (optlen < 0)
1774 return -EINVAL;
89bddce5
SH
1775
1776 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1777 if (sock != NULL) {
1778 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1779 if (err)
1780 goto out_put;
1da177e4
LT
1781
1782 if (level == SOL_SOCKET)
89bddce5
SH
1783 err =
1784 sock_setsockopt(sock, level, optname, optval,
1785 optlen);
1da177e4 1786 else
89bddce5
SH
1787 err =
1788 sock->ops->setsockopt(sock, level, optname, optval,
1789 optlen);
6cb153ca
BL
1790out_put:
1791 fput_light(sock->file, fput_needed);
1da177e4
LT
1792 }
1793 return err;
1794}
1795
1796/*
1797 * Get a socket option. Because we don't know the option lengths we have
1798 * to pass a user mode parameter for the protocols to sort out.
1799 */
1800
20f37034
HC
1801SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1802 char __user *, optval, int __user *, optlen)
1da177e4 1803{
6cb153ca 1804 int err, fput_needed;
1da177e4
LT
1805 struct socket *sock;
1806
89bddce5
SH
1807 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1808 if (sock != NULL) {
6cb153ca
BL
1809 err = security_socket_getsockopt(sock, level, optname);
1810 if (err)
1811 goto out_put;
1da177e4
LT
1812
1813 if (level == SOL_SOCKET)
89bddce5
SH
1814 err =
1815 sock_getsockopt(sock, level, optname, optval,
1816 optlen);
1da177e4 1817 else
89bddce5
SH
1818 err =
1819 sock->ops->getsockopt(sock, level, optname, optval,
1820 optlen);
6cb153ca
BL
1821out_put:
1822 fput_light(sock->file, fput_needed);
1da177e4
LT
1823 }
1824 return err;
1825}
1826
1da177e4
LT
1827/*
1828 * Shutdown a socket.
1829 */
1830
754fe8d2 1831SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1832{
6cb153ca 1833 int err, fput_needed;
1da177e4
LT
1834 struct socket *sock;
1835
89bddce5
SH
1836 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1837 if (sock != NULL) {
1da177e4 1838 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1839 if (!err)
1840 err = sock->ops->shutdown(sock, how);
1841 fput_light(sock->file, fput_needed);
1da177e4
LT
1842 }
1843 return err;
1844}
1845
89bddce5 1846/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1847 * fields which are the same type (int / unsigned) on our platforms.
1848 */
1849#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1850#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1851#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1852
c71d8ebe
TH
1853struct used_address {
1854 struct sockaddr_storage name;
1855 unsigned int name_len;
1856};
1857
da184284
AV
1858static int copy_msghdr_from_user(struct msghdr *kmsg,
1859 struct user_msghdr __user *umsg,
1860 struct sockaddr __user **save_addr,
1861 struct iovec **iov)
1661bf36 1862{
08adb7da
AV
1863 struct sockaddr __user *uaddr;
1864 struct iovec __user *uiov;
c0371da6 1865 size_t nr_segs;
08adb7da
AV
1866 ssize_t err;
1867
1868 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1869 __get_user(uaddr, &umsg->msg_name) ||
1870 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1871 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1872 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1873 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1874 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1875 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1876 return -EFAULT;
dbb490b9 1877
08adb7da 1878 if (!uaddr)
6a2a2b3a
AS
1879 kmsg->msg_namelen = 0;
1880
dbb490b9
ML
1881 if (kmsg->msg_namelen < 0)
1882 return -EINVAL;
1883
1661bf36 1884 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1885 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1886
1887 if (save_addr)
1888 *save_addr = uaddr;
1889
1890 if (uaddr && kmsg->msg_namelen) {
1891 if (!save_addr) {
1892 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1893 kmsg->msg_name);
1894 if (err < 0)
1895 return err;
1896 }
1897 } else {
1898 kmsg->msg_name = NULL;
1899 kmsg->msg_namelen = 0;
1900 }
1901
c0371da6 1902 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1903 return -EMSGSIZE;
1904
0345f931 1905 kmsg->msg_iocb = NULL;
1906
da184284
AV
1907 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1908 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1909}
1910
666547ff 1911static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1912 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1913 struct used_address *used_address,
1914 unsigned int allowed_msghdr_flags)
1da177e4 1915{
89bddce5
SH
1916 struct compat_msghdr __user *msg_compat =
1917 (struct compat_msghdr __user *)msg;
230b1839 1918 struct sockaddr_storage address;
1da177e4 1919 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1920 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 1921 __aligned(sizeof(__kernel_size_t));
89bddce5 1922 /* 20 is size of ipv6_pktinfo */
1da177e4 1923 unsigned char *ctl_buf = ctl;
d8725c86 1924 int ctl_len;
08adb7da 1925 ssize_t err;
89bddce5 1926
08adb7da 1927 msg_sys->msg_name = &address;
1da177e4 1928
08449320 1929 if (MSG_CMSG_COMPAT & flags)
08adb7da 1930 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1931 else
08adb7da 1932 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1933 if (err < 0)
da184284 1934 return err;
1da177e4
LT
1935
1936 err = -ENOBUFS;
1937
228e548e 1938 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1939 goto out_freeiov;
28a94d8f 1940 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 1941 ctl_len = msg_sys->msg_controllen;
1da177e4 1942 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1943 err =
228e548e 1944 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1945 sizeof(ctl));
1da177e4
LT
1946 if (err)
1947 goto out_freeiov;
228e548e
AB
1948 ctl_buf = msg_sys->msg_control;
1949 ctl_len = msg_sys->msg_controllen;
1da177e4 1950 } else if (ctl_len) {
89bddce5 1951 if (ctl_len > sizeof(ctl)) {
1da177e4 1952 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1953 if (ctl_buf == NULL)
1da177e4
LT
1954 goto out_freeiov;
1955 }
1956 err = -EFAULT;
1957 /*
228e548e 1958 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1959 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1960 * checking falls down on this.
1961 */
fb8621bb 1962 if (copy_from_user(ctl_buf,
228e548e 1963 (void __user __force *)msg_sys->msg_control,
89bddce5 1964 ctl_len))
1da177e4 1965 goto out_freectl;
228e548e 1966 msg_sys->msg_control = ctl_buf;
1da177e4 1967 }
228e548e 1968 msg_sys->msg_flags = flags;
1da177e4
LT
1969
1970 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1971 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1972 /*
1973 * If this is sendmmsg() and current destination address is same as
1974 * previously succeeded address, omit asking LSM's decision.
1975 * used_address->name_len is initialized to UINT_MAX so that the first
1976 * destination address never matches.
1977 */
bc909d9d
MD
1978 if (used_address && msg_sys->msg_name &&
1979 used_address->name_len == msg_sys->msg_namelen &&
1980 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1981 used_address->name_len)) {
d8725c86 1982 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1983 goto out_freectl;
1984 }
d8725c86 1985 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1986 /*
1987 * If this is sendmmsg() and sending to current destination address was
1988 * successful, remember it.
1989 */
1990 if (used_address && err >= 0) {
1991 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1992 if (msg_sys->msg_name)
1993 memcpy(&used_address->name, msg_sys->msg_name,
1994 used_address->name_len);
c71d8ebe 1995 }
1da177e4
LT
1996
1997out_freectl:
89bddce5 1998 if (ctl_buf != ctl)
1da177e4
LT
1999 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2000out_freeiov:
da184284 2001 kfree(iov);
228e548e
AB
2002 return err;
2003}
2004
2005/*
2006 * BSD sendmsg interface
2007 */
2008
666547ff 2009long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
2010{
2011 int fput_needed, err;
2012 struct msghdr msg_sys;
1be374a0
AL
2013 struct socket *sock;
2014
1be374a0 2015 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2016 if (!sock)
2017 goto out;
2018
28a94d8f 2019 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2020
6cb153ca 2021 fput_light(sock->file, fput_needed);
89bddce5 2022out:
1da177e4
LT
2023 return err;
2024}
2025
666547ff 2026SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
2027{
2028 if (flags & MSG_CMSG_COMPAT)
2029 return -EINVAL;
2030 return __sys_sendmsg(fd, msg, flags);
2031}
2032
228e548e
AB
2033/*
2034 * Linux sendmmsg interface
2035 */
2036
2037int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2038 unsigned int flags)
2039{
2040 int fput_needed, err, datagrams;
2041 struct socket *sock;
2042 struct mmsghdr __user *entry;
2043 struct compat_mmsghdr __user *compat_entry;
2044 struct msghdr msg_sys;
c71d8ebe 2045 struct used_address used_address;
f092276d 2046 unsigned int oflags = flags;
228e548e 2047
98382f41
AB
2048 if (vlen > UIO_MAXIOV)
2049 vlen = UIO_MAXIOV;
228e548e
AB
2050
2051 datagrams = 0;
2052
2053 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2054 if (!sock)
2055 return err;
2056
c71d8ebe 2057 used_address.name_len = UINT_MAX;
228e548e
AB
2058 entry = mmsg;
2059 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2060 err = 0;
f092276d 2061 flags |= MSG_BATCH;
228e548e
AB
2062
2063 while (datagrams < vlen) {
f092276d
TH
2064 if (datagrams == vlen - 1)
2065 flags = oflags;
2066
228e548e 2067 if (MSG_CMSG_COMPAT & flags) {
666547ff 2068 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2069 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2070 if (err < 0)
2071 break;
2072 err = __put_user(err, &compat_entry->msg_len);
2073 ++compat_entry;
2074 } else {
a7526eb5 2075 err = ___sys_sendmsg(sock,
666547ff 2076 (struct user_msghdr __user *)entry,
28a94d8f 2077 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2078 if (err < 0)
2079 break;
2080 err = put_user(err, &entry->msg_len);
2081 ++entry;
2082 }
2083
2084 if (err)
2085 break;
2086 ++datagrams;
3023898b
SHY
2087 if (msg_data_left(&msg_sys))
2088 break;
a78cb84c 2089 cond_resched();
228e548e
AB
2090 }
2091
228e548e
AB
2092 fput_light(sock->file, fput_needed);
2093
728ffb86
AB
2094 /* We only return an error if no datagrams were able to be sent */
2095 if (datagrams != 0)
228e548e
AB
2096 return datagrams;
2097
228e548e
AB
2098 return err;
2099}
2100
2101SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2102 unsigned int, vlen, unsigned int, flags)
2103{
1be374a0
AL
2104 if (flags & MSG_CMSG_COMPAT)
2105 return -EINVAL;
228e548e
AB
2106 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2107}
2108
666547ff 2109static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2110 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2111{
89bddce5
SH
2112 struct compat_msghdr __user *msg_compat =
2113 (struct compat_msghdr __user *)msg;
1da177e4 2114 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2115 struct iovec *iov = iovstack;
1da177e4 2116 unsigned long cmsg_ptr;
2da62906 2117 int len;
08adb7da 2118 ssize_t err;
1da177e4
LT
2119
2120 /* kernel mode address */
230b1839 2121 struct sockaddr_storage addr;
1da177e4
LT
2122
2123 /* user mode address pointers */
2124 struct sockaddr __user *uaddr;
08adb7da 2125 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2126
08adb7da 2127 msg_sys->msg_name = &addr;
1da177e4 2128
f3d33426 2129 if (MSG_CMSG_COMPAT & flags)
08adb7da 2130 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2131 else
08adb7da 2132 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2133 if (err < 0)
da184284 2134 return err;
1da177e4 2135
a2e27255
ACM
2136 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2137 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2138
f3d33426
HFS
2139 /* We assume all kernel code knows the size of sockaddr_storage */
2140 msg_sys->msg_namelen = 0;
2141
1da177e4
LT
2142 if (sock->file->f_flags & O_NONBLOCK)
2143 flags |= MSG_DONTWAIT;
2da62906 2144 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2145 if (err < 0)
2146 goto out_freeiov;
2147 len = err;
2148
2149 if (uaddr != NULL) {
43db362d 2150 err = move_addr_to_user(&addr,
a2e27255 2151 msg_sys->msg_namelen, uaddr,
89bddce5 2152 uaddr_len);
1da177e4
LT
2153 if (err < 0)
2154 goto out_freeiov;
2155 }
a2e27255 2156 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2157 COMPAT_FLAGS(msg));
1da177e4
LT
2158 if (err)
2159 goto out_freeiov;
2160 if (MSG_CMSG_COMPAT & flags)
a2e27255 2161 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2162 &msg_compat->msg_controllen);
2163 else
a2e27255 2164 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2165 &msg->msg_controllen);
2166 if (err)
2167 goto out_freeiov;
2168 err = len;
2169
2170out_freeiov:
da184284 2171 kfree(iov);
a2e27255
ACM
2172 return err;
2173}
2174
2175/*
2176 * BSD recvmsg interface
2177 */
2178
666547ff 2179long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2180{
2181 int fput_needed, err;
2182 struct msghdr msg_sys;
1be374a0
AL
2183 struct socket *sock;
2184
1be374a0 2185 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2186 if (!sock)
2187 goto out;
2188
a7526eb5 2189 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2190
6cb153ca 2191 fput_light(sock->file, fput_needed);
1da177e4
LT
2192out:
2193 return err;
2194}
2195
666547ff 2196SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2197 unsigned int, flags)
2198{
2199 if (flags & MSG_CMSG_COMPAT)
2200 return -EINVAL;
2201 return __sys_recvmsg(fd, msg, flags);
2202}
2203
a2e27255
ACM
2204/*
2205 * Linux recvmmsg interface
2206 */
2207
2208int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2209 unsigned int flags, struct timespec *timeout)
2210{
2211 int fput_needed, err, datagrams;
2212 struct socket *sock;
2213 struct mmsghdr __user *entry;
d7256d0e 2214 struct compat_mmsghdr __user *compat_entry;
a2e27255 2215 struct msghdr msg_sys;
766b9f92
DD
2216 struct timespec64 end_time;
2217 struct timespec64 timeout64;
a2e27255
ACM
2218
2219 if (timeout &&
2220 poll_select_set_timeout(&end_time, timeout->tv_sec,
2221 timeout->tv_nsec))
2222 return -EINVAL;
2223
2224 datagrams = 0;
2225
2226 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2227 if (!sock)
2228 return err;
2229
2230 err = sock_error(sock->sk);
2231 if (err)
2232 goto out_put;
2233
2234 entry = mmsg;
d7256d0e 2235 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2236
2237 while (datagrams < vlen) {
2238 /*
2239 * No need to ask LSM for more than the first datagram.
2240 */
d7256d0e 2241 if (MSG_CMSG_COMPAT & flags) {
666547ff 2242 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2243 &msg_sys, flags & ~MSG_WAITFORONE,
2244 datagrams);
d7256d0e
JMG
2245 if (err < 0)
2246 break;
2247 err = __put_user(err, &compat_entry->msg_len);
2248 ++compat_entry;
2249 } else {
a7526eb5 2250 err = ___sys_recvmsg(sock,
666547ff 2251 (struct user_msghdr __user *)entry,
a7526eb5
AL
2252 &msg_sys, flags & ~MSG_WAITFORONE,
2253 datagrams);
d7256d0e
JMG
2254 if (err < 0)
2255 break;
2256 err = put_user(err, &entry->msg_len);
2257 ++entry;
2258 }
2259
a2e27255
ACM
2260 if (err)
2261 break;
a2e27255
ACM
2262 ++datagrams;
2263
71c5c159
BB
2264 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2265 if (flags & MSG_WAITFORONE)
2266 flags |= MSG_DONTWAIT;
2267
a2e27255 2268 if (timeout) {
766b9f92
DD
2269 ktime_get_ts64(&timeout64);
2270 *timeout = timespec64_to_timespec(
2271 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2272 if (timeout->tv_sec < 0) {
2273 timeout->tv_sec = timeout->tv_nsec = 0;
2274 break;
2275 }
2276
2277 /* Timeout, return less than vlen datagrams */
2278 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2279 break;
2280 }
2281
2282 /* Out of band data, return right away */
2283 if (msg_sys.msg_flags & MSG_OOB)
2284 break;
a78cb84c 2285 cond_resched();
a2e27255
ACM
2286 }
2287
a2e27255 2288 if (err == 0)
34b88a68
ACM
2289 goto out_put;
2290
2291 if (datagrams == 0) {
2292 datagrams = err;
2293 goto out_put;
2294 }
a2e27255 2295
34b88a68
ACM
2296 /*
2297 * We may return less entries than requested (vlen) if the
2298 * sock is non block and there aren't enough datagrams...
2299 */
2300 if (err != -EAGAIN) {
a2e27255 2301 /*
34b88a68
ACM
2302 * ... or if recvmsg returns an error after we
2303 * received some datagrams, where we record the
2304 * error to return on the next call or if the
2305 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2306 */
34b88a68 2307 sock->sk->sk_err = -err;
a2e27255 2308 }
34b88a68
ACM
2309out_put:
2310 fput_light(sock->file, fput_needed);
a2e27255 2311
34b88a68 2312 return datagrams;
a2e27255
ACM
2313}
2314
2315SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2316 unsigned int, vlen, unsigned int, flags,
2317 struct timespec __user *, timeout)
2318{
2319 int datagrams;
2320 struct timespec timeout_sys;
2321
1be374a0
AL
2322 if (flags & MSG_CMSG_COMPAT)
2323 return -EINVAL;
2324
a2e27255
ACM
2325 if (!timeout)
2326 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2327
2328 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2329 return -EFAULT;
2330
2331 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2332
2333 if (datagrams > 0 &&
2334 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2335 datagrams = -EFAULT;
2336
2337 return datagrams;
2338}
2339
2340#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2341/* Argument list sizes for sys_socketcall */
2342#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2343static const unsigned char nargs[21] = {
c6d409cf
ED
2344 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2345 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2346 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2347 AL(4), AL(5), AL(4)
89bddce5
SH
2348};
2349
1da177e4
LT
2350#undef AL
2351
2352/*
89bddce5 2353 * System call vectors.
1da177e4
LT
2354 *
2355 * Argument checking cleaned up. Saved 20% in size.
2356 * This function doesn't need to set the kernel lock because
89bddce5 2357 * it is set by the callees.
1da177e4
LT
2358 */
2359
3e0fa65f 2360SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2361{
2950fa9d 2362 unsigned long a[AUDITSC_ARGS];
89bddce5 2363 unsigned long a0, a1;
1da177e4 2364 int err;
47379052 2365 unsigned int len;
1da177e4 2366
228e548e 2367 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2368 return -EINVAL;
2369
47379052
AV
2370 len = nargs[call];
2371 if (len > sizeof(a))
2372 return -EINVAL;
2373
1da177e4 2374 /* copy_from_user should be SMP safe. */
47379052 2375 if (copy_from_user(a, args, len))
1da177e4 2376 return -EFAULT;
3ec3b2fb 2377
2950fa9d
CG
2378 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2379 if (err)
2380 return err;
3ec3b2fb 2381
89bddce5
SH
2382 a0 = a[0];
2383 a1 = a[1];
2384
2385 switch (call) {
2386 case SYS_SOCKET:
2387 err = sys_socket(a0, a1, a[2]);
2388 break;
2389 case SYS_BIND:
2390 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2391 break;
2392 case SYS_CONNECT:
2393 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2394 break;
2395 case SYS_LISTEN:
2396 err = sys_listen(a0, a1);
2397 break;
2398 case SYS_ACCEPT:
de11defe
UD
2399 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2400 (int __user *)a[2], 0);
89bddce5
SH
2401 break;
2402 case SYS_GETSOCKNAME:
2403 err =
2404 sys_getsockname(a0, (struct sockaddr __user *)a1,
2405 (int __user *)a[2]);
2406 break;
2407 case SYS_GETPEERNAME:
2408 err =
2409 sys_getpeername(a0, (struct sockaddr __user *)a1,
2410 (int __user *)a[2]);
2411 break;
2412 case SYS_SOCKETPAIR:
2413 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2414 break;
2415 case SYS_SEND:
2416 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2417 break;
2418 case SYS_SENDTO:
2419 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2420 (struct sockaddr __user *)a[4], a[5]);
2421 break;
2422 case SYS_RECV:
2423 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2424 break;
2425 case SYS_RECVFROM:
2426 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2427 (struct sockaddr __user *)a[4],
2428 (int __user *)a[5]);
2429 break;
2430 case SYS_SHUTDOWN:
2431 err = sys_shutdown(a0, a1);
2432 break;
2433 case SYS_SETSOCKOPT:
2434 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2435 break;
2436 case SYS_GETSOCKOPT:
2437 err =
2438 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2439 (int __user *)a[4]);
2440 break;
2441 case SYS_SENDMSG:
666547ff 2442 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2443 break;
228e548e
AB
2444 case SYS_SENDMMSG:
2445 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2446 break;
89bddce5 2447 case SYS_RECVMSG:
666547ff 2448 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2449 break;
a2e27255
ACM
2450 case SYS_RECVMMSG:
2451 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2452 (struct timespec __user *)a[4]);
2453 break;
de11defe
UD
2454 case SYS_ACCEPT4:
2455 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2456 (int __user *)a[2], a[3]);
aaca0bdc 2457 break;
89bddce5
SH
2458 default:
2459 err = -EINVAL;
2460 break;
1da177e4
LT
2461 }
2462 return err;
2463}
2464
89bddce5 2465#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2466
55737fda
SH
2467/**
2468 * sock_register - add a socket protocol handler
2469 * @ops: description of protocol
2470 *
1da177e4
LT
2471 * This function is called by a protocol handler that wants to
2472 * advertise its address family, and have it linked into the
e793c0f7 2473 * socket interface. The value ops->family corresponds to the
55737fda 2474 * socket system call protocol family.
1da177e4 2475 */
f0fd27d4 2476int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2477{
2478 int err;
2479
2480 if (ops->family >= NPROTO) {
3410f22e 2481 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2482 return -ENOBUFS;
2483 }
55737fda
SH
2484
2485 spin_lock(&net_family_lock);
190683a9
ED
2486 if (rcu_dereference_protected(net_families[ops->family],
2487 lockdep_is_held(&net_family_lock)))
55737fda
SH
2488 err = -EEXIST;
2489 else {
cf778b00 2490 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2491 err = 0;
2492 }
55737fda
SH
2493 spin_unlock(&net_family_lock);
2494
3410f22e 2495 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2496 return err;
2497}
c6d409cf 2498EXPORT_SYMBOL(sock_register);
1da177e4 2499
55737fda
SH
2500/**
2501 * sock_unregister - remove a protocol handler
2502 * @family: protocol family to remove
2503 *
1da177e4
LT
2504 * This function is called by a protocol handler that wants to
2505 * remove its address family, and have it unlinked from the
55737fda
SH
2506 * new socket creation.
2507 *
2508 * If protocol handler is a module, then it can use module reference
2509 * counts to protect against new references. If protocol handler is not
2510 * a module then it needs to provide its own protection in
2511 * the ops->create routine.
1da177e4 2512 */
f0fd27d4 2513void sock_unregister(int family)
1da177e4 2514{
f0fd27d4 2515 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2516
55737fda 2517 spin_lock(&net_family_lock);
a9b3cd7f 2518 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2519 spin_unlock(&net_family_lock);
2520
2521 synchronize_rcu();
2522
3410f22e 2523 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2524}
c6d409cf 2525EXPORT_SYMBOL(sock_unregister);
1da177e4 2526
77d76ea3 2527static int __init sock_init(void)
1da177e4 2528{
b3e19d92 2529 int err;
2ca794e5
EB
2530 /*
2531 * Initialize the network sysctl infrastructure.
2532 */
2533 err = net_sysctl_init();
2534 if (err)
2535 goto out;
b3e19d92 2536
1da177e4 2537 /*
89bddce5 2538 * Initialize skbuff SLAB cache
1da177e4
LT
2539 */
2540 skb_init();
1da177e4
LT
2541
2542 /*
89bddce5 2543 * Initialize the protocols module.
1da177e4
LT
2544 */
2545
2546 init_inodecache();
b3e19d92
NP
2547
2548 err = register_filesystem(&sock_fs_type);
2549 if (err)
2550 goto out_fs;
1da177e4 2551 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2552 if (IS_ERR(sock_mnt)) {
2553 err = PTR_ERR(sock_mnt);
2554 goto out_mount;
2555 }
77d76ea3
AK
2556
2557 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2558 */
2559
2560#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2561 err = netfilter_init();
2562 if (err)
2563 goto out;
1da177e4 2564#endif
cbeb321a 2565
408eccce 2566 ptp_classifier_init();
c1f19b51 2567
b3e19d92
NP
2568out:
2569 return err;
2570
2571out_mount:
2572 unregister_filesystem(&sock_fs_type);
2573out_fs:
2574 goto out;
1da177e4
LT
2575}
2576
77d76ea3
AK
2577core_initcall(sock_init); /* early initcall */
2578
1da177e4
LT
2579#ifdef CONFIG_PROC_FS
2580void socket_seq_show(struct seq_file *seq)
2581{
2582 int cpu;
2583 int counter = 0;
2584
6f912042 2585 for_each_possible_cpu(cpu)
89bddce5 2586 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2587
2588 /* It can be negative, by the way. 8) */
2589 if (counter < 0)
2590 counter = 0;
2591
2592 seq_printf(seq, "sockets: used %d\n", counter);
2593}
89bddce5 2594#endif /* CONFIG_PROC_FS */
1da177e4 2595
89bbfc95 2596#ifdef CONFIG_COMPAT
6b96018b 2597static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2598 unsigned int cmd, void __user *up)
7a229387 2599{
7a229387
AB
2600 mm_segment_t old_fs = get_fs();
2601 struct timeval ktv;
2602 int err;
2603
2604 set_fs(KERNEL_DS);
6b96018b 2605 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2606 set_fs(old_fs);
644595f8 2607 if (!err)
ed6fe9d6 2608 err = compat_put_timeval(&ktv, up);
644595f8 2609
7a229387
AB
2610 return err;
2611}
2612
6b96018b 2613static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2614 unsigned int cmd, void __user *up)
7a229387 2615{
7a229387
AB
2616 mm_segment_t old_fs = get_fs();
2617 struct timespec kts;
2618 int err;
2619
2620 set_fs(KERNEL_DS);
6b96018b 2621 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2622 set_fs(old_fs);
644595f8 2623 if (!err)
ed6fe9d6 2624 err = compat_put_timespec(&kts, up);
644595f8 2625
7a229387
AB
2626 return err;
2627}
2628
6b96018b 2629static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2630{
2631 struct ifreq __user *uifr;
2632 int err;
2633
2634 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2635 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2636 return -EFAULT;
2637
6b96018b 2638 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2639 if (err)
2640 return err;
2641
6b96018b 2642 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2643 return -EFAULT;
2644
2645 return 0;
2646}
2647
6b96018b 2648static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2649{
6b96018b 2650 struct compat_ifconf ifc32;
7a229387
AB
2651 struct ifconf ifc;
2652 struct ifconf __user *uifc;
6b96018b 2653 struct compat_ifreq __user *ifr32;
7a229387
AB
2654 struct ifreq __user *ifr;
2655 unsigned int i, j;
2656 int err;
2657
6b96018b 2658 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2659 return -EFAULT;
2660
43da5f2e 2661 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2662 if (ifc32.ifcbuf == 0) {
2663 ifc32.ifc_len = 0;
2664 ifc.ifc_len = 0;
2665 ifc.ifc_req = NULL;
2666 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2667 } else {
c6d409cf
ED
2668 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2669 sizeof(struct ifreq);
7a229387
AB
2670 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2671 ifc.ifc_len = len;
2672 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2673 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2674 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2675 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2676 return -EFAULT;
2677 ifr++;
2678 ifr32++;
2679 }
2680 }
2681 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2682 return -EFAULT;
2683
6b96018b 2684 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2685 if (err)
2686 return err;
2687
2688 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2689 return -EFAULT;
2690
2691 ifr = ifc.ifc_req;
2692 ifr32 = compat_ptr(ifc32.ifcbuf);
2693 for (i = 0, j = 0;
c6d409cf
ED
2694 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2695 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2696 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2697 return -EFAULT;
2698 ifr32++;
2699 ifr++;
2700 }
2701
2702 if (ifc32.ifcbuf == 0) {
2703 /* Translate from 64-bit structure multiple to
2704 * a 32-bit one.
2705 */
2706 i = ifc.ifc_len;
6b96018b 2707 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2708 ifc32.ifc_len = i;
2709 } else {
2710 ifc32.ifc_len = i;
2711 }
6b96018b 2712 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2713 return -EFAULT;
2714
2715 return 0;
2716}
2717
6b96018b 2718static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2719{
3a7da39d
BH
2720 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2721 bool convert_in = false, convert_out = false;
2722 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2723 struct ethtool_rxnfc __user *rxnfc;
7a229387 2724 struct ifreq __user *ifr;
3a7da39d
BH
2725 u32 rule_cnt = 0, actual_rule_cnt;
2726 u32 ethcmd;
7a229387 2727 u32 data;
3a7da39d 2728 int ret;
7a229387 2729
3a7da39d
BH
2730 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2731 return -EFAULT;
7a229387 2732
3a7da39d
BH
2733 compat_rxnfc = compat_ptr(data);
2734
2735 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2736 return -EFAULT;
2737
3a7da39d
BH
2738 /* Most ethtool structures are defined without padding.
2739 * Unfortunately struct ethtool_rxnfc is an exception.
2740 */
2741 switch (ethcmd) {
2742 default:
2743 break;
2744 case ETHTOOL_GRXCLSRLALL:
2745 /* Buffer size is variable */
2746 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2747 return -EFAULT;
2748 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2749 return -ENOMEM;
2750 buf_size += rule_cnt * sizeof(u32);
2751 /* fall through */
2752 case ETHTOOL_GRXRINGS:
2753 case ETHTOOL_GRXCLSRLCNT:
2754 case ETHTOOL_GRXCLSRULE:
55664f32 2755 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2756 convert_out = true;
2757 /* fall through */
2758 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2759 buf_size += sizeof(struct ethtool_rxnfc);
2760 convert_in = true;
2761 break;
2762 }
2763
2764 ifr = compat_alloc_user_space(buf_size);
954b1244 2765 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2766
2767 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2768 return -EFAULT;
2769
3a7da39d
BH
2770 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2771 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2772 return -EFAULT;
2773
3a7da39d 2774 if (convert_in) {
127fe533 2775 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2776 * fs.ring_cookie and at the end of fs, but nowhere else.
2777 */
127fe533
AD
2778 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2779 sizeof(compat_rxnfc->fs.m_ext) !=
2780 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2781 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2782 BUILD_BUG_ON(
2783 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2784 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2785 offsetof(struct ethtool_rxnfc, fs.location) -
2786 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2787
2788 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2789 (void __user *)(&rxnfc->fs.m_ext + 1) -
2790 (void __user *)rxnfc) ||
3a7da39d
BH
2791 copy_in_user(&rxnfc->fs.ring_cookie,
2792 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2793 (void __user *)(&rxnfc->fs.location + 1) -
2794 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2795 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2796 sizeof(rxnfc->rule_cnt)))
2797 return -EFAULT;
2798 }
2799
2800 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2801 if (ret)
2802 return ret;
2803
2804 if (convert_out) {
2805 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2806 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2807 (const void __user *)rxnfc) ||
3a7da39d
BH
2808 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2809 &rxnfc->fs.ring_cookie,
954b1244
SH
2810 (const void __user *)(&rxnfc->fs.location + 1) -
2811 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2812 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2813 sizeof(rxnfc->rule_cnt)))
2814 return -EFAULT;
2815
2816 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2817 /* As an optimisation, we only copy the actual
2818 * number of rules that the underlying
2819 * function returned. Since Mallory might
2820 * change the rule count in user memory, we
2821 * check that it is less than the rule count
2822 * originally given (as the user buffer size),
2823 * which has been range-checked.
2824 */
2825 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2826 return -EFAULT;
2827 if (actual_rule_cnt < rule_cnt)
2828 rule_cnt = actual_rule_cnt;
2829 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2830 &rxnfc->rule_locs[0],
2831 rule_cnt * sizeof(u32)))
2832 return -EFAULT;
2833 }
2834 }
2835
2836 return 0;
7a229387
AB
2837}
2838
7a50a240
AB
2839static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2840{
2841 void __user *uptr;
2842 compat_uptr_t uptr32;
2843 struct ifreq __user *uifr;
2844
c6d409cf 2845 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2846 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2847 return -EFAULT;
2848
2849 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2850 return -EFAULT;
2851
2852 uptr = compat_ptr(uptr32);
2853
2854 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2855 return -EFAULT;
2856
2857 return dev_ioctl(net, SIOCWANDEV, uifr);
2858}
2859
6b96018b
AB
2860static int bond_ioctl(struct net *net, unsigned int cmd,
2861 struct compat_ifreq __user *ifr32)
7a229387
AB
2862{
2863 struct ifreq kifr;
7a229387
AB
2864 mm_segment_t old_fs;
2865 int err;
7a229387
AB
2866
2867 switch (cmd) {
2868 case SIOCBONDENSLAVE:
2869 case SIOCBONDRELEASE:
2870 case SIOCBONDSETHWADDR:
2871 case SIOCBONDCHANGEACTIVE:
6b96018b 2872 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2873 return -EFAULT;
2874
2875 old_fs = get_fs();
c6d409cf 2876 set_fs(KERNEL_DS);
c3f52ae6 2877 err = dev_ioctl(net, cmd,
2878 (struct ifreq __user __force *) &kifr);
c6d409cf 2879 set_fs(old_fs);
7a229387
AB
2880
2881 return err;
7a229387 2882 default:
07d106d0 2883 return -ENOIOCTLCMD;
ccbd6a5a 2884 }
7a229387
AB
2885}
2886
590d4693
BH
2887/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2888static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2889 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2890{
2891 struct ifreq __user *u_ifreq64;
7a229387
AB
2892 char tmp_buf[IFNAMSIZ];
2893 void __user *data64;
2894 u32 data32;
2895
2896 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2897 IFNAMSIZ))
2898 return -EFAULT;
417c3522 2899 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2900 return -EFAULT;
2901 data64 = compat_ptr(data32);
2902
2903 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2904
7a229387
AB
2905 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2906 IFNAMSIZ))
2907 return -EFAULT;
417c3522 2908 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2909 return -EFAULT;
2910
6b96018b 2911 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2912}
2913
6b96018b
AB
2914static int dev_ifsioc(struct net *net, struct socket *sock,
2915 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2916{
a2116ed2 2917 struct ifreq __user *uifr;
7a229387
AB
2918 int err;
2919
a2116ed2
AB
2920 uifr = compat_alloc_user_space(sizeof(*uifr));
2921 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2922 return -EFAULT;
2923
2924 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2925
7a229387
AB
2926 if (!err) {
2927 switch (cmd) {
2928 case SIOCGIFFLAGS:
2929 case SIOCGIFMETRIC:
2930 case SIOCGIFMTU:
2931 case SIOCGIFMEM:
2932 case SIOCGIFHWADDR:
2933 case SIOCGIFINDEX:
2934 case SIOCGIFADDR:
2935 case SIOCGIFBRDADDR:
2936 case SIOCGIFDSTADDR:
2937 case SIOCGIFNETMASK:
fab2532b 2938 case SIOCGIFPFLAGS:
7a229387 2939 case SIOCGIFTXQLEN:
fab2532b
AB
2940 case SIOCGMIIPHY:
2941 case SIOCGMIIREG:
a2116ed2 2942 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2943 err = -EFAULT;
2944 break;
2945 }
2946 }
2947 return err;
2948}
2949
a2116ed2
AB
2950static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2951 struct compat_ifreq __user *uifr32)
2952{
2953 struct ifreq ifr;
2954 struct compat_ifmap __user *uifmap32;
2955 mm_segment_t old_fs;
2956 int err;
2957
2958 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2959 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2960 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2961 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2962 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2963 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2964 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2965 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2966 if (err)
2967 return -EFAULT;
2968
2969 old_fs = get_fs();
c6d409cf 2970 set_fs(KERNEL_DS);
c3f52ae6 2971 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2972 set_fs(old_fs);
a2116ed2
AB
2973
2974 if (cmd == SIOCGIFMAP && !err) {
2975 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2976 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2977 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2978 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2979 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2980 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2981 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2982 if (err)
2983 err = -EFAULT;
2984 }
2985 return err;
2986}
2987
7a229387 2988struct rtentry32 {
c6d409cf 2989 u32 rt_pad1;
7a229387
AB
2990 struct sockaddr rt_dst; /* target address */
2991 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2992 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2993 unsigned short rt_flags;
2994 short rt_pad2;
2995 u32 rt_pad3;
2996 unsigned char rt_tos;
2997 unsigned char rt_class;
2998 short rt_pad4;
2999 short rt_metric; /* +1 for binary compatibility! */
7a229387 3000 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3001 u32 rt_mtu; /* per route MTU/Window */
3002 u32 rt_window; /* Window clamping */
7a229387
AB
3003 unsigned short rt_irtt; /* Initial RTT */
3004};
3005
3006struct in6_rtmsg32 {
3007 struct in6_addr rtmsg_dst;
3008 struct in6_addr rtmsg_src;
3009 struct in6_addr rtmsg_gateway;
3010 u32 rtmsg_type;
3011 u16 rtmsg_dst_len;
3012 u16 rtmsg_src_len;
3013 u32 rtmsg_metric;
3014 u32 rtmsg_info;
3015 u32 rtmsg_flags;
3016 s32 rtmsg_ifindex;
3017};
3018
6b96018b
AB
3019static int routing_ioctl(struct net *net, struct socket *sock,
3020 unsigned int cmd, void __user *argp)
7a229387
AB
3021{
3022 int ret;
3023 void *r = NULL;
3024 struct in6_rtmsg r6;
3025 struct rtentry r4;
3026 char devname[16];
3027 u32 rtdev;
3028 mm_segment_t old_fs = get_fs();
3029
6b96018b
AB
3030 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3031 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3032 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3033 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3034 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3035 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3036 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3037 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3038 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3039 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3040 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3041
3042 r = (void *) &r6;
3043 } else { /* ipv4 */
6b96018b 3044 struct rtentry32 __user *ur4 = argp;
c6d409cf 3045 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3046 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3047 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3048 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3049 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3050 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3051 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3052 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3053 if (rtdev) {
c6d409cf 3054 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3055 r4.rt_dev = (char __user __force *)devname;
3056 devname[15] = 0;
7a229387
AB
3057 } else
3058 r4.rt_dev = NULL;
3059
3060 r = (void *) &r4;
3061 }
3062
3063 if (ret) {
3064 ret = -EFAULT;
3065 goto out;
3066 }
3067
c6d409cf 3068 set_fs(KERNEL_DS);
6b96018b 3069 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3070 set_fs(old_fs);
7a229387
AB
3071
3072out:
7a229387
AB
3073 return ret;
3074}
3075
3076/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3077 * for some operations; this forces use of the newer bridge-utils that
25985edc 3078 * use compatible ioctls
7a229387 3079 */
6b96018b 3080static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3081{
6b96018b 3082 compat_ulong_t tmp;
7a229387 3083
6b96018b 3084 if (get_user(tmp, argp))
7a229387
AB
3085 return -EFAULT;
3086 if (tmp == BRCTL_GET_VERSION)
3087 return BRCTL_VERSION + 1;
3088 return -EINVAL;
3089}
3090
6b96018b
AB
3091static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3092 unsigned int cmd, unsigned long arg)
3093{
3094 void __user *argp = compat_ptr(arg);
3095 struct sock *sk = sock->sk;
3096 struct net *net = sock_net(sk);
7a229387 3097
6b96018b 3098 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3099 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3100
3101 switch (cmd) {
3102 case SIOCSIFBR:
3103 case SIOCGIFBR:
3104 return old_bridge_ioctl(argp);
3105 case SIOCGIFNAME:
3106 return dev_ifname32(net, argp);
3107 case SIOCGIFCONF:
3108 return dev_ifconf(net, argp);
3109 case SIOCETHTOOL:
3110 return ethtool_ioctl(net, argp);
7a50a240
AB
3111 case SIOCWANDEV:
3112 return compat_siocwandev(net, argp);
a2116ed2
AB
3113 case SIOCGIFMAP:
3114 case SIOCSIFMAP:
3115 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3116 case SIOCBONDENSLAVE:
3117 case SIOCBONDRELEASE:
3118 case SIOCBONDSETHWADDR:
6b96018b
AB
3119 case SIOCBONDCHANGEACTIVE:
3120 return bond_ioctl(net, cmd, argp);
3121 case SIOCADDRT:
3122 case SIOCDELRT:
3123 return routing_ioctl(net, sock, cmd, argp);
3124 case SIOCGSTAMP:
3125 return do_siocgstamp(net, sock, cmd, argp);
3126 case SIOCGSTAMPNS:
3127 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3128 case SIOCBONDSLAVEINFOQUERY:
3129 case SIOCBONDINFOQUERY:
a2116ed2 3130 case SIOCSHWTSTAMP:
fd468c74 3131 case SIOCGHWTSTAMP:
590d4693 3132 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3133
3134 case FIOSETOWN:
3135 case SIOCSPGRP:
3136 case FIOGETOWN:
3137 case SIOCGPGRP:
3138 case SIOCBRADDBR:
3139 case SIOCBRDELBR:
3140 case SIOCGIFVLAN:
3141 case SIOCSIFVLAN:
3142 case SIOCADDDLCI:
3143 case SIOCDELDLCI:
c62cce2c 3144 case SIOCGSKNS:
6b96018b
AB
3145 return sock_ioctl(file, cmd, arg);
3146
3147 case SIOCGIFFLAGS:
3148 case SIOCSIFFLAGS:
3149 case SIOCGIFMETRIC:
3150 case SIOCSIFMETRIC:
3151 case SIOCGIFMTU:
3152 case SIOCSIFMTU:
3153 case SIOCGIFMEM:
3154 case SIOCSIFMEM:
3155 case SIOCGIFHWADDR:
3156 case SIOCSIFHWADDR:
3157 case SIOCADDMULTI:
3158 case SIOCDELMULTI:
3159 case SIOCGIFINDEX:
6b96018b
AB
3160 case SIOCGIFADDR:
3161 case SIOCSIFADDR:
3162 case SIOCSIFHWBROADCAST:
6b96018b 3163 case SIOCDIFADDR:
6b96018b
AB
3164 case SIOCGIFBRDADDR:
3165 case SIOCSIFBRDADDR:
3166 case SIOCGIFDSTADDR:
3167 case SIOCSIFDSTADDR:
3168 case SIOCGIFNETMASK:
3169 case SIOCSIFNETMASK:
3170 case SIOCSIFPFLAGS:
3171 case SIOCGIFPFLAGS:
3172 case SIOCGIFTXQLEN:
3173 case SIOCSIFTXQLEN:
3174 case SIOCBRADDIF:
3175 case SIOCBRDELIF:
9177efd3
AB
3176 case SIOCSIFNAME:
3177 case SIOCGMIIPHY:
3178 case SIOCGMIIREG:
3179 case SIOCSMIIREG:
6b96018b 3180 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3181
6b96018b
AB
3182 case SIOCSARP:
3183 case SIOCGARP:
3184 case SIOCDARP:
6b96018b 3185 case SIOCATMARK:
9177efd3
AB
3186 return sock_do_ioctl(net, sock, cmd, arg);
3187 }
3188
6b96018b
AB
3189 return -ENOIOCTLCMD;
3190}
7a229387 3191
95c96174 3192static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3193 unsigned long arg)
89bbfc95
SP
3194{
3195 struct socket *sock = file->private_data;
3196 int ret = -ENOIOCTLCMD;
87de87d5
DM
3197 struct sock *sk;
3198 struct net *net;
3199
3200 sk = sock->sk;
3201 net = sock_net(sk);
89bbfc95
SP
3202
3203 if (sock->ops->compat_ioctl)
3204 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3205
87de87d5
DM
3206 if (ret == -ENOIOCTLCMD &&
3207 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3208 ret = compat_wext_handle_ioctl(net, cmd, arg);
3209
6b96018b
AB
3210 if (ret == -ENOIOCTLCMD)
3211 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3212
89bbfc95
SP
3213 return ret;
3214}
3215#endif
3216
ac5a488e
SS
3217int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3218{
3219 return sock->ops->bind(sock, addr, addrlen);
3220}
c6d409cf 3221EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3222
3223int kernel_listen(struct socket *sock, int backlog)
3224{
3225 return sock->ops->listen(sock, backlog);
3226}
c6d409cf 3227EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3228
3229int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3230{
3231 struct sock *sk = sock->sk;
3232 int err;
3233
3234 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3235 newsock);
3236 if (err < 0)
3237 goto done;
3238
3239 err = sock->ops->accept(sock, *newsock, flags);
3240 if (err < 0) {
3241 sock_release(*newsock);
fa8705b0 3242 *newsock = NULL;
ac5a488e
SS
3243 goto done;
3244 }
3245
3246 (*newsock)->ops = sock->ops;
1b08534e 3247 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3248
3249done:
3250 return err;
3251}
c6d409cf 3252EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3253
3254int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3255 int flags)
ac5a488e
SS
3256{
3257 return sock->ops->connect(sock, addr, addrlen, flags);
3258}
c6d409cf 3259EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3260
3261int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3262 int *addrlen)
3263{
3264 return sock->ops->getname(sock, addr, addrlen, 0);
3265}
c6d409cf 3266EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3267
3268int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3269 int *addrlen)
3270{
3271 return sock->ops->getname(sock, addr, addrlen, 1);
3272}
c6d409cf 3273EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3274
3275int kernel_getsockopt(struct socket *sock, int level, int optname,
3276 char *optval, int *optlen)
3277{
3278 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3279 char __user *uoptval;
3280 int __user *uoptlen;
ac5a488e
SS
3281 int err;
3282
fb8621bb
NK
3283 uoptval = (char __user __force *) optval;
3284 uoptlen = (int __user __force *) optlen;
3285
ac5a488e
SS
3286 set_fs(KERNEL_DS);
3287 if (level == SOL_SOCKET)
fb8621bb 3288 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3289 else
fb8621bb
NK
3290 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3291 uoptlen);
ac5a488e
SS
3292 set_fs(oldfs);
3293 return err;
3294}
c6d409cf 3295EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3296
3297int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3298 char *optval, unsigned int optlen)
ac5a488e
SS
3299{
3300 mm_segment_t oldfs = get_fs();
fb8621bb 3301 char __user *uoptval;
ac5a488e
SS
3302 int err;
3303
fb8621bb
NK
3304 uoptval = (char __user __force *) optval;
3305
ac5a488e
SS
3306 set_fs(KERNEL_DS);
3307 if (level == SOL_SOCKET)
fb8621bb 3308 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3309 else
fb8621bb 3310 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3311 optlen);
3312 set_fs(oldfs);
3313 return err;
3314}
c6d409cf 3315EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3316
3317int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3318 size_t size, int flags)
3319{
3320 if (sock->ops->sendpage)
3321 return sock->ops->sendpage(sock, page, offset, size, flags);
3322
3323 return sock_no_sendpage(sock, page, offset, size, flags);
3324}
c6d409cf 3325EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3326
3327int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3328{
3329 mm_segment_t oldfs = get_fs();
3330 int err;
3331
3332 set_fs(KERNEL_DS);
3333 err = sock->ops->ioctl(sock, cmd, arg);
3334 set_fs(oldfs);
3335
3336 return err;
3337}
c6d409cf 3338EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3339
91cf45f0
TM
3340int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3341{
3342 return sock->ops->shutdown(sock, how);
3343}
91cf45f0 3344EXPORT_SYMBOL(kernel_sock_shutdown);