]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/socket.c
Merge tag 'powerpc-4.9-6' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
[mirror_ubuntu-zesty-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
1da177e4
LT
92
93#include <asm/uaccess.h>
94#include <asm/unistd.h>
95
96#include <net/compat.h>
87de87d5 97#include <net/wext.h>
f8451725 98#include <net/cls_cgroup.h>
1da177e4
LT
99
100#include <net/sock.h>
101#include <linux/netfilter.h>
102
6b96018b
AB
103#include <linux/if_tun.h>
104#include <linux/ipv6_route.h>
105#include <linux/route.h>
6b96018b
AB
106#include <linux/sockios.h>
107#include <linux/atalk.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
121static unsigned int sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4
LT
165/*
166 * Statistics counters of the socket lists
167 */
168
c6d409cf 169static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
170
171/*
89bddce5
SH
172 * Support routines.
173 * Move socket addresses back and forth across the kernel/user
174 * divide and look after the messy bits.
1da177e4
LT
175 */
176
1da177e4
LT
177/**
178 * move_addr_to_kernel - copy a socket address into kernel space
179 * @uaddr: Address in user space
180 * @kaddr: Address in kernel space
181 * @ulen: Length in user space
182 *
183 * The address is copied into kernel space. If the provided address is
184 * too long an error code of -EINVAL is returned. If the copy gives
185 * invalid addresses -EFAULT is returned. On a success 0 is returned.
186 */
187
43db362d 188int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 189{
230b1839 190 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 191 return -EINVAL;
89bddce5 192 if (ulen == 0)
1da177e4 193 return 0;
89bddce5 194 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 195 return -EFAULT;
3ec3b2fb 196 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
197}
198
199/**
200 * move_addr_to_user - copy an address to user space
201 * @kaddr: kernel space address
202 * @klen: length of address in kernel
203 * @uaddr: user space address
204 * @ulen: pointer to user length field
205 *
206 * The value pointed to by ulen on entry is the buffer length available.
207 * This is overwritten with the buffer space used. -EINVAL is returned
208 * if an overlong buffer is specified or a negative buffer size. -EFAULT
209 * is returned if either the buffer or the length field are not
210 * accessible.
211 * After copying the data up to the limit the user specifies, the true
212 * length of the data is written over the length limit the user
213 * specified. Zero is returned for a success.
214 */
89bddce5 215
43db362d 216static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 217 void __user *uaddr, int __user *ulen)
1da177e4
LT
218{
219 int err;
220 int len;
221
68c6beb3 222 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
223 err = get_user(len, ulen);
224 if (err)
1da177e4 225 return err;
89bddce5
SH
226 if (len > klen)
227 len = klen;
68c6beb3 228 if (len < 0)
1da177e4 229 return -EINVAL;
89bddce5 230 if (len) {
d6fe3945
SG
231 if (audit_sockaddr(klen, kaddr))
232 return -ENOMEM;
89bddce5 233 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
234 return -EFAULT;
235 }
236 /*
89bddce5
SH
237 * "fromlen shall refer to the value before truncation.."
238 * 1003.1g
1da177e4
LT
239 */
240 return __put_user(klen, ulen);
241}
242
e18b890b 243static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
244
245static struct inode *sock_alloc_inode(struct super_block *sb)
246{
247 struct socket_alloc *ei;
eaefd110 248 struct socket_wq *wq;
89bddce5 249
e94b1766 250 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
251 if (!ei)
252 return NULL;
eaefd110
ED
253 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
254 if (!wq) {
43815482
ED
255 kmem_cache_free(sock_inode_cachep, ei);
256 return NULL;
257 }
eaefd110
ED
258 init_waitqueue_head(&wq->wait);
259 wq->fasync_list = NULL;
574aab1e 260 wq->flags = 0;
eaefd110 261 RCU_INIT_POINTER(ei->socket.wq, wq);
89bddce5 262
1da177e4
LT
263 ei->socket.state = SS_UNCONNECTED;
264 ei->socket.flags = 0;
265 ei->socket.ops = NULL;
266 ei->socket.sk = NULL;
267 ei->socket.file = NULL;
1da177e4
LT
268
269 return &ei->vfs_inode;
270}
271
272static void sock_destroy_inode(struct inode *inode)
273{
43815482 274 struct socket_alloc *ei;
eaefd110 275 struct socket_wq *wq;
43815482
ED
276
277 ei = container_of(inode, struct socket_alloc, vfs_inode);
eaefd110 278 wq = rcu_dereference_protected(ei->socket.wq, 1);
61845220 279 kfree_rcu(wq, rcu);
43815482 280 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
281}
282
51cc5068 283static void init_once(void *foo)
1da177e4 284{
89bddce5 285 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 286
a35afb83 287 inode_init_once(&ei->vfs_inode);
1da177e4 288}
89bddce5 289
1da177e4
LT
290static int init_inodecache(void)
291{
292 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
293 sizeof(struct socket_alloc),
294 0,
295 (SLAB_HWCACHE_ALIGN |
296 SLAB_RECLAIM_ACCOUNT |
5d097056 297 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 298 init_once);
1da177e4
LT
299 if (sock_inode_cachep == NULL)
300 return -ENOMEM;
301 return 0;
302}
303
b87221de 304static const struct super_operations sockfs_ops = {
c6d409cf
ED
305 .alloc_inode = sock_alloc_inode,
306 .destroy_inode = sock_destroy_inode,
307 .statfs = simple_statfs,
1da177e4
LT
308};
309
c23fbb6b
ED
310/*
311 * sockfs_dname() is called from d_path().
312 */
313static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
314{
315 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 316 d_inode(dentry)->i_ino);
c23fbb6b
ED
317}
318
3ba13d17 319static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 320 .d_dname = sockfs_dname,
1da177e4
LT
321};
322
bba0bd31
AG
323static int sockfs_xattr_get(const struct xattr_handler *handler,
324 struct dentry *dentry, struct inode *inode,
325 const char *suffix, void *value, size_t size)
326{
327 if (value) {
328 if (dentry->d_name.len + 1 > size)
329 return -ERANGE;
330 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
331 }
332 return dentry->d_name.len + 1;
333}
334
335#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
336#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
337#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
338
339static const struct xattr_handler sockfs_xattr_handler = {
340 .name = XATTR_NAME_SOCKPROTONAME,
341 .get = sockfs_xattr_get,
342};
343
4a590153
AG
344static int sockfs_security_xattr_set(const struct xattr_handler *handler,
345 struct dentry *dentry, struct inode *inode,
346 const char *suffix, const void *value,
347 size_t size, int flags)
348{
349 /* Handled by LSM. */
350 return -EAGAIN;
351}
352
353static const struct xattr_handler sockfs_security_xattr_handler = {
354 .prefix = XATTR_SECURITY_PREFIX,
355 .set = sockfs_security_xattr_set,
356};
357
bba0bd31
AG
358static const struct xattr_handler *sockfs_xattr_handlers[] = {
359 &sockfs_xattr_handler,
4a590153 360 &sockfs_security_xattr_handler,
bba0bd31
AG
361 NULL
362};
363
c74a1cbb
AV
364static struct dentry *sockfs_mount(struct file_system_type *fs_type,
365 int flags, const char *dev_name, void *data)
366{
bba0bd31
AG
367 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
368 sockfs_xattr_handlers,
369 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
370}
371
372static struct vfsmount *sock_mnt __read_mostly;
373
374static struct file_system_type sock_fs_type = {
375 .name = "sockfs",
376 .mount = sockfs_mount,
377 .kill_sb = kill_anon_super,
378};
379
1da177e4
LT
380/*
381 * Obtains the first available file descriptor and sets it up for use.
382 *
39d8c1b6
DM
383 * These functions create file structures and maps them to fd space
384 * of the current process. On success it returns file descriptor
1da177e4
LT
385 * and file struct implicitly stored in sock->file.
386 * Note that another thread may close file descriptor before we return
387 * from this function. We use the fact that now we do not refer
388 * to socket after mapping. If one day we will need it, this
389 * function will increment ref. count on file by 1.
390 *
391 * In any case returned fd MAY BE not valid!
392 * This race condition is unavoidable
393 * with shared fd spaces, we cannot solve it inside kernel,
394 * but we take care of internal coherence yet.
395 */
396
aab174f0 397struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 398{
7cbe66b6 399 struct qstr name = { .name = "" };
2c48b9c4 400 struct path path;
7cbe66b6 401 struct file *file;
1da177e4 402
600e1779
MY
403 if (dname) {
404 name.name = dname;
405 name.len = strlen(name.name);
406 } else if (sock->sk) {
407 name.name = sock->sk->sk_prot_creator->name;
408 name.len = strlen(name.name);
409 }
4b936885 410 path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
28407630
AV
411 if (unlikely(!path.dentry))
412 return ERR_PTR(-ENOMEM);
2c48b9c4 413 path.mnt = mntget(sock_mnt);
39d8c1b6 414
2c48b9c4 415 d_instantiate(path.dentry, SOCK_INODE(sock));
39d8c1b6 416
2c48b9c4 417 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 418 &socket_file_ops);
b5ffe634 419 if (IS_ERR(file)) {
cc3808f8 420 /* drop dentry, keep inode */
c5ef6035 421 ihold(d_inode(path.dentry));
2c48b9c4 422 path_put(&path);
39b65252 423 return file;
cc3808f8
AV
424 }
425
426 sock->file = file;
77d27200 427 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6 428 file->private_data = sock;
28407630 429 return file;
39d8c1b6 430}
56b31d1c 431EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 432
56b31d1c 433static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
434{
435 struct file *newfile;
28407630
AV
436 int fd = get_unused_fd_flags(flags);
437 if (unlikely(fd < 0))
438 return fd;
39d8c1b6 439
aab174f0 440 newfile = sock_alloc_file(sock, flags, NULL);
28407630 441 if (likely(!IS_ERR(newfile))) {
39d8c1b6 442 fd_install(fd, newfile);
28407630
AV
443 return fd;
444 }
7cbe66b6 445
28407630
AV
446 put_unused_fd(fd);
447 return PTR_ERR(newfile);
1da177e4
LT
448}
449
406a3c63 450struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 451{
6cb153ca
BL
452 if (file->f_op == &socket_file_ops)
453 return file->private_data; /* set in sock_map_fd */
454
23bb80d2
ED
455 *err = -ENOTSOCK;
456 return NULL;
6cb153ca 457}
406a3c63 458EXPORT_SYMBOL(sock_from_file);
6cb153ca 459
1da177e4 460/**
c6d409cf 461 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
462 * @fd: file handle
463 * @err: pointer to an error code return
464 *
465 * The file handle passed in is locked and the socket it is bound
466 * too is returned. If an error occurs the err pointer is overwritten
467 * with a negative errno code and NULL is returned. The function checks
468 * for both invalid handles and passing a handle which is not a socket.
469 *
470 * On a success the socket object pointer is returned.
471 */
472
473struct socket *sockfd_lookup(int fd, int *err)
474{
475 struct file *file;
1da177e4
LT
476 struct socket *sock;
477
89bddce5
SH
478 file = fget(fd);
479 if (!file) {
1da177e4
LT
480 *err = -EBADF;
481 return NULL;
482 }
89bddce5 483
6cb153ca
BL
484 sock = sock_from_file(file, err);
485 if (!sock)
1da177e4 486 fput(file);
6cb153ca
BL
487 return sock;
488}
c6d409cf 489EXPORT_SYMBOL(sockfd_lookup);
1da177e4 490
6cb153ca
BL
491static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
492{
00e188ef 493 struct fd f = fdget(fd);
6cb153ca
BL
494 struct socket *sock;
495
3672558c 496 *err = -EBADF;
00e188ef
AV
497 if (f.file) {
498 sock = sock_from_file(f.file, err);
499 if (likely(sock)) {
500 *fput_needed = f.flags;
6cb153ca 501 return sock;
00e188ef
AV
502 }
503 fdput(f);
1da177e4 504 }
6cb153ca 505 return NULL;
1da177e4
LT
506}
507
600e1779
MY
508static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
509 size_t size)
510{
511 ssize_t len;
512 ssize_t used = 0;
513
c5ef6035 514 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
515 if (len < 0)
516 return len;
517 used += len;
518 if (buffer) {
519 if (size < used)
520 return -ERANGE;
521 buffer += len;
522 }
523
524 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
525 used += len;
526 if (buffer) {
527 if (size < used)
528 return -ERANGE;
529 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
530 buffer += len;
531 }
532
533 return used;
534}
535
536static const struct inode_operations sockfs_inode_ops = {
600e1779
MY
537 .listxattr = sockfs_listxattr,
538};
539
1da177e4
LT
540/**
541 * sock_alloc - allocate a socket
89bddce5 542 *
1da177e4
LT
543 * Allocate a new inode and socket object. The two are bound together
544 * and initialised. The socket is then returned. If we are out of inodes
545 * NULL is returned.
546 */
547
f4a00aac 548struct socket *sock_alloc(void)
1da177e4 549{
89bddce5
SH
550 struct inode *inode;
551 struct socket *sock;
1da177e4 552
a209dfc7 553 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
554 if (!inode)
555 return NULL;
556
557 sock = SOCKET_I(inode);
558
29a020d3 559 kmemcheck_annotate_bitfield(sock, type);
85fe4025 560 inode->i_ino = get_next_ino();
89bddce5 561 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
562 inode->i_uid = current_fsuid();
563 inode->i_gid = current_fsgid();
600e1779 564 inode->i_op = &sockfs_inode_ops;
1da177e4 565
19e8d69c 566 this_cpu_add(sockets_in_use, 1);
1da177e4
LT
567 return sock;
568}
f4a00aac 569EXPORT_SYMBOL(sock_alloc);
1da177e4 570
1da177e4
LT
571/**
572 * sock_release - close a socket
573 * @sock: socket to close
574 *
575 * The socket is released from the protocol stack if it has a release
576 * callback, and the inode is then released if the socket is bound to
89bddce5 577 * an inode not a file.
1da177e4 578 */
89bddce5 579
1da177e4
LT
580void sock_release(struct socket *sock)
581{
582 if (sock->ops) {
583 struct module *owner = sock->ops->owner;
584
585 sock->ops->release(sock);
586 sock->ops = NULL;
587 module_put(owner);
588 }
589
eaefd110 590 if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
3410f22e 591 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 592
19e8d69c 593 this_cpu_sub(sockets_in_use, 1);
1da177e4
LT
594 if (!sock->file) {
595 iput(SOCK_INODE(sock));
596 return;
597 }
89bddce5 598 sock->file = NULL;
1da177e4 599}
c6d409cf 600EXPORT_SYMBOL(sock_release);
1da177e4 601
c14ac945 602void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 603{
140c55d4
ED
604 u8 flags = *tx_flags;
605
c14ac945 606 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
607 flags |= SKBTX_HW_TSTAMP;
608
c14ac945 609 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
610 flags |= SKBTX_SW_TSTAMP;
611
c14ac945 612 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
613 flags |= SKBTX_SCHED_TSTAMP;
614
140c55d4 615 *tx_flags = flags;
20d49473 616}
67cc0d40 617EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 618
d8725c86 619static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 620{
01e97e65 621 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
622 BUG_ON(ret == -EIOCBQUEUED);
623 return ret;
1da177e4
LT
624}
625
d8725c86 626int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 627{
d8725c86 628 int err = security_socket_sendmsg(sock, msg,
01e97e65 629 msg_data_left(msg));
228e548e 630
d8725c86 631 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 632}
c6d409cf 633EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
634
635int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
636 struct kvec *vec, size_t num, size_t size)
637{
6aa24814 638 iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
d8725c86 639 return sock_sendmsg(sock, msg);
1da177e4 640}
c6d409cf 641EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 642
92f37fd2
ED
643/*
644 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
645 */
646void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
647 struct sk_buff *skb)
648{
20d49473 649 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
f24b9be5 650 struct scm_timestamping tss;
20d49473
PO
651 int empty = 1;
652 struct skb_shared_hwtstamps *shhwtstamps =
653 skb_hwtstamps(skb);
654
655 /* Race occurred between timestamp enabling and packet
656 receiving. Fill in the current time for now. */
657 if (need_software_tstamp && skb->tstamp.tv64 == 0)
658 __net_timestamp(skb);
659
660 if (need_software_tstamp) {
661 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
662 struct timeval tv;
663 skb_get_timestamp(skb, &tv);
664 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
665 sizeof(tv), &tv);
666 } else {
f24b9be5
WB
667 struct timespec ts;
668 skb_get_timestampns(skb, &ts);
20d49473 669 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
f24b9be5 670 sizeof(ts), &ts);
20d49473
PO
671 }
672 }
673
f24b9be5 674 memset(&tss, 0, sizeof(tss));
c199105d 675 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
f24b9be5 676 ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
20d49473 677 empty = 0;
4d276eb6 678 if (shhwtstamps &&
b9f40e21 679 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
f24b9be5 680 ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
4d276eb6 681 empty = 0;
20d49473
PO
682 if (!empty)
683 put_cmsg(msg, SOL_SOCKET,
f24b9be5 684 SCM_TIMESTAMPING, sizeof(tss), &tss);
92f37fd2 685}
7c81fd8b
ACM
686EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
687
6e3e939f
JB
688void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
689 struct sk_buff *skb)
690{
691 int ack;
692
693 if (!sock_flag(sk, SOCK_WIFI_STATUS))
694 return;
695 if (!skb->wifi_acked_valid)
696 return;
697
698 ack = skb->wifi_acked;
699
700 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
701}
702EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
703
11165f14 704static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
705 struct sk_buff *skb)
3b885787 706{
744d5a3e 707 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 708 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 709 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
710}
711
767dd033 712void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
713 struct sk_buff *skb)
714{
715 sock_recv_timestamp(msg, sk, skb);
716 sock_recv_drops(msg, sk, skb);
717}
767dd033 718EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 719
1b784140 720static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 721 int flags)
1da177e4 722{
2da62906 723 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
724}
725
2da62906 726int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 727{
2da62906 728 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 729
2da62906 730 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 731}
c6d409cf 732EXPORT_SYMBOL(sock_recvmsg);
1da177e4 733
c1249c0a
ML
734/**
735 * kernel_recvmsg - Receive a message from a socket (kernel space)
736 * @sock: The socket to receive the message from
737 * @msg: Received message
738 * @vec: Input s/g array for message data
739 * @num: Size of input s/g array
740 * @size: Number of bytes to read
741 * @flags: Message flags (MSG_DONTWAIT, etc...)
742 *
743 * On return the msg structure contains the scatter/gather array passed in the
744 * vec argument. The array is modified so that it consists of the unfilled
745 * portion of the original array.
746 *
747 * The returned value is the total number of bytes received, or an error.
748 */
89bddce5
SH
749int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
750 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
751{
752 mm_segment_t oldfs = get_fs();
753 int result;
754
6aa24814 755 iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
1da177e4 756 set_fs(KERNEL_DS);
2da62906 757 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
758 set_fs(oldfs);
759 return result;
760}
c6d409cf 761EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 762
ce1d4d3e
CH
763static ssize_t sock_sendpage(struct file *file, struct page *page,
764 int offset, size_t size, loff_t *ppos, int more)
1da177e4 765{
1da177e4
LT
766 struct socket *sock;
767 int flags;
768
ce1d4d3e
CH
769 sock = file->private_data;
770
35f9c09f
ED
771 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
772 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
773 flags |= more;
ce1d4d3e 774
e6949583 775 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 776}
1da177e4 777
9c55e01c 778static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 779 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
780 unsigned int flags)
781{
782 struct socket *sock = file->private_data;
783
997b37da
RDC
784 if (unlikely(!sock->ops->splice_read))
785 return -EINVAL;
786
9c55e01c
JA
787 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
788}
789
8ae5e030 790static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 791{
6d652330
AV
792 struct file *file = iocb->ki_filp;
793 struct socket *sock = file->private_data;
0345f931 794 struct msghdr msg = {.msg_iter = *to,
795 .msg_iocb = iocb};
8ae5e030 796 ssize_t res;
ce1d4d3e 797
8ae5e030
AV
798 if (file->f_flags & O_NONBLOCK)
799 msg.msg_flags = MSG_DONTWAIT;
800
801 if (iocb->ki_pos != 0)
1da177e4 802 return -ESPIPE;
027445c3 803
66ee59af 804 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
805 return 0;
806
2da62906 807 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
808 *to = msg.msg_iter;
809 return res;
1da177e4
LT
810}
811
8ae5e030 812static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 813{
6d652330
AV
814 struct file *file = iocb->ki_filp;
815 struct socket *sock = file->private_data;
0345f931 816 struct msghdr msg = {.msg_iter = *from,
817 .msg_iocb = iocb};
8ae5e030 818 ssize_t res;
1da177e4 819
8ae5e030 820 if (iocb->ki_pos != 0)
ce1d4d3e 821 return -ESPIPE;
027445c3 822
8ae5e030
AV
823 if (file->f_flags & O_NONBLOCK)
824 msg.msg_flags = MSG_DONTWAIT;
825
6d652330
AV
826 if (sock->type == SOCK_SEQPACKET)
827 msg.msg_flags |= MSG_EOR;
828
d8725c86 829 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
830 *from = msg.msg_iter;
831 return res;
1da177e4
LT
832}
833
1da177e4
LT
834/*
835 * Atomic setting of ioctl hooks to avoid race
836 * with module unload.
837 */
838
4a3e2f71 839static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 840static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 841
881d966b 842void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 843{
4a3e2f71 844 mutex_lock(&br_ioctl_mutex);
1da177e4 845 br_ioctl_hook = hook;
4a3e2f71 846 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
847}
848EXPORT_SYMBOL(brioctl_set);
849
4a3e2f71 850static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 851static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 852
881d966b 853void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 854{
4a3e2f71 855 mutex_lock(&vlan_ioctl_mutex);
1da177e4 856 vlan_ioctl_hook = hook;
4a3e2f71 857 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
858}
859EXPORT_SYMBOL(vlan_ioctl_set);
860
4a3e2f71 861static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 862static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 863
89bddce5 864void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 865{
4a3e2f71 866 mutex_lock(&dlci_ioctl_mutex);
1da177e4 867 dlci_ioctl_hook = hook;
4a3e2f71 868 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
869}
870EXPORT_SYMBOL(dlci_ioctl_set);
871
6b96018b
AB
872static long sock_do_ioctl(struct net *net, struct socket *sock,
873 unsigned int cmd, unsigned long arg)
874{
875 int err;
876 void __user *argp = (void __user *)arg;
877
878 err = sock->ops->ioctl(sock, cmd, arg);
879
880 /*
881 * If this ioctl is unknown try to hand it down
882 * to the NIC driver.
883 */
884 if (err == -ENOIOCTLCMD)
885 err = dev_ioctl(net, cmd, argp);
886
887 return err;
888}
889
1da177e4
LT
890/*
891 * With an ioctl, arg may well be a user mode pointer, but we don't know
892 * what to do with it - that's up to the protocol still.
893 */
894
895static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
896{
897 struct socket *sock;
881d966b 898 struct sock *sk;
1da177e4
LT
899 void __user *argp = (void __user *)arg;
900 int pid, err;
881d966b 901 struct net *net;
1da177e4 902
b69aee04 903 sock = file->private_data;
881d966b 904 sk = sock->sk;
3b1e0a65 905 net = sock_net(sk);
1da177e4 906 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 907 err = dev_ioctl(net, cmd, argp);
1da177e4 908 } else
3d23e349 909#ifdef CONFIG_WEXT_CORE
1da177e4 910 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 911 err = dev_ioctl(net, cmd, argp);
1da177e4 912 } else
3d23e349 913#endif
89bddce5 914 switch (cmd) {
1da177e4
LT
915 case FIOSETOWN:
916 case SIOCSPGRP:
917 err = -EFAULT;
918 if (get_user(pid, (int __user *)argp))
919 break;
e0b93edd
JL
920 f_setown(sock->file, pid, 1);
921 err = 0;
1da177e4
LT
922 break;
923 case FIOGETOWN:
924 case SIOCGPGRP:
609d7fa9 925 err = put_user(f_getown(sock->file),
89bddce5 926 (int __user *)argp);
1da177e4
LT
927 break;
928 case SIOCGIFBR:
929 case SIOCSIFBR:
930 case SIOCBRADDBR:
931 case SIOCBRDELBR:
932 err = -ENOPKG;
933 if (!br_ioctl_hook)
934 request_module("bridge");
935
4a3e2f71 936 mutex_lock(&br_ioctl_mutex);
89bddce5 937 if (br_ioctl_hook)
881d966b 938 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 939 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
940 break;
941 case SIOCGIFVLAN:
942 case SIOCSIFVLAN:
943 err = -ENOPKG;
944 if (!vlan_ioctl_hook)
945 request_module("8021q");
946
4a3e2f71 947 mutex_lock(&vlan_ioctl_mutex);
1da177e4 948 if (vlan_ioctl_hook)
881d966b 949 err = vlan_ioctl_hook(net, argp);
4a3e2f71 950 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 951 break;
1da177e4
LT
952 case SIOCADDDLCI:
953 case SIOCDELDLCI:
954 err = -ENOPKG;
955 if (!dlci_ioctl_hook)
956 request_module("dlci");
957
7512cbf6
PE
958 mutex_lock(&dlci_ioctl_mutex);
959 if (dlci_ioctl_hook)
1da177e4 960 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 961 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
962 break;
963 default:
6b96018b 964 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 965 break;
89bddce5 966 }
1da177e4
LT
967 return err;
968}
969
970int sock_create_lite(int family, int type, int protocol, struct socket **res)
971{
972 int err;
973 struct socket *sock = NULL;
89bddce5 974
1da177e4
LT
975 err = security_socket_create(family, type, protocol, 1);
976 if (err)
977 goto out;
978
979 sock = sock_alloc();
980 if (!sock) {
981 err = -ENOMEM;
982 goto out;
983 }
984
1da177e4 985 sock->type = type;
7420ed23
VY
986 err = security_socket_post_create(sock, family, type, protocol, 1);
987 if (err)
988 goto out_release;
989
1da177e4
LT
990out:
991 *res = sock;
992 return err;
7420ed23
VY
993out_release:
994 sock_release(sock);
995 sock = NULL;
996 goto out;
1da177e4 997}
c6d409cf 998EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
999
1000/* No kernel lock held - perfect */
89bddce5 1001static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4 1002{
cbf55001 1003 unsigned int busy_flag = 0;
1da177e4
LT
1004 struct socket *sock;
1005
1006 /*
89bddce5 1007 * We can't return errors to poll, so it's either yes or no.
1da177e4 1008 */
b69aee04 1009 sock = file->private_data;
2d48d67f 1010
cbf55001 1011 if (sk_can_busy_loop(sock->sk)) {
2d48d67f 1012 /* this socket can poll_ll so tell the system call */
cbf55001 1013 busy_flag = POLL_BUSY_LOOP;
2d48d67f
ET
1014
1015 /* once, only if requested by syscall */
cbf55001
ET
1016 if (wait && (wait->_key & POLL_BUSY_LOOP))
1017 sk_busy_loop(sock->sk, 1);
2d48d67f
ET
1018 }
1019
cbf55001 1020 return busy_flag | sock->ops->poll(file, sock, wait);
1da177e4
LT
1021}
1022
89bddce5 1023static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1024{
b69aee04 1025 struct socket *sock = file->private_data;
1da177e4
LT
1026
1027 return sock->ops->mmap(file, sock, vma);
1028}
1029
20380731 1030static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1031{
1da177e4
LT
1032 sock_release(SOCKET_I(inode));
1033 return 0;
1034}
1035
1036/*
1037 * Update the socket async list
1038 *
1039 * Fasync_list locking strategy.
1040 *
1041 * 1. fasync_list is modified only under process context socket lock
1042 * i.e. under semaphore.
1043 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1044 * or under socket lock
1da177e4
LT
1045 */
1046
1047static int sock_fasync(int fd, struct file *filp, int on)
1048{
989a2979
ED
1049 struct socket *sock = filp->private_data;
1050 struct sock *sk = sock->sk;
eaefd110 1051 struct socket_wq *wq;
1da177e4 1052
989a2979 1053 if (sk == NULL)
1da177e4 1054 return -EINVAL;
1da177e4
LT
1055
1056 lock_sock(sk);
1e1d04e6 1057 wq = rcu_dereference_protected(sock->wq, lockdep_sock_is_held(sk));
eaefd110 1058 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1059
eaefd110 1060 if (!wq->fasync_list)
989a2979
ED
1061 sock_reset_flag(sk, SOCK_FASYNC);
1062 else
bcdce719 1063 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1064
989a2979 1065 release_sock(sk);
1da177e4
LT
1066 return 0;
1067}
1068
ceb5d58b 1069/* This function may be called only under rcu_lock */
1da177e4 1070
ceb5d58b 1071int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1072{
ceb5d58b 1073 if (!wq || !wq->fasync_list)
1da177e4 1074 return -1;
ceb5d58b 1075
89bddce5 1076 switch (how) {
8d8ad9d7 1077 case SOCK_WAKE_WAITD:
ceb5d58b 1078 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1079 break;
1080 goto call_kill;
8d8ad9d7 1081 case SOCK_WAKE_SPACE:
ceb5d58b 1082 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1083 break;
1084 /* fall through */
8d8ad9d7 1085 case SOCK_WAKE_IO:
89bddce5 1086call_kill:
43815482 1087 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1088 break;
8d8ad9d7 1089 case SOCK_WAKE_URG:
43815482 1090 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1091 }
ceb5d58b 1092
1da177e4
LT
1093 return 0;
1094}
c6d409cf 1095EXPORT_SYMBOL(sock_wake_async);
1da177e4 1096
721db93a 1097int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1098 struct socket **res, int kern)
1da177e4
LT
1099{
1100 int err;
1101 struct socket *sock;
55737fda 1102 const struct net_proto_family *pf;
1da177e4
LT
1103
1104 /*
89bddce5 1105 * Check protocol is in range
1da177e4
LT
1106 */
1107 if (family < 0 || family >= NPROTO)
1108 return -EAFNOSUPPORT;
1109 if (type < 0 || type >= SOCK_MAX)
1110 return -EINVAL;
1111
1112 /* Compatibility.
1113
1114 This uglymoron is moved from INET layer to here to avoid
1115 deadlock in module load.
1116 */
1117 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1118 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1119 current->comm);
1da177e4
LT
1120 family = PF_PACKET;
1121 }
1122
1123 err = security_socket_create(family, type, protocol, kern);
1124 if (err)
1125 return err;
89bddce5 1126
55737fda
SH
1127 /*
1128 * Allocate the socket and allow the family to set things up. if
1129 * the protocol is 0, the family is instructed to select an appropriate
1130 * default.
1131 */
1132 sock = sock_alloc();
1133 if (!sock) {
e87cc472 1134 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1135 return -ENFILE; /* Not exactly a match, but its the
1136 closest posix thing */
1137 }
1138
1139 sock->type = type;
1140
95a5afca 1141#ifdef CONFIG_MODULES
89bddce5
SH
1142 /* Attempt to load a protocol module if the find failed.
1143 *
1144 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1145 * requested real, full-featured networking support upon configuration.
1146 * Otherwise module support will break!
1147 */
190683a9 1148 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1149 request_module("net-pf-%d", family);
1da177e4
LT
1150#endif
1151
55737fda
SH
1152 rcu_read_lock();
1153 pf = rcu_dereference(net_families[family]);
1154 err = -EAFNOSUPPORT;
1155 if (!pf)
1156 goto out_release;
1da177e4
LT
1157
1158 /*
1159 * We will call the ->create function, that possibly is in a loadable
1160 * module, so we have to bump that loadable module refcnt first.
1161 */
55737fda 1162 if (!try_module_get(pf->owner))
1da177e4
LT
1163 goto out_release;
1164
55737fda
SH
1165 /* Now protected by module ref count */
1166 rcu_read_unlock();
1167
3f378b68 1168 err = pf->create(net, sock, protocol, kern);
55737fda 1169 if (err < 0)
1da177e4 1170 goto out_module_put;
a79af59e 1171
1da177e4
LT
1172 /*
1173 * Now to bump the refcnt of the [loadable] module that owns this
1174 * socket at sock_release time we decrement its refcnt.
1175 */
55737fda
SH
1176 if (!try_module_get(sock->ops->owner))
1177 goto out_module_busy;
1178
1da177e4
LT
1179 /*
1180 * Now that we're done with the ->create function, the [loadable]
1181 * module can have its refcnt decremented
1182 */
55737fda 1183 module_put(pf->owner);
7420ed23
VY
1184 err = security_socket_post_create(sock, family, type, protocol, kern);
1185 if (err)
3b185525 1186 goto out_sock_release;
55737fda 1187 *res = sock;
1da177e4 1188
55737fda
SH
1189 return 0;
1190
1191out_module_busy:
1192 err = -EAFNOSUPPORT;
1da177e4 1193out_module_put:
55737fda
SH
1194 sock->ops = NULL;
1195 module_put(pf->owner);
1196out_sock_release:
1da177e4 1197 sock_release(sock);
55737fda
SH
1198 return err;
1199
1200out_release:
1201 rcu_read_unlock();
1202 goto out_sock_release;
1da177e4 1203}
721db93a 1204EXPORT_SYMBOL(__sock_create);
1da177e4
LT
1205
1206int sock_create(int family, int type, int protocol, struct socket **res)
1207{
1b8d7ae4 1208 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1209}
c6d409cf 1210EXPORT_SYMBOL(sock_create);
1da177e4 1211
eeb1bd5c 1212int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1213{
eeb1bd5c 1214 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1215}
c6d409cf 1216EXPORT_SYMBOL(sock_create_kern);
1da177e4 1217
3e0fa65f 1218SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1219{
1220 int retval;
1221 struct socket *sock;
a677a039
UD
1222 int flags;
1223
e38b36f3
UD
1224 /* Check the SOCK_* constants for consistency. */
1225 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1226 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1227 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1228 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1229
a677a039 1230 flags = type & ~SOCK_TYPE_MASK;
77d27200 1231 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1232 return -EINVAL;
1233 type &= SOCK_TYPE_MASK;
1da177e4 1234
aaca0bdc
UD
1235 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1236 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1237
1da177e4
LT
1238 retval = sock_create(family, type, protocol, &sock);
1239 if (retval < 0)
1240 goto out;
1241
77d27200 1242 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1243 if (retval < 0)
1244 goto out_release;
1245
1246out:
1247 /* It may be already another descriptor 8) Not kernel problem. */
1248 return retval;
1249
1250out_release:
1251 sock_release(sock);
1252 return retval;
1253}
1254
1255/*
1256 * Create a pair of connected sockets.
1257 */
1258
3e0fa65f
HC
1259SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1260 int __user *, usockvec)
1da177e4
LT
1261{
1262 struct socket *sock1, *sock2;
1263 int fd1, fd2, err;
db349509 1264 struct file *newfile1, *newfile2;
a677a039
UD
1265 int flags;
1266
1267 flags = type & ~SOCK_TYPE_MASK;
77d27200 1268 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1269 return -EINVAL;
1270 type &= SOCK_TYPE_MASK;
1da177e4 1271
aaca0bdc
UD
1272 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1273 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1274
1da177e4
LT
1275 /*
1276 * Obtain the first socket and check if the underlying protocol
1277 * supports the socketpair call.
1278 */
1279
1280 err = sock_create(family, type, protocol, &sock1);
1281 if (err < 0)
1282 goto out;
1283
1284 err = sock_create(family, type, protocol, &sock2);
1285 if (err < 0)
1286 goto out_release_1;
1287
1288 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1289 if (err < 0)
1da177e4
LT
1290 goto out_release_both;
1291
28407630 1292 fd1 = get_unused_fd_flags(flags);
bf3c23d1
DM
1293 if (unlikely(fd1 < 0)) {
1294 err = fd1;
db349509 1295 goto out_release_both;
bf3c23d1 1296 }
d73aa286 1297
28407630 1298 fd2 = get_unused_fd_flags(flags);
198de4d7
AV
1299 if (unlikely(fd2 < 0)) {
1300 err = fd2;
d73aa286 1301 goto out_put_unused_1;
28407630
AV
1302 }
1303
aab174f0 1304 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1305 if (IS_ERR(newfile1)) {
28407630 1306 err = PTR_ERR(newfile1);
d73aa286 1307 goto out_put_unused_both;
28407630
AV
1308 }
1309
aab174f0 1310 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1311 if (IS_ERR(newfile2)) {
1312 err = PTR_ERR(newfile2);
d73aa286 1313 goto out_fput_1;
db349509
AV
1314 }
1315
d73aa286
YD
1316 err = put_user(fd1, &usockvec[0]);
1317 if (err)
1318 goto out_fput_both;
1319
1320 err = put_user(fd2, &usockvec[1]);
1321 if (err)
1322 goto out_fput_both;
1323
157cf649 1324 audit_fd_pair(fd1, fd2);
d73aa286 1325
db349509
AV
1326 fd_install(fd1, newfile1);
1327 fd_install(fd2, newfile2);
1da177e4
LT
1328 /* fd1 and fd2 may be already another descriptors.
1329 * Not kernel problem.
1330 */
1331
d73aa286 1332 return 0;
1da177e4 1333
d73aa286
YD
1334out_fput_both:
1335 fput(newfile2);
1336 fput(newfile1);
1337 put_unused_fd(fd2);
1338 put_unused_fd(fd1);
1339 goto out;
1340
1341out_fput_1:
1342 fput(newfile1);
1343 put_unused_fd(fd2);
1344 put_unused_fd(fd1);
1345 sock_release(sock2);
1346 goto out;
1da177e4 1347
d73aa286
YD
1348out_put_unused_both:
1349 put_unused_fd(fd2);
1350out_put_unused_1:
1351 put_unused_fd(fd1);
1da177e4 1352out_release_both:
89bddce5 1353 sock_release(sock2);
1da177e4 1354out_release_1:
89bddce5 1355 sock_release(sock1);
1da177e4
LT
1356out:
1357 return err;
1358}
1359
1da177e4
LT
1360/*
1361 * Bind a name to a socket. Nothing much to do here since it's
1362 * the protocol's responsibility to handle the local address.
1363 *
1364 * We move the socket address to kernel space before we call
1365 * the protocol layer (having also checked the address is ok).
1366 */
1367
20f37034 1368SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1369{
1370 struct socket *sock;
230b1839 1371 struct sockaddr_storage address;
6cb153ca 1372 int err, fput_needed;
1da177e4 1373
89bddce5 1374 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1375 if (sock) {
43db362d 1376 err = move_addr_to_kernel(umyaddr, addrlen, &address);
89bddce5
SH
1377 if (err >= 0) {
1378 err = security_socket_bind(sock,
230b1839 1379 (struct sockaddr *)&address,
89bddce5 1380 addrlen);
6cb153ca
BL
1381 if (!err)
1382 err = sock->ops->bind(sock,
89bddce5 1383 (struct sockaddr *)
230b1839 1384 &address, addrlen);
1da177e4 1385 }
6cb153ca 1386 fput_light(sock->file, fput_needed);
89bddce5 1387 }
1da177e4
LT
1388 return err;
1389}
1390
1da177e4
LT
1391/*
1392 * Perform a listen. Basically, we allow the protocol to do anything
1393 * necessary for a listen, and if that works, we mark the socket as
1394 * ready for listening.
1395 */
1396
3e0fa65f 1397SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1398{
1399 struct socket *sock;
6cb153ca 1400 int err, fput_needed;
b8e1f9b5 1401 int somaxconn;
89bddce5
SH
1402
1403 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1404 if (sock) {
8efa6e93 1405 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1406 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1407 backlog = somaxconn;
1da177e4
LT
1408
1409 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1410 if (!err)
1411 err = sock->ops->listen(sock, backlog);
1da177e4 1412
6cb153ca 1413 fput_light(sock->file, fput_needed);
1da177e4
LT
1414 }
1415 return err;
1416}
1417
1da177e4
LT
1418/*
1419 * For accept, we attempt to create a new socket, set up the link
1420 * with the client, wake up the client, then return the new
1421 * connected fd. We collect the address of the connector in kernel
1422 * space and move it to user at the very end. This is unclean because
1423 * we open the socket then return an error.
1424 *
1425 * 1003.1g adds the ability to recvmsg() to query connection pending
1426 * status to recvmsg. We need to add that support in a way thats
1427 * clean when we restucture accept also.
1428 */
1429
20f37034
HC
1430SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1431 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1432{
1433 struct socket *sock, *newsock;
39d8c1b6 1434 struct file *newfile;
6cb153ca 1435 int err, len, newfd, fput_needed;
230b1839 1436 struct sockaddr_storage address;
1da177e4 1437
77d27200 1438 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1439 return -EINVAL;
1440
1441 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1442 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1443
6cb153ca 1444 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1445 if (!sock)
1446 goto out;
1447
1448 err = -ENFILE;
c6d409cf
ED
1449 newsock = sock_alloc();
1450 if (!newsock)
1da177e4
LT
1451 goto out_put;
1452
1453 newsock->type = sock->type;
1454 newsock->ops = sock->ops;
1455
1da177e4
LT
1456 /*
1457 * We don't need try_module_get here, as the listening socket (sock)
1458 * has the protocol module (sock->ops->owner) held.
1459 */
1460 __module_get(newsock->ops->owner);
1461
28407630 1462 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1463 if (unlikely(newfd < 0)) {
1464 err = newfd;
9a1875e6
DM
1465 sock_release(newsock);
1466 goto out_put;
39d8c1b6 1467 }
aab174f0 1468 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1469 if (IS_ERR(newfile)) {
28407630
AV
1470 err = PTR_ERR(newfile);
1471 put_unused_fd(newfd);
1472 sock_release(newsock);
1473 goto out_put;
1474 }
39d8c1b6 1475
a79af59e
FF
1476 err = security_socket_accept(sock, newsock);
1477 if (err)
39d8c1b6 1478 goto out_fd;
a79af59e 1479
1da177e4
LT
1480 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1481 if (err < 0)
39d8c1b6 1482 goto out_fd;
1da177e4
LT
1483
1484 if (upeer_sockaddr) {
230b1839 1485 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1486 &len, 2) < 0) {
1da177e4 1487 err = -ECONNABORTED;
39d8c1b6 1488 goto out_fd;
1da177e4 1489 }
43db362d 1490 err = move_addr_to_user(&address,
230b1839 1491 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1492 if (err < 0)
39d8c1b6 1493 goto out_fd;
1da177e4
LT
1494 }
1495
1496 /* File flags are not inherited via accept() unlike another OSes. */
1497
39d8c1b6
DM
1498 fd_install(newfd, newfile);
1499 err = newfd;
1da177e4 1500
1da177e4 1501out_put:
6cb153ca 1502 fput_light(sock->file, fput_needed);
1da177e4
LT
1503out:
1504 return err;
39d8c1b6 1505out_fd:
9606a216 1506 fput(newfile);
39d8c1b6 1507 put_unused_fd(newfd);
1da177e4
LT
1508 goto out_put;
1509}
1510
20f37034
HC
1511SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1512 int __user *, upeer_addrlen)
aaca0bdc 1513{
de11defe 1514 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1515}
1516
1da177e4
LT
1517/*
1518 * Attempt to connect to a socket with the server address. The address
1519 * is in user space so we verify it is OK and move it to kernel space.
1520 *
1521 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1522 * break bindings
1523 *
1524 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1525 * other SEQPACKET protocols that take time to connect() as it doesn't
1526 * include the -EINPROGRESS status for such sockets.
1527 */
1528
20f37034
HC
1529SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1530 int, addrlen)
1da177e4
LT
1531{
1532 struct socket *sock;
230b1839 1533 struct sockaddr_storage address;
6cb153ca 1534 int err, fput_needed;
1da177e4 1535
6cb153ca 1536 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1537 if (!sock)
1538 goto out;
43db362d 1539 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1540 if (err < 0)
1541 goto out_put;
1542
89bddce5 1543 err =
230b1839 1544 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1545 if (err)
1546 goto out_put;
1547
230b1839 1548 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1549 sock->file->f_flags);
1550out_put:
6cb153ca 1551 fput_light(sock->file, fput_needed);
1da177e4
LT
1552out:
1553 return err;
1554}
1555
1556/*
1557 * Get the local address ('name') of a socket object. Move the obtained
1558 * name to user space.
1559 */
1560
20f37034
HC
1561SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1562 int __user *, usockaddr_len)
1da177e4
LT
1563{
1564 struct socket *sock;
230b1839 1565 struct sockaddr_storage address;
6cb153ca 1566 int len, err, fput_needed;
89bddce5 1567
6cb153ca 1568 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1569 if (!sock)
1570 goto out;
1571
1572 err = security_socket_getsockname(sock);
1573 if (err)
1574 goto out_put;
1575
230b1839 1576 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1577 if (err)
1578 goto out_put;
43db362d 1579 err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1580
1581out_put:
6cb153ca 1582 fput_light(sock->file, fput_needed);
1da177e4
LT
1583out:
1584 return err;
1585}
1586
1587/*
1588 * Get the remote address ('name') of a socket object. Move the obtained
1589 * name to user space.
1590 */
1591
20f37034
HC
1592SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1593 int __user *, usockaddr_len)
1da177e4
LT
1594{
1595 struct socket *sock;
230b1839 1596 struct sockaddr_storage address;
6cb153ca 1597 int len, err, fput_needed;
1da177e4 1598
89bddce5
SH
1599 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1600 if (sock != NULL) {
1da177e4
LT
1601 err = security_socket_getpeername(sock);
1602 if (err) {
6cb153ca 1603 fput_light(sock->file, fput_needed);
1da177e4
LT
1604 return err;
1605 }
1606
89bddce5 1607 err =
230b1839 1608 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1609 1);
1da177e4 1610 if (!err)
43db362d 1611 err = move_addr_to_user(&address, len, usockaddr,
89bddce5 1612 usockaddr_len);
6cb153ca 1613 fput_light(sock->file, fput_needed);
1da177e4
LT
1614 }
1615 return err;
1616}
1617
1618/*
1619 * Send a datagram to a given address. We move the address into kernel
1620 * space and check the user space data area is readable before invoking
1621 * the protocol.
1622 */
1623
3e0fa65f 1624SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
95c96174 1625 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1626 int, addr_len)
1da177e4
LT
1627{
1628 struct socket *sock;
230b1839 1629 struct sockaddr_storage address;
1da177e4
LT
1630 int err;
1631 struct msghdr msg;
1632 struct iovec iov;
6cb153ca 1633 int fput_needed;
6cb153ca 1634
602bd0e9
AV
1635 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1636 if (unlikely(err))
1637 return err;
de0fa95c
PE
1638 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1639 if (!sock)
4387ff75 1640 goto out;
6cb153ca 1641
89bddce5 1642 msg.msg_name = NULL;
89bddce5
SH
1643 msg.msg_control = NULL;
1644 msg.msg_controllen = 0;
1645 msg.msg_namelen = 0;
6cb153ca 1646 if (addr) {
43db362d 1647 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1648 if (err < 0)
1649 goto out_put;
230b1839 1650 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1651 msg.msg_namelen = addr_len;
1da177e4
LT
1652 }
1653 if (sock->file->f_flags & O_NONBLOCK)
1654 flags |= MSG_DONTWAIT;
1655 msg.msg_flags = flags;
d8725c86 1656 err = sock_sendmsg(sock, &msg);
1da177e4 1657
89bddce5 1658out_put:
de0fa95c 1659 fput_light(sock->file, fput_needed);
4387ff75 1660out:
1da177e4
LT
1661 return err;
1662}
1663
1664/*
89bddce5 1665 * Send a datagram down a socket.
1da177e4
LT
1666 */
1667
3e0fa65f 1668SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1669 unsigned int, flags)
1da177e4
LT
1670{
1671 return sys_sendto(fd, buff, len, flags, NULL, 0);
1672}
1673
1674/*
89bddce5 1675 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1676 * sender. We verify the buffers are writable and if needed move the
1677 * sender address from kernel to user space.
1678 */
1679
3e0fa65f 1680SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
95c96174 1681 unsigned int, flags, struct sockaddr __user *, addr,
3e0fa65f 1682 int __user *, addr_len)
1da177e4
LT
1683{
1684 struct socket *sock;
1685 struct iovec iov;
1686 struct msghdr msg;
230b1839 1687 struct sockaddr_storage address;
89bddce5 1688 int err, err2;
6cb153ca
BL
1689 int fput_needed;
1690
602bd0e9
AV
1691 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1692 if (unlikely(err))
1693 return err;
de0fa95c 1694 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1695 if (!sock)
de0fa95c 1696 goto out;
1da177e4 1697
89bddce5
SH
1698 msg.msg_control = NULL;
1699 msg.msg_controllen = 0;
f3d33426
HFS
1700 /* Save some cycles and don't copy the address if not needed */
1701 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1702 /* We assume all kernel code knows the size of sockaddr_storage */
1703 msg.msg_namelen = 0;
130ed5d1 1704 msg.msg_iocb = NULL;
1da177e4
LT
1705 if (sock->file->f_flags & O_NONBLOCK)
1706 flags |= MSG_DONTWAIT;
2da62906 1707 err = sock_recvmsg(sock, &msg, flags);
1da177e4 1708
89bddce5 1709 if (err >= 0 && addr != NULL) {
43db362d 1710 err2 = move_addr_to_user(&address,
230b1839 1711 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1712 if (err2 < 0)
1713 err = err2;
1da177e4 1714 }
de0fa95c
PE
1715
1716 fput_light(sock->file, fput_needed);
4387ff75 1717out:
1da177e4
LT
1718 return err;
1719}
1720
1721/*
89bddce5 1722 * Receive a datagram from a socket.
1da177e4
LT
1723 */
1724
b7c0ddf5
JG
1725SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1726 unsigned int, flags)
1da177e4
LT
1727{
1728 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1729}
1730
1731/*
1732 * Set a socket option. Because we don't know the option lengths we have
1733 * to pass the user mode parameter for the protocols to sort out.
1734 */
1735
20f37034
HC
1736SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1737 char __user *, optval, int, optlen)
1da177e4 1738{
6cb153ca 1739 int err, fput_needed;
1da177e4
LT
1740 struct socket *sock;
1741
1742 if (optlen < 0)
1743 return -EINVAL;
89bddce5
SH
1744
1745 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1746 if (sock != NULL) {
1747 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1748 if (err)
1749 goto out_put;
1da177e4
LT
1750
1751 if (level == SOL_SOCKET)
89bddce5
SH
1752 err =
1753 sock_setsockopt(sock, level, optname, optval,
1754 optlen);
1da177e4 1755 else
89bddce5
SH
1756 err =
1757 sock->ops->setsockopt(sock, level, optname, optval,
1758 optlen);
6cb153ca
BL
1759out_put:
1760 fput_light(sock->file, fput_needed);
1da177e4
LT
1761 }
1762 return err;
1763}
1764
1765/*
1766 * Get a socket option. Because we don't know the option lengths we have
1767 * to pass a user mode parameter for the protocols to sort out.
1768 */
1769
20f37034
HC
1770SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1771 char __user *, optval, int __user *, optlen)
1da177e4 1772{
6cb153ca 1773 int err, fput_needed;
1da177e4
LT
1774 struct socket *sock;
1775
89bddce5
SH
1776 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1777 if (sock != NULL) {
6cb153ca
BL
1778 err = security_socket_getsockopt(sock, level, optname);
1779 if (err)
1780 goto out_put;
1da177e4
LT
1781
1782 if (level == SOL_SOCKET)
89bddce5
SH
1783 err =
1784 sock_getsockopt(sock, level, optname, optval,
1785 optlen);
1da177e4 1786 else
89bddce5
SH
1787 err =
1788 sock->ops->getsockopt(sock, level, optname, optval,
1789 optlen);
6cb153ca
BL
1790out_put:
1791 fput_light(sock->file, fput_needed);
1da177e4
LT
1792 }
1793 return err;
1794}
1795
1da177e4
LT
1796/*
1797 * Shutdown a socket.
1798 */
1799
754fe8d2 1800SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1801{
6cb153ca 1802 int err, fput_needed;
1da177e4
LT
1803 struct socket *sock;
1804
89bddce5
SH
1805 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1806 if (sock != NULL) {
1da177e4 1807 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1808 if (!err)
1809 err = sock->ops->shutdown(sock, how);
1810 fput_light(sock->file, fput_needed);
1da177e4
LT
1811 }
1812 return err;
1813}
1814
89bddce5 1815/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1816 * fields which are the same type (int / unsigned) on our platforms.
1817 */
1818#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1819#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1820#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1821
c71d8ebe
TH
1822struct used_address {
1823 struct sockaddr_storage name;
1824 unsigned int name_len;
1825};
1826
da184284
AV
1827static int copy_msghdr_from_user(struct msghdr *kmsg,
1828 struct user_msghdr __user *umsg,
1829 struct sockaddr __user **save_addr,
1830 struct iovec **iov)
1661bf36 1831{
08adb7da
AV
1832 struct sockaddr __user *uaddr;
1833 struct iovec __user *uiov;
c0371da6 1834 size_t nr_segs;
08adb7da
AV
1835 ssize_t err;
1836
1837 if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
1838 __get_user(uaddr, &umsg->msg_name) ||
1839 __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
1840 __get_user(uiov, &umsg->msg_iov) ||
c0371da6 1841 __get_user(nr_segs, &umsg->msg_iovlen) ||
08adb7da
AV
1842 __get_user(kmsg->msg_control, &umsg->msg_control) ||
1843 __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
1844 __get_user(kmsg->msg_flags, &umsg->msg_flags))
1661bf36 1845 return -EFAULT;
dbb490b9 1846
08adb7da 1847 if (!uaddr)
6a2a2b3a
AS
1848 kmsg->msg_namelen = 0;
1849
dbb490b9
ML
1850 if (kmsg->msg_namelen < 0)
1851 return -EINVAL;
1852
1661bf36 1853 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 1854 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
1855
1856 if (save_addr)
1857 *save_addr = uaddr;
1858
1859 if (uaddr && kmsg->msg_namelen) {
1860 if (!save_addr) {
1861 err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
1862 kmsg->msg_name);
1863 if (err < 0)
1864 return err;
1865 }
1866 } else {
1867 kmsg->msg_name = NULL;
1868 kmsg->msg_namelen = 0;
1869 }
1870
c0371da6 1871 if (nr_segs > UIO_MAXIOV)
08adb7da
AV
1872 return -EMSGSIZE;
1873
0345f931 1874 kmsg->msg_iocb = NULL;
1875
da184284
AV
1876 return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
1877 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
1878}
1879
666547ff 1880static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 1881 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
1882 struct used_address *used_address,
1883 unsigned int allowed_msghdr_flags)
1da177e4 1884{
89bddce5
SH
1885 struct compat_msghdr __user *msg_compat =
1886 (struct compat_msghdr __user *)msg;
230b1839 1887 struct sockaddr_storage address;
1da177e4 1888 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1889 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1890 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1891 /* 20 is size of ipv6_pktinfo */
1da177e4 1892 unsigned char *ctl_buf = ctl;
d8725c86 1893 int ctl_len;
08adb7da 1894 ssize_t err;
89bddce5 1895
08adb7da 1896 msg_sys->msg_name = &address;
1da177e4 1897
08449320 1898 if (MSG_CMSG_COMPAT & flags)
08adb7da 1899 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 1900 else
08adb7da 1901 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 1902 if (err < 0)
da184284 1903 return err;
1da177e4
LT
1904
1905 err = -ENOBUFS;
1906
228e548e 1907 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 1908 goto out_freeiov;
28a94d8f 1909 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 1910 ctl_len = msg_sys->msg_controllen;
1da177e4 1911 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 1912 err =
228e548e 1913 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 1914 sizeof(ctl));
1da177e4
LT
1915 if (err)
1916 goto out_freeiov;
228e548e
AB
1917 ctl_buf = msg_sys->msg_control;
1918 ctl_len = msg_sys->msg_controllen;
1da177e4 1919 } else if (ctl_len) {
89bddce5 1920 if (ctl_len > sizeof(ctl)) {
1da177e4 1921 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1922 if (ctl_buf == NULL)
1da177e4
LT
1923 goto out_freeiov;
1924 }
1925 err = -EFAULT;
1926 /*
228e548e 1927 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
1928 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1929 * checking falls down on this.
1930 */
fb8621bb 1931 if (copy_from_user(ctl_buf,
228e548e 1932 (void __user __force *)msg_sys->msg_control,
89bddce5 1933 ctl_len))
1da177e4 1934 goto out_freectl;
228e548e 1935 msg_sys->msg_control = ctl_buf;
1da177e4 1936 }
228e548e 1937 msg_sys->msg_flags = flags;
1da177e4
LT
1938
1939 if (sock->file->f_flags & O_NONBLOCK)
228e548e 1940 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
1941 /*
1942 * If this is sendmmsg() and current destination address is same as
1943 * previously succeeded address, omit asking LSM's decision.
1944 * used_address->name_len is initialized to UINT_MAX so that the first
1945 * destination address never matches.
1946 */
bc909d9d
MD
1947 if (used_address && msg_sys->msg_name &&
1948 used_address->name_len == msg_sys->msg_namelen &&
1949 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 1950 used_address->name_len)) {
d8725c86 1951 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
1952 goto out_freectl;
1953 }
d8725c86 1954 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
1955 /*
1956 * If this is sendmmsg() and sending to current destination address was
1957 * successful, remember it.
1958 */
1959 if (used_address && err >= 0) {
1960 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
1961 if (msg_sys->msg_name)
1962 memcpy(&used_address->name, msg_sys->msg_name,
1963 used_address->name_len);
c71d8ebe 1964 }
1da177e4
LT
1965
1966out_freectl:
89bddce5 1967 if (ctl_buf != ctl)
1da177e4
LT
1968 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1969out_freeiov:
da184284 1970 kfree(iov);
228e548e
AB
1971 return err;
1972}
1973
1974/*
1975 * BSD sendmsg interface
1976 */
1977
666547ff 1978long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
228e548e
AB
1979{
1980 int fput_needed, err;
1981 struct msghdr msg_sys;
1be374a0
AL
1982 struct socket *sock;
1983
1be374a0 1984 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
1985 if (!sock)
1986 goto out;
1987
28a94d8f 1988 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 1989
6cb153ca 1990 fput_light(sock->file, fput_needed);
89bddce5 1991out:
1da177e4
LT
1992 return err;
1993}
1994
666547ff 1995SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5
AL
1996{
1997 if (flags & MSG_CMSG_COMPAT)
1998 return -EINVAL;
1999 return __sys_sendmsg(fd, msg, flags);
2000}
2001
228e548e
AB
2002/*
2003 * Linux sendmmsg interface
2004 */
2005
2006int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2007 unsigned int flags)
2008{
2009 int fput_needed, err, datagrams;
2010 struct socket *sock;
2011 struct mmsghdr __user *entry;
2012 struct compat_mmsghdr __user *compat_entry;
2013 struct msghdr msg_sys;
c71d8ebe 2014 struct used_address used_address;
f092276d 2015 unsigned int oflags = flags;
228e548e 2016
98382f41
AB
2017 if (vlen > UIO_MAXIOV)
2018 vlen = UIO_MAXIOV;
228e548e
AB
2019
2020 datagrams = 0;
2021
2022 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2023 if (!sock)
2024 return err;
2025
c71d8ebe 2026 used_address.name_len = UINT_MAX;
228e548e
AB
2027 entry = mmsg;
2028 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2029 err = 0;
f092276d 2030 flags |= MSG_BATCH;
228e548e
AB
2031
2032 while (datagrams < vlen) {
f092276d
TH
2033 if (datagrams == vlen - 1)
2034 flags = oflags;
2035
228e548e 2036 if (MSG_CMSG_COMPAT & flags) {
666547ff 2037 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2038 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2039 if (err < 0)
2040 break;
2041 err = __put_user(err, &compat_entry->msg_len);
2042 ++compat_entry;
2043 } else {
a7526eb5 2044 err = ___sys_sendmsg(sock,
666547ff 2045 (struct user_msghdr __user *)entry,
28a94d8f 2046 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2047 if (err < 0)
2048 break;
2049 err = put_user(err, &entry->msg_len);
2050 ++entry;
2051 }
2052
2053 if (err)
2054 break;
2055 ++datagrams;
3023898b
SHY
2056 if (msg_data_left(&msg_sys))
2057 break;
a78cb84c 2058 cond_resched();
228e548e
AB
2059 }
2060
228e548e
AB
2061 fput_light(sock->file, fput_needed);
2062
728ffb86
AB
2063 /* We only return an error if no datagrams were able to be sent */
2064 if (datagrams != 0)
228e548e
AB
2065 return datagrams;
2066
228e548e
AB
2067 return err;
2068}
2069
2070SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2071 unsigned int, vlen, unsigned int, flags)
2072{
1be374a0
AL
2073 if (flags & MSG_CMSG_COMPAT)
2074 return -EINVAL;
228e548e
AB
2075 return __sys_sendmmsg(fd, mmsg, vlen, flags);
2076}
2077
666547ff 2078static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2079 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2080{
89bddce5
SH
2081 struct compat_msghdr __user *msg_compat =
2082 (struct compat_msghdr __user *)msg;
1da177e4 2083 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2084 struct iovec *iov = iovstack;
1da177e4 2085 unsigned long cmsg_ptr;
2da62906 2086 int len;
08adb7da 2087 ssize_t err;
1da177e4
LT
2088
2089 /* kernel mode address */
230b1839 2090 struct sockaddr_storage addr;
1da177e4
LT
2091
2092 /* user mode address pointers */
2093 struct sockaddr __user *uaddr;
08adb7da 2094 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2095
08adb7da 2096 msg_sys->msg_name = &addr;
1da177e4 2097
f3d33426 2098 if (MSG_CMSG_COMPAT & flags)
08adb7da 2099 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2100 else
08adb7da 2101 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2102 if (err < 0)
da184284 2103 return err;
1da177e4 2104
a2e27255
ACM
2105 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2106 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2107
f3d33426
HFS
2108 /* We assume all kernel code knows the size of sockaddr_storage */
2109 msg_sys->msg_namelen = 0;
2110
1da177e4
LT
2111 if (sock->file->f_flags & O_NONBLOCK)
2112 flags |= MSG_DONTWAIT;
2da62906 2113 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2114 if (err < 0)
2115 goto out_freeiov;
2116 len = err;
2117
2118 if (uaddr != NULL) {
43db362d 2119 err = move_addr_to_user(&addr,
a2e27255 2120 msg_sys->msg_namelen, uaddr,
89bddce5 2121 uaddr_len);
1da177e4
LT
2122 if (err < 0)
2123 goto out_freeiov;
2124 }
a2e27255 2125 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2126 COMPAT_FLAGS(msg));
1da177e4
LT
2127 if (err)
2128 goto out_freeiov;
2129 if (MSG_CMSG_COMPAT & flags)
a2e27255 2130 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2131 &msg_compat->msg_controllen);
2132 else
a2e27255 2133 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2134 &msg->msg_controllen);
2135 if (err)
2136 goto out_freeiov;
2137 err = len;
2138
2139out_freeiov:
da184284 2140 kfree(iov);
a2e27255
ACM
2141 return err;
2142}
2143
2144/*
2145 * BSD recvmsg interface
2146 */
2147
666547ff 2148long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
a2e27255
ACM
2149{
2150 int fput_needed, err;
2151 struct msghdr msg_sys;
1be374a0
AL
2152 struct socket *sock;
2153
1be374a0 2154 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2155 if (!sock)
2156 goto out;
2157
a7526eb5 2158 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2159
6cb153ca 2160 fput_light(sock->file, fput_needed);
1da177e4
LT
2161out:
2162 return err;
2163}
2164
666547ff 2165SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2166 unsigned int, flags)
2167{
2168 if (flags & MSG_CMSG_COMPAT)
2169 return -EINVAL;
2170 return __sys_recvmsg(fd, msg, flags);
2171}
2172
a2e27255
ACM
2173/*
2174 * Linux recvmmsg interface
2175 */
2176
2177int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2178 unsigned int flags, struct timespec *timeout)
2179{
2180 int fput_needed, err, datagrams;
2181 struct socket *sock;
2182 struct mmsghdr __user *entry;
d7256d0e 2183 struct compat_mmsghdr __user *compat_entry;
a2e27255 2184 struct msghdr msg_sys;
766b9f92
DD
2185 struct timespec64 end_time;
2186 struct timespec64 timeout64;
a2e27255
ACM
2187
2188 if (timeout &&
2189 poll_select_set_timeout(&end_time, timeout->tv_sec,
2190 timeout->tv_nsec))
2191 return -EINVAL;
2192
2193 datagrams = 0;
2194
2195 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2196 if (!sock)
2197 return err;
2198
2199 err = sock_error(sock->sk);
2200 if (err)
2201 goto out_put;
2202
2203 entry = mmsg;
d7256d0e 2204 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2205
2206 while (datagrams < vlen) {
2207 /*
2208 * No need to ask LSM for more than the first datagram.
2209 */
d7256d0e 2210 if (MSG_CMSG_COMPAT & flags) {
666547ff 2211 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2212 &msg_sys, flags & ~MSG_WAITFORONE,
2213 datagrams);
d7256d0e
JMG
2214 if (err < 0)
2215 break;
2216 err = __put_user(err, &compat_entry->msg_len);
2217 ++compat_entry;
2218 } else {
a7526eb5 2219 err = ___sys_recvmsg(sock,
666547ff 2220 (struct user_msghdr __user *)entry,
a7526eb5
AL
2221 &msg_sys, flags & ~MSG_WAITFORONE,
2222 datagrams);
d7256d0e
JMG
2223 if (err < 0)
2224 break;
2225 err = put_user(err, &entry->msg_len);
2226 ++entry;
2227 }
2228
a2e27255
ACM
2229 if (err)
2230 break;
a2e27255
ACM
2231 ++datagrams;
2232
71c5c159
BB
2233 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2234 if (flags & MSG_WAITFORONE)
2235 flags |= MSG_DONTWAIT;
2236
a2e27255 2237 if (timeout) {
766b9f92
DD
2238 ktime_get_ts64(&timeout64);
2239 *timeout = timespec64_to_timespec(
2240 timespec64_sub(end_time, timeout64));
a2e27255
ACM
2241 if (timeout->tv_sec < 0) {
2242 timeout->tv_sec = timeout->tv_nsec = 0;
2243 break;
2244 }
2245
2246 /* Timeout, return less than vlen datagrams */
2247 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2248 break;
2249 }
2250
2251 /* Out of band data, return right away */
2252 if (msg_sys.msg_flags & MSG_OOB)
2253 break;
a78cb84c 2254 cond_resched();
a2e27255
ACM
2255 }
2256
a2e27255 2257 if (err == 0)
34b88a68
ACM
2258 goto out_put;
2259
2260 if (datagrams == 0) {
2261 datagrams = err;
2262 goto out_put;
2263 }
a2e27255 2264
34b88a68
ACM
2265 /*
2266 * We may return less entries than requested (vlen) if the
2267 * sock is non block and there aren't enough datagrams...
2268 */
2269 if (err != -EAGAIN) {
a2e27255 2270 /*
34b88a68
ACM
2271 * ... or if recvmsg returns an error after we
2272 * received some datagrams, where we record the
2273 * error to return on the next call or if the
2274 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2275 */
34b88a68 2276 sock->sk->sk_err = -err;
a2e27255 2277 }
34b88a68
ACM
2278out_put:
2279 fput_light(sock->file, fput_needed);
a2e27255 2280
34b88a68 2281 return datagrams;
a2e27255
ACM
2282}
2283
2284SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2285 unsigned int, vlen, unsigned int, flags,
2286 struct timespec __user *, timeout)
2287{
2288 int datagrams;
2289 struct timespec timeout_sys;
2290
1be374a0
AL
2291 if (flags & MSG_CMSG_COMPAT)
2292 return -EINVAL;
2293
a2e27255
ACM
2294 if (!timeout)
2295 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2296
2297 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2298 return -EFAULT;
2299
2300 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2301
2302 if (datagrams > 0 &&
2303 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2304 datagrams = -EFAULT;
2305
2306 return datagrams;
2307}
2308
2309#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2310/* Argument list sizes for sys_socketcall */
2311#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2312static const unsigned char nargs[21] = {
c6d409cf
ED
2313 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2314 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2315 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2316 AL(4), AL(5), AL(4)
89bddce5
SH
2317};
2318
1da177e4
LT
2319#undef AL
2320
2321/*
89bddce5 2322 * System call vectors.
1da177e4
LT
2323 *
2324 * Argument checking cleaned up. Saved 20% in size.
2325 * This function doesn't need to set the kernel lock because
89bddce5 2326 * it is set by the callees.
1da177e4
LT
2327 */
2328
3e0fa65f 2329SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2330{
2950fa9d 2331 unsigned long a[AUDITSC_ARGS];
89bddce5 2332 unsigned long a0, a1;
1da177e4 2333 int err;
47379052 2334 unsigned int len;
1da177e4 2335
228e548e 2336 if (call < 1 || call > SYS_SENDMMSG)
1da177e4
LT
2337 return -EINVAL;
2338
47379052
AV
2339 len = nargs[call];
2340 if (len > sizeof(a))
2341 return -EINVAL;
2342
1da177e4 2343 /* copy_from_user should be SMP safe. */
47379052 2344 if (copy_from_user(a, args, len))
1da177e4 2345 return -EFAULT;
3ec3b2fb 2346
2950fa9d
CG
2347 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2348 if (err)
2349 return err;
3ec3b2fb 2350
89bddce5
SH
2351 a0 = a[0];
2352 a1 = a[1];
2353
2354 switch (call) {
2355 case SYS_SOCKET:
2356 err = sys_socket(a0, a1, a[2]);
2357 break;
2358 case SYS_BIND:
2359 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2360 break;
2361 case SYS_CONNECT:
2362 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2363 break;
2364 case SYS_LISTEN:
2365 err = sys_listen(a0, a1);
2366 break;
2367 case SYS_ACCEPT:
de11defe
UD
2368 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2369 (int __user *)a[2], 0);
89bddce5
SH
2370 break;
2371 case SYS_GETSOCKNAME:
2372 err =
2373 sys_getsockname(a0, (struct sockaddr __user *)a1,
2374 (int __user *)a[2]);
2375 break;
2376 case SYS_GETPEERNAME:
2377 err =
2378 sys_getpeername(a0, (struct sockaddr __user *)a1,
2379 (int __user *)a[2]);
2380 break;
2381 case SYS_SOCKETPAIR:
2382 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2383 break;
2384 case SYS_SEND:
2385 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2386 break;
2387 case SYS_SENDTO:
2388 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2389 (struct sockaddr __user *)a[4], a[5]);
2390 break;
2391 case SYS_RECV:
2392 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2393 break;
2394 case SYS_RECVFROM:
2395 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2396 (struct sockaddr __user *)a[4],
2397 (int __user *)a[5]);
2398 break;
2399 case SYS_SHUTDOWN:
2400 err = sys_shutdown(a0, a1);
2401 break;
2402 case SYS_SETSOCKOPT:
2403 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2404 break;
2405 case SYS_GETSOCKOPT:
2406 err =
2407 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2408 (int __user *)a[4]);
2409 break;
2410 case SYS_SENDMSG:
666547ff 2411 err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2412 break;
228e548e
AB
2413 case SYS_SENDMMSG:
2414 err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2415 break;
89bddce5 2416 case SYS_RECVMSG:
666547ff 2417 err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
89bddce5 2418 break;
a2e27255
ACM
2419 case SYS_RECVMMSG:
2420 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2421 (struct timespec __user *)a[4]);
2422 break;
de11defe
UD
2423 case SYS_ACCEPT4:
2424 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2425 (int __user *)a[2], a[3]);
aaca0bdc 2426 break;
89bddce5
SH
2427 default:
2428 err = -EINVAL;
2429 break;
1da177e4
LT
2430 }
2431 return err;
2432}
2433
89bddce5 2434#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2435
55737fda
SH
2436/**
2437 * sock_register - add a socket protocol handler
2438 * @ops: description of protocol
2439 *
1da177e4
LT
2440 * This function is called by a protocol handler that wants to
2441 * advertise its address family, and have it linked into the
e793c0f7 2442 * socket interface. The value ops->family corresponds to the
55737fda 2443 * socket system call protocol family.
1da177e4 2444 */
f0fd27d4 2445int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2446{
2447 int err;
2448
2449 if (ops->family >= NPROTO) {
3410f22e 2450 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2451 return -ENOBUFS;
2452 }
55737fda
SH
2453
2454 spin_lock(&net_family_lock);
190683a9
ED
2455 if (rcu_dereference_protected(net_families[ops->family],
2456 lockdep_is_held(&net_family_lock)))
55737fda
SH
2457 err = -EEXIST;
2458 else {
cf778b00 2459 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2460 err = 0;
2461 }
55737fda
SH
2462 spin_unlock(&net_family_lock);
2463
3410f22e 2464 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2465 return err;
2466}
c6d409cf 2467EXPORT_SYMBOL(sock_register);
1da177e4 2468
55737fda
SH
2469/**
2470 * sock_unregister - remove a protocol handler
2471 * @family: protocol family to remove
2472 *
1da177e4
LT
2473 * This function is called by a protocol handler that wants to
2474 * remove its address family, and have it unlinked from the
55737fda
SH
2475 * new socket creation.
2476 *
2477 * If protocol handler is a module, then it can use module reference
2478 * counts to protect against new references. If protocol handler is not
2479 * a module then it needs to provide its own protection in
2480 * the ops->create routine.
1da177e4 2481 */
f0fd27d4 2482void sock_unregister(int family)
1da177e4 2483{
f0fd27d4 2484 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2485
55737fda 2486 spin_lock(&net_family_lock);
a9b3cd7f 2487 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2488 spin_unlock(&net_family_lock);
2489
2490 synchronize_rcu();
2491
3410f22e 2492 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2493}
c6d409cf 2494EXPORT_SYMBOL(sock_unregister);
1da177e4 2495
77d76ea3 2496static int __init sock_init(void)
1da177e4 2497{
b3e19d92 2498 int err;
2ca794e5
EB
2499 /*
2500 * Initialize the network sysctl infrastructure.
2501 */
2502 err = net_sysctl_init();
2503 if (err)
2504 goto out;
b3e19d92 2505
1da177e4 2506 /*
89bddce5 2507 * Initialize skbuff SLAB cache
1da177e4
LT
2508 */
2509 skb_init();
1da177e4
LT
2510
2511 /*
89bddce5 2512 * Initialize the protocols module.
1da177e4
LT
2513 */
2514
2515 init_inodecache();
b3e19d92
NP
2516
2517 err = register_filesystem(&sock_fs_type);
2518 if (err)
2519 goto out_fs;
1da177e4 2520 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2521 if (IS_ERR(sock_mnt)) {
2522 err = PTR_ERR(sock_mnt);
2523 goto out_mount;
2524 }
77d76ea3
AK
2525
2526 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2527 */
2528
2529#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2530 err = netfilter_init();
2531 if (err)
2532 goto out;
1da177e4 2533#endif
cbeb321a 2534
408eccce 2535 ptp_classifier_init();
c1f19b51 2536
b3e19d92
NP
2537out:
2538 return err;
2539
2540out_mount:
2541 unregister_filesystem(&sock_fs_type);
2542out_fs:
2543 goto out;
1da177e4
LT
2544}
2545
77d76ea3
AK
2546core_initcall(sock_init); /* early initcall */
2547
1da177e4
LT
2548#ifdef CONFIG_PROC_FS
2549void socket_seq_show(struct seq_file *seq)
2550{
2551 int cpu;
2552 int counter = 0;
2553
6f912042 2554 for_each_possible_cpu(cpu)
89bddce5 2555 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2556
2557 /* It can be negative, by the way. 8) */
2558 if (counter < 0)
2559 counter = 0;
2560
2561 seq_printf(seq, "sockets: used %d\n", counter);
2562}
89bddce5 2563#endif /* CONFIG_PROC_FS */
1da177e4 2564
89bbfc95 2565#ifdef CONFIG_COMPAT
6b96018b 2566static int do_siocgstamp(struct net *net, struct socket *sock,
644595f8 2567 unsigned int cmd, void __user *up)
7a229387 2568{
7a229387
AB
2569 mm_segment_t old_fs = get_fs();
2570 struct timeval ktv;
2571 int err;
2572
2573 set_fs(KERNEL_DS);
6b96018b 2574 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387 2575 set_fs(old_fs);
644595f8 2576 if (!err)
ed6fe9d6 2577 err = compat_put_timeval(&ktv, up);
644595f8 2578
7a229387
AB
2579 return err;
2580}
2581
6b96018b 2582static int do_siocgstampns(struct net *net, struct socket *sock,
644595f8 2583 unsigned int cmd, void __user *up)
7a229387 2584{
7a229387
AB
2585 mm_segment_t old_fs = get_fs();
2586 struct timespec kts;
2587 int err;
2588
2589 set_fs(KERNEL_DS);
6b96018b 2590 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387 2591 set_fs(old_fs);
644595f8 2592 if (!err)
ed6fe9d6 2593 err = compat_put_timespec(&kts, up);
644595f8 2594
7a229387
AB
2595 return err;
2596}
2597
6b96018b 2598static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2599{
2600 struct ifreq __user *uifr;
2601 int err;
2602
2603 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2604 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2605 return -EFAULT;
2606
6b96018b 2607 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2608 if (err)
2609 return err;
2610
6b96018b 2611 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2612 return -EFAULT;
2613
2614 return 0;
2615}
2616
6b96018b 2617static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2618{
6b96018b 2619 struct compat_ifconf ifc32;
7a229387
AB
2620 struct ifconf ifc;
2621 struct ifconf __user *uifc;
6b96018b 2622 struct compat_ifreq __user *ifr32;
7a229387
AB
2623 struct ifreq __user *ifr;
2624 unsigned int i, j;
2625 int err;
2626
6b96018b 2627 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2628 return -EFAULT;
2629
43da5f2e 2630 memset(&ifc, 0, sizeof(ifc));
7a229387
AB
2631 if (ifc32.ifcbuf == 0) {
2632 ifc32.ifc_len = 0;
2633 ifc.ifc_len = 0;
2634 ifc.ifc_req = NULL;
2635 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2636 } else {
c6d409cf
ED
2637 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2638 sizeof(struct ifreq);
7a229387
AB
2639 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2640 ifc.ifc_len = len;
2641 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2642 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2643 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2644 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2645 return -EFAULT;
2646 ifr++;
2647 ifr32++;
2648 }
2649 }
2650 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2651 return -EFAULT;
2652
6b96018b 2653 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2654 if (err)
2655 return err;
2656
2657 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2658 return -EFAULT;
2659
2660 ifr = ifc.ifc_req;
2661 ifr32 = compat_ptr(ifc32.ifcbuf);
2662 for (i = 0, j = 0;
c6d409cf
ED
2663 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2664 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2665 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2666 return -EFAULT;
2667 ifr32++;
2668 ifr++;
2669 }
2670
2671 if (ifc32.ifcbuf == 0) {
2672 /* Translate from 64-bit structure multiple to
2673 * a 32-bit one.
2674 */
2675 i = ifc.ifc_len;
6b96018b 2676 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2677 ifc32.ifc_len = i;
2678 } else {
2679 ifc32.ifc_len = i;
2680 }
6b96018b 2681 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2682 return -EFAULT;
2683
2684 return 0;
2685}
2686
6b96018b 2687static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2688{
3a7da39d
BH
2689 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2690 bool convert_in = false, convert_out = false;
2691 size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
2692 struct ethtool_rxnfc __user *rxnfc;
7a229387 2693 struct ifreq __user *ifr;
3a7da39d
BH
2694 u32 rule_cnt = 0, actual_rule_cnt;
2695 u32 ethcmd;
7a229387 2696 u32 data;
3a7da39d 2697 int ret;
7a229387 2698
3a7da39d
BH
2699 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2700 return -EFAULT;
7a229387 2701
3a7da39d
BH
2702 compat_rxnfc = compat_ptr(data);
2703
2704 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2705 return -EFAULT;
2706
3a7da39d
BH
2707 /* Most ethtool structures are defined without padding.
2708 * Unfortunately struct ethtool_rxnfc is an exception.
2709 */
2710 switch (ethcmd) {
2711 default:
2712 break;
2713 case ETHTOOL_GRXCLSRLALL:
2714 /* Buffer size is variable */
2715 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2716 return -EFAULT;
2717 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2718 return -ENOMEM;
2719 buf_size += rule_cnt * sizeof(u32);
2720 /* fall through */
2721 case ETHTOOL_GRXRINGS:
2722 case ETHTOOL_GRXCLSRLCNT:
2723 case ETHTOOL_GRXCLSRULE:
55664f32 2724 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
2725 convert_out = true;
2726 /* fall through */
2727 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
2728 buf_size += sizeof(struct ethtool_rxnfc);
2729 convert_in = true;
2730 break;
2731 }
2732
2733 ifr = compat_alloc_user_space(buf_size);
954b1244 2734 rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
3a7da39d
BH
2735
2736 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
2737 return -EFAULT;
2738
3a7da39d
BH
2739 if (put_user(convert_in ? rxnfc : compat_ptr(data),
2740 &ifr->ifr_ifru.ifru_data))
7a229387
AB
2741 return -EFAULT;
2742
3a7da39d 2743 if (convert_in) {
127fe533 2744 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
2745 * fs.ring_cookie and at the end of fs, but nowhere else.
2746 */
127fe533
AD
2747 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2748 sizeof(compat_rxnfc->fs.m_ext) !=
2749 offsetof(struct ethtool_rxnfc, fs.m_ext) +
2750 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
2751 BUILD_BUG_ON(
2752 offsetof(struct compat_ethtool_rxnfc, fs.location) -
2753 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
2754 offsetof(struct ethtool_rxnfc, fs.location) -
2755 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
2756
2757 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
2758 (void __user *)(&rxnfc->fs.m_ext + 1) -
2759 (void __user *)rxnfc) ||
3a7da39d
BH
2760 copy_in_user(&rxnfc->fs.ring_cookie,
2761 &compat_rxnfc->fs.ring_cookie,
954b1244
SH
2762 (void __user *)(&rxnfc->fs.location + 1) -
2763 (void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2764 copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
2765 sizeof(rxnfc->rule_cnt)))
2766 return -EFAULT;
2767 }
2768
2769 ret = dev_ioctl(net, SIOCETHTOOL, ifr);
2770 if (ret)
2771 return ret;
2772
2773 if (convert_out) {
2774 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
2775 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2776 (const void __user *)rxnfc) ||
3a7da39d
BH
2777 copy_in_user(&compat_rxnfc->fs.ring_cookie,
2778 &rxnfc->fs.ring_cookie,
954b1244
SH
2779 (const void __user *)(&rxnfc->fs.location + 1) -
2780 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
2781 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
2782 sizeof(rxnfc->rule_cnt)))
2783 return -EFAULT;
2784
2785 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
2786 /* As an optimisation, we only copy the actual
2787 * number of rules that the underlying
2788 * function returned. Since Mallory might
2789 * change the rule count in user memory, we
2790 * check that it is less than the rule count
2791 * originally given (as the user buffer size),
2792 * which has been range-checked.
2793 */
2794 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
2795 return -EFAULT;
2796 if (actual_rule_cnt < rule_cnt)
2797 rule_cnt = actual_rule_cnt;
2798 if (copy_in_user(&compat_rxnfc->rule_locs[0],
2799 &rxnfc->rule_locs[0],
2800 rule_cnt * sizeof(u32)))
2801 return -EFAULT;
2802 }
2803 }
2804
2805 return 0;
7a229387
AB
2806}
2807
7a50a240
AB
2808static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2809{
2810 void __user *uptr;
2811 compat_uptr_t uptr32;
2812 struct ifreq __user *uifr;
2813
c6d409cf 2814 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2815 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2816 return -EFAULT;
2817
2818 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2819 return -EFAULT;
2820
2821 uptr = compat_ptr(uptr32);
2822
2823 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2824 return -EFAULT;
2825
2826 return dev_ioctl(net, SIOCWANDEV, uifr);
2827}
2828
6b96018b
AB
2829static int bond_ioctl(struct net *net, unsigned int cmd,
2830 struct compat_ifreq __user *ifr32)
7a229387
AB
2831{
2832 struct ifreq kifr;
7a229387
AB
2833 mm_segment_t old_fs;
2834 int err;
7a229387
AB
2835
2836 switch (cmd) {
2837 case SIOCBONDENSLAVE:
2838 case SIOCBONDRELEASE:
2839 case SIOCBONDSETHWADDR:
2840 case SIOCBONDCHANGEACTIVE:
6b96018b 2841 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2842 return -EFAULT;
2843
2844 old_fs = get_fs();
c6d409cf 2845 set_fs(KERNEL_DS);
c3f52ae6 2846 err = dev_ioctl(net, cmd,
2847 (struct ifreq __user __force *) &kifr);
c6d409cf 2848 set_fs(old_fs);
7a229387
AB
2849
2850 return err;
7a229387 2851 default:
07d106d0 2852 return -ENOIOCTLCMD;
ccbd6a5a 2853 }
7a229387
AB
2854}
2855
590d4693
BH
2856/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2857static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 2858 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2859{
2860 struct ifreq __user *u_ifreq64;
7a229387
AB
2861 char tmp_buf[IFNAMSIZ];
2862 void __user *data64;
2863 u32 data32;
2864
2865 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2866 IFNAMSIZ))
2867 return -EFAULT;
417c3522 2868 if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
7a229387
AB
2869 return -EFAULT;
2870 data64 = compat_ptr(data32);
2871
2872 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2873
7a229387
AB
2874 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2875 IFNAMSIZ))
2876 return -EFAULT;
417c3522 2877 if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
7a229387
AB
2878 return -EFAULT;
2879
6b96018b 2880 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2881}
2882
6b96018b
AB
2883static int dev_ifsioc(struct net *net, struct socket *sock,
2884 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2885{
a2116ed2 2886 struct ifreq __user *uifr;
7a229387
AB
2887 int err;
2888
a2116ed2
AB
2889 uifr = compat_alloc_user_space(sizeof(*uifr));
2890 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2891 return -EFAULT;
2892
2893 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2894
7a229387
AB
2895 if (!err) {
2896 switch (cmd) {
2897 case SIOCGIFFLAGS:
2898 case SIOCGIFMETRIC:
2899 case SIOCGIFMTU:
2900 case SIOCGIFMEM:
2901 case SIOCGIFHWADDR:
2902 case SIOCGIFINDEX:
2903 case SIOCGIFADDR:
2904 case SIOCGIFBRDADDR:
2905 case SIOCGIFDSTADDR:
2906 case SIOCGIFNETMASK:
fab2532b 2907 case SIOCGIFPFLAGS:
7a229387 2908 case SIOCGIFTXQLEN:
fab2532b
AB
2909 case SIOCGMIIPHY:
2910 case SIOCGMIIREG:
a2116ed2 2911 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2912 err = -EFAULT;
2913 break;
2914 }
2915 }
2916 return err;
2917}
2918
a2116ed2
AB
2919static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2920 struct compat_ifreq __user *uifr32)
2921{
2922 struct ifreq ifr;
2923 struct compat_ifmap __user *uifmap32;
2924 mm_segment_t old_fs;
2925 int err;
2926
2927 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2928 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
2929 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2930 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2931 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2932 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
2933 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
2934 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2935 if (err)
2936 return -EFAULT;
2937
2938 old_fs = get_fs();
c6d409cf 2939 set_fs(KERNEL_DS);
c3f52ae6 2940 err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
c6d409cf 2941 set_fs(old_fs);
a2116ed2
AB
2942
2943 if (cmd == SIOCGIFMAP && !err) {
2944 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
2945 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2946 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2947 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2948 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
2949 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
2950 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
2951 if (err)
2952 err = -EFAULT;
2953 }
2954 return err;
2955}
2956
7a229387 2957struct rtentry32 {
c6d409cf 2958 u32 rt_pad1;
7a229387
AB
2959 struct sockaddr rt_dst; /* target address */
2960 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2961 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2962 unsigned short rt_flags;
2963 short rt_pad2;
2964 u32 rt_pad3;
2965 unsigned char rt_tos;
2966 unsigned char rt_class;
2967 short rt_pad4;
2968 short rt_metric; /* +1 for binary compatibility! */
7a229387 2969 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
2970 u32 rt_mtu; /* per route MTU/Window */
2971 u32 rt_window; /* Window clamping */
7a229387
AB
2972 unsigned short rt_irtt; /* Initial RTT */
2973};
2974
2975struct in6_rtmsg32 {
2976 struct in6_addr rtmsg_dst;
2977 struct in6_addr rtmsg_src;
2978 struct in6_addr rtmsg_gateway;
2979 u32 rtmsg_type;
2980 u16 rtmsg_dst_len;
2981 u16 rtmsg_src_len;
2982 u32 rtmsg_metric;
2983 u32 rtmsg_info;
2984 u32 rtmsg_flags;
2985 s32 rtmsg_ifindex;
2986};
2987
6b96018b
AB
2988static int routing_ioctl(struct net *net, struct socket *sock,
2989 unsigned int cmd, void __user *argp)
7a229387
AB
2990{
2991 int ret;
2992 void *r = NULL;
2993 struct in6_rtmsg r6;
2994 struct rtentry r4;
2995 char devname[16];
2996 u32 rtdev;
2997 mm_segment_t old_fs = get_fs();
2998
6b96018b
AB
2999 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3000 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3001 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3002 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3003 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3004 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3005 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3006 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3007 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3008 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3009 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3010
3011 r = (void *) &r6;
3012 } else { /* ipv4 */
6b96018b 3013 struct rtentry32 __user *ur4 = argp;
c6d409cf 3014 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3015 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3016 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3017 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3018 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3019 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3020 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3021 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3022 if (rtdev) {
c6d409cf 3023 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3024 r4.rt_dev = (char __user __force *)devname;
3025 devname[15] = 0;
7a229387
AB
3026 } else
3027 r4.rt_dev = NULL;
3028
3029 r = (void *) &r4;
3030 }
3031
3032 if (ret) {
3033 ret = -EFAULT;
3034 goto out;
3035 }
3036
c6d409cf 3037 set_fs(KERNEL_DS);
6b96018b 3038 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3039 set_fs(old_fs);
7a229387
AB
3040
3041out:
7a229387
AB
3042 return ret;
3043}
3044
3045/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3046 * for some operations; this forces use of the newer bridge-utils that
25985edc 3047 * use compatible ioctls
7a229387 3048 */
6b96018b 3049static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3050{
6b96018b 3051 compat_ulong_t tmp;
7a229387 3052
6b96018b 3053 if (get_user(tmp, argp))
7a229387
AB
3054 return -EFAULT;
3055 if (tmp == BRCTL_GET_VERSION)
3056 return BRCTL_VERSION + 1;
3057 return -EINVAL;
3058}
3059
6b96018b
AB
3060static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3061 unsigned int cmd, unsigned long arg)
3062{
3063 void __user *argp = compat_ptr(arg);
3064 struct sock *sk = sock->sk;
3065 struct net *net = sock_net(sk);
7a229387 3066
6b96018b 3067 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3068 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3069
3070 switch (cmd) {
3071 case SIOCSIFBR:
3072 case SIOCGIFBR:
3073 return old_bridge_ioctl(argp);
3074 case SIOCGIFNAME:
3075 return dev_ifname32(net, argp);
3076 case SIOCGIFCONF:
3077 return dev_ifconf(net, argp);
3078 case SIOCETHTOOL:
3079 return ethtool_ioctl(net, argp);
7a50a240
AB
3080 case SIOCWANDEV:
3081 return compat_siocwandev(net, argp);
a2116ed2
AB
3082 case SIOCGIFMAP:
3083 case SIOCSIFMAP:
3084 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3085 case SIOCBONDENSLAVE:
3086 case SIOCBONDRELEASE:
3087 case SIOCBONDSETHWADDR:
6b96018b
AB
3088 case SIOCBONDCHANGEACTIVE:
3089 return bond_ioctl(net, cmd, argp);
3090 case SIOCADDRT:
3091 case SIOCDELRT:
3092 return routing_ioctl(net, sock, cmd, argp);
3093 case SIOCGSTAMP:
3094 return do_siocgstamp(net, sock, cmd, argp);
3095 case SIOCGSTAMPNS:
3096 return do_siocgstampns(net, sock, cmd, argp);
590d4693
BH
3097 case SIOCBONDSLAVEINFOQUERY:
3098 case SIOCBONDINFOQUERY:
a2116ed2 3099 case SIOCSHWTSTAMP:
fd468c74 3100 case SIOCGHWTSTAMP:
590d4693 3101 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3102
3103 case FIOSETOWN:
3104 case SIOCSPGRP:
3105 case FIOGETOWN:
3106 case SIOCGPGRP:
3107 case SIOCBRADDBR:
3108 case SIOCBRDELBR:
3109 case SIOCGIFVLAN:
3110 case SIOCSIFVLAN:
3111 case SIOCADDDLCI:
3112 case SIOCDELDLCI:
3113 return sock_ioctl(file, cmd, arg);
3114
3115 case SIOCGIFFLAGS:
3116 case SIOCSIFFLAGS:
3117 case SIOCGIFMETRIC:
3118 case SIOCSIFMETRIC:
3119 case SIOCGIFMTU:
3120 case SIOCSIFMTU:
3121 case SIOCGIFMEM:
3122 case SIOCSIFMEM:
3123 case SIOCGIFHWADDR:
3124 case SIOCSIFHWADDR:
3125 case SIOCADDMULTI:
3126 case SIOCDELMULTI:
3127 case SIOCGIFINDEX:
6b96018b
AB
3128 case SIOCGIFADDR:
3129 case SIOCSIFADDR:
3130 case SIOCSIFHWBROADCAST:
6b96018b 3131 case SIOCDIFADDR:
6b96018b
AB
3132 case SIOCGIFBRDADDR:
3133 case SIOCSIFBRDADDR:
3134 case SIOCGIFDSTADDR:
3135 case SIOCSIFDSTADDR:
3136 case SIOCGIFNETMASK:
3137 case SIOCSIFNETMASK:
3138 case SIOCSIFPFLAGS:
3139 case SIOCGIFPFLAGS:
3140 case SIOCGIFTXQLEN:
3141 case SIOCSIFTXQLEN:
3142 case SIOCBRADDIF:
3143 case SIOCBRDELIF:
9177efd3
AB
3144 case SIOCSIFNAME:
3145 case SIOCGMIIPHY:
3146 case SIOCGMIIREG:
3147 case SIOCSMIIREG:
6b96018b 3148 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 3149
6b96018b
AB
3150 case SIOCSARP:
3151 case SIOCGARP:
3152 case SIOCDARP:
6b96018b 3153 case SIOCATMARK:
9177efd3
AB
3154 return sock_do_ioctl(net, sock, cmd, arg);
3155 }
3156
6b96018b
AB
3157 return -ENOIOCTLCMD;
3158}
7a229387 3159
95c96174 3160static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3161 unsigned long arg)
89bbfc95
SP
3162{
3163 struct socket *sock = file->private_data;
3164 int ret = -ENOIOCTLCMD;
87de87d5
DM
3165 struct sock *sk;
3166 struct net *net;
3167
3168 sk = sock->sk;
3169 net = sock_net(sk);
89bbfc95
SP
3170
3171 if (sock->ops->compat_ioctl)
3172 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3173
87de87d5
DM
3174 if (ret == -ENOIOCTLCMD &&
3175 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3176 ret = compat_wext_handle_ioctl(net, cmd, arg);
3177
6b96018b
AB
3178 if (ret == -ENOIOCTLCMD)
3179 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3180
89bbfc95
SP
3181 return ret;
3182}
3183#endif
3184
ac5a488e
SS
3185int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3186{
3187 return sock->ops->bind(sock, addr, addrlen);
3188}
c6d409cf 3189EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3190
3191int kernel_listen(struct socket *sock, int backlog)
3192{
3193 return sock->ops->listen(sock, backlog);
3194}
c6d409cf 3195EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3196
3197int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3198{
3199 struct sock *sk = sock->sk;
3200 int err;
3201
3202 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3203 newsock);
3204 if (err < 0)
3205 goto done;
3206
3207 err = sock->ops->accept(sock, *newsock, flags);
3208 if (err < 0) {
3209 sock_release(*newsock);
fa8705b0 3210 *newsock = NULL;
ac5a488e
SS
3211 goto done;
3212 }
3213
3214 (*newsock)->ops = sock->ops;
1b08534e 3215 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3216
3217done:
3218 return err;
3219}
c6d409cf 3220EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3221
3222int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3223 int flags)
ac5a488e
SS
3224{
3225 return sock->ops->connect(sock, addr, addrlen, flags);
3226}
c6d409cf 3227EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3228
3229int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3230 int *addrlen)
3231{
3232 return sock->ops->getname(sock, addr, addrlen, 0);
3233}
c6d409cf 3234EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3235
3236int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3237 int *addrlen)
3238{
3239 return sock->ops->getname(sock, addr, addrlen, 1);
3240}
c6d409cf 3241EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3242
3243int kernel_getsockopt(struct socket *sock, int level, int optname,
3244 char *optval, int *optlen)
3245{
3246 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3247 char __user *uoptval;
3248 int __user *uoptlen;
ac5a488e
SS
3249 int err;
3250
fb8621bb
NK
3251 uoptval = (char __user __force *) optval;
3252 uoptlen = (int __user __force *) optlen;
3253
ac5a488e
SS
3254 set_fs(KERNEL_DS);
3255 if (level == SOL_SOCKET)
fb8621bb 3256 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3257 else
fb8621bb
NK
3258 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3259 uoptlen);
ac5a488e
SS
3260 set_fs(oldfs);
3261 return err;
3262}
c6d409cf 3263EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3264
3265int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3266 char *optval, unsigned int optlen)
ac5a488e
SS
3267{
3268 mm_segment_t oldfs = get_fs();
fb8621bb 3269 char __user *uoptval;
ac5a488e
SS
3270 int err;
3271
fb8621bb
NK
3272 uoptval = (char __user __force *) optval;
3273
ac5a488e
SS
3274 set_fs(KERNEL_DS);
3275 if (level == SOL_SOCKET)
fb8621bb 3276 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3277 else
fb8621bb 3278 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3279 optlen);
3280 set_fs(oldfs);
3281 return err;
3282}
c6d409cf 3283EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3284
3285int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3286 size_t size, int flags)
3287{
3288 if (sock->ops->sendpage)
3289 return sock->ops->sendpage(sock, page, offset, size, flags);
3290
3291 return sock_no_sendpage(sock, page, offset, size, flags);
3292}
c6d409cf 3293EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3294
3295int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3296{
3297 mm_segment_t oldfs = get_fs();
3298 int err;
3299
3300 set_fs(KERNEL_DS);
3301 err = sock->ops->ioctl(sock, cmd, arg);
3302 set_fs(oldfs);
3303
3304 return err;
3305}
c6d409cf 3306EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3307
91cf45f0
TM
3308int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3309{
3310 return sock->ops->shutdown(sock, how);
3311}
91cf45f0 3312EXPORT_SYMBOL(kernel_sock_shutdown);