]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/socket.c
net: use indirect calls helpers at early demux stage
[mirror_ubuntu-jammy-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4 72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
408eccce 75#include <linux/ptp_classify.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
600e1779 91#include <linux/xattr.h>
c8e8cd57 92#include <linux/nospec.h>
1da177e4 93
7c0f6ba6 94#include <linux/uaccess.h>
1da177e4
LT
95#include <asm/unistd.h>
96
97#include <net/compat.h>
87de87d5 98#include <net/wext.h>
f8451725 99#include <net/cls_cgroup.h>
1da177e4
LT
100
101#include <net/sock.h>
102#include <linux/netfilter.h>
103
6b96018b
AB
104#include <linux/if_tun.h>
105#include <linux/ipv6_route.h>
106#include <linux/route.h>
6b96018b 107#include <linux/sockios.h>
076bb0c8 108#include <net/busy_poll.h>
f24b9be5 109#include <linux/errqueue.h>
06021292 110
e0d1095a 111#ifdef CONFIG_NET_RX_BUSY_POLL
64b0dc51
ET
112unsigned int sysctl_net_busy_read __read_mostly;
113unsigned int sysctl_net_busy_poll __read_mostly;
06021292 114#endif
6b96018b 115
8ae5e030
AV
116static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
117static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
89bddce5 118static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
119
120static int sock_close(struct inode *inode, struct file *file);
a11e1d43
LT
121static __poll_t sock_poll(struct file *file,
122 struct poll_table_struct *wait);
89bddce5 123static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
124#ifdef CONFIG_COMPAT
125static long compat_sock_ioctl(struct file *file,
89bddce5 126 unsigned int cmd, unsigned long arg);
89bbfc95 127#endif
1da177e4 128static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
129static ssize_t sock_sendpage(struct file *file, struct page *page,
130 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 131static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 132 struct pipe_inode_info *pipe, size_t len,
9c55e01c 133 unsigned int flags);
1da177e4 134
1da177e4
LT
135/*
136 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
137 * in the operation structures but are done directly via the socketcall() multiplexor.
138 */
139
da7071d7 140static const struct file_operations socket_file_ops = {
1da177e4
LT
141 .owner = THIS_MODULE,
142 .llseek = no_llseek,
8ae5e030
AV
143 .read_iter = sock_read_iter,
144 .write_iter = sock_write_iter,
1da177e4
LT
145 .poll = sock_poll,
146 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
147#ifdef CONFIG_COMPAT
148 .compat_ioctl = compat_sock_ioctl,
149#endif
1da177e4 150 .mmap = sock_mmap,
1da177e4
LT
151 .release = sock_close,
152 .fasync = sock_fasync,
5274f052
JA
153 .sendpage = sock_sendpage,
154 .splice_write = generic_splice_sendpage,
9c55e01c 155 .splice_read = sock_splice_read,
1da177e4
LT
156};
157
158/*
159 * The protocol list. Each protocol is registered in here.
160 */
161
1da177e4 162static DEFINE_SPINLOCK(net_family_lock);
190683a9 163static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1da177e4 164
1da177e4 165/*
89bddce5
SH
166 * Support routines.
167 * Move socket addresses back and forth across the kernel/user
168 * divide and look after the messy bits.
1da177e4
LT
169 */
170
1da177e4
LT
171/**
172 * move_addr_to_kernel - copy a socket address into kernel space
173 * @uaddr: Address in user space
174 * @kaddr: Address in kernel space
175 * @ulen: Length in user space
176 *
177 * The address is copied into kernel space. If the provided address is
178 * too long an error code of -EINVAL is returned. If the copy gives
179 * invalid addresses -EFAULT is returned. On a success 0 is returned.
180 */
181
43db362d 182int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1da177e4 183{
230b1839 184 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 185 return -EINVAL;
89bddce5 186 if (ulen == 0)
1da177e4 187 return 0;
89bddce5 188 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 189 return -EFAULT;
3ec3b2fb 190 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
191}
192
193/**
194 * move_addr_to_user - copy an address to user space
195 * @kaddr: kernel space address
196 * @klen: length of address in kernel
197 * @uaddr: user space address
198 * @ulen: pointer to user length field
199 *
200 * The value pointed to by ulen on entry is the buffer length available.
201 * This is overwritten with the buffer space used. -EINVAL is returned
202 * if an overlong buffer is specified or a negative buffer size. -EFAULT
203 * is returned if either the buffer or the length field are not
204 * accessible.
205 * After copying the data up to the limit the user specifies, the true
206 * length of the data is written over the length limit the user
207 * specified. Zero is returned for a success.
208 */
89bddce5 209
43db362d 210static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
11165f14 211 void __user *uaddr, int __user *ulen)
1da177e4
LT
212{
213 int err;
214 int len;
215
68c6beb3 216 BUG_ON(klen > sizeof(struct sockaddr_storage));
89bddce5
SH
217 err = get_user(len, ulen);
218 if (err)
1da177e4 219 return err;
89bddce5
SH
220 if (len > klen)
221 len = klen;
68c6beb3 222 if (len < 0)
1da177e4 223 return -EINVAL;
89bddce5 224 if (len) {
d6fe3945
SG
225 if (audit_sockaddr(klen, kaddr))
226 return -ENOMEM;
89bddce5 227 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
228 return -EFAULT;
229 }
230 /*
89bddce5
SH
231 * "fromlen shall refer to the value before truncation.."
232 * 1003.1g
1da177e4
LT
233 */
234 return __put_user(klen, ulen);
235}
236
08009a76 237static struct kmem_cache *sock_inode_cachep __ro_after_init;
1da177e4
LT
238
239static struct inode *sock_alloc_inode(struct super_block *sb)
240{
241 struct socket_alloc *ei;
eaefd110 242 struct socket_wq *wq;
89bddce5 243
e94b1766 244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
245 if (!ei)
246 return NULL;
eaefd110
ED
247 wq = kmalloc(sizeof(*wq), GFP_KERNEL);
248 if (!wq) {
43815482
ED
249 kmem_cache_free(sock_inode_cachep, ei);
250 return NULL;
251 }
eaefd110
ED
252 init_waitqueue_head(&wq->wait);
253 wq->fasync_list = NULL;
574aab1e 254 wq->flags = 0;
e6476c21 255 ei->socket.wq = wq;
89bddce5 256
1da177e4
LT
257 ei->socket.state = SS_UNCONNECTED;
258 ei->socket.flags = 0;
259 ei->socket.ops = NULL;
260 ei->socket.sk = NULL;
261 ei->socket.file = NULL;
1da177e4
LT
262
263 return &ei->vfs_inode;
264}
265
266static void sock_destroy_inode(struct inode *inode)
267{
43815482
ED
268 struct socket_alloc *ei;
269
270 ei = container_of(inode, struct socket_alloc, vfs_inode);
e6476c21 271 kfree_rcu(ei->socket.wq, rcu);
43815482 272 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
273}
274
51cc5068 275static void init_once(void *foo)
1da177e4 276{
89bddce5 277 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 278
a35afb83 279 inode_init_once(&ei->vfs_inode);
1da177e4 280}
89bddce5 281
1e911632 282static void init_inodecache(void)
1da177e4
LT
283{
284 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
285 sizeof(struct socket_alloc),
286 0,
287 (SLAB_HWCACHE_ALIGN |
288 SLAB_RECLAIM_ACCOUNT |
5d097056 289 SLAB_MEM_SPREAD | SLAB_ACCOUNT),
20c2df83 290 init_once);
1e911632 291 BUG_ON(sock_inode_cachep == NULL);
1da177e4
LT
292}
293
b87221de 294static const struct super_operations sockfs_ops = {
c6d409cf
ED
295 .alloc_inode = sock_alloc_inode,
296 .destroy_inode = sock_destroy_inode,
297 .statfs = simple_statfs,
1da177e4
LT
298};
299
c23fbb6b
ED
300/*
301 * sockfs_dname() is called from d_path().
302 */
303static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
304{
305 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
c5ef6035 306 d_inode(dentry)->i_ino);
c23fbb6b
ED
307}
308
3ba13d17 309static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 310 .d_dname = sockfs_dname,
1da177e4
LT
311};
312
bba0bd31
AG
313static int sockfs_xattr_get(const struct xattr_handler *handler,
314 struct dentry *dentry, struct inode *inode,
315 const char *suffix, void *value, size_t size)
316{
317 if (value) {
318 if (dentry->d_name.len + 1 > size)
319 return -ERANGE;
320 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
321 }
322 return dentry->d_name.len + 1;
323}
324
325#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
326#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
327#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
328
329static const struct xattr_handler sockfs_xattr_handler = {
330 .name = XATTR_NAME_SOCKPROTONAME,
331 .get = sockfs_xattr_get,
332};
333
4a590153
AG
334static int sockfs_security_xattr_set(const struct xattr_handler *handler,
335 struct dentry *dentry, struct inode *inode,
336 const char *suffix, const void *value,
337 size_t size, int flags)
338{
339 /* Handled by LSM. */
340 return -EAGAIN;
341}
342
343static const struct xattr_handler sockfs_security_xattr_handler = {
344 .prefix = XATTR_SECURITY_PREFIX,
345 .set = sockfs_security_xattr_set,
346};
347
bba0bd31
AG
348static const struct xattr_handler *sockfs_xattr_handlers[] = {
349 &sockfs_xattr_handler,
4a590153 350 &sockfs_security_xattr_handler,
bba0bd31
AG
351 NULL
352};
353
c74a1cbb
AV
354static struct dentry *sockfs_mount(struct file_system_type *fs_type,
355 int flags, const char *dev_name, void *data)
356{
bba0bd31
AG
357 return mount_pseudo_xattr(fs_type, "socket:", &sockfs_ops,
358 sockfs_xattr_handlers,
359 &sockfs_dentry_operations, SOCKFS_MAGIC);
c74a1cbb
AV
360}
361
362static struct vfsmount *sock_mnt __read_mostly;
363
364static struct file_system_type sock_fs_type = {
365 .name = "sockfs",
366 .mount = sockfs_mount,
367 .kill_sb = kill_anon_super,
368};
369
1da177e4
LT
370/*
371 * Obtains the first available file descriptor and sets it up for use.
372 *
39d8c1b6
DM
373 * These functions create file structures and maps them to fd space
374 * of the current process. On success it returns file descriptor
1da177e4
LT
375 * and file struct implicitly stored in sock->file.
376 * Note that another thread may close file descriptor before we return
377 * from this function. We use the fact that now we do not refer
378 * to socket after mapping. If one day we will need it, this
379 * function will increment ref. count on file by 1.
380 *
381 * In any case returned fd MAY BE not valid!
382 * This race condition is unavoidable
383 * with shared fd spaces, we cannot solve it inside kernel,
384 * but we take care of internal coherence yet.
385 */
386
8a3c245c
PT
387/**
388 * sock_alloc_file - Bind a &socket to a &file
389 * @sock: socket
390 * @flags: file status flags
391 * @dname: protocol name
392 *
393 * Returns the &file bound with @sock, implicitly storing it
394 * in sock->file. If dname is %NULL, sets to "".
395 * On failure the return is a ERR pointer (see linux/err.h).
396 * This function uses GFP_KERNEL internally.
397 */
398
aab174f0 399struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
1da177e4 400{
7cbe66b6 401 struct file *file;
1da177e4 402
d93aa9d8
AV
403 if (!dname)
404 dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
39d8c1b6 405
d93aa9d8
AV
406 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
407 O_RDWR | (flags & O_NONBLOCK),
408 &socket_file_ops);
b5ffe634 409 if (IS_ERR(file)) {
8e1611e2 410 sock_release(sock);
39b65252 411 return file;
cc3808f8
AV
412 }
413
414 sock->file = file;
39d8c1b6 415 file->private_data = sock;
28407630 416 return file;
39d8c1b6 417}
56b31d1c 418EXPORT_SYMBOL(sock_alloc_file);
39d8c1b6 419
56b31d1c 420static int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
421{
422 struct file *newfile;
28407630 423 int fd = get_unused_fd_flags(flags);
ce4bb04c
AV
424 if (unlikely(fd < 0)) {
425 sock_release(sock);
28407630 426 return fd;
ce4bb04c 427 }
39d8c1b6 428
aab174f0 429 newfile = sock_alloc_file(sock, flags, NULL);
28407630 430 if (likely(!IS_ERR(newfile))) {
39d8c1b6 431 fd_install(fd, newfile);
28407630
AV
432 return fd;
433 }
7cbe66b6 434
28407630
AV
435 put_unused_fd(fd);
436 return PTR_ERR(newfile);
1da177e4
LT
437}
438
8a3c245c
PT
439/**
440 * sock_from_file - Return the &socket bounded to @file.
441 * @file: file
442 * @err: pointer to an error code return
443 *
444 * On failure returns %NULL and assigns -ENOTSOCK to @err.
445 */
446
406a3c63 447struct socket *sock_from_file(struct file *file, int *err)
6cb153ca 448{
6cb153ca
BL
449 if (file->f_op == &socket_file_ops)
450 return file->private_data; /* set in sock_map_fd */
451
23bb80d2
ED
452 *err = -ENOTSOCK;
453 return NULL;
6cb153ca 454}
406a3c63 455EXPORT_SYMBOL(sock_from_file);
6cb153ca 456
1da177e4 457/**
c6d409cf 458 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
459 * @fd: file handle
460 * @err: pointer to an error code return
461 *
462 * The file handle passed in is locked and the socket it is bound
241c4667 463 * to is returned. If an error occurs the err pointer is overwritten
1da177e4
LT
464 * with a negative errno code and NULL is returned. The function checks
465 * for both invalid handles and passing a handle which is not a socket.
466 *
467 * On a success the socket object pointer is returned.
468 */
469
470struct socket *sockfd_lookup(int fd, int *err)
471{
472 struct file *file;
1da177e4
LT
473 struct socket *sock;
474
89bddce5
SH
475 file = fget(fd);
476 if (!file) {
1da177e4
LT
477 *err = -EBADF;
478 return NULL;
479 }
89bddce5 480
6cb153ca
BL
481 sock = sock_from_file(file, err);
482 if (!sock)
1da177e4 483 fput(file);
6cb153ca
BL
484 return sock;
485}
c6d409cf 486EXPORT_SYMBOL(sockfd_lookup);
1da177e4 487
6cb153ca
BL
488static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
489{
00e188ef 490 struct fd f = fdget(fd);
6cb153ca
BL
491 struct socket *sock;
492
3672558c 493 *err = -EBADF;
00e188ef
AV
494 if (f.file) {
495 sock = sock_from_file(f.file, err);
496 if (likely(sock)) {
497 *fput_needed = f.flags;
6cb153ca 498 return sock;
00e188ef
AV
499 }
500 fdput(f);
1da177e4 501 }
6cb153ca 502 return NULL;
1da177e4
LT
503}
504
600e1779
MY
505static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
506 size_t size)
507{
508 ssize_t len;
509 ssize_t used = 0;
510
c5ef6035 511 len = security_inode_listsecurity(d_inode(dentry), buffer, size);
600e1779
MY
512 if (len < 0)
513 return len;
514 used += len;
515 if (buffer) {
516 if (size < used)
517 return -ERANGE;
518 buffer += len;
519 }
520
521 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
522 used += len;
523 if (buffer) {
524 if (size < used)
525 return -ERANGE;
526 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
527 buffer += len;
528 }
529
530 return used;
531}
532
dc647ec8 533static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
86741ec2
LC
534{
535 int err = simple_setattr(dentry, iattr);
536
e1a3a60a 537 if (!err && (iattr->ia_valid & ATTR_UID)) {
86741ec2
LC
538 struct socket *sock = SOCKET_I(d_inode(dentry));
539
6d8c50dc
CW
540 if (sock->sk)
541 sock->sk->sk_uid = iattr->ia_uid;
542 else
543 err = -ENOENT;
86741ec2
LC
544 }
545
546 return err;
547}
548
600e1779 549static const struct inode_operations sockfs_inode_ops = {
600e1779 550 .listxattr = sockfs_listxattr,
86741ec2 551 .setattr = sockfs_setattr,
600e1779
MY
552};
553
1da177e4 554/**
8a3c245c 555 * sock_alloc - allocate a socket
89bddce5 556 *
1da177e4
LT
557 * Allocate a new inode and socket object. The two are bound together
558 * and initialised. The socket is then returned. If we are out of inodes
8a3c245c 559 * NULL is returned. This functions uses GFP_KERNEL internally.
1da177e4
LT
560 */
561
f4a00aac 562struct socket *sock_alloc(void)
1da177e4 563{
89bddce5
SH
564 struct inode *inode;
565 struct socket *sock;
1da177e4 566
a209dfc7 567 inode = new_inode_pseudo(sock_mnt->mnt_sb);
1da177e4
LT
568 if (!inode)
569 return NULL;
570
571 sock = SOCKET_I(inode);
572
85fe4025 573 inode->i_ino = get_next_ino();
89bddce5 574 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
575 inode->i_uid = current_fsuid();
576 inode->i_gid = current_fsgid();
600e1779 577 inode->i_op = &sockfs_inode_ops;
1da177e4 578
1da177e4
LT
579 return sock;
580}
f4a00aac 581EXPORT_SYMBOL(sock_alloc);
1da177e4 582
1da177e4 583/**
8a3c245c 584 * sock_release - close a socket
1da177e4
LT
585 * @sock: socket to close
586 *
587 * The socket is released from the protocol stack if it has a release
588 * callback, and the inode is then released if the socket is bound to
89bddce5 589 * an inode not a file.
1da177e4 590 */
89bddce5 591
6d8c50dc 592static void __sock_release(struct socket *sock, struct inode *inode)
1da177e4
LT
593{
594 if (sock->ops) {
595 struct module *owner = sock->ops->owner;
596
6d8c50dc
CW
597 if (inode)
598 inode_lock(inode);
1da177e4 599 sock->ops->release(sock);
ff7b11aa 600 sock->sk = NULL;
6d8c50dc
CW
601 if (inode)
602 inode_unlock(inode);
1da177e4
LT
603 sock->ops = NULL;
604 module_put(owner);
605 }
606
e6476c21 607 if (sock->wq->fasync_list)
3410f22e 608 pr_err("%s: fasync list not empty!\n", __func__);
1da177e4 609
1da177e4
LT
610 if (!sock->file) {
611 iput(SOCK_INODE(sock));
612 return;
613 }
89bddce5 614 sock->file = NULL;
1da177e4 615}
6d8c50dc
CW
616
617void sock_release(struct socket *sock)
618{
619 __sock_release(sock, NULL);
620}
c6d409cf 621EXPORT_SYMBOL(sock_release);
1da177e4 622
c14ac945 623void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
20d49473 624{
140c55d4
ED
625 u8 flags = *tx_flags;
626
c14ac945 627 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
140c55d4
ED
628 flags |= SKBTX_HW_TSTAMP;
629
c14ac945 630 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
140c55d4
ED
631 flags |= SKBTX_SW_TSTAMP;
632
c14ac945 633 if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
140c55d4
ED
634 flags |= SKBTX_SCHED_TSTAMP;
635
140c55d4 636 *tx_flags = flags;
20d49473 637}
67cc0d40 638EXPORT_SYMBOL(__sock_tx_timestamp);
20d49473 639
8a3c245c
PT
640/**
641 * sock_sendmsg - send a message through @sock
642 * @sock: socket
643 * @msg: message to send
644 *
645 * Sends @msg through @sock, passing through LSM.
646 * Returns the number of bytes sent, or an error code.
647 */
648
d8725c86 649static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
1da177e4 650{
01e97e65 651 int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
d8725c86
AV
652 BUG_ON(ret == -EIOCBQUEUED);
653 return ret;
1da177e4
LT
654}
655
d8725c86 656int sock_sendmsg(struct socket *sock, struct msghdr *msg)
228e548e 657{
d8725c86 658 int err = security_socket_sendmsg(sock, msg,
01e97e65 659 msg_data_left(msg));
228e548e 660
d8725c86 661 return err ?: sock_sendmsg_nosec(sock, msg);
0cf00c6f 662}
c6d409cf 663EXPORT_SYMBOL(sock_sendmsg);
1da177e4 664
8a3c245c
PT
665/**
666 * kernel_sendmsg - send a message through @sock (kernel-space)
667 * @sock: socket
668 * @msg: message header
669 * @vec: kernel vec
670 * @num: vec array length
671 * @size: total message data size
672 *
673 * Builds the message data with @vec and sends it through @sock.
674 * Returns the number of bytes sent, or an error code.
675 */
676
1da177e4
LT
677int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
678 struct kvec *vec, size_t num, size_t size)
679{
aa563d7b 680 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
d8725c86 681 return sock_sendmsg(sock, msg);
1da177e4 682}
c6d409cf 683EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 684
8a3c245c
PT
685/**
686 * kernel_sendmsg_locked - send a message through @sock (kernel-space)
687 * @sk: sock
688 * @msg: message header
689 * @vec: output s/g array
690 * @num: output s/g array length
691 * @size: total message data size
692 *
693 * Builds the message data with @vec and sends it through @sock.
694 * Returns the number of bytes sent, or an error code.
695 * Caller must hold @sk.
696 */
697
306b13eb
TH
698int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
699 struct kvec *vec, size_t num, size_t size)
700{
701 struct socket *sock = sk->sk_socket;
702
703 if (!sock->ops->sendmsg_locked)
db5980d8 704 return sock_no_sendmsg_locked(sk, msg, size);
306b13eb 705
aa563d7b 706 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
306b13eb
TH
707
708 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
709}
710EXPORT_SYMBOL(kernel_sendmsg_locked);
711
8605330a
SHY
712static bool skb_is_err_queue(const struct sk_buff *skb)
713{
714 /* pkt_type of skbs enqueued on the error queue are set to
715 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
716 * in recvmsg, since skbs received on a local socket will never
717 * have a pkt_type of PACKET_OUTGOING.
718 */
719 return skb->pkt_type == PACKET_OUTGOING;
720}
721
b50a5c70
ML
722/* On transmit, software and hardware timestamps are returned independently.
723 * As the two skb clones share the hardware timestamp, which may be updated
724 * before the software timestamp is received, a hardware TX timestamp may be
725 * returned only if there is no software TX timestamp. Ignore false software
726 * timestamps, which may be made in the __sock_recv_timestamp() call when the
7f1bc6e9 727 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
b50a5c70
ML
728 * hardware timestamp.
729 */
730static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
731{
732 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
733}
734
aad9c8c4
ML
735static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
736{
737 struct scm_ts_pktinfo ts_pktinfo;
738 struct net_device *orig_dev;
739
740 if (!skb_mac_header_was_set(skb))
741 return;
742
743 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
744
745 rcu_read_lock();
746 orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
747 if (orig_dev)
748 ts_pktinfo.if_index = orig_dev->ifindex;
749 rcu_read_unlock();
750
751 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
752 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
753 sizeof(ts_pktinfo), &ts_pktinfo);
754}
755
92f37fd2
ED
756/*
757 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
758 */
759void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
760 struct sk_buff *skb)
761{
20d49473 762 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
887feae3 763 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
9718475e
DD
764 struct scm_timestamping_internal tss;
765
b50a5c70 766 int empty = 1, false_tstamp = 0;
20d49473
PO
767 struct skb_shared_hwtstamps *shhwtstamps =
768 skb_hwtstamps(skb);
769
770 /* Race occurred between timestamp enabling and packet
771 receiving. Fill in the current time for now. */
b50a5c70 772 if (need_software_tstamp && skb->tstamp == 0) {
20d49473 773 __net_timestamp(skb);
b50a5c70
ML
774 false_tstamp = 1;
775 }
20d49473
PO
776
777 if (need_software_tstamp) {
778 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
887feae3
DD
779 if (new_tstamp) {
780 struct __kernel_sock_timeval tv;
781
782 skb_get_new_timestamp(skb, &tv);
783 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
784 sizeof(tv), &tv);
785 } else {
786 struct __kernel_old_timeval tv;
787
788 skb_get_timestamp(skb, &tv);
789 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
790 sizeof(tv), &tv);
791 }
20d49473 792 } else {
887feae3
DD
793 if (new_tstamp) {
794 struct __kernel_timespec ts;
795
796 skb_get_new_timestampns(skb, &ts);
797 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
798 sizeof(ts), &ts);
799 } else {
800 struct timespec ts;
801
802 skb_get_timestampns(skb, &ts);
803 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
804 sizeof(ts), &ts);
805 }
20d49473
PO
806 }
807 }
808
f24b9be5 809 memset(&tss, 0, sizeof(tss));
c199105d 810 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
9718475e 811 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
20d49473 812 empty = 0;
4d276eb6 813 if (shhwtstamps &&
b9f40e21 814 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
b50a5c70 815 !skb_is_swtx_tstamp(skb, false_tstamp) &&
9718475e 816 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
4d276eb6 817 empty = 0;
aad9c8c4
ML
818 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
819 !skb_is_err_queue(skb))
820 put_ts_pktinfo(msg, skb);
821 }
1c885808 822 if (!empty) {
9718475e
DD
823 if (sock_flag(sk, SOCK_TSTAMP_NEW))
824 put_cmsg_scm_timestamping64(msg, &tss);
825 else
826 put_cmsg_scm_timestamping(msg, &tss);
1c885808 827
8605330a 828 if (skb_is_err_queue(skb) && skb->len &&
4ef1b286 829 SKB_EXT_ERR(skb)->opt_stats)
1c885808
FY
830 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
831 skb->len, skb->data);
832 }
92f37fd2 833}
7c81fd8b
ACM
834EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
835
6e3e939f
JB
836void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
837 struct sk_buff *skb)
838{
839 int ack;
840
841 if (!sock_flag(sk, SOCK_WIFI_STATUS))
842 return;
843 if (!skb->wifi_acked_valid)
844 return;
845
846 ack = skb->wifi_acked;
847
848 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
849}
850EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
851
11165f14 852static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
853 struct sk_buff *skb)
3b885787 854{
744d5a3e 855 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
3b885787 856 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
744d5a3e 857 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
3b885787
NH
858}
859
767dd033 860void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
861 struct sk_buff *skb)
862{
863 sock_recv_timestamp(msg, sk, skb);
864 sock_recv_drops(msg, sk, skb);
865}
767dd033 866EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 867
8a3c245c
PT
868/**
869 * sock_recvmsg - receive a message from @sock
870 * @sock: socket
871 * @msg: message to receive
872 * @flags: message flags
873 *
874 * Receives @msg from @sock, passing through LSM. Returns the total number
875 * of bytes received, or an error.
876 */
877
1b784140 878static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
2da62906 879 int flags)
1da177e4 880{
2da62906 881 return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
1da177e4
LT
882}
883
2da62906 884int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
a2e27255 885{
2da62906 886 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
a2e27255 887
2da62906 888 return err ?: sock_recvmsg_nosec(sock, msg, flags);
1da177e4 889}
c6d409cf 890EXPORT_SYMBOL(sock_recvmsg);
1da177e4 891
c1249c0a 892/**
8a3c245c
PT
893 * kernel_recvmsg - Receive a message from a socket (kernel space)
894 * @sock: The socket to receive the message from
895 * @msg: Received message
896 * @vec: Input s/g array for message data
897 * @num: Size of input s/g array
898 * @size: Number of bytes to read
899 * @flags: Message flags (MSG_DONTWAIT, etc...)
c1249c0a 900 *
8a3c245c
PT
901 * On return the msg structure contains the scatter/gather array passed in the
902 * vec argument. The array is modified so that it consists of the unfilled
903 * portion of the original array.
c1249c0a 904 *
8a3c245c 905 * The returned value is the total number of bytes received, or an error.
c1249c0a 906 */
8a3c245c 907
89bddce5
SH
908int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
909 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
910{
911 mm_segment_t oldfs = get_fs();
912 int result;
913
aa563d7b 914 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
1da177e4 915 set_fs(KERNEL_DS);
2da62906 916 result = sock_recvmsg(sock, msg, flags);
1da177e4
LT
917 set_fs(oldfs);
918 return result;
919}
c6d409cf 920EXPORT_SYMBOL(kernel_recvmsg);
1da177e4 921
ce1d4d3e
CH
922static ssize_t sock_sendpage(struct file *file, struct page *page,
923 int offset, size_t size, loff_t *ppos, int more)
1da177e4 924{
1da177e4
LT
925 struct socket *sock;
926 int flags;
927
ce1d4d3e
CH
928 sock = file->private_data;
929
35f9c09f
ED
930 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
931 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
932 flags |= more;
ce1d4d3e 933
e6949583 934 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 935}
1da177e4 936
9c55e01c 937static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 938 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
939 unsigned int flags)
940{
941 struct socket *sock = file->private_data;
942
997b37da 943 if (unlikely(!sock->ops->splice_read))
95506588 944 return generic_file_splice_read(file, ppos, pipe, len, flags);
997b37da 945
9c55e01c
JA
946 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
947}
948
8ae5e030 949static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
ce1d4d3e 950{
6d652330
AV
951 struct file *file = iocb->ki_filp;
952 struct socket *sock = file->private_data;
0345f931 953 struct msghdr msg = {.msg_iter = *to,
954 .msg_iocb = iocb};
8ae5e030 955 ssize_t res;
ce1d4d3e 956
8ae5e030
AV
957 if (file->f_flags & O_NONBLOCK)
958 msg.msg_flags = MSG_DONTWAIT;
959
960 if (iocb->ki_pos != 0)
1da177e4 961 return -ESPIPE;
027445c3 962
66ee59af 963 if (!iov_iter_count(to)) /* Match SYS5 behaviour */
1da177e4
LT
964 return 0;
965
2da62906 966 res = sock_recvmsg(sock, &msg, msg.msg_flags);
8ae5e030
AV
967 *to = msg.msg_iter;
968 return res;
1da177e4
LT
969}
970
8ae5e030 971static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
ce1d4d3e 972{
6d652330
AV
973 struct file *file = iocb->ki_filp;
974 struct socket *sock = file->private_data;
0345f931 975 struct msghdr msg = {.msg_iter = *from,
976 .msg_iocb = iocb};
8ae5e030 977 ssize_t res;
1da177e4 978
8ae5e030 979 if (iocb->ki_pos != 0)
ce1d4d3e 980 return -ESPIPE;
027445c3 981
8ae5e030
AV
982 if (file->f_flags & O_NONBLOCK)
983 msg.msg_flags = MSG_DONTWAIT;
984
6d652330
AV
985 if (sock->type == SOCK_SEQPACKET)
986 msg.msg_flags |= MSG_EOR;
987
d8725c86 988 res = sock_sendmsg(sock, &msg);
8ae5e030
AV
989 *from = msg.msg_iter;
990 return res;
1da177e4
LT
991}
992
1da177e4
LT
993/*
994 * Atomic setting of ioctl hooks to avoid race
995 * with module unload.
996 */
997
4a3e2f71 998static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 999static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 1000
881d966b 1001void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 1002{
4a3e2f71 1003 mutex_lock(&br_ioctl_mutex);
1da177e4 1004 br_ioctl_hook = hook;
4a3e2f71 1005 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1006}
1007EXPORT_SYMBOL(brioctl_set);
1008
4a3e2f71 1009static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 1010static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 1011
881d966b 1012void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 1013{
4a3e2f71 1014 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1015 vlan_ioctl_hook = hook;
4a3e2f71 1016 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
1017}
1018EXPORT_SYMBOL(vlan_ioctl_set);
1019
4a3e2f71 1020static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 1021static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 1022
89bddce5 1023void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 1024{
4a3e2f71 1025 mutex_lock(&dlci_ioctl_mutex);
1da177e4 1026 dlci_ioctl_hook = hook;
4a3e2f71 1027 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1028}
1029EXPORT_SYMBOL(dlci_ioctl_set);
1030
6b96018b 1031static long sock_do_ioctl(struct net *net, struct socket *sock,
63ff03ab 1032 unsigned int cmd, unsigned long arg)
6b96018b
AB
1033{
1034 int err;
1035 void __user *argp = (void __user *)arg;
1036
1037 err = sock->ops->ioctl(sock, cmd, arg);
1038
1039 /*
1040 * If this ioctl is unknown try to hand it down
1041 * to the NIC driver.
1042 */
36fd633e
AV
1043 if (err != -ENOIOCTLCMD)
1044 return err;
6b96018b 1045
36fd633e
AV
1046 if (cmd == SIOCGIFCONF) {
1047 struct ifconf ifc;
1048 if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
1049 return -EFAULT;
1050 rtnl_lock();
1051 err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
1052 rtnl_unlock();
1053 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
1054 err = -EFAULT;
44c02a2c
AV
1055 } else {
1056 struct ifreq ifr;
1057 bool need_copyout;
63ff03ab 1058 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
44c02a2c
AV
1059 return -EFAULT;
1060 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1061 if (!err && need_copyout)
63ff03ab 1062 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
44c02a2c 1063 return -EFAULT;
36fd633e 1064 }
6b96018b
AB
1065 return err;
1066}
1067
1da177e4
LT
1068/*
1069 * With an ioctl, arg may well be a user mode pointer, but we don't know
1070 * what to do with it - that's up to the protocol still.
1071 */
1072
8a3c245c
PT
1073/**
1074 * get_net_ns - increment the refcount of the network namespace
1075 * @ns: common namespace (net)
1076 *
1077 * Returns the net's common namespace.
1078 */
1079
d8d211a2 1080struct ns_common *get_net_ns(struct ns_common *ns)
c62cce2c
AV
1081{
1082 return &get_net(container_of(ns, struct net, ns))->ns;
1083}
d8d211a2 1084EXPORT_SYMBOL_GPL(get_net_ns);
c62cce2c 1085
1da177e4
LT
1086static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1087{
1088 struct socket *sock;
881d966b 1089 struct sock *sk;
1da177e4
LT
1090 void __user *argp = (void __user *)arg;
1091 int pid, err;
881d966b 1092 struct net *net;
1da177e4 1093
b69aee04 1094 sock = file->private_data;
881d966b 1095 sk = sock->sk;
3b1e0a65 1096 net = sock_net(sk);
44c02a2c
AV
1097 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
1098 struct ifreq ifr;
1099 bool need_copyout;
1100 if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
1101 return -EFAULT;
1102 err = dev_ioctl(net, cmd, &ifr, &need_copyout);
1103 if (!err && need_copyout)
1104 if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
1105 return -EFAULT;
1da177e4 1106 } else
3d23e349 1107#ifdef CONFIG_WEXT_CORE
1da177e4 1108 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
b1b0c245 1109 err = wext_handle_ioctl(net, cmd, argp);
1da177e4 1110 } else
3d23e349 1111#endif
89bddce5 1112 switch (cmd) {
1da177e4
LT
1113 case FIOSETOWN:
1114 case SIOCSPGRP:
1115 err = -EFAULT;
1116 if (get_user(pid, (int __user *)argp))
1117 break;
393cc3f5 1118 err = f_setown(sock->file, pid, 1);
1da177e4
LT
1119 break;
1120 case FIOGETOWN:
1121 case SIOCGPGRP:
609d7fa9 1122 err = put_user(f_getown(sock->file),
89bddce5 1123 (int __user *)argp);
1da177e4
LT
1124 break;
1125 case SIOCGIFBR:
1126 case SIOCSIFBR:
1127 case SIOCBRADDBR:
1128 case SIOCBRDELBR:
1129 err = -ENOPKG;
1130 if (!br_ioctl_hook)
1131 request_module("bridge");
1132
4a3e2f71 1133 mutex_lock(&br_ioctl_mutex);
89bddce5 1134 if (br_ioctl_hook)
881d966b 1135 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 1136 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
1137 break;
1138 case SIOCGIFVLAN:
1139 case SIOCSIFVLAN:
1140 err = -ENOPKG;
1141 if (!vlan_ioctl_hook)
1142 request_module("8021q");
1143
4a3e2f71 1144 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1145 if (vlan_ioctl_hook)
881d966b 1146 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1147 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1148 break;
1da177e4
LT
1149 case SIOCADDDLCI:
1150 case SIOCDELDLCI:
1151 err = -ENOPKG;
1152 if (!dlci_ioctl_hook)
1153 request_module("dlci");
1154
7512cbf6
PE
1155 mutex_lock(&dlci_ioctl_mutex);
1156 if (dlci_ioctl_hook)
1da177e4 1157 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1158 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 1159 break;
c62cce2c
AV
1160 case SIOCGSKNS:
1161 err = -EPERM;
1162 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1163 break;
1164
1165 err = open_related_ns(&net->ns, get_net_ns);
1166 break;
0768e170
AB
1167 case SIOCGSTAMP_OLD:
1168 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
1169 if (!sock->ops->gettstamp) {
1170 err = -ENOIOCTLCMD;
1171 break;
1172 }
1173 err = sock->ops->gettstamp(sock, argp,
0768e170
AB
1174 cmd == SIOCGSTAMP_OLD,
1175 !IS_ENABLED(CONFIG_64BIT));
60747828 1176 break;
0768e170
AB
1177 case SIOCGSTAMP_NEW:
1178 case SIOCGSTAMPNS_NEW:
1179 if (!sock->ops->gettstamp) {
1180 err = -ENOIOCTLCMD;
1181 break;
1182 }
1183 err = sock->ops->gettstamp(sock, argp,
1184 cmd == SIOCGSTAMP_NEW,
1185 false);
c7cbdbf2 1186 break;
1da177e4 1187 default:
63ff03ab 1188 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1189 break;
89bddce5 1190 }
1da177e4
LT
1191 return err;
1192}
1193
8a3c245c
PT
1194/**
1195 * sock_create_lite - creates a socket
1196 * @family: protocol family (AF_INET, ...)
1197 * @type: communication type (SOCK_STREAM, ...)
1198 * @protocol: protocol (0, ...)
1199 * @res: new socket
1200 *
1201 * Creates a new socket and assigns it to @res, passing through LSM.
1202 * The new socket initialization is not complete, see kernel_accept().
1203 * Returns 0 or an error. On failure @res is set to %NULL.
1204 * This function internally uses GFP_KERNEL.
1205 */
1206
1da177e4
LT
1207int sock_create_lite(int family, int type, int protocol, struct socket **res)
1208{
1209 int err;
1210 struct socket *sock = NULL;
89bddce5 1211
1da177e4
LT
1212 err = security_socket_create(family, type, protocol, 1);
1213 if (err)
1214 goto out;
1215
1216 sock = sock_alloc();
1217 if (!sock) {
1218 err = -ENOMEM;
1219 goto out;
1220 }
1221
1da177e4 1222 sock->type = type;
7420ed23
VY
1223 err = security_socket_post_create(sock, family, type, protocol, 1);
1224 if (err)
1225 goto out_release;
1226
1da177e4
LT
1227out:
1228 *res = sock;
1229 return err;
7420ed23
VY
1230out_release:
1231 sock_release(sock);
1232 sock = NULL;
1233 goto out;
1da177e4 1234}
c6d409cf 1235EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1236
1237/* No kernel lock held - perfect */
ade994f4 1238static __poll_t sock_poll(struct file *file, poll_table *wait)
1da177e4 1239{
3cafb376 1240 struct socket *sock = file->private_data;
a331de3b 1241 __poll_t events = poll_requested_events(wait), flag = 0;
2d48d67f 1242
e88958e6
CH
1243 if (!sock->ops->poll)
1244 return 0;
f641f13b 1245
a331de3b
CH
1246 if (sk_can_busy_loop(sock->sk)) {
1247 /* poll once if requested by the syscall */
1248 if (events & POLL_BUSY_LOOP)
1249 sk_busy_loop(sock->sk, 1);
1250
1251 /* if this socket can poll_ll, tell the system call */
1252 flag = POLL_BUSY_LOOP;
1253 }
1254
1255 return sock->ops->poll(file, sock, wait) | flag;
1da177e4
LT
1256}
1257
89bddce5 1258static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1259{
b69aee04 1260 struct socket *sock = file->private_data;
1da177e4
LT
1261
1262 return sock->ops->mmap(file, sock, vma);
1263}
1264
20380731 1265static int sock_close(struct inode *inode, struct file *filp)
1da177e4 1266{
6d8c50dc 1267 __sock_release(SOCKET_I(inode), inode);
1da177e4
LT
1268 return 0;
1269}
1270
1271/*
1272 * Update the socket async list
1273 *
1274 * Fasync_list locking strategy.
1275 *
1276 * 1. fasync_list is modified only under process context socket lock
1277 * i.e. under semaphore.
1278 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1279 * or under socket lock
1da177e4
LT
1280 */
1281
1282static int sock_fasync(int fd, struct file *filp, int on)
1283{
989a2979
ED
1284 struct socket *sock = filp->private_data;
1285 struct sock *sk = sock->sk;
eaefd110 1286 struct socket_wq *wq;
1da177e4 1287
989a2979 1288 if (sk == NULL)
1da177e4 1289 return -EINVAL;
1da177e4
LT
1290
1291 lock_sock(sk);
e6476c21 1292 wq = sock->wq;
eaefd110 1293 fasync_helper(fd, filp, on, &wq->fasync_list);
1da177e4 1294
eaefd110 1295 if (!wq->fasync_list)
989a2979
ED
1296 sock_reset_flag(sk, SOCK_FASYNC);
1297 else
bcdce719 1298 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1299
989a2979 1300 release_sock(sk);
1da177e4
LT
1301 return 0;
1302}
1303
ceb5d58b 1304/* This function may be called only under rcu_lock */
1da177e4 1305
ceb5d58b 1306int sock_wake_async(struct socket_wq *wq, int how, int band)
1da177e4 1307{
ceb5d58b 1308 if (!wq || !wq->fasync_list)
1da177e4 1309 return -1;
ceb5d58b 1310
89bddce5 1311 switch (how) {
8d8ad9d7 1312 case SOCK_WAKE_WAITD:
ceb5d58b 1313 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
1da177e4
LT
1314 break;
1315 goto call_kill;
8d8ad9d7 1316 case SOCK_WAKE_SPACE:
ceb5d58b 1317 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
1da177e4
LT
1318 break;
1319 /* fall through */
8d8ad9d7 1320 case SOCK_WAKE_IO:
89bddce5 1321call_kill:
43815482 1322 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1323 break;
8d8ad9d7 1324 case SOCK_WAKE_URG:
43815482 1325 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1326 }
ceb5d58b 1327
1da177e4
LT
1328 return 0;
1329}
c6d409cf 1330EXPORT_SYMBOL(sock_wake_async);
1da177e4 1331
8a3c245c
PT
1332/**
1333 * __sock_create - creates a socket
1334 * @net: net namespace
1335 * @family: protocol family (AF_INET, ...)
1336 * @type: communication type (SOCK_STREAM, ...)
1337 * @protocol: protocol (0, ...)
1338 * @res: new socket
1339 * @kern: boolean for kernel space sockets
1340 *
1341 * Creates a new socket and assigns it to @res, passing through LSM.
1342 * Returns 0 or an error. On failure @res is set to %NULL. @kern must
1343 * be set to true if the socket resides in kernel space.
1344 * This function internally uses GFP_KERNEL.
1345 */
1346
721db93a 1347int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1348 struct socket **res, int kern)
1da177e4
LT
1349{
1350 int err;
1351 struct socket *sock;
55737fda 1352 const struct net_proto_family *pf;
1da177e4
LT
1353
1354 /*
89bddce5 1355 * Check protocol is in range
1da177e4
LT
1356 */
1357 if (family < 0 || family >= NPROTO)
1358 return -EAFNOSUPPORT;
1359 if (type < 0 || type >= SOCK_MAX)
1360 return -EINVAL;
1361
1362 /* Compatibility.
1363
1364 This uglymoron is moved from INET layer to here to avoid
1365 deadlock in module load.
1366 */
1367 if (family == PF_INET && type == SOCK_PACKET) {
f3c98690 1368 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1369 current->comm);
1da177e4
LT
1370 family = PF_PACKET;
1371 }
1372
1373 err = security_socket_create(family, type, protocol, kern);
1374 if (err)
1375 return err;
89bddce5 1376
55737fda
SH
1377 /*
1378 * Allocate the socket and allow the family to set things up. if
1379 * the protocol is 0, the family is instructed to select an appropriate
1380 * default.
1381 */
1382 sock = sock_alloc();
1383 if (!sock) {
e87cc472 1384 net_warn_ratelimited("socket: no more sockets\n");
55737fda
SH
1385 return -ENFILE; /* Not exactly a match, but its the
1386 closest posix thing */
1387 }
1388
1389 sock->type = type;
1390
95a5afca 1391#ifdef CONFIG_MODULES
89bddce5
SH
1392 /* Attempt to load a protocol module if the find failed.
1393 *
1394 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1395 * requested real, full-featured networking support upon configuration.
1396 * Otherwise module support will break!
1397 */
190683a9 1398 if (rcu_access_pointer(net_families[family]) == NULL)
89bddce5 1399 request_module("net-pf-%d", family);
1da177e4
LT
1400#endif
1401
55737fda
SH
1402 rcu_read_lock();
1403 pf = rcu_dereference(net_families[family]);
1404 err = -EAFNOSUPPORT;
1405 if (!pf)
1406 goto out_release;
1da177e4
LT
1407
1408 /*
1409 * We will call the ->create function, that possibly is in a loadable
1410 * module, so we have to bump that loadable module refcnt first.
1411 */
55737fda 1412 if (!try_module_get(pf->owner))
1da177e4
LT
1413 goto out_release;
1414
55737fda
SH
1415 /* Now protected by module ref count */
1416 rcu_read_unlock();
1417
3f378b68 1418 err = pf->create(net, sock, protocol, kern);
55737fda 1419 if (err < 0)
1da177e4 1420 goto out_module_put;
a79af59e 1421
1da177e4
LT
1422 /*
1423 * Now to bump the refcnt of the [loadable] module that owns this
1424 * socket at sock_release time we decrement its refcnt.
1425 */
55737fda
SH
1426 if (!try_module_get(sock->ops->owner))
1427 goto out_module_busy;
1428
1da177e4
LT
1429 /*
1430 * Now that we're done with the ->create function, the [loadable]
1431 * module can have its refcnt decremented
1432 */
55737fda 1433 module_put(pf->owner);
7420ed23
VY
1434 err = security_socket_post_create(sock, family, type, protocol, kern);
1435 if (err)
3b185525 1436 goto out_sock_release;
55737fda 1437 *res = sock;
1da177e4 1438
55737fda
SH
1439 return 0;
1440
1441out_module_busy:
1442 err = -EAFNOSUPPORT;
1da177e4 1443out_module_put:
55737fda
SH
1444 sock->ops = NULL;
1445 module_put(pf->owner);
1446out_sock_release:
1da177e4 1447 sock_release(sock);
55737fda
SH
1448 return err;
1449
1450out_release:
1451 rcu_read_unlock();
1452 goto out_sock_release;
1da177e4 1453}
721db93a 1454EXPORT_SYMBOL(__sock_create);
1da177e4 1455
8a3c245c
PT
1456/**
1457 * sock_create - creates a socket
1458 * @family: protocol family (AF_INET, ...)
1459 * @type: communication type (SOCK_STREAM, ...)
1460 * @protocol: protocol (0, ...)
1461 * @res: new socket
1462 *
1463 * A wrapper around __sock_create().
1464 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1465 */
1466
1da177e4
LT
1467int sock_create(int family, int type, int protocol, struct socket **res)
1468{
1b8d7ae4 1469 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1470}
c6d409cf 1471EXPORT_SYMBOL(sock_create);
1da177e4 1472
8a3c245c
PT
1473/**
1474 * sock_create_kern - creates a socket (kernel space)
1475 * @net: net namespace
1476 * @family: protocol family (AF_INET, ...)
1477 * @type: communication type (SOCK_STREAM, ...)
1478 * @protocol: protocol (0, ...)
1479 * @res: new socket
1480 *
1481 * A wrapper around __sock_create().
1482 * Returns 0 or an error. This function internally uses GFP_KERNEL.
1483 */
1484
eeb1bd5c 1485int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
1da177e4 1486{
eeb1bd5c 1487 return __sock_create(net, family, type, protocol, res, 1);
1da177e4 1488}
c6d409cf 1489EXPORT_SYMBOL(sock_create_kern);
1da177e4 1490
9d6a15c3 1491int __sys_socket(int family, int type, int protocol)
1da177e4
LT
1492{
1493 int retval;
1494 struct socket *sock;
a677a039
UD
1495 int flags;
1496
e38b36f3
UD
1497 /* Check the SOCK_* constants for consistency. */
1498 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1499 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1500 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1501 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1502
a677a039 1503 flags = type & ~SOCK_TYPE_MASK;
77d27200 1504 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1505 return -EINVAL;
1506 type &= SOCK_TYPE_MASK;
1da177e4 1507
aaca0bdc
UD
1508 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1509 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1510
1da177e4
LT
1511 retval = sock_create(family, type, protocol, &sock);
1512 if (retval < 0)
8e1611e2 1513 return retval;
1da177e4 1514
8e1611e2 1515 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1516}
1517
9d6a15c3
DB
1518SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1519{
1520 return __sys_socket(family, type, protocol);
1521}
1522
1da177e4
LT
1523/*
1524 * Create a pair of connected sockets.
1525 */
1526
6debc8d8 1527int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1da177e4
LT
1528{
1529 struct socket *sock1, *sock2;
1530 int fd1, fd2, err;
db349509 1531 struct file *newfile1, *newfile2;
a677a039
UD
1532 int flags;
1533
1534 flags = type & ~SOCK_TYPE_MASK;
77d27200 1535 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1536 return -EINVAL;
1537 type &= SOCK_TYPE_MASK;
1da177e4 1538
aaca0bdc
UD
1539 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1540 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1541
016a266b
AV
1542 /*
1543 * reserve descriptors and make sure we won't fail
1544 * to return them to userland.
1545 */
1546 fd1 = get_unused_fd_flags(flags);
1547 if (unlikely(fd1 < 0))
1548 return fd1;
1549
1550 fd2 = get_unused_fd_flags(flags);
1551 if (unlikely(fd2 < 0)) {
1552 put_unused_fd(fd1);
1553 return fd2;
1554 }
1555
1556 err = put_user(fd1, &usockvec[0]);
1557 if (err)
1558 goto out;
1559
1560 err = put_user(fd2, &usockvec[1]);
1561 if (err)
1562 goto out;
1563
1da177e4
LT
1564 /*
1565 * Obtain the first socket and check if the underlying protocol
1566 * supports the socketpair call.
1567 */
1568
1569 err = sock_create(family, type, protocol, &sock1);
016a266b 1570 if (unlikely(err < 0))
1da177e4
LT
1571 goto out;
1572
1573 err = sock_create(family, type, protocol, &sock2);
016a266b
AV
1574 if (unlikely(err < 0)) {
1575 sock_release(sock1);
1576 goto out;
bf3c23d1 1577 }
d73aa286 1578
d47cd945
DH
1579 err = security_socket_socketpair(sock1, sock2);
1580 if (unlikely(err)) {
1581 sock_release(sock2);
1582 sock_release(sock1);
1583 goto out;
1584 }
1585
016a266b
AV
1586 err = sock1->ops->socketpair(sock1, sock2);
1587 if (unlikely(err < 0)) {
1588 sock_release(sock2);
1589 sock_release(sock1);
1590 goto out;
28407630
AV
1591 }
1592
aab174f0 1593 newfile1 = sock_alloc_file(sock1, flags, NULL);
b5ffe634 1594 if (IS_ERR(newfile1)) {
28407630 1595 err = PTR_ERR(newfile1);
016a266b
AV
1596 sock_release(sock2);
1597 goto out;
28407630
AV
1598 }
1599
aab174f0 1600 newfile2 = sock_alloc_file(sock2, flags, NULL);
28407630
AV
1601 if (IS_ERR(newfile2)) {
1602 err = PTR_ERR(newfile2);
016a266b
AV
1603 fput(newfile1);
1604 goto out;
db349509
AV
1605 }
1606
157cf649 1607 audit_fd_pair(fd1, fd2);
d73aa286 1608
db349509
AV
1609 fd_install(fd1, newfile1);
1610 fd_install(fd2, newfile2);
d73aa286 1611 return 0;
1da177e4 1612
016a266b 1613out:
d73aa286 1614 put_unused_fd(fd2);
d73aa286 1615 put_unused_fd(fd1);
1da177e4
LT
1616 return err;
1617}
1618
6debc8d8
DB
1619SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1620 int __user *, usockvec)
1621{
1622 return __sys_socketpair(family, type, protocol, usockvec);
1623}
1624
1da177e4
LT
1625/*
1626 * Bind a name to a socket. Nothing much to do here since it's
1627 * the protocol's responsibility to handle the local address.
1628 *
1629 * We move the socket address to kernel space before we call
1630 * the protocol layer (having also checked the address is ok).
1631 */
1632
a87d35d8 1633int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1da177e4
LT
1634{
1635 struct socket *sock;
230b1839 1636 struct sockaddr_storage address;
6cb153ca 1637 int err, fput_needed;
1da177e4 1638
89bddce5 1639 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1640 if (sock) {
43db362d 1641 err = move_addr_to_kernel(umyaddr, addrlen, &address);
068b88cc 1642 if (!err) {
89bddce5 1643 err = security_socket_bind(sock,
230b1839 1644 (struct sockaddr *)&address,
89bddce5 1645 addrlen);
6cb153ca
BL
1646 if (!err)
1647 err = sock->ops->bind(sock,
89bddce5 1648 (struct sockaddr *)
230b1839 1649 &address, addrlen);
1da177e4 1650 }
6cb153ca 1651 fput_light(sock->file, fput_needed);
89bddce5 1652 }
1da177e4
LT
1653 return err;
1654}
1655
a87d35d8
DB
1656SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1657{
1658 return __sys_bind(fd, umyaddr, addrlen);
1659}
1660
1da177e4
LT
1661/*
1662 * Perform a listen. Basically, we allow the protocol to do anything
1663 * necessary for a listen, and if that works, we mark the socket as
1664 * ready for listening.
1665 */
1666
25e290ee 1667int __sys_listen(int fd, int backlog)
1da177e4
LT
1668{
1669 struct socket *sock;
6cb153ca 1670 int err, fput_needed;
b8e1f9b5 1671 int somaxconn;
89bddce5
SH
1672
1673 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1674 if (sock) {
8efa6e93 1675 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
95c96174 1676 if ((unsigned int)backlog > somaxconn)
b8e1f9b5 1677 backlog = somaxconn;
1da177e4
LT
1678
1679 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1680 if (!err)
1681 err = sock->ops->listen(sock, backlog);
1da177e4 1682
6cb153ca 1683 fput_light(sock->file, fput_needed);
1da177e4
LT
1684 }
1685 return err;
1686}
1687
25e290ee
DB
1688SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1689{
1690 return __sys_listen(fd, backlog);
1691}
1692
1da177e4
LT
1693/*
1694 * For accept, we attempt to create a new socket, set up the link
1695 * with the client, wake up the client, then return the new
1696 * connected fd. We collect the address of the connector in kernel
1697 * space and move it to user at the very end. This is unclean because
1698 * we open the socket then return an error.
1699 *
1700 * 1003.1g adds the ability to recvmsg() to query connection pending
1701 * status to recvmsg. We need to add that support in a way thats
b903036a 1702 * clean when we restructure accept also.
1da177e4
LT
1703 */
1704
4541e805
DB
1705int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1706 int __user *upeer_addrlen, int flags)
1da177e4
LT
1707{
1708 struct socket *sock, *newsock;
39d8c1b6 1709 struct file *newfile;
6cb153ca 1710 int err, len, newfd, fput_needed;
230b1839 1711 struct sockaddr_storage address;
1da177e4 1712
77d27200 1713 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1714 return -EINVAL;
1715
1716 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1717 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1718
6cb153ca 1719 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1720 if (!sock)
1721 goto out;
1722
1723 err = -ENFILE;
c6d409cf
ED
1724 newsock = sock_alloc();
1725 if (!newsock)
1da177e4
LT
1726 goto out_put;
1727
1728 newsock->type = sock->type;
1729 newsock->ops = sock->ops;
1730
1da177e4
LT
1731 /*
1732 * We don't need try_module_get here, as the listening socket (sock)
1733 * has the protocol module (sock->ops->owner) held.
1734 */
1735 __module_get(newsock->ops->owner);
1736
28407630 1737 newfd = get_unused_fd_flags(flags);
39d8c1b6
DM
1738 if (unlikely(newfd < 0)) {
1739 err = newfd;
9a1875e6
DM
1740 sock_release(newsock);
1741 goto out_put;
39d8c1b6 1742 }
aab174f0 1743 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
b5ffe634 1744 if (IS_ERR(newfile)) {
28407630
AV
1745 err = PTR_ERR(newfile);
1746 put_unused_fd(newfd);
28407630
AV
1747 goto out_put;
1748 }
39d8c1b6 1749
a79af59e
FF
1750 err = security_socket_accept(sock, newsock);
1751 if (err)
39d8c1b6 1752 goto out_fd;
a79af59e 1753
cdfbabfb 1754 err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
1da177e4 1755 if (err < 0)
39d8c1b6 1756 goto out_fd;
1da177e4
LT
1757
1758 if (upeer_sockaddr) {
9b2c45d4
DV
1759 len = newsock->ops->getname(newsock,
1760 (struct sockaddr *)&address, 2);
1761 if (len < 0) {
1da177e4 1762 err = -ECONNABORTED;
39d8c1b6 1763 goto out_fd;
1da177e4 1764 }
43db362d 1765 err = move_addr_to_user(&address,
230b1839 1766 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1767 if (err < 0)
39d8c1b6 1768 goto out_fd;
1da177e4
LT
1769 }
1770
1771 /* File flags are not inherited via accept() unlike another OSes. */
1772
39d8c1b6
DM
1773 fd_install(newfd, newfile);
1774 err = newfd;
1da177e4 1775
1da177e4 1776out_put:
6cb153ca 1777 fput_light(sock->file, fput_needed);
1da177e4
LT
1778out:
1779 return err;
39d8c1b6 1780out_fd:
9606a216 1781 fput(newfile);
39d8c1b6 1782 put_unused_fd(newfd);
1da177e4
LT
1783 goto out_put;
1784}
1785
4541e805
DB
1786SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1787 int __user *, upeer_addrlen, int, flags)
1788{
1789 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
1790}
1791
20f37034
HC
1792SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1793 int __user *, upeer_addrlen)
aaca0bdc 1794{
4541e805 1795 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1796}
1797
1da177e4
LT
1798/*
1799 * Attempt to connect to a socket with the server address. The address
1800 * is in user space so we verify it is OK and move it to kernel space.
1801 *
1802 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1803 * break bindings
1804 *
1805 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1806 * other SEQPACKET protocols that take time to connect() as it doesn't
1807 * include the -EINPROGRESS status for such sockets.
1808 */
1809
1387c2c2 1810int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1da177e4
LT
1811{
1812 struct socket *sock;
230b1839 1813 struct sockaddr_storage address;
6cb153ca 1814 int err, fput_needed;
1da177e4 1815
6cb153ca 1816 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1817 if (!sock)
1818 goto out;
43db362d 1819 err = move_addr_to_kernel(uservaddr, addrlen, &address);
1da177e4
LT
1820 if (err < 0)
1821 goto out_put;
1822
89bddce5 1823 err =
230b1839 1824 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1825 if (err)
1826 goto out_put;
1827
230b1839 1828 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1829 sock->file->f_flags);
1830out_put:
6cb153ca 1831 fput_light(sock->file, fput_needed);
1da177e4
LT
1832out:
1833 return err;
1834}
1835
1387c2c2
DB
1836SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1837 int, addrlen)
1838{
1839 return __sys_connect(fd, uservaddr, addrlen);
1840}
1841
1da177e4
LT
1842/*
1843 * Get the local address ('name') of a socket object. Move the obtained
1844 * name to user space.
1845 */
1846
8882a107
DB
1847int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1848 int __user *usockaddr_len)
1da177e4
LT
1849{
1850 struct socket *sock;
230b1839 1851 struct sockaddr_storage address;
9b2c45d4 1852 int err, fput_needed;
89bddce5 1853
6cb153ca 1854 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1855 if (!sock)
1856 goto out;
1857
1858 err = security_socket_getsockname(sock);
1859 if (err)
1860 goto out_put;
1861
9b2c45d4
DV
1862 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
1863 if (err < 0)
1da177e4 1864 goto out_put;
9b2c45d4
DV
1865 /* "err" is actually length in this case */
1866 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
1da177e4
LT
1867
1868out_put:
6cb153ca 1869 fput_light(sock->file, fput_needed);
1da177e4
LT
1870out:
1871 return err;
1872}
1873
8882a107
DB
1874SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1875 int __user *, usockaddr_len)
1876{
1877 return __sys_getsockname(fd, usockaddr, usockaddr_len);
1878}
1879
1da177e4
LT
1880/*
1881 * Get the remote address ('name') of a socket object. Move the obtained
1882 * name to user space.
1883 */
1884
b21c8f83
DB
1885int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1886 int __user *usockaddr_len)
1da177e4
LT
1887{
1888 struct socket *sock;
230b1839 1889 struct sockaddr_storage address;
9b2c45d4 1890 int err, fput_needed;
1da177e4 1891
89bddce5
SH
1892 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1893 if (sock != NULL) {
1da177e4
LT
1894 err = security_socket_getpeername(sock);
1895 if (err) {
6cb153ca 1896 fput_light(sock->file, fput_needed);
1da177e4
LT
1897 return err;
1898 }
1899
9b2c45d4
DV
1900 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
1901 if (err >= 0)
1902 /* "err" is actually length in this case */
1903 err = move_addr_to_user(&address, err, usockaddr,
89bddce5 1904 usockaddr_len);
6cb153ca 1905 fput_light(sock->file, fput_needed);
1da177e4
LT
1906 }
1907 return err;
1908}
1909
b21c8f83
DB
1910SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1911 int __user *, usockaddr_len)
1912{
1913 return __sys_getpeername(fd, usockaddr, usockaddr_len);
1914}
1915
1da177e4
LT
1916/*
1917 * Send a datagram to a given address. We move the address into kernel
1918 * space and check the user space data area is readable before invoking
1919 * the protocol.
1920 */
211b634b
DB
1921int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1922 struct sockaddr __user *addr, int addr_len)
1da177e4
LT
1923{
1924 struct socket *sock;
230b1839 1925 struct sockaddr_storage address;
1da177e4
LT
1926 int err;
1927 struct msghdr msg;
1928 struct iovec iov;
6cb153ca 1929 int fput_needed;
6cb153ca 1930
602bd0e9
AV
1931 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1932 if (unlikely(err))
1933 return err;
de0fa95c
PE
1934 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1935 if (!sock)
4387ff75 1936 goto out;
6cb153ca 1937
89bddce5 1938 msg.msg_name = NULL;
89bddce5
SH
1939 msg.msg_control = NULL;
1940 msg.msg_controllen = 0;
1941 msg.msg_namelen = 0;
6cb153ca 1942 if (addr) {
43db362d 1943 err = move_addr_to_kernel(addr, addr_len, &address);
1da177e4
LT
1944 if (err < 0)
1945 goto out_put;
230b1839 1946 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1947 msg.msg_namelen = addr_len;
1da177e4
LT
1948 }
1949 if (sock->file->f_flags & O_NONBLOCK)
1950 flags |= MSG_DONTWAIT;
1951 msg.msg_flags = flags;
d8725c86 1952 err = sock_sendmsg(sock, &msg);
1da177e4 1953
89bddce5 1954out_put:
de0fa95c 1955 fput_light(sock->file, fput_needed);
4387ff75 1956out:
1da177e4
LT
1957 return err;
1958}
1959
211b634b
DB
1960SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1961 unsigned int, flags, struct sockaddr __user *, addr,
1962 int, addr_len)
1963{
1964 return __sys_sendto(fd, buff, len, flags, addr, addr_len);
1965}
1966
1da177e4 1967/*
89bddce5 1968 * Send a datagram down a socket.
1da177e4
LT
1969 */
1970
3e0fa65f 1971SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
95c96174 1972 unsigned int, flags)
1da177e4 1973{
211b634b 1974 return __sys_sendto(fd, buff, len, flags, NULL, 0);
1da177e4
LT
1975}
1976
1977/*
89bddce5 1978 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1979 * sender. We verify the buffers are writable and if needed move the
1980 * sender address from kernel to user space.
1981 */
7a09e1eb
DB
1982int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
1983 struct sockaddr __user *addr, int __user *addr_len)
1da177e4
LT
1984{
1985 struct socket *sock;
1986 struct iovec iov;
1987 struct msghdr msg;
230b1839 1988 struct sockaddr_storage address;
89bddce5 1989 int err, err2;
6cb153ca
BL
1990 int fput_needed;
1991
602bd0e9
AV
1992 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
1993 if (unlikely(err))
1994 return err;
de0fa95c 1995 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1996 if (!sock)
de0fa95c 1997 goto out;
1da177e4 1998
89bddce5
SH
1999 msg.msg_control = NULL;
2000 msg.msg_controllen = 0;
f3d33426
HFS
2001 /* Save some cycles and don't copy the address if not needed */
2002 msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2003 /* We assume all kernel code knows the size of sockaddr_storage */
2004 msg.msg_namelen = 0;
130ed5d1 2005 msg.msg_iocb = NULL;
9f138fa6 2006 msg.msg_flags = 0;
1da177e4
LT
2007 if (sock->file->f_flags & O_NONBLOCK)
2008 flags |= MSG_DONTWAIT;
2da62906 2009 err = sock_recvmsg(sock, &msg, flags);
1da177e4 2010
89bddce5 2011 if (err >= 0 && addr != NULL) {
43db362d 2012 err2 = move_addr_to_user(&address,
230b1839 2013 msg.msg_namelen, addr, addr_len);
89bddce5
SH
2014 if (err2 < 0)
2015 err = err2;
1da177e4 2016 }
de0fa95c
PE
2017
2018 fput_light(sock->file, fput_needed);
4387ff75 2019out:
1da177e4
LT
2020 return err;
2021}
2022
7a09e1eb
DB
2023SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
2024 unsigned int, flags, struct sockaddr __user *, addr,
2025 int __user *, addr_len)
2026{
2027 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
2028}
2029
1da177e4 2030/*
89bddce5 2031 * Receive a datagram from a socket.
1da177e4
LT
2032 */
2033
b7c0ddf5
JG
2034SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2035 unsigned int, flags)
1da177e4 2036{
7a09e1eb 2037 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1da177e4
LT
2038}
2039
2040/*
2041 * Set a socket option. Because we don't know the option lengths we have
2042 * to pass the user mode parameter for the protocols to sort out.
2043 */
2044
cc36dca0
DB
2045static int __sys_setsockopt(int fd, int level, int optname,
2046 char __user *optval, int optlen)
1da177e4 2047{
6cb153ca 2048 int err, fput_needed;
1da177e4
LT
2049 struct socket *sock;
2050
2051 if (optlen < 0)
2052 return -EINVAL;
89bddce5
SH
2053
2054 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2055 if (sock != NULL) {
2056 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
2057 if (err)
2058 goto out_put;
1da177e4
LT
2059
2060 if (level == SOL_SOCKET)
89bddce5
SH
2061 err =
2062 sock_setsockopt(sock, level, optname, optval,
2063 optlen);
1da177e4 2064 else
89bddce5
SH
2065 err =
2066 sock->ops->setsockopt(sock, level, optname, optval,
2067 optlen);
6cb153ca
BL
2068out_put:
2069 fput_light(sock->file, fput_needed);
1da177e4
LT
2070 }
2071 return err;
2072}
2073
cc36dca0
DB
2074SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2075 char __user *, optval, int, optlen)
2076{
2077 return __sys_setsockopt(fd, level, optname, optval, optlen);
2078}
2079
1da177e4
LT
2080/*
2081 * Get a socket option. Because we don't know the option lengths we have
2082 * to pass a user mode parameter for the protocols to sort out.
2083 */
2084
13a2d70e
DB
2085static int __sys_getsockopt(int fd, int level, int optname,
2086 char __user *optval, int __user *optlen)
1da177e4 2087{
6cb153ca 2088 int err, fput_needed;
1da177e4
LT
2089 struct socket *sock;
2090
89bddce5
SH
2091 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2092 if (sock != NULL) {
6cb153ca
BL
2093 err = security_socket_getsockopt(sock, level, optname);
2094 if (err)
2095 goto out_put;
1da177e4
LT
2096
2097 if (level == SOL_SOCKET)
89bddce5
SH
2098 err =
2099 sock_getsockopt(sock, level, optname, optval,
2100 optlen);
1da177e4 2101 else
89bddce5
SH
2102 err =
2103 sock->ops->getsockopt(sock, level, optname, optval,
2104 optlen);
6cb153ca
BL
2105out_put:
2106 fput_light(sock->file, fput_needed);
1da177e4
LT
2107 }
2108 return err;
2109}
2110
13a2d70e
DB
2111SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
2112 char __user *, optval, int __user *, optlen)
2113{
2114 return __sys_getsockopt(fd, level, optname, optval, optlen);
2115}
2116
1da177e4
LT
2117/*
2118 * Shutdown a socket.
2119 */
2120
005a1aea 2121int __sys_shutdown(int fd, int how)
1da177e4 2122{
6cb153ca 2123 int err, fput_needed;
1da177e4
LT
2124 struct socket *sock;
2125
89bddce5
SH
2126 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2127 if (sock != NULL) {
1da177e4 2128 err = security_socket_shutdown(sock, how);
6cb153ca
BL
2129 if (!err)
2130 err = sock->ops->shutdown(sock, how);
2131 fput_light(sock->file, fput_needed);
1da177e4
LT
2132 }
2133 return err;
2134}
2135
005a1aea
DB
2136SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2137{
2138 return __sys_shutdown(fd, how);
2139}
2140
89bddce5 2141/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
2142 * fields which are the same type (int / unsigned) on our platforms.
2143 */
2144#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
2145#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
2146#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
2147
c71d8ebe
TH
2148struct used_address {
2149 struct sockaddr_storage name;
2150 unsigned int name_len;
2151};
2152
da184284
AV
2153static int copy_msghdr_from_user(struct msghdr *kmsg,
2154 struct user_msghdr __user *umsg,
2155 struct sockaddr __user **save_addr,
2156 struct iovec **iov)
1661bf36 2157{
ffb07550 2158 struct user_msghdr msg;
08adb7da
AV
2159 ssize_t err;
2160
ffb07550 2161 if (copy_from_user(&msg, umsg, sizeof(*umsg)))
1661bf36 2162 return -EFAULT;
dbb490b9 2163
864d9664 2164 kmsg->msg_control = (void __force *)msg.msg_control;
ffb07550
AV
2165 kmsg->msg_controllen = msg.msg_controllen;
2166 kmsg->msg_flags = msg.msg_flags;
2167
2168 kmsg->msg_namelen = msg.msg_namelen;
2169 if (!msg.msg_name)
6a2a2b3a
AS
2170 kmsg->msg_namelen = 0;
2171
dbb490b9
ML
2172 if (kmsg->msg_namelen < 0)
2173 return -EINVAL;
2174
1661bf36 2175 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
db31c55a 2176 kmsg->msg_namelen = sizeof(struct sockaddr_storage);
08adb7da
AV
2177
2178 if (save_addr)
ffb07550 2179 *save_addr = msg.msg_name;
08adb7da 2180
ffb07550 2181 if (msg.msg_name && kmsg->msg_namelen) {
08adb7da 2182 if (!save_addr) {
864d9664
PA
2183 err = move_addr_to_kernel(msg.msg_name,
2184 kmsg->msg_namelen,
08adb7da
AV
2185 kmsg->msg_name);
2186 if (err < 0)
2187 return err;
2188 }
2189 } else {
2190 kmsg->msg_name = NULL;
2191 kmsg->msg_namelen = 0;
2192 }
2193
ffb07550 2194 if (msg.msg_iovlen > UIO_MAXIOV)
08adb7da
AV
2195 return -EMSGSIZE;
2196
0345f931 2197 kmsg->msg_iocb = NULL;
2198
ffb07550
AV
2199 return import_iovec(save_addr ? READ : WRITE,
2200 msg.msg_iov, msg.msg_iovlen,
da184284 2201 UIO_FASTIOV, iov, &kmsg->msg_iter);
1661bf36
DC
2202}
2203
666547ff 2204static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2205 struct msghdr *msg_sys, unsigned int flags,
28a94d8f
TH
2206 struct used_address *used_address,
2207 unsigned int allowed_msghdr_flags)
1da177e4 2208{
89bddce5
SH
2209 struct compat_msghdr __user *msg_compat =
2210 (struct compat_msghdr __user *)msg;
230b1839 2211 struct sockaddr_storage address;
1da177e4 2212 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 2213 unsigned char ctl[sizeof(struct cmsghdr) + 20]
846cc123 2214 __aligned(sizeof(__kernel_size_t));
89bddce5 2215 /* 20 is size of ipv6_pktinfo */
1da177e4 2216 unsigned char *ctl_buf = ctl;
d8725c86 2217 int ctl_len;
08adb7da 2218 ssize_t err;
89bddce5 2219
08adb7da 2220 msg_sys->msg_name = &address;
1da177e4 2221
08449320 2222 if (MSG_CMSG_COMPAT & flags)
08adb7da 2223 err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
08449320 2224 else
08adb7da 2225 err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
89bddce5 2226 if (err < 0)
da184284 2227 return err;
1da177e4
LT
2228
2229 err = -ENOBUFS;
2230
228e548e 2231 if (msg_sys->msg_controllen > INT_MAX)
1da177e4 2232 goto out_freeiov;
28a94d8f 2233 flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
228e548e 2234 ctl_len = msg_sys->msg_controllen;
1da177e4 2235 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5 2236 err =
228e548e 2237 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
89bddce5 2238 sizeof(ctl));
1da177e4
LT
2239 if (err)
2240 goto out_freeiov;
228e548e
AB
2241 ctl_buf = msg_sys->msg_control;
2242 ctl_len = msg_sys->msg_controllen;
1da177e4 2243 } else if (ctl_len) {
ac4340fc
DM
2244 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2245 CMSG_ALIGN(sizeof(struct cmsghdr)));
89bddce5 2246 if (ctl_len > sizeof(ctl)) {
1da177e4 2247 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 2248 if (ctl_buf == NULL)
1da177e4
LT
2249 goto out_freeiov;
2250 }
2251 err = -EFAULT;
2252 /*
228e548e 2253 * Careful! Before this, msg_sys->msg_control contains a user pointer.
1da177e4
LT
2254 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
2255 * checking falls down on this.
2256 */
fb8621bb 2257 if (copy_from_user(ctl_buf,
228e548e 2258 (void __user __force *)msg_sys->msg_control,
89bddce5 2259 ctl_len))
1da177e4 2260 goto out_freectl;
228e548e 2261 msg_sys->msg_control = ctl_buf;
1da177e4 2262 }
228e548e 2263 msg_sys->msg_flags = flags;
1da177e4
LT
2264
2265 if (sock->file->f_flags & O_NONBLOCK)
228e548e 2266 msg_sys->msg_flags |= MSG_DONTWAIT;
c71d8ebe
TH
2267 /*
2268 * If this is sendmmsg() and current destination address is same as
2269 * previously succeeded address, omit asking LSM's decision.
2270 * used_address->name_len is initialized to UINT_MAX so that the first
2271 * destination address never matches.
2272 */
bc909d9d
MD
2273 if (used_address && msg_sys->msg_name &&
2274 used_address->name_len == msg_sys->msg_namelen &&
2275 !memcmp(&used_address->name, msg_sys->msg_name,
c71d8ebe 2276 used_address->name_len)) {
d8725c86 2277 err = sock_sendmsg_nosec(sock, msg_sys);
c71d8ebe
TH
2278 goto out_freectl;
2279 }
d8725c86 2280 err = sock_sendmsg(sock, msg_sys);
c71d8ebe
TH
2281 /*
2282 * If this is sendmmsg() and sending to current destination address was
2283 * successful, remember it.
2284 */
2285 if (used_address && err >= 0) {
2286 used_address->name_len = msg_sys->msg_namelen;
bc909d9d
MD
2287 if (msg_sys->msg_name)
2288 memcpy(&used_address->name, msg_sys->msg_name,
2289 used_address->name_len);
c71d8ebe 2290 }
1da177e4
LT
2291
2292out_freectl:
89bddce5 2293 if (ctl_buf != ctl)
1da177e4
LT
2294 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
2295out_freeiov:
da184284 2296 kfree(iov);
228e548e
AB
2297 return err;
2298}
2299
2300/*
2301 * BSD sendmsg interface
2302 */
2303
e1834a32
DB
2304long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2305 bool forbid_cmsg_compat)
228e548e
AB
2306{
2307 int fput_needed, err;
2308 struct msghdr msg_sys;
1be374a0
AL
2309 struct socket *sock;
2310
e1834a32
DB
2311 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2312 return -EINVAL;
2313
1be374a0 2314 sock = sockfd_lookup_light(fd, &err, &fput_needed);
228e548e
AB
2315 if (!sock)
2316 goto out;
2317
28a94d8f 2318 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
228e548e 2319
6cb153ca 2320 fput_light(sock->file, fput_needed);
89bddce5 2321out:
1da177e4
LT
2322 return err;
2323}
2324
666547ff 2325SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
a7526eb5 2326{
e1834a32 2327 return __sys_sendmsg(fd, msg, flags, true);
a7526eb5
AL
2328}
2329
228e548e
AB
2330/*
2331 * Linux sendmmsg interface
2332 */
2333
2334int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
e1834a32 2335 unsigned int flags, bool forbid_cmsg_compat)
228e548e
AB
2336{
2337 int fput_needed, err, datagrams;
2338 struct socket *sock;
2339 struct mmsghdr __user *entry;
2340 struct compat_mmsghdr __user *compat_entry;
2341 struct msghdr msg_sys;
c71d8ebe 2342 struct used_address used_address;
f092276d 2343 unsigned int oflags = flags;
228e548e 2344
e1834a32
DB
2345 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2346 return -EINVAL;
2347
98382f41
AB
2348 if (vlen > UIO_MAXIOV)
2349 vlen = UIO_MAXIOV;
228e548e
AB
2350
2351 datagrams = 0;
2352
2353 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2354 if (!sock)
2355 return err;
2356
c71d8ebe 2357 used_address.name_len = UINT_MAX;
228e548e
AB
2358 entry = mmsg;
2359 compat_entry = (struct compat_mmsghdr __user *)mmsg;
728ffb86 2360 err = 0;
f092276d 2361 flags |= MSG_BATCH;
228e548e
AB
2362
2363 while (datagrams < vlen) {
f092276d
TH
2364 if (datagrams == vlen - 1)
2365 flags = oflags;
2366
228e548e 2367 if (MSG_CMSG_COMPAT & flags) {
666547ff 2368 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
28a94d8f 2369 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2370 if (err < 0)
2371 break;
2372 err = __put_user(err, &compat_entry->msg_len);
2373 ++compat_entry;
2374 } else {
a7526eb5 2375 err = ___sys_sendmsg(sock,
666547ff 2376 (struct user_msghdr __user *)entry,
28a94d8f 2377 &msg_sys, flags, &used_address, MSG_EOR);
228e548e
AB
2378 if (err < 0)
2379 break;
2380 err = put_user(err, &entry->msg_len);
2381 ++entry;
2382 }
2383
2384 if (err)
2385 break;
2386 ++datagrams;
3023898b
SHY
2387 if (msg_data_left(&msg_sys))
2388 break;
a78cb84c 2389 cond_resched();
228e548e
AB
2390 }
2391
228e548e
AB
2392 fput_light(sock->file, fput_needed);
2393
728ffb86
AB
2394 /* We only return an error if no datagrams were able to be sent */
2395 if (datagrams != 0)
228e548e
AB
2396 return datagrams;
2397
228e548e
AB
2398 return err;
2399}
2400
2401SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2402 unsigned int, vlen, unsigned int, flags)
2403{
e1834a32 2404 return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
228e548e
AB
2405}
2406
666547ff 2407static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
95c96174 2408 struct msghdr *msg_sys, unsigned int flags, int nosec)
1da177e4 2409{
89bddce5
SH
2410 struct compat_msghdr __user *msg_compat =
2411 (struct compat_msghdr __user *)msg;
1da177e4 2412 struct iovec iovstack[UIO_FASTIOV];
89bddce5 2413 struct iovec *iov = iovstack;
1da177e4 2414 unsigned long cmsg_ptr;
2da62906 2415 int len;
08adb7da 2416 ssize_t err;
1da177e4
LT
2417
2418 /* kernel mode address */
230b1839 2419 struct sockaddr_storage addr;
1da177e4
LT
2420
2421 /* user mode address pointers */
2422 struct sockaddr __user *uaddr;
08adb7da 2423 int __user *uaddr_len = COMPAT_NAMELEN(msg);
89bddce5 2424
08adb7da 2425 msg_sys->msg_name = &addr;
1da177e4 2426
f3d33426 2427 if (MSG_CMSG_COMPAT & flags)
08adb7da 2428 err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
f3d33426 2429 else
08adb7da 2430 err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
1da177e4 2431 if (err < 0)
da184284 2432 return err;
1da177e4 2433
a2e27255
ACM
2434 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2435 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2436
f3d33426
HFS
2437 /* We assume all kernel code knows the size of sockaddr_storage */
2438 msg_sys->msg_namelen = 0;
2439
1da177e4
LT
2440 if (sock->file->f_flags & O_NONBLOCK)
2441 flags |= MSG_DONTWAIT;
2da62906 2442 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, flags);
1da177e4
LT
2443 if (err < 0)
2444 goto out_freeiov;
2445 len = err;
2446
2447 if (uaddr != NULL) {
43db362d 2448 err = move_addr_to_user(&addr,
a2e27255 2449 msg_sys->msg_namelen, uaddr,
89bddce5 2450 uaddr_len);
1da177e4
LT
2451 if (err < 0)
2452 goto out_freeiov;
2453 }
a2e27255 2454 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2455 COMPAT_FLAGS(msg));
1da177e4
LT
2456 if (err)
2457 goto out_freeiov;
2458 if (MSG_CMSG_COMPAT & flags)
a2e27255 2459 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2460 &msg_compat->msg_controllen);
2461 else
a2e27255 2462 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2463 &msg->msg_controllen);
2464 if (err)
2465 goto out_freeiov;
2466 err = len;
2467
2468out_freeiov:
da184284 2469 kfree(iov);
a2e27255
ACM
2470 return err;
2471}
2472
2473/*
2474 * BSD recvmsg interface
2475 */
2476
e1834a32
DB
2477long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2478 bool forbid_cmsg_compat)
a2e27255
ACM
2479{
2480 int fput_needed, err;
2481 struct msghdr msg_sys;
1be374a0
AL
2482 struct socket *sock;
2483
e1834a32
DB
2484 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2485 return -EINVAL;
2486
1be374a0 2487 sock = sockfd_lookup_light(fd, &err, &fput_needed);
a2e27255
ACM
2488 if (!sock)
2489 goto out;
2490
a7526eb5 2491 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
a2e27255 2492
6cb153ca 2493 fput_light(sock->file, fput_needed);
1da177e4
LT
2494out:
2495 return err;
2496}
2497
666547ff 2498SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
a7526eb5
AL
2499 unsigned int, flags)
2500{
e1834a32 2501 return __sys_recvmsg(fd, msg, flags, true);
a7526eb5
AL
2502}
2503
a2e27255
ACM
2504/*
2505 * Linux recvmmsg interface
2506 */
2507
e11d4284
AB
2508static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2509 unsigned int vlen, unsigned int flags,
2510 struct timespec64 *timeout)
a2e27255
ACM
2511{
2512 int fput_needed, err, datagrams;
2513 struct socket *sock;
2514 struct mmsghdr __user *entry;
d7256d0e 2515 struct compat_mmsghdr __user *compat_entry;
a2e27255 2516 struct msghdr msg_sys;
766b9f92
DD
2517 struct timespec64 end_time;
2518 struct timespec64 timeout64;
a2e27255
ACM
2519
2520 if (timeout &&
2521 poll_select_set_timeout(&end_time, timeout->tv_sec,
2522 timeout->tv_nsec))
2523 return -EINVAL;
2524
2525 datagrams = 0;
2526
2527 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2528 if (!sock)
2529 return err;
2530
7797dc41
SHY
2531 if (likely(!(flags & MSG_ERRQUEUE))) {
2532 err = sock_error(sock->sk);
2533 if (err) {
2534 datagrams = err;
2535 goto out_put;
2536 }
e623a9e9 2537 }
a2e27255
ACM
2538
2539 entry = mmsg;
d7256d0e 2540 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2541
2542 while (datagrams < vlen) {
2543 /*
2544 * No need to ask LSM for more than the first datagram.
2545 */
d7256d0e 2546 if (MSG_CMSG_COMPAT & flags) {
666547ff 2547 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
a7526eb5
AL
2548 &msg_sys, flags & ~MSG_WAITFORONE,
2549 datagrams);
d7256d0e
JMG
2550 if (err < 0)
2551 break;
2552 err = __put_user(err, &compat_entry->msg_len);
2553 ++compat_entry;
2554 } else {
a7526eb5 2555 err = ___sys_recvmsg(sock,
666547ff 2556 (struct user_msghdr __user *)entry,
a7526eb5
AL
2557 &msg_sys, flags & ~MSG_WAITFORONE,
2558 datagrams);
d7256d0e
JMG
2559 if (err < 0)
2560 break;
2561 err = put_user(err, &entry->msg_len);
2562 ++entry;
2563 }
2564
a2e27255
ACM
2565 if (err)
2566 break;
a2e27255
ACM
2567 ++datagrams;
2568
71c5c159
BB
2569 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2570 if (flags & MSG_WAITFORONE)
2571 flags |= MSG_DONTWAIT;
2572
a2e27255 2573 if (timeout) {
766b9f92 2574 ktime_get_ts64(&timeout64);
c2e6c856 2575 *timeout = timespec64_sub(end_time, timeout64);
a2e27255
ACM
2576 if (timeout->tv_sec < 0) {
2577 timeout->tv_sec = timeout->tv_nsec = 0;
2578 break;
2579 }
2580
2581 /* Timeout, return less than vlen datagrams */
2582 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2583 break;
2584 }
2585
2586 /* Out of band data, return right away */
2587 if (msg_sys.msg_flags & MSG_OOB)
2588 break;
a78cb84c 2589 cond_resched();
a2e27255
ACM
2590 }
2591
a2e27255 2592 if (err == 0)
34b88a68
ACM
2593 goto out_put;
2594
2595 if (datagrams == 0) {
2596 datagrams = err;
2597 goto out_put;
2598 }
a2e27255 2599
34b88a68
ACM
2600 /*
2601 * We may return less entries than requested (vlen) if the
2602 * sock is non block and there aren't enough datagrams...
2603 */
2604 if (err != -EAGAIN) {
a2e27255 2605 /*
34b88a68
ACM
2606 * ... or if recvmsg returns an error after we
2607 * received some datagrams, where we record the
2608 * error to return on the next call or if the
2609 * app asks about it using getsockopt(SO_ERROR).
a2e27255 2610 */
34b88a68 2611 sock->sk->sk_err = -err;
a2e27255 2612 }
34b88a68
ACM
2613out_put:
2614 fput_light(sock->file, fput_needed);
a2e27255 2615
34b88a68 2616 return datagrams;
a2e27255
ACM
2617}
2618
e11d4284
AB
2619int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2620 unsigned int vlen, unsigned int flags,
2621 struct __kernel_timespec __user *timeout,
2622 struct old_timespec32 __user *timeout32)
a2e27255
ACM
2623{
2624 int datagrams;
c2e6c856 2625 struct timespec64 timeout_sys;
a2e27255 2626
e11d4284
AB
2627 if (timeout && get_timespec64(&timeout_sys, timeout))
2628 return -EFAULT;
a2e27255 2629
e11d4284 2630 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2631 return -EFAULT;
2632
e11d4284
AB
2633 if (!timeout && !timeout32)
2634 return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2635
2636 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
a2e27255 2637
e11d4284
AB
2638 if (datagrams <= 0)
2639 return datagrams;
2640
2641 if (timeout && put_timespec64(&timeout_sys, timeout))
2642 datagrams = -EFAULT;
2643
2644 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
a2e27255
ACM
2645 datagrams = -EFAULT;
2646
2647 return datagrams;
2648}
2649
1255e269
DB
2650SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2651 unsigned int, vlen, unsigned int, flags,
c2e6c856 2652 struct __kernel_timespec __user *, timeout)
1255e269 2653{
e11d4284
AB
2654 if (flags & MSG_CMSG_COMPAT)
2655 return -EINVAL;
2656
2657 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
2658}
2659
2660#ifdef CONFIG_COMPAT_32BIT_TIME
2661SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2662 unsigned int, vlen, unsigned int, flags,
2663 struct old_timespec32 __user *, timeout)
2664{
2665 if (flags & MSG_CMSG_COMPAT)
2666 return -EINVAL;
2667
2668 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
1255e269 2669}
e11d4284 2670#endif
1255e269 2671
a2e27255 2672#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2673/* Argument list sizes for sys_socketcall */
2674#define AL(x) ((x) * sizeof(unsigned long))
228e548e 2675static const unsigned char nargs[21] = {
c6d409cf
ED
2676 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2677 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2678 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
228e548e 2679 AL(4), AL(5), AL(4)
89bddce5
SH
2680};
2681
1da177e4
LT
2682#undef AL
2683
2684/*
89bddce5 2685 * System call vectors.
1da177e4
LT
2686 *
2687 * Argument checking cleaned up. Saved 20% in size.
2688 * This function doesn't need to set the kernel lock because
89bddce5 2689 * it is set by the callees.
1da177e4
LT
2690 */
2691
3e0fa65f 2692SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4 2693{
2950fa9d 2694 unsigned long a[AUDITSC_ARGS];
89bddce5 2695 unsigned long a0, a1;
1da177e4 2696 int err;
47379052 2697 unsigned int len;
1da177e4 2698
228e548e 2699 if (call < 1 || call > SYS_SENDMMSG)
1da177e4 2700 return -EINVAL;
c8e8cd57 2701 call = array_index_nospec(call, SYS_SENDMMSG + 1);
1da177e4 2702
47379052
AV
2703 len = nargs[call];
2704 if (len > sizeof(a))
2705 return -EINVAL;
2706
1da177e4 2707 /* copy_from_user should be SMP safe. */
47379052 2708 if (copy_from_user(a, args, len))
1da177e4 2709 return -EFAULT;
3ec3b2fb 2710
2950fa9d
CG
2711 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2712 if (err)
2713 return err;
3ec3b2fb 2714
89bddce5
SH
2715 a0 = a[0];
2716 a1 = a[1];
2717
2718 switch (call) {
2719 case SYS_SOCKET:
9d6a15c3 2720 err = __sys_socket(a0, a1, a[2]);
89bddce5
SH
2721 break;
2722 case SYS_BIND:
a87d35d8 2723 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2724 break;
2725 case SYS_CONNECT:
1387c2c2 2726 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
89bddce5
SH
2727 break;
2728 case SYS_LISTEN:
25e290ee 2729 err = __sys_listen(a0, a1);
89bddce5
SH
2730 break;
2731 case SYS_ACCEPT:
4541e805
DB
2732 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2733 (int __user *)a[2], 0);
89bddce5
SH
2734 break;
2735 case SYS_GETSOCKNAME:
2736 err =
8882a107
DB
2737 __sys_getsockname(a0, (struct sockaddr __user *)a1,
2738 (int __user *)a[2]);
89bddce5
SH
2739 break;
2740 case SYS_GETPEERNAME:
2741 err =
b21c8f83
DB
2742 __sys_getpeername(a0, (struct sockaddr __user *)a1,
2743 (int __user *)a[2]);
89bddce5
SH
2744 break;
2745 case SYS_SOCKETPAIR:
6debc8d8 2746 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
89bddce5
SH
2747 break;
2748 case SYS_SEND:
f3bf896b
DB
2749 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2750 NULL, 0);
89bddce5
SH
2751 break;
2752 case SYS_SENDTO:
211b634b
DB
2753 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2754 (struct sockaddr __user *)a[4], a[5]);
89bddce5
SH
2755 break;
2756 case SYS_RECV:
d27e9afc
DB
2757 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2758 NULL, NULL);
89bddce5
SH
2759 break;
2760 case SYS_RECVFROM:
7a09e1eb
DB
2761 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2762 (struct sockaddr __user *)a[4],
2763 (int __user *)a[5]);
89bddce5
SH
2764 break;
2765 case SYS_SHUTDOWN:
005a1aea 2766 err = __sys_shutdown(a0, a1);
89bddce5
SH
2767 break;
2768 case SYS_SETSOCKOPT:
cc36dca0
DB
2769 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2770 a[4]);
89bddce5
SH
2771 break;
2772 case SYS_GETSOCKOPT:
2773 err =
13a2d70e
DB
2774 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2775 (int __user *)a[4]);
89bddce5
SH
2776 break;
2777 case SYS_SENDMSG:
e1834a32
DB
2778 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2779 a[2], true);
89bddce5 2780 break;
228e548e 2781 case SYS_SENDMMSG:
e1834a32
DB
2782 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2783 a[3], true);
228e548e 2784 break;
89bddce5 2785 case SYS_RECVMSG:
e1834a32
DB
2786 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2787 a[2], true);
89bddce5 2788 break;
a2e27255 2789 case SYS_RECVMMSG:
e11d4284
AB
2790 if (IS_ENABLED(CONFIG_64BIT) || !IS_ENABLED(CONFIG_64BIT_TIME))
2791 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2792 a[2], a[3],
2793 (struct __kernel_timespec __user *)a[4],
2794 NULL);
2795 else
2796 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2797 a[2], a[3], NULL,
2798 (struct old_timespec32 __user *)a[4]);
a2e27255 2799 break;
de11defe 2800 case SYS_ACCEPT4:
4541e805
DB
2801 err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2802 (int __user *)a[2], a[3]);
aaca0bdc 2803 break;
89bddce5
SH
2804 default:
2805 err = -EINVAL;
2806 break;
1da177e4
LT
2807 }
2808 return err;
2809}
2810
89bddce5 2811#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2812
55737fda
SH
2813/**
2814 * sock_register - add a socket protocol handler
2815 * @ops: description of protocol
2816 *
1da177e4
LT
2817 * This function is called by a protocol handler that wants to
2818 * advertise its address family, and have it linked into the
e793c0f7 2819 * socket interface. The value ops->family corresponds to the
55737fda 2820 * socket system call protocol family.
1da177e4 2821 */
f0fd27d4 2822int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2823{
2824 int err;
2825
2826 if (ops->family >= NPROTO) {
3410f22e 2827 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1da177e4
LT
2828 return -ENOBUFS;
2829 }
55737fda
SH
2830
2831 spin_lock(&net_family_lock);
190683a9
ED
2832 if (rcu_dereference_protected(net_families[ops->family],
2833 lockdep_is_held(&net_family_lock)))
55737fda
SH
2834 err = -EEXIST;
2835 else {
cf778b00 2836 rcu_assign_pointer(net_families[ops->family], ops);
1da177e4
LT
2837 err = 0;
2838 }
55737fda
SH
2839 spin_unlock(&net_family_lock);
2840
3410f22e 2841 pr_info("NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2842 return err;
2843}
c6d409cf 2844EXPORT_SYMBOL(sock_register);
1da177e4 2845
55737fda
SH
2846/**
2847 * sock_unregister - remove a protocol handler
2848 * @family: protocol family to remove
2849 *
1da177e4
LT
2850 * This function is called by a protocol handler that wants to
2851 * remove its address family, and have it unlinked from the
55737fda
SH
2852 * new socket creation.
2853 *
2854 * If protocol handler is a module, then it can use module reference
2855 * counts to protect against new references. If protocol handler is not
2856 * a module then it needs to provide its own protection in
2857 * the ops->create routine.
1da177e4 2858 */
f0fd27d4 2859void sock_unregister(int family)
1da177e4 2860{
f0fd27d4 2861 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2862
55737fda 2863 spin_lock(&net_family_lock);
a9b3cd7f 2864 RCU_INIT_POINTER(net_families[family], NULL);
55737fda
SH
2865 spin_unlock(&net_family_lock);
2866
2867 synchronize_rcu();
2868
3410f22e 2869 pr_info("NET: Unregistered protocol family %d\n", family);
1da177e4 2870}
c6d409cf 2871EXPORT_SYMBOL(sock_unregister);
1da177e4 2872
bf2ae2e4
XL
2873bool sock_is_registered(int family)
2874{
66b51b0a 2875 return family < NPROTO && rcu_access_pointer(net_families[family]);
bf2ae2e4
XL
2876}
2877
77d76ea3 2878static int __init sock_init(void)
1da177e4 2879{
b3e19d92 2880 int err;
2ca794e5
EB
2881 /*
2882 * Initialize the network sysctl infrastructure.
2883 */
2884 err = net_sysctl_init();
2885 if (err)
2886 goto out;
b3e19d92 2887
1da177e4 2888 /*
89bddce5 2889 * Initialize skbuff SLAB cache
1da177e4
LT
2890 */
2891 skb_init();
1da177e4
LT
2892
2893 /*
89bddce5 2894 * Initialize the protocols module.
1da177e4
LT
2895 */
2896
2897 init_inodecache();
b3e19d92
NP
2898
2899 err = register_filesystem(&sock_fs_type);
2900 if (err)
2901 goto out_fs;
1da177e4 2902 sock_mnt = kern_mount(&sock_fs_type);
b3e19d92
NP
2903 if (IS_ERR(sock_mnt)) {
2904 err = PTR_ERR(sock_mnt);
2905 goto out_mount;
2906 }
77d76ea3
AK
2907
2908 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2909 */
2910
2911#ifdef CONFIG_NETFILTER
6d11cfdb
PNA
2912 err = netfilter_init();
2913 if (err)
2914 goto out;
1da177e4 2915#endif
cbeb321a 2916
408eccce 2917 ptp_classifier_init();
c1f19b51 2918
b3e19d92
NP
2919out:
2920 return err;
2921
2922out_mount:
2923 unregister_filesystem(&sock_fs_type);
2924out_fs:
2925 goto out;
1da177e4
LT
2926}
2927
77d76ea3
AK
2928core_initcall(sock_init); /* early initcall */
2929
1da177e4
LT
2930#ifdef CONFIG_PROC_FS
2931void socket_seq_show(struct seq_file *seq)
2932{
648845ab
TZ
2933 seq_printf(seq, "sockets: used %d\n",
2934 sock_inuse_get(seq->private));
1da177e4 2935}
89bddce5 2936#endif /* CONFIG_PROC_FS */
1da177e4 2937
89bbfc95 2938#ifdef CONFIG_COMPAT
36fd633e 2939static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2940{
6b96018b 2941 struct compat_ifconf ifc32;
7a229387 2942 struct ifconf ifc;
7a229387
AB
2943 int err;
2944
6b96018b 2945 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2946 return -EFAULT;
2947
36fd633e
AV
2948 ifc.ifc_len = ifc32.ifc_len;
2949 ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
7a229387 2950
36fd633e
AV
2951 rtnl_lock();
2952 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
2953 rtnl_unlock();
7a229387
AB
2954 if (err)
2955 return err;
2956
36fd633e 2957 ifc32.ifc_len = ifc.ifc_len;
6b96018b 2958 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2959 return -EFAULT;
2960
2961 return 0;
2962}
2963
6b96018b 2964static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387 2965{
3a7da39d
BH
2966 struct compat_ethtool_rxnfc __user *compat_rxnfc;
2967 bool convert_in = false, convert_out = false;
44c02a2c
AV
2968 size_t buf_size = 0;
2969 struct ethtool_rxnfc __user *rxnfc = NULL;
2970 struct ifreq ifr;
3a7da39d
BH
2971 u32 rule_cnt = 0, actual_rule_cnt;
2972 u32 ethcmd;
7a229387 2973 u32 data;
3a7da39d 2974 int ret;
7a229387 2975
3a7da39d
BH
2976 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2977 return -EFAULT;
7a229387 2978
3a7da39d
BH
2979 compat_rxnfc = compat_ptr(data);
2980
2981 if (get_user(ethcmd, &compat_rxnfc->cmd))
7a229387
AB
2982 return -EFAULT;
2983
3a7da39d
BH
2984 /* Most ethtool structures are defined without padding.
2985 * Unfortunately struct ethtool_rxnfc is an exception.
2986 */
2987 switch (ethcmd) {
2988 default:
2989 break;
2990 case ETHTOOL_GRXCLSRLALL:
2991 /* Buffer size is variable */
2992 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
2993 return -EFAULT;
2994 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
2995 return -ENOMEM;
2996 buf_size += rule_cnt * sizeof(u32);
2997 /* fall through */
2998 case ETHTOOL_GRXRINGS:
2999 case ETHTOOL_GRXCLSRLCNT:
3000 case ETHTOOL_GRXCLSRULE:
55664f32 3001 case ETHTOOL_SRXCLSRLINS:
3a7da39d
BH
3002 convert_out = true;
3003 /* fall through */
3004 case ETHTOOL_SRXCLSRLDEL:
3a7da39d
BH
3005 buf_size += sizeof(struct ethtool_rxnfc);
3006 convert_in = true;
44c02a2c 3007 rxnfc = compat_alloc_user_space(buf_size);
3a7da39d
BH
3008 break;
3009 }
3010
44c02a2c 3011 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
7a229387
AB
3012 return -EFAULT;
3013
44c02a2c 3014 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
7a229387 3015
3a7da39d 3016 if (convert_in) {
127fe533 3017 /* We expect there to be holes between fs.m_ext and
3a7da39d
BH
3018 * fs.ring_cookie and at the end of fs, but nowhere else.
3019 */
127fe533
AD
3020 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
3021 sizeof(compat_rxnfc->fs.m_ext) !=
3022 offsetof(struct ethtool_rxnfc, fs.m_ext) +
3023 sizeof(rxnfc->fs.m_ext));
3a7da39d
BH
3024 BUILD_BUG_ON(
3025 offsetof(struct compat_ethtool_rxnfc, fs.location) -
3026 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
3027 offsetof(struct ethtool_rxnfc, fs.location) -
3028 offsetof(struct ethtool_rxnfc, fs.ring_cookie));
3029
3030 if (copy_in_user(rxnfc, compat_rxnfc,
954b1244
SH
3031 (void __user *)(&rxnfc->fs.m_ext + 1) -
3032 (void __user *)rxnfc) ||
3a7da39d
BH
3033 copy_in_user(&rxnfc->fs.ring_cookie,
3034 &compat_rxnfc->fs.ring_cookie,
954b1244 3035 (void __user *)(&rxnfc->fs.location + 1) -
b6168562
WW
3036 (void __user *)&rxnfc->fs.ring_cookie))
3037 return -EFAULT;
3038 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3039 if (put_user(rule_cnt, &rxnfc->rule_cnt))
3040 return -EFAULT;
3041 } else if (copy_in_user(&rxnfc->rule_cnt,
3042 &compat_rxnfc->rule_cnt,
3043 sizeof(rxnfc->rule_cnt)))
3a7da39d
BH
3044 return -EFAULT;
3045 }
3046
44c02a2c 3047 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
3a7da39d
BH
3048 if (ret)
3049 return ret;
3050
3051 if (convert_out) {
3052 if (copy_in_user(compat_rxnfc, rxnfc,
954b1244
SH
3053 (const void __user *)(&rxnfc->fs.m_ext + 1) -
3054 (const void __user *)rxnfc) ||
3a7da39d
BH
3055 copy_in_user(&compat_rxnfc->fs.ring_cookie,
3056 &rxnfc->fs.ring_cookie,
954b1244
SH
3057 (const void __user *)(&rxnfc->fs.location + 1) -
3058 (const void __user *)&rxnfc->fs.ring_cookie) ||
3a7da39d
BH
3059 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
3060 sizeof(rxnfc->rule_cnt)))
3061 return -EFAULT;
3062
3063 if (ethcmd == ETHTOOL_GRXCLSRLALL) {
3064 /* As an optimisation, we only copy the actual
3065 * number of rules that the underlying
3066 * function returned. Since Mallory might
3067 * change the rule count in user memory, we
3068 * check that it is less than the rule count
3069 * originally given (as the user buffer size),
3070 * which has been range-checked.
3071 */
3072 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
3073 return -EFAULT;
3074 if (actual_rule_cnt < rule_cnt)
3075 rule_cnt = actual_rule_cnt;
3076 if (copy_in_user(&compat_rxnfc->rule_locs[0],
3077 &rxnfc->rule_locs[0],
3078 rule_cnt * sizeof(u32)))
3079 return -EFAULT;
3080 }
3081 }
3082
3083 return 0;
7a229387
AB
3084}
3085
7a50a240
AB
3086static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
3087{
7a50a240 3088 compat_uptr_t uptr32;
44c02a2c
AV
3089 struct ifreq ifr;
3090 void __user *saved;
3091 int err;
7a50a240 3092
44c02a2c 3093 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
7a50a240
AB
3094 return -EFAULT;
3095
3096 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
3097 return -EFAULT;
3098
44c02a2c
AV
3099 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
3100 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
7a229387 3101
44c02a2c
AV
3102 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
3103 if (!err) {
3104 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3105 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
3106 err = -EFAULT;
ccbd6a5a 3107 }
44c02a2c 3108 return err;
7a229387
AB
3109}
3110
590d4693
BH
3111/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3112static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
6b96018b 3113 struct compat_ifreq __user *u_ifreq32)
7a229387 3114{
44c02a2c 3115 struct ifreq ifreq;
7a229387
AB
3116 u32 data32;
3117
44c02a2c 3118 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
7a229387 3119 return -EFAULT;
44c02a2c 3120 if (get_user(data32, &u_ifreq32->ifr_data))
7a229387 3121 return -EFAULT;
44c02a2c 3122 ifreq.ifr_data = compat_ptr(data32);
7a229387 3123
44c02a2c 3124 return dev_ioctl(net, cmd, &ifreq, NULL);
7a229387
AB
3125}
3126
37ac39bd
JB
3127static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
3128 unsigned int cmd,
3129 struct compat_ifreq __user *uifr32)
3130{
3131 struct ifreq __user *uifr;
3132 int err;
3133
3134 /* Handle the fact that while struct ifreq has the same *layout* on
3135 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3136 * which are handled elsewhere, it still has different *size* due to
3137 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3138 * resulting in struct ifreq being 32 and 40 bytes respectively).
3139 * As a result, if the struct happens to be at the end of a page and
3140 * the next page isn't readable/writable, we get a fault. To prevent
3141 * that, copy back and forth to the full size.
3142 */
3143
3144 uifr = compat_alloc_user_space(sizeof(*uifr));
3145 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
3146 return -EFAULT;
3147
3148 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
3149
3150 if (!err) {
3151 switch (cmd) {
3152 case SIOCGIFFLAGS:
3153 case SIOCGIFMETRIC:
3154 case SIOCGIFMTU:
3155 case SIOCGIFMEM:
3156 case SIOCGIFHWADDR:
3157 case SIOCGIFINDEX:
3158 case SIOCGIFADDR:
3159 case SIOCGIFBRDADDR:
3160 case SIOCGIFDSTADDR:
3161 case SIOCGIFNETMASK:
3162 case SIOCGIFPFLAGS:
3163 case SIOCGIFTXQLEN:
3164 case SIOCGMIIPHY:
3165 case SIOCGMIIREG:
c6c9fee3 3166 case SIOCGIFNAME:
37ac39bd
JB
3167 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
3168 err = -EFAULT;
3169 break;
3170 }
3171 }
3172 return err;
3173}
3174
a2116ed2
AB
3175static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
3176 struct compat_ifreq __user *uifr32)
3177{
3178 struct ifreq ifr;
3179 struct compat_ifmap __user *uifmap32;
a2116ed2
AB
3180 int err;
3181
3182 uifmap32 = &uifr32->ifr_ifru.ifru_map;
3183 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
3ddc5b46
MD
3184 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3185 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3186 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3187 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
3188 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
3189 err |= get_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3190 if (err)
3191 return -EFAULT;
3192
44c02a2c 3193 err = dev_ioctl(net, cmd, &ifr, NULL);
a2116ed2
AB
3194
3195 if (cmd == SIOCGIFMAP && !err) {
3196 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
3ddc5b46
MD
3197 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
3198 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
3199 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
3200 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
3201 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
3202 err |= put_user(ifr.ifr_map.port, &uifmap32->port);
a2116ed2
AB
3203 if (err)
3204 err = -EFAULT;
3205 }
3206 return err;
3207}
3208
7a229387 3209struct rtentry32 {
c6d409cf 3210 u32 rt_pad1;
7a229387
AB
3211 struct sockaddr rt_dst; /* target address */
3212 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
3213 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
3214 unsigned short rt_flags;
3215 short rt_pad2;
3216 u32 rt_pad3;
3217 unsigned char rt_tos;
3218 unsigned char rt_class;
3219 short rt_pad4;
3220 short rt_metric; /* +1 for binary compatibility! */
7a229387 3221 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
3222 u32 rt_mtu; /* per route MTU/Window */
3223 u32 rt_window; /* Window clamping */
7a229387
AB
3224 unsigned short rt_irtt; /* Initial RTT */
3225};
3226
3227struct in6_rtmsg32 {
3228 struct in6_addr rtmsg_dst;
3229 struct in6_addr rtmsg_src;
3230 struct in6_addr rtmsg_gateway;
3231 u32 rtmsg_type;
3232 u16 rtmsg_dst_len;
3233 u16 rtmsg_src_len;
3234 u32 rtmsg_metric;
3235 u32 rtmsg_info;
3236 u32 rtmsg_flags;
3237 s32 rtmsg_ifindex;
3238};
3239
6b96018b
AB
3240static int routing_ioctl(struct net *net, struct socket *sock,
3241 unsigned int cmd, void __user *argp)
7a229387
AB
3242{
3243 int ret;
3244 void *r = NULL;
3245 struct in6_rtmsg r6;
3246 struct rtentry r4;
3247 char devname[16];
3248 u32 rtdev;
3249 mm_segment_t old_fs = get_fs();
3250
6b96018b
AB
3251 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
3252 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 3253 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 3254 3 * sizeof(struct in6_addr));
3ddc5b46
MD
3255 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
3256 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
3257 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
3258 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
3259 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
3260 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
3261 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
3262
3263 r = (void *) &r6;
3264 } else { /* ipv4 */
6b96018b 3265 struct rtentry32 __user *ur4 = argp;
c6d409cf 3266 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 3267 3 * sizeof(struct sockaddr));
3ddc5b46
MD
3268 ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
3269 ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
3270 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
3271 ret |= get_user(r4.rt_window, &(ur4->rt_window));
3272 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
3273 ret |= get_user(rtdev, &(ur4->rt_dev));
7a229387 3274 if (rtdev) {
c6d409cf 3275 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
c3f52ae6 3276 r4.rt_dev = (char __user __force *)devname;
3277 devname[15] = 0;
7a229387
AB
3278 } else
3279 r4.rt_dev = NULL;
3280
3281 r = (void *) &r4;
3282 }
3283
3284 if (ret) {
3285 ret = -EFAULT;
3286 goto out;
3287 }
3288
c6d409cf 3289 set_fs(KERNEL_DS);
63ff03ab 3290 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 3291 set_fs(old_fs);
7a229387
AB
3292
3293out:
7a229387
AB
3294 return ret;
3295}
3296
3297/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
3298 * for some operations; this forces use of the newer bridge-utils that
25985edc 3299 * use compatible ioctls
7a229387 3300 */
6b96018b 3301static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 3302{
6b96018b 3303 compat_ulong_t tmp;
7a229387 3304
6b96018b 3305 if (get_user(tmp, argp))
7a229387
AB
3306 return -EFAULT;
3307 if (tmp == BRCTL_GET_VERSION)
3308 return BRCTL_VERSION + 1;
3309 return -EINVAL;
3310}
3311
6b96018b
AB
3312static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
3313 unsigned int cmd, unsigned long arg)
3314{
3315 void __user *argp = compat_ptr(arg);
3316 struct sock *sk = sock->sk;
3317 struct net *net = sock_net(sk);
7a229387 3318
6b96018b 3319 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
590d4693 3320 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3321
3322 switch (cmd) {
3323 case SIOCSIFBR:
3324 case SIOCGIFBR:
3325 return old_bridge_ioctl(argp);
6b96018b 3326 case SIOCGIFCONF:
36fd633e 3327 return compat_dev_ifconf(net, argp);
6b96018b
AB
3328 case SIOCETHTOOL:
3329 return ethtool_ioctl(net, argp);
7a50a240
AB
3330 case SIOCWANDEV:
3331 return compat_siocwandev(net, argp);
a2116ed2
AB
3332 case SIOCGIFMAP:
3333 case SIOCSIFMAP:
3334 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
3335 case SIOCADDRT:
3336 case SIOCDELRT:
3337 return routing_ioctl(net, sock, cmd, argp);
0768e170
AB
3338 case SIOCGSTAMP_OLD:
3339 case SIOCGSTAMPNS_OLD:
c7cbdbf2
AB
3340 if (!sock->ops->gettstamp)
3341 return -ENOIOCTLCMD;
0768e170 3342 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
c7cbdbf2
AB
3343 !COMPAT_USE_64BIT_TIME);
3344
590d4693
BH
3345 case SIOCBONDSLAVEINFOQUERY:
3346 case SIOCBONDINFOQUERY:
a2116ed2 3347 case SIOCSHWTSTAMP:
fd468c74 3348 case SIOCGHWTSTAMP:
590d4693 3349 return compat_ifr_data_ioctl(net, cmd, argp);
6b96018b
AB
3350
3351 case FIOSETOWN:
3352 case SIOCSPGRP:
3353 case FIOGETOWN:
3354 case SIOCGPGRP:
3355 case SIOCBRADDBR:
3356 case SIOCBRDELBR:
3357 case SIOCGIFVLAN:
3358 case SIOCSIFVLAN:
3359 case SIOCADDDLCI:
3360 case SIOCDELDLCI:
c62cce2c 3361 case SIOCGSKNS:
0768e170
AB
3362 case SIOCGSTAMP_NEW:
3363 case SIOCGSTAMPNS_NEW:
6b96018b
AB
3364 return sock_ioctl(file, cmd, arg);
3365
3366 case SIOCGIFFLAGS:
3367 case SIOCSIFFLAGS:
3368 case SIOCGIFMETRIC:
3369 case SIOCSIFMETRIC:
3370 case SIOCGIFMTU:
3371 case SIOCSIFMTU:
3372 case SIOCGIFMEM:
3373 case SIOCSIFMEM:
3374 case SIOCGIFHWADDR:
3375 case SIOCSIFHWADDR:
3376 case SIOCADDMULTI:
3377 case SIOCDELMULTI:
3378 case SIOCGIFINDEX:
6b96018b
AB
3379 case SIOCGIFADDR:
3380 case SIOCSIFADDR:
3381 case SIOCSIFHWBROADCAST:
6b96018b 3382 case SIOCDIFADDR:
6b96018b
AB
3383 case SIOCGIFBRDADDR:
3384 case SIOCSIFBRDADDR:
3385 case SIOCGIFDSTADDR:
3386 case SIOCSIFDSTADDR:
3387 case SIOCGIFNETMASK:
3388 case SIOCSIFNETMASK:
3389 case SIOCSIFPFLAGS:
3390 case SIOCGIFPFLAGS:
3391 case SIOCGIFTXQLEN:
3392 case SIOCSIFTXQLEN:
3393 case SIOCBRADDIF:
3394 case SIOCBRDELIF:
c6c9fee3 3395 case SIOCGIFNAME:
9177efd3
AB
3396 case SIOCSIFNAME:
3397 case SIOCGMIIPHY:
3398 case SIOCGMIIREG:
3399 case SIOCSMIIREG:
f92d4fc9
AV
3400 case SIOCBONDENSLAVE:
3401 case SIOCBONDRELEASE:
3402 case SIOCBONDSETHWADDR:
3403 case SIOCBONDCHANGEACTIVE:
37ac39bd
JB
3404 return compat_ifreq_ioctl(net, sock, cmd, argp);
3405
6b96018b
AB
3406 case SIOCSARP:
3407 case SIOCGARP:
3408 case SIOCDARP:
6b96018b 3409 case SIOCATMARK:
63ff03ab 3410 return sock_do_ioctl(net, sock, cmd, arg);
9177efd3
AB
3411 }
3412
6b96018b
AB
3413 return -ENOIOCTLCMD;
3414}
7a229387 3415
95c96174 3416static long compat_sock_ioctl(struct file *file, unsigned int cmd,
89bddce5 3417 unsigned long arg)
89bbfc95
SP
3418{
3419 struct socket *sock = file->private_data;
3420 int ret = -ENOIOCTLCMD;
87de87d5
DM
3421 struct sock *sk;
3422 struct net *net;
3423
3424 sk = sock->sk;
3425 net = sock_net(sk);
89bbfc95
SP
3426
3427 if (sock->ops->compat_ioctl)
3428 ret = sock->ops->compat_ioctl(sock, cmd, arg);
3429
87de87d5
DM
3430 if (ret == -ENOIOCTLCMD &&
3431 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
3432 ret = compat_wext_handle_ioctl(net, cmd, arg);
3433
6b96018b
AB
3434 if (ret == -ENOIOCTLCMD)
3435 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
3436
89bbfc95
SP
3437 return ret;
3438}
3439#endif
3440
8a3c245c
PT
3441/**
3442 * kernel_bind - bind an address to a socket (kernel space)
3443 * @sock: socket
3444 * @addr: address
3445 * @addrlen: length of address
3446 *
3447 * Returns 0 or an error.
3448 */
3449
ac5a488e
SS
3450int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3451{
3452 return sock->ops->bind(sock, addr, addrlen);
3453}
c6d409cf 3454EXPORT_SYMBOL(kernel_bind);
ac5a488e 3455
8a3c245c
PT
3456/**
3457 * kernel_listen - move socket to listening state (kernel space)
3458 * @sock: socket
3459 * @backlog: pending connections queue size
3460 *
3461 * Returns 0 or an error.
3462 */
3463
ac5a488e
SS
3464int kernel_listen(struct socket *sock, int backlog)
3465{
3466 return sock->ops->listen(sock, backlog);
3467}
c6d409cf 3468EXPORT_SYMBOL(kernel_listen);
ac5a488e 3469
8a3c245c
PT
3470/**
3471 * kernel_accept - accept a connection (kernel space)
3472 * @sock: listening socket
3473 * @newsock: new connected socket
3474 * @flags: flags
3475 *
3476 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
3477 * If it fails, @newsock is guaranteed to be %NULL.
3478 * Returns 0 or an error.
3479 */
3480
ac5a488e
SS
3481int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3482{
3483 struct sock *sk = sock->sk;
3484 int err;
3485
3486 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3487 newsock);
3488 if (err < 0)
3489 goto done;
3490
cdfbabfb 3491 err = sock->ops->accept(sock, *newsock, flags, true);
ac5a488e
SS
3492 if (err < 0) {
3493 sock_release(*newsock);
fa8705b0 3494 *newsock = NULL;
ac5a488e
SS
3495 goto done;
3496 }
3497
3498 (*newsock)->ops = sock->ops;
1b08534e 3499 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3500
3501done:
3502 return err;
3503}
c6d409cf 3504EXPORT_SYMBOL(kernel_accept);
ac5a488e 3505
8a3c245c
PT
3506/**
3507 * kernel_connect - connect a socket (kernel space)
3508 * @sock: socket
3509 * @addr: address
3510 * @addrlen: address length
3511 * @flags: flags (O_NONBLOCK, ...)
3512 *
3513 * For datagram sockets, @addr is the addres to which datagrams are sent
3514 * by default, and the only address from which datagrams are received.
3515 * For stream sockets, attempts to connect to @addr.
3516 * Returns 0 or an error code.
3517 */
3518
ac5a488e 3519int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3520 int flags)
ac5a488e
SS
3521{
3522 return sock->ops->connect(sock, addr, addrlen, flags);
3523}
c6d409cf 3524EXPORT_SYMBOL(kernel_connect);
ac5a488e 3525
8a3c245c
PT
3526/**
3527 * kernel_getsockname - get the address which the socket is bound (kernel space)
3528 * @sock: socket
3529 * @addr: address holder
3530 *
3531 * Fills the @addr pointer with the address which the socket is bound.
3532 * Returns 0 or an error code.
3533 */
3534
9b2c45d4 3535int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
ac5a488e 3536{
9b2c45d4 3537 return sock->ops->getname(sock, addr, 0);
ac5a488e 3538}
c6d409cf 3539EXPORT_SYMBOL(kernel_getsockname);
ac5a488e 3540
8a3c245c
PT
3541/**
3542 * kernel_peername - get the address which the socket is connected (kernel space)
3543 * @sock: socket
3544 * @addr: address holder
3545 *
3546 * Fills the @addr pointer with the address which the socket is connected.
3547 * Returns 0 or an error code.
3548 */
3549
9b2c45d4 3550int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
ac5a488e 3551{
9b2c45d4 3552 return sock->ops->getname(sock, addr, 1);
ac5a488e 3553}
c6d409cf 3554EXPORT_SYMBOL(kernel_getpeername);
ac5a488e 3555
8a3c245c
PT
3556/**
3557 * kernel_getsockopt - get a socket option (kernel space)
3558 * @sock: socket
3559 * @level: API level (SOL_SOCKET, ...)
3560 * @optname: option tag
3561 * @optval: option value
3562 * @optlen: option length
3563 *
3564 * Assigns the option length to @optlen.
3565 * Returns 0 or an error.
3566 */
3567
ac5a488e
SS
3568int kernel_getsockopt(struct socket *sock, int level, int optname,
3569 char *optval, int *optlen)
3570{
3571 mm_segment_t oldfs = get_fs();
fb8621bb
NK
3572 char __user *uoptval;
3573 int __user *uoptlen;
ac5a488e
SS
3574 int err;
3575
fb8621bb
NK
3576 uoptval = (char __user __force *) optval;
3577 uoptlen = (int __user __force *) optlen;
3578
ac5a488e
SS
3579 set_fs(KERNEL_DS);
3580 if (level == SOL_SOCKET)
fb8621bb 3581 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
ac5a488e 3582 else
fb8621bb
NK
3583 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3584 uoptlen);
ac5a488e
SS
3585 set_fs(oldfs);
3586 return err;
3587}
c6d409cf 3588EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e 3589
8a3c245c
PT
3590/**
3591 * kernel_setsockopt - set a socket option (kernel space)
3592 * @sock: socket
3593 * @level: API level (SOL_SOCKET, ...)
3594 * @optname: option tag
3595 * @optval: option value
3596 * @optlen: option length
3597 *
3598 * Returns 0 or an error.
3599 */
3600
ac5a488e 3601int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3602 char *optval, unsigned int optlen)
ac5a488e
SS
3603{
3604 mm_segment_t oldfs = get_fs();
fb8621bb 3605 char __user *uoptval;
ac5a488e
SS
3606 int err;
3607
fb8621bb
NK
3608 uoptval = (char __user __force *) optval;
3609
ac5a488e
SS
3610 set_fs(KERNEL_DS);
3611 if (level == SOL_SOCKET)
fb8621bb 3612 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
ac5a488e 3613 else
fb8621bb 3614 err = sock->ops->setsockopt(sock, level, optname, uoptval,
ac5a488e
SS
3615 optlen);
3616 set_fs(oldfs);
3617 return err;
3618}
c6d409cf 3619EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e 3620
8a3c245c
PT
3621/**
3622 * kernel_sendpage - send a &page through a socket (kernel space)
3623 * @sock: socket
3624 * @page: page
3625 * @offset: page offset
3626 * @size: total size in bytes
3627 * @flags: flags (MSG_DONTWAIT, ...)
3628 *
3629 * Returns the total amount sent in bytes or an error.
3630 */
3631
ac5a488e
SS
3632int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3633 size_t size, int flags)
3634{
3635 if (sock->ops->sendpage)
3636 return sock->ops->sendpage(sock, page, offset, size, flags);
3637
3638 return sock_no_sendpage(sock, page, offset, size, flags);
3639}
c6d409cf 3640EXPORT_SYMBOL(kernel_sendpage);
ac5a488e 3641
8a3c245c
PT
3642/**
3643 * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
3644 * @sk: sock
3645 * @page: page
3646 * @offset: page offset
3647 * @size: total size in bytes
3648 * @flags: flags (MSG_DONTWAIT, ...)
3649 *
3650 * Returns the total amount sent in bytes or an error.
3651 * Caller must hold @sk.
3652 */
3653
306b13eb
TH
3654int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3655 size_t size, int flags)
3656{
3657 struct socket *sock = sk->sk_socket;
3658
3659 if (sock->ops->sendpage_locked)
3660 return sock->ops->sendpage_locked(sk, page, offset, size,
3661 flags);
3662
3663 return sock_no_sendpage_locked(sk, page, offset, size, flags);
3664}
3665EXPORT_SYMBOL(kernel_sendpage_locked);
3666
8a3c245c
PT
3667/**
3668 * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
3669 * @sock: socket
3670 * @how: connection part
3671 *
3672 * Returns 0 or an error.
3673 */
3674
91cf45f0
TM
3675int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3676{
3677 return sock->ops->shutdown(sock, how);
3678}
91cf45f0 3679EXPORT_SYMBOL(kernel_sock_shutdown);
113c3075 3680
8a3c245c
PT
3681/**
3682 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
3683 * @sk: socket
3684 *
3685 * This routine returns the IP overhead imposed by a socket i.e.
3686 * the length of the underlying IP header, depending on whether
3687 * this is an IPv4 or IPv6 socket and the length from IP options turned
3688 * on at the socket. Assumes that the caller has a lock on the socket.
113c3075 3689 */
8a3c245c 3690
113c3075
P
3691u32 kernel_sock_ip_overhead(struct sock *sk)
3692{
3693 struct inet_sock *inet;
3694 struct ip_options_rcu *opt;
3695 u32 overhead = 0;
113c3075
P
3696#if IS_ENABLED(CONFIG_IPV6)
3697 struct ipv6_pinfo *np;
3698 struct ipv6_txoptions *optv6 = NULL;
3699#endif /* IS_ENABLED(CONFIG_IPV6) */
3700
3701 if (!sk)
3702 return overhead;
3703
113c3075
P
3704 switch (sk->sk_family) {
3705 case AF_INET:
3706 inet = inet_sk(sk);
3707 overhead += sizeof(struct iphdr);
3708 opt = rcu_dereference_protected(inet->inet_opt,
614d79c0 3709 sock_owned_by_user(sk));
113c3075
P
3710 if (opt)
3711 overhead += opt->opt.optlen;
3712 return overhead;
3713#if IS_ENABLED(CONFIG_IPV6)
3714 case AF_INET6:
3715 np = inet6_sk(sk);
3716 overhead += sizeof(struct ipv6hdr);
3717 if (np)
3718 optv6 = rcu_dereference_protected(np->opt,
614d79c0 3719 sock_owned_by_user(sk));
113c3075
P
3720 if (optv6)
3721 overhead += (optv6->opt_flen + optv6->opt_nflen);
3722 return overhead;
3723#endif /* IS_ENABLED(CONFIG_IPV6) */
3724 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3725 return overhead;
3726 }
3727}
3728EXPORT_SYMBOL(kernel_sock_ip_overhead);