]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/socket.c
[NET]: Implement network device movement between namespaces
[mirror_ubuntu-bionic-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1b8d7ae4 87#include <linux/nsproxy.h>
1da177e4
LT
88
89#include <asm/uaccess.h>
90#include <asm/unistd.h>
91
92#include <net/compat.h>
93
94#include <net/sock.h>
95#include <linux/netfilter.h>
96
97static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
98static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
99 unsigned long nr_segs, loff_t pos);
100static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
101 unsigned long nr_segs, loff_t pos);
89bddce5 102static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
103
104static int sock_close(struct inode *inode, struct file *file);
105static unsigned int sock_poll(struct file *file,
106 struct poll_table_struct *wait);
89bddce5 107static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
108#ifdef CONFIG_COMPAT
109static long compat_sock_ioctl(struct file *file,
89bddce5 110 unsigned int cmd, unsigned long arg);
89bbfc95 111#endif
1da177e4 112static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
113static ssize_t sock_sendpage(struct file *file, struct page *page,
114 int offset, size_t size, loff_t *ppos, int more);
115
1da177e4
LT
116/*
117 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
118 * in the operation structures but are done directly via the socketcall() multiplexor.
119 */
120
da7071d7 121static const struct file_operations socket_file_ops = {
1da177e4
LT
122 .owner = THIS_MODULE,
123 .llseek = no_llseek,
124 .aio_read = sock_aio_read,
125 .aio_write = sock_aio_write,
126 .poll = sock_poll,
127 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
128#ifdef CONFIG_COMPAT
129 .compat_ioctl = compat_sock_ioctl,
130#endif
1da177e4
LT
131 .mmap = sock_mmap,
132 .open = sock_no_open, /* special open code to disallow open via /proc */
133 .release = sock_close,
134 .fasync = sock_fasync,
5274f052
JA
135 .sendpage = sock_sendpage,
136 .splice_write = generic_splice_sendpage,
1da177e4
LT
137};
138
139/*
140 * The protocol list. Each protocol is registered in here.
141 */
142
1da177e4 143static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 144static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 145
1da177e4
LT
146/*
147 * Statistics counters of the socket lists
148 */
149
150static DEFINE_PER_CPU(int, sockets_in_use) = 0;
151
152/*
89bddce5
SH
153 * Support routines.
154 * Move socket addresses back and forth across the kernel/user
155 * divide and look after the messy bits.
1da177e4
LT
156 */
157
89bddce5 158#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
159 16 for IP, 16 for IPX,
160 24 for IPv6,
89bddce5 161 about 80 for AX.25
1da177e4
LT
162 must be at least one bigger than
163 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 164 :unix_mkname()).
1da177e4 165 */
89bddce5 166
1da177e4
LT
167/**
168 * move_addr_to_kernel - copy a socket address into kernel space
169 * @uaddr: Address in user space
170 * @kaddr: Address in kernel space
171 * @ulen: Length in user space
172 *
173 * The address is copied into kernel space. If the provided address is
174 * too long an error code of -EINVAL is returned. If the copy gives
175 * invalid addresses -EFAULT is returned. On a success 0 is returned.
176 */
177
178int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
179{
89bddce5 180 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 181 return -EINVAL;
89bddce5 182 if (ulen == 0)
1da177e4 183 return 0;
89bddce5 184 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 185 return -EFAULT;
3ec3b2fb 186 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
187}
188
189/**
190 * move_addr_to_user - copy an address to user space
191 * @kaddr: kernel space address
192 * @klen: length of address in kernel
193 * @uaddr: user space address
194 * @ulen: pointer to user length field
195 *
196 * The value pointed to by ulen on entry is the buffer length available.
197 * This is overwritten with the buffer space used. -EINVAL is returned
198 * if an overlong buffer is specified or a negative buffer size. -EFAULT
199 * is returned if either the buffer or the length field are not
200 * accessible.
201 * After copying the data up to the limit the user specifies, the true
202 * length of the data is written over the length limit the user
203 * specified. Zero is returned for a success.
204 */
89bddce5
SH
205
206int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
207 int __user *ulen)
1da177e4
LT
208{
209 int err;
210 int len;
211
89bddce5
SH
212 err = get_user(len, ulen);
213 if (err)
1da177e4 214 return err;
89bddce5
SH
215 if (len > klen)
216 len = klen;
217 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 218 return -EINVAL;
89bddce5 219 if (len) {
d6fe3945
SG
220 if (audit_sockaddr(klen, kaddr))
221 return -ENOMEM;
89bddce5 222 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
223 return -EFAULT;
224 }
225 /*
89bddce5
SH
226 * "fromlen shall refer to the value before truncation.."
227 * 1003.1g
1da177e4
LT
228 */
229 return __put_user(klen, ulen);
230}
231
232#define SOCKFS_MAGIC 0x534F434B
233
e18b890b 234static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
235
236static struct inode *sock_alloc_inode(struct super_block *sb)
237{
238 struct socket_alloc *ei;
89bddce5 239
e94b1766 240 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
241 if (!ei)
242 return NULL;
243 init_waitqueue_head(&ei->socket.wait);
89bddce5 244
1da177e4
LT
245 ei->socket.fasync_list = NULL;
246 ei->socket.state = SS_UNCONNECTED;
247 ei->socket.flags = 0;
248 ei->socket.ops = NULL;
249 ei->socket.sk = NULL;
250 ei->socket.file = NULL;
1da177e4
LT
251
252 return &ei->vfs_inode;
253}
254
255static void sock_destroy_inode(struct inode *inode)
256{
257 kmem_cache_free(sock_inode_cachep,
258 container_of(inode, struct socket_alloc, vfs_inode));
259}
260
e18b890b 261static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
1da177e4 262{
89bddce5 263 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 264
a35afb83 265 inode_init_once(&ei->vfs_inode);
1da177e4 266}
89bddce5 267
1da177e4
LT
268static int init_inodecache(void)
269{
270 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
271 sizeof(struct socket_alloc),
272 0,
273 (SLAB_HWCACHE_ALIGN |
274 SLAB_RECLAIM_ACCOUNT |
275 SLAB_MEM_SPREAD),
20c2df83 276 init_once);
1da177e4
LT
277 if (sock_inode_cachep == NULL)
278 return -ENOMEM;
279 return 0;
280}
281
282static struct super_operations sockfs_ops = {
283 .alloc_inode = sock_alloc_inode,
284 .destroy_inode =sock_destroy_inode,
285 .statfs = simple_statfs,
286};
287
454e2398 288static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
289 int flags, const char *dev_name, void *data,
290 struct vfsmount *mnt)
1da177e4 291{
454e2398
DH
292 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
293 mnt);
1da177e4
LT
294}
295
ba89966c 296static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
297
298static struct file_system_type sock_fs_type = {
299 .name = "sockfs",
300 .get_sb = sockfs_get_sb,
301 .kill_sb = kill_anon_super,
302};
89bddce5 303
1da177e4
LT
304static int sockfs_delete_dentry(struct dentry *dentry)
305{
304e61e6
ED
306 /*
307 * At creation time, we pretended this dentry was hashed
308 * (by clearing DCACHE_UNHASHED bit in d_flags)
309 * At delete time, we restore the truth : not hashed.
310 * (so that dput() can proceed correctly)
311 */
312 dentry->d_flags |= DCACHE_UNHASHED;
313 return 0;
1da177e4 314}
c23fbb6b
ED
315
316/*
317 * sockfs_dname() is called from d_path().
318 */
319static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
320{
321 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
322 dentry->d_inode->i_ino);
323}
324
1da177e4 325static struct dentry_operations sockfs_dentry_operations = {
89bddce5 326 .d_delete = sockfs_delete_dentry,
c23fbb6b 327 .d_dname = sockfs_dname,
1da177e4
LT
328};
329
330/*
331 * Obtains the first available file descriptor and sets it up for use.
332 *
39d8c1b6
DM
333 * These functions create file structures and maps them to fd space
334 * of the current process. On success it returns file descriptor
1da177e4
LT
335 * and file struct implicitly stored in sock->file.
336 * Note that another thread may close file descriptor before we return
337 * from this function. We use the fact that now we do not refer
338 * to socket after mapping. If one day we will need it, this
339 * function will increment ref. count on file by 1.
340 *
341 * In any case returned fd MAY BE not valid!
342 * This race condition is unavoidable
343 * with shared fd spaces, we cannot solve it inside kernel,
344 * but we take care of internal coherence yet.
345 */
346
39d8c1b6 347static int sock_alloc_fd(struct file **filep)
1da177e4
LT
348{
349 int fd;
1da177e4
LT
350
351 fd = get_unused_fd();
39d8c1b6 352 if (likely(fd >= 0)) {
1da177e4
LT
353 struct file *file = get_empty_filp();
354
39d8c1b6
DM
355 *filep = file;
356 if (unlikely(!file)) {
1da177e4 357 put_unused_fd(fd);
39d8c1b6 358 return -ENFILE;
1da177e4 359 }
39d8c1b6
DM
360 } else
361 *filep = NULL;
362 return fd;
363}
1da177e4 364
39d8c1b6
DM
365static int sock_attach_fd(struct socket *sock, struct file *file)
366{
c23fbb6b 367 struct qstr name = { .name = "" };
39d8c1b6 368
c23fbb6b 369 file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
3126a42c 370 if (unlikely(!file->f_path.dentry))
39d8c1b6
DM
371 return -ENOMEM;
372
3126a42c 373 file->f_path.dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
374 /*
375 * We dont want to push this dentry into global dentry hash table.
376 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
377 * This permits a working /proc/$pid/fd/XXX on sockets
378 */
3126a42c
JS
379 file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
380 d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
381 file->f_path.mnt = mntget(sock_mnt);
382 file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
39d8c1b6
DM
383
384 sock->file = file;
385 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
386 file->f_mode = FMODE_READ | FMODE_WRITE;
387 file->f_flags = O_RDWR;
388 file->f_pos = 0;
389 file->private_data = sock;
1da177e4 390
39d8c1b6
DM
391 return 0;
392}
393
394int sock_map_fd(struct socket *sock)
395{
396 struct file *newfile;
397 int fd = sock_alloc_fd(&newfile);
398
399 if (likely(fd >= 0)) {
400 int err = sock_attach_fd(sock, newfile);
401
402 if (unlikely(err < 0)) {
403 put_filp(newfile);
1da177e4 404 put_unused_fd(fd);
39d8c1b6 405 return err;
1da177e4 406 }
39d8c1b6 407 fd_install(fd, newfile);
1da177e4 408 }
1da177e4
LT
409 return fd;
410}
411
6cb153ca
BL
412static struct socket *sock_from_file(struct file *file, int *err)
413{
6cb153ca
BL
414 if (file->f_op == &socket_file_ops)
415 return file->private_data; /* set in sock_map_fd */
416
23bb80d2
ED
417 *err = -ENOTSOCK;
418 return NULL;
6cb153ca
BL
419}
420
1da177e4
LT
421/**
422 * sockfd_lookup - Go from a file number to its socket slot
423 * @fd: file handle
424 * @err: pointer to an error code return
425 *
426 * The file handle passed in is locked and the socket it is bound
427 * too is returned. If an error occurs the err pointer is overwritten
428 * with a negative errno code and NULL is returned. The function checks
429 * for both invalid handles and passing a handle which is not a socket.
430 *
431 * On a success the socket object pointer is returned.
432 */
433
434struct socket *sockfd_lookup(int fd, int *err)
435{
436 struct file *file;
1da177e4
LT
437 struct socket *sock;
438
89bddce5
SH
439 file = fget(fd);
440 if (!file) {
1da177e4
LT
441 *err = -EBADF;
442 return NULL;
443 }
89bddce5 444
6cb153ca
BL
445 sock = sock_from_file(file, err);
446 if (!sock)
1da177e4 447 fput(file);
6cb153ca
BL
448 return sock;
449}
1da177e4 450
6cb153ca
BL
451static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
452{
453 struct file *file;
454 struct socket *sock;
455
3672558c 456 *err = -EBADF;
6cb153ca
BL
457 file = fget_light(fd, fput_needed);
458 if (file) {
459 sock = sock_from_file(file, err);
460 if (sock)
461 return sock;
462 fput_light(file, *fput_needed);
1da177e4 463 }
6cb153ca 464 return NULL;
1da177e4
LT
465}
466
467/**
468 * sock_alloc - allocate a socket
89bddce5 469 *
1da177e4
LT
470 * Allocate a new inode and socket object. The two are bound together
471 * and initialised. The socket is then returned. If we are out of inodes
472 * NULL is returned.
473 */
474
475static struct socket *sock_alloc(void)
476{
89bddce5
SH
477 struct inode *inode;
478 struct socket *sock;
1da177e4
LT
479
480 inode = new_inode(sock_mnt->mnt_sb);
481 if (!inode)
482 return NULL;
483
484 sock = SOCKET_I(inode);
485
89bddce5 486 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
487 inode->i_uid = current->fsuid;
488 inode->i_gid = current->fsgid;
489
490 get_cpu_var(sockets_in_use)++;
491 put_cpu_var(sockets_in_use);
492 return sock;
493}
494
495/*
496 * In theory you can't get an open on this inode, but /proc provides
497 * a back door. Remember to keep it shut otherwise you'll let the
498 * creepy crawlies in.
499 */
89bddce5 500
1da177e4
LT
501static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
502{
503 return -ENXIO;
504}
505
4b6f5d20 506const struct file_operations bad_sock_fops = {
1da177e4
LT
507 .owner = THIS_MODULE,
508 .open = sock_no_open,
509};
510
511/**
512 * sock_release - close a socket
513 * @sock: socket to close
514 *
515 * The socket is released from the protocol stack if it has a release
516 * callback, and the inode is then released if the socket is bound to
89bddce5 517 * an inode not a file.
1da177e4 518 */
89bddce5 519
1da177e4
LT
520void sock_release(struct socket *sock)
521{
522 if (sock->ops) {
523 struct module *owner = sock->ops->owner;
524
525 sock->ops->release(sock);
526 sock->ops = NULL;
527 module_put(owner);
528 }
529
530 if (sock->fasync_list)
531 printk(KERN_ERR "sock_release: fasync list not empty!\n");
532
533 get_cpu_var(sockets_in_use)--;
534 put_cpu_var(sockets_in_use);
535 if (!sock->file) {
536 iput(SOCK_INODE(sock));
537 return;
538 }
89bddce5 539 sock->file = NULL;
1da177e4
LT
540}
541
89bddce5 542static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
543 struct msghdr *msg, size_t size)
544{
545 struct sock_iocb *si = kiocb_to_siocb(iocb);
546 int err;
547
548 si->sock = sock;
549 si->scm = NULL;
550 si->msg = msg;
551 si->size = size;
552
553 err = security_socket_sendmsg(sock, msg, size);
554 if (err)
555 return err;
556
557 return sock->ops->sendmsg(iocb, sock, msg, size);
558}
559
560int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
561{
562 struct kiocb iocb;
563 struct sock_iocb siocb;
564 int ret;
565
566 init_sync_kiocb(&iocb, NULL);
567 iocb.private = &siocb;
568 ret = __sock_sendmsg(&iocb, sock, msg, size);
569 if (-EIOCBQUEUED == ret)
570 ret = wait_on_sync_kiocb(&iocb);
571 return ret;
572}
573
574int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
575 struct kvec *vec, size_t num, size_t size)
576{
577 mm_segment_t oldfs = get_fs();
578 int result;
579
580 set_fs(KERNEL_DS);
581 /*
582 * the following is safe, since for compiler definitions of kvec and
583 * iovec are identical, yielding the same in-core layout and alignment
584 */
89bddce5 585 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
586 msg->msg_iovlen = num;
587 result = sock_sendmsg(sock, msg, size);
588 set_fs(oldfs);
589 return result;
590}
591
92f37fd2
ED
592/*
593 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
594 */
595void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
596 struct sk_buff *skb)
597{
598 ktime_t kt = skb->tstamp;
599
600 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
601 struct timeval tv;
602 /* Race occurred between timestamp enabling and packet
603 receiving. Fill in the current time for now. */
604 if (kt.tv64 == 0)
605 kt = ktime_get_real();
606 skb->tstamp = kt;
607 tv = ktime_to_timeval(kt);
608 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
609 } else {
610 struct timespec ts;
611 /* Race occurred between timestamp enabling and packet
612 receiving. Fill in the current time for now. */
613 if (kt.tv64 == 0)
614 kt = ktime_get_real();
615 skb->tstamp = kt;
616 ts = ktime_to_timespec(kt);
617 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
618 }
619}
620
7c81fd8b
ACM
621EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
622
89bddce5 623static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
624 struct msghdr *msg, size_t size, int flags)
625{
626 int err;
627 struct sock_iocb *si = kiocb_to_siocb(iocb);
628
629 si->sock = sock;
630 si->scm = NULL;
631 si->msg = msg;
632 si->size = size;
633 si->flags = flags;
634
635 err = security_socket_recvmsg(sock, msg, size, flags);
636 if (err)
637 return err;
638
639 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
640}
641
89bddce5 642int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
643 size_t size, int flags)
644{
645 struct kiocb iocb;
646 struct sock_iocb siocb;
647 int ret;
648
89bddce5 649 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
650 iocb.private = &siocb;
651 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
652 if (-EIOCBQUEUED == ret)
653 ret = wait_on_sync_kiocb(&iocb);
654 return ret;
655}
656
89bddce5
SH
657int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
658 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
659{
660 mm_segment_t oldfs = get_fs();
661 int result;
662
663 set_fs(KERNEL_DS);
664 /*
665 * the following is safe, since for compiler definitions of kvec and
666 * iovec are identical, yielding the same in-core layout and alignment
667 */
89bddce5 668 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
669 result = sock_recvmsg(sock, msg, size, flags);
670 set_fs(oldfs);
671 return result;
672}
673
674static void sock_aio_dtor(struct kiocb *iocb)
675{
676 kfree(iocb->private);
677}
678
ce1d4d3e
CH
679static ssize_t sock_sendpage(struct file *file, struct page *page,
680 int offset, size_t size, loff_t *ppos, int more)
1da177e4 681{
1da177e4
LT
682 struct socket *sock;
683 int flags;
684
ce1d4d3e
CH
685 sock = file->private_data;
686
687 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
688 if (more)
689 flags |= MSG_MORE;
690
691 return sock->ops->sendpage(sock, page, offset, size, flags);
692}
1da177e4 693
ce1d4d3e 694static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 695 struct sock_iocb *siocb)
ce1d4d3e
CH
696{
697 if (!is_sync_kiocb(iocb)) {
698 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
699 if (!siocb)
700 return NULL;
1da177e4
LT
701 iocb->ki_dtor = sock_aio_dtor;
702 }
1da177e4 703
ce1d4d3e 704 siocb->kiocb = iocb;
ce1d4d3e
CH
705 iocb->private = siocb;
706 return siocb;
1da177e4
LT
707}
708
ce1d4d3e 709static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
710 struct file *file, const struct iovec *iov,
711 unsigned long nr_segs)
ce1d4d3e
CH
712{
713 struct socket *sock = file->private_data;
714 size_t size = 0;
715 int i;
1da177e4 716
89bddce5
SH
717 for (i = 0; i < nr_segs; i++)
718 size += iov[i].iov_len;
1da177e4 719
ce1d4d3e
CH
720 msg->msg_name = NULL;
721 msg->msg_namelen = 0;
722 msg->msg_control = NULL;
723 msg->msg_controllen = 0;
89bddce5 724 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
725 msg->msg_iovlen = nr_segs;
726 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
727
728 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
729}
730
027445c3
BP
731static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
732 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
733{
734 struct sock_iocb siocb, *x;
735
1da177e4
LT
736 if (pos != 0)
737 return -ESPIPE;
027445c3
BP
738
739 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
740 return 0;
741
027445c3
BP
742
743 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
744 if (!x)
745 return -ENOMEM;
027445c3 746 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
747}
748
ce1d4d3e 749static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
750 struct file *file, const struct iovec *iov,
751 unsigned long nr_segs)
1da177e4 752{
ce1d4d3e
CH
753 struct socket *sock = file->private_data;
754 size_t size = 0;
755 int i;
1da177e4 756
89bddce5
SH
757 for (i = 0; i < nr_segs; i++)
758 size += iov[i].iov_len;
1da177e4 759
ce1d4d3e
CH
760 msg->msg_name = NULL;
761 msg->msg_namelen = 0;
762 msg->msg_control = NULL;
763 msg->msg_controllen = 0;
89bddce5 764 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
765 msg->msg_iovlen = nr_segs;
766 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
767 if (sock->type == SOCK_SEQPACKET)
768 msg->msg_flags |= MSG_EOR;
1da177e4 769
ce1d4d3e 770 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
771}
772
027445c3
BP
773static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
774 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
775{
776 struct sock_iocb siocb, *x;
1da177e4 777
ce1d4d3e
CH
778 if (pos != 0)
779 return -ESPIPE;
027445c3 780
027445c3 781 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
782 if (!x)
783 return -ENOMEM;
1da177e4 784
027445c3 785 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
786}
787
1da177e4
LT
788/*
789 * Atomic setting of ioctl hooks to avoid race
790 * with module unload.
791 */
792
4a3e2f71 793static DEFINE_MUTEX(br_ioctl_mutex);
881d966b 794static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
1da177e4 795
881d966b 796void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 797{
4a3e2f71 798 mutex_lock(&br_ioctl_mutex);
1da177e4 799 br_ioctl_hook = hook;
4a3e2f71 800 mutex_unlock(&br_ioctl_mutex);
1da177e4 801}
89bddce5 802
1da177e4
LT
803EXPORT_SYMBOL(brioctl_set);
804
4a3e2f71 805static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 806static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 807
881d966b 808void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 809{
4a3e2f71 810 mutex_lock(&vlan_ioctl_mutex);
1da177e4 811 vlan_ioctl_hook = hook;
4a3e2f71 812 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 813}
89bddce5 814
1da177e4
LT
815EXPORT_SYMBOL(vlan_ioctl_set);
816
4a3e2f71 817static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 818static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 819
89bddce5 820void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 821{
4a3e2f71 822 mutex_lock(&dlci_ioctl_mutex);
1da177e4 823 dlci_ioctl_hook = hook;
4a3e2f71 824 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 825}
89bddce5 826
1da177e4
LT
827EXPORT_SYMBOL(dlci_ioctl_set);
828
829/*
830 * With an ioctl, arg may well be a user mode pointer, but we don't know
831 * what to do with it - that's up to the protocol still.
832 */
833
834static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
835{
836 struct socket *sock;
881d966b 837 struct sock *sk;
1da177e4
LT
838 void __user *argp = (void __user *)arg;
839 int pid, err;
881d966b 840 struct net *net;
1da177e4 841
b69aee04 842 sock = file->private_data;
881d966b
EB
843 sk = sock->sk;
844 net = sk->sk_net;
1da177e4 845 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 846 err = dev_ioctl(net, cmd, argp);
1da177e4 847 } else
d86b5e0e 848#ifdef CONFIG_WIRELESS_EXT
1da177e4 849 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 850 err = dev_ioctl(net, cmd, argp);
1da177e4 851 } else
89bddce5
SH
852#endif /* CONFIG_WIRELESS_EXT */
853 switch (cmd) {
1da177e4
LT
854 case FIOSETOWN:
855 case SIOCSPGRP:
856 err = -EFAULT;
857 if (get_user(pid, (int __user *)argp))
858 break;
859 err = f_setown(sock->file, pid, 1);
860 break;
861 case FIOGETOWN:
862 case SIOCGPGRP:
609d7fa9 863 err = put_user(f_getown(sock->file),
89bddce5 864 (int __user *)argp);
1da177e4
LT
865 break;
866 case SIOCGIFBR:
867 case SIOCSIFBR:
868 case SIOCBRADDBR:
869 case SIOCBRDELBR:
870 err = -ENOPKG;
871 if (!br_ioctl_hook)
872 request_module("bridge");
873
4a3e2f71 874 mutex_lock(&br_ioctl_mutex);
89bddce5 875 if (br_ioctl_hook)
881d966b 876 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 877 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
878 break;
879 case SIOCGIFVLAN:
880 case SIOCSIFVLAN:
881 err = -ENOPKG;
882 if (!vlan_ioctl_hook)
883 request_module("8021q");
884
4a3e2f71 885 mutex_lock(&vlan_ioctl_mutex);
1da177e4 886 if (vlan_ioctl_hook)
881d966b 887 err = vlan_ioctl_hook(net, argp);
4a3e2f71 888 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 889 break;
1da177e4
LT
890 case SIOCADDDLCI:
891 case SIOCDELDLCI:
892 err = -ENOPKG;
893 if (!dlci_ioctl_hook)
894 request_module("dlci");
895
896 if (dlci_ioctl_hook) {
4a3e2f71 897 mutex_lock(&dlci_ioctl_mutex);
1da177e4 898 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 899 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
900 }
901 break;
902 default:
903 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
904
905 /*
906 * If this ioctl is unknown try to hand it down
907 * to the NIC driver.
908 */
909 if (err == -ENOIOCTLCMD)
881d966b 910 err = dev_ioctl(net, cmd, argp);
1da177e4 911 break;
89bddce5 912 }
1da177e4
LT
913 return err;
914}
915
916int sock_create_lite(int family, int type, int protocol, struct socket **res)
917{
918 int err;
919 struct socket *sock = NULL;
89bddce5 920
1da177e4
LT
921 err = security_socket_create(family, type, protocol, 1);
922 if (err)
923 goto out;
924
925 sock = sock_alloc();
926 if (!sock) {
927 err = -ENOMEM;
928 goto out;
929 }
930
1da177e4 931 sock->type = type;
7420ed23
VY
932 err = security_socket_post_create(sock, family, type, protocol, 1);
933 if (err)
934 goto out_release;
935
1da177e4
LT
936out:
937 *res = sock;
938 return err;
7420ed23
VY
939out_release:
940 sock_release(sock);
941 sock = NULL;
942 goto out;
1da177e4
LT
943}
944
945/* No kernel lock held - perfect */
89bddce5 946static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
947{
948 struct socket *sock;
949
950 /*
89bddce5 951 * We can't return errors to poll, so it's either yes or no.
1da177e4 952 */
b69aee04 953 sock = file->private_data;
1da177e4
LT
954 return sock->ops->poll(file, sock, wait);
955}
956
89bddce5 957static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 958{
b69aee04 959 struct socket *sock = file->private_data;
1da177e4
LT
960
961 return sock->ops->mmap(file, sock, vma);
962}
963
20380731 964static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
965{
966 /*
89bddce5
SH
967 * It was possible the inode is NULL we were
968 * closing an unfinished socket.
1da177e4
LT
969 */
970
89bddce5 971 if (!inode) {
1da177e4
LT
972 printk(KERN_DEBUG "sock_close: NULL inode\n");
973 return 0;
974 }
975 sock_fasync(-1, filp, 0);
976 sock_release(SOCKET_I(inode));
977 return 0;
978}
979
980/*
981 * Update the socket async list
982 *
983 * Fasync_list locking strategy.
984 *
985 * 1. fasync_list is modified only under process context socket lock
986 * i.e. under semaphore.
987 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
988 * or under socket lock.
989 * 3. fasync_list can be used from softirq context, so that
990 * modification under socket lock have to be enhanced with
991 * write_lock_bh(&sk->sk_callback_lock).
992 * --ANK (990710)
993 */
994
995static int sock_fasync(int fd, struct file *filp, int on)
996{
89bddce5 997 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
998 struct socket *sock;
999 struct sock *sk;
1000
89bddce5 1001 if (on) {
8b3a7005 1002 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1003 if (fna == NULL)
1da177e4
LT
1004 return -ENOMEM;
1005 }
1006
b69aee04 1007 sock = filp->private_data;
1da177e4 1008
89bddce5
SH
1009 sk = sock->sk;
1010 if (sk == NULL) {
1da177e4
LT
1011 kfree(fna);
1012 return -EINVAL;
1013 }
1014
1015 lock_sock(sk);
1016
89bddce5 1017 prev = &(sock->fasync_list);
1da177e4 1018
89bddce5
SH
1019 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1020 if (fa->fa_file == filp)
1da177e4
LT
1021 break;
1022
89bddce5
SH
1023 if (on) {
1024 if (fa != NULL) {
1da177e4 1025 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1026 fa->fa_fd = fd;
1da177e4
LT
1027 write_unlock_bh(&sk->sk_callback_lock);
1028
1029 kfree(fna);
1030 goto out;
1031 }
89bddce5
SH
1032 fna->fa_file = filp;
1033 fna->fa_fd = fd;
1034 fna->magic = FASYNC_MAGIC;
1035 fna->fa_next = sock->fasync_list;
1da177e4 1036 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1037 sock->fasync_list = fna;
1da177e4 1038 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1039 } else {
1040 if (fa != NULL) {
1da177e4 1041 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1042 *prev = fa->fa_next;
1da177e4
LT
1043 write_unlock_bh(&sk->sk_callback_lock);
1044 kfree(fa);
1045 }
1046 }
1047
1048out:
1049 release_sock(sock->sk);
1050 return 0;
1051}
1052
1053/* This function may be called only under socket lock or callback_lock */
1054
1055int sock_wake_async(struct socket *sock, int how, int band)
1056{
1057 if (!sock || !sock->fasync_list)
1058 return -1;
89bddce5 1059 switch (how) {
1da177e4 1060 case 1:
89bddce5 1061
1da177e4
LT
1062 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1063 break;
1064 goto call_kill;
1065 case 2:
1066 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1067 break;
1068 /* fall through */
1069 case 0:
89bddce5 1070call_kill:
1da177e4
LT
1071 __kill_fasync(sock->fasync_list, SIGIO, band);
1072 break;
1073 case 3:
1074 __kill_fasync(sock->fasync_list, SIGURG, band);
1075 }
1076 return 0;
1077}
1078
1b8d7ae4 1079static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1080 struct socket **res, int kern)
1da177e4
LT
1081{
1082 int err;
1083 struct socket *sock;
55737fda 1084 const struct net_proto_family *pf;
1da177e4
LT
1085
1086 /*
89bddce5 1087 * Check protocol is in range
1da177e4
LT
1088 */
1089 if (family < 0 || family >= NPROTO)
1090 return -EAFNOSUPPORT;
1091 if (type < 0 || type >= SOCK_MAX)
1092 return -EINVAL;
1093
1094 /* Compatibility.
1095
1096 This uglymoron is moved from INET layer to here to avoid
1097 deadlock in module load.
1098 */
1099 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1100 static int warned;
1da177e4
LT
1101 if (!warned) {
1102 warned = 1;
89bddce5
SH
1103 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1104 current->comm);
1da177e4
LT
1105 }
1106 family = PF_PACKET;
1107 }
1108
1109 err = security_socket_create(family, type, protocol, kern);
1110 if (err)
1111 return err;
89bddce5 1112
55737fda
SH
1113 /*
1114 * Allocate the socket and allow the family to set things up. if
1115 * the protocol is 0, the family is instructed to select an appropriate
1116 * default.
1117 */
1118 sock = sock_alloc();
1119 if (!sock) {
1120 if (net_ratelimit())
1121 printk(KERN_WARNING "socket: no more sockets\n");
1122 return -ENFILE; /* Not exactly a match, but its the
1123 closest posix thing */
1124 }
1125
1126 sock->type = type;
1127
1da177e4 1128#if defined(CONFIG_KMOD)
89bddce5
SH
1129 /* Attempt to load a protocol module if the find failed.
1130 *
1131 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1132 * requested real, full-featured networking support upon configuration.
1133 * Otherwise module support will break!
1134 */
55737fda 1135 if (net_families[family] == NULL)
89bddce5 1136 request_module("net-pf-%d", family);
1da177e4
LT
1137#endif
1138
55737fda
SH
1139 rcu_read_lock();
1140 pf = rcu_dereference(net_families[family]);
1141 err = -EAFNOSUPPORT;
1142 if (!pf)
1143 goto out_release;
1da177e4
LT
1144
1145 /*
1146 * We will call the ->create function, that possibly is in a loadable
1147 * module, so we have to bump that loadable module refcnt first.
1148 */
55737fda 1149 if (!try_module_get(pf->owner))
1da177e4
LT
1150 goto out_release;
1151
55737fda
SH
1152 /* Now protected by module ref count */
1153 rcu_read_unlock();
1154
1b8d7ae4 1155 err = pf->create(net, sock, protocol);
55737fda 1156 if (err < 0)
1da177e4 1157 goto out_module_put;
a79af59e 1158
1da177e4
LT
1159 /*
1160 * Now to bump the refcnt of the [loadable] module that owns this
1161 * socket at sock_release time we decrement its refcnt.
1162 */
55737fda
SH
1163 if (!try_module_get(sock->ops->owner))
1164 goto out_module_busy;
1165
1da177e4
LT
1166 /*
1167 * Now that we're done with the ->create function, the [loadable]
1168 * module can have its refcnt decremented
1169 */
55737fda 1170 module_put(pf->owner);
7420ed23
VY
1171 err = security_socket_post_create(sock, family, type, protocol, kern);
1172 if (err)
3b185525 1173 goto out_sock_release;
55737fda 1174 *res = sock;
1da177e4 1175
55737fda
SH
1176 return 0;
1177
1178out_module_busy:
1179 err = -EAFNOSUPPORT;
1da177e4 1180out_module_put:
55737fda
SH
1181 sock->ops = NULL;
1182 module_put(pf->owner);
1183out_sock_release:
1da177e4 1184 sock_release(sock);
55737fda
SH
1185 return err;
1186
1187out_release:
1188 rcu_read_unlock();
1189 goto out_sock_release;
1da177e4
LT
1190}
1191
1192int sock_create(int family, int type, int protocol, struct socket **res)
1193{
1b8d7ae4 1194 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4
LT
1195}
1196
1197int sock_create_kern(int family, int type, int protocol, struct socket **res)
1198{
1b8d7ae4 1199 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4
LT
1200}
1201
1202asmlinkage long sys_socket(int family, int type, int protocol)
1203{
1204 int retval;
1205 struct socket *sock;
1206
1207 retval = sock_create(family, type, protocol, &sock);
1208 if (retval < 0)
1209 goto out;
1210
1211 retval = sock_map_fd(sock);
1212 if (retval < 0)
1213 goto out_release;
1214
1215out:
1216 /* It may be already another descriptor 8) Not kernel problem. */
1217 return retval;
1218
1219out_release:
1220 sock_release(sock);
1221 return retval;
1222}
1223
1224/*
1225 * Create a pair of connected sockets.
1226 */
1227
89bddce5
SH
1228asmlinkage long sys_socketpair(int family, int type, int protocol,
1229 int __user *usockvec)
1da177e4
LT
1230{
1231 struct socket *sock1, *sock2;
1232 int fd1, fd2, err;
db349509 1233 struct file *newfile1, *newfile2;
1da177e4
LT
1234
1235 /*
1236 * Obtain the first socket and check if the underlying protocol
1237 * supports the socketpair call.
1238 */
1239
1240 err = sock_create(family, type, protocol, &sock1);
1241 if (err < 0)
1242 goto out;
1243
1244 err = sock_create(family, type, protocol, &sock2);
1245 if (err < 0)
1246 goto out_release_1;
1247
1248 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1249 if (err < 0)
1da177e4
LT
1250 goto out_release_both;
1251
db349509
AV
1252 fd1 = sock_alloc_fd(&newfile1);
1253 if (unlikely(fd1 < 0))
1254 goto out_release_both;
1da177e4 1255
db349509
AV
1256 fd2 = sock_alloc_fd(&newfile2);
1257 if (unlikely(fd2 < 0)) {
1258 put_filp(newfile1);
1259 put_unused_fd(fd1);
1da177e4 1260 goto out_release_both;
db349509 1261 }
1da177e4 1262
db349509
AV
1263 err = sock_attach_fd(sock1, newfile1);
1264 if (unlikely(err < 0)) {
1265 goto out_fd2;
1266 }
1267
1268 err = sock_attach_fd(sock2, newfile2);
1269 if (unlikely(err < 0)) {
1270 fput(newfile1);
1271 goto out_fd1;
1272 }
1273
1274 err = audit_fd_pair(fd1, fd2);
1275 if (err < 0) {
1276 fput(newfile1);
1277 fput(newfile2);
1278 goto out_fd;
1279 }
1da177e4 1280
db349509
AV
1281 fd_install(fd1, newfile1);
1282 fd_install(fd2, newfile2);
1da177e4
LT
1283 /* fd1 and fd2 may be already another descriptors.
1284 * Not kernel problem.
1285 */
1286
89bddce5 1287 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1288 if (!err)
1289 err = put_user(fd2, &usockvec[1]);
1290 if (!err)
1291 return 0;
1292
1293 sys_close(fd2);
1294 sys_close(fd1);
1295 return err;
1296
1da177e4 1297out_release_both:
89bddce5 1298 sock_release(sock2);
1da177e4 1299out_release_1:
89bddce5 1300 sock_release(sock1);
1da177e4
LT
1301out:
1302 return err;
db349509
AV
1303
1304out_fd2:
1305 put_filp(newfile1);
1306 sock_release(sock1);
1307out_fd1:
1308 put_filp(newfile2);
1309 sock_release(sock2);
1310out_fd:
1311 put_unused_fd(fd1);
1312 put_unused_fd(fd2);
1313 goto out;
1da177e4
LT
1314}
1315
1da177e4
LT
1316/*
1317 * Bind a name to a socket. Nothing much to do here since it's
1318 * the protocol's responsibility to handle the local address.
1319 *
1320 * We move the socket address to kernel space before we call
1321 * the protocol layer (having also checked the address is ok).
1322 */
1323
1324asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1325{
1326 struct socket *sock;
1327 char address[MAX_SOCK_ADDR];
6cb153ca 1328 int err, fput_needed;
1da177e4 1329
89bddce5 1330 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1331 if (sock) {
89bddce5
SH
1332 err = move_addr_to_kernel(umyaddr, addrlen, address);
1333 if (err >= 0) {
1334 err = security_socket_bind(sock,
1335 (struct sockaddr *)address,
1336 addrlen);
6cb153ca
BL
1337 if (!err)
1338 err = sock->ops->bind(sock,
89bddce5
SH
1339 (struct sockaddr *)
1340 address, addrlen);
1da177e4 1341 }
6cb153ca 1342 fput_light(sock->file, fput_needed);
89bddce5 1343 }
1da177e4
LT
1344 return err;
1345}
1346
1da177e4
LT
1347/*
1348 * Perform a listen. Basically, we allow the protocol to do anything
1349 * necessary for a listen, and if that works, we mark the socket as
1350 * ready for listening.
1351 */
1352
7a42c217 1353int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1354
1355asmlinkage long sys_listen(int fd, int backlog)
1356{
1357 struct socket *sock;
6cb153ca 1358 int err, fput_needed;
89bddce5
SH
1359
1360 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1361 if (sock) {
1362 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1363 backlog = sysctl_somaxconn;
1364
1365 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1366 if (!err)
1367 err = sock->ops->listen(sock, backlog);
1da177e4 1368
6cb153ca 1369 fput_light(sock->file, fput_needed);
1da177e4
LT
1370 }
1371 return err;
1372}
1373
1da177e4
LT
1374/*
1375 * For accept, we attempt to create a new socket, set up the link
1376 * with the client, wake up the client, then return the new
1377 * connected fd. We collect the address of the connector in kernel
1378 * space and move it to user at the very end. This is unclean because
1379 * we open the socket then return an error.
1380 *
1381 * 1003.1g adds the ability to recvmsg() to query connection pending
1382 * status to recvmsg. We need to add that support in a way thats
1383 * clean when we restucture accept also.
1384 */
1385
89bddce5
SH
1386asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1387 int __user *upeer_addrlen)
1da177e4
LT
1388{
1389 struct socket *sock, *newsock;
39d8c1b6 1390 struct file *newfile;
6cb153ca 1391 int err, len, newfd, fput_needed;
1da177e4
LT
1392 char address[MAX_SOCK_ADDR];
1393
6cb153ca 1394 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1395 if (!sock)
1396 goto out;
1397
1398 err = -ENFILE;
89bddce5 1399 if (!(newsock = sock_alloc()))
1da177e4
LT
1400 goto out_put;
1401
1402 newsock->type = sock->type;
1403 newsock->ops = sock->ops;
1404
1da177e4
LT
1405 /*
1406 * We don't need try_module_get here, as the listening socket (sock)
1407 * has the protocol module (sock->ops->owner) held.
1408 */
1409 __module_get(newsock->ops->owner);
1410
39d8c1b6
DM
1411 newfd = sock_alloc_fd(&newfile);
1412 if (unlikely(newfd < 0)) {
1413 err = newfd;
9a1875e6
DM
1414 sock_release(newsock);
1415 goto out_put;
39d8c1b6
DM
1416 }
1417
1418 err = sock_attach_fd(newsock, newfile);
1419 if (err < 0)
79f4f642 1420 goto out_fd_simple;
39d8c1b6 1421
a79af59e
FF
1422 err = security_socket_accept(sock, newsock);
1423 if (err)
39d8c1b6 1424 goto out_fd;
a79af59e 1425
1da177e4
LT
1426 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1427 if (err < 0)
39d8c1b6 1428 goto out_fd;
1da177e4
LT
1429
1430 if (upeer_sockaddr) {
89bddce5
SH
1431 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1432 &len, 2) < 0) {
1da177e4 1433 err = -ECONNABORTED;
39d8c1b6 1434 goto out_fd;
1da177e4 1435 }
89bddce5
SH
1436 err = move_addr_to_user(address, len, upeer_sockaddr,
1437 upeer_addrlen);
1da177e4 1438 if (err < 0)
39d8c1b6 1439 goto out_fd;
1da177e4
LT
1440 }
1441
1442 /* File flags are not inherited via accept() unlike another OSes. */
1443
39d8c1b6
DM
1444 fd_install(newfd, newfile);
1445 err = newfd;
1da177e4
LT
1446
1447 security_socket_post_accept(sock, newsock);
1448
1449out_put:
6cb153ca 1450 fput_light(sock->file, fput_needed);
1da177e4
LT
1451out:
1452 return err;
79f4f642
AD
1453out_fd_simple:
1454 sock_release(newsock);
1455 put_filp(newfile);
1456 put_unused_fd(newfd);
1457 goto out_put;
39d8c1b6 1458out_fd:
9606a216 1459 fput(newfile);
39d8c1b6 1460 put_unused_fd(newfd);
1da177e4
LT
1461 goto out_put;
1462}
1463
1da177e4
LT
1464/*
1465 * Attempt to connect to a socket with the server address. The address
1466 * is in user space so we verify it is OK and move it to kernel space.
1467 *
1468 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1469 * break bindings
1470 *
1471 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1472 * other SEQPACKET protocols that take time to connect() as it doesn't
1473 * include the -EINPROGRESS status for such sockets.
1474 */
1475
89bddce5
SH
1476asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1477 int addrlen)
1da177e4
LT
1478{
1479 struct socket *sock;
1480 char address[MAX_SOCK_ADDR];
6cb153ca 1481 int err, fput_needed;
1da177e4 1482
6cb153ca 1483 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1484 if (!sock)
1485 goto out;
1486 err = move_addr_to_kernel(uservaddr, addrlen, address);
1487 if (err < 0)
1488 goto out_put;
1489
89bddce5
SH
1490 err =
1491 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1492 if (err)
1493 goto out_put;
1494
89bddce5 1495 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1496 sock->file->f_flags);
1497out_put:
6cb153ca 1498 fput_light(sock->file, fput_needed);
1da177e4
LT
1499out:
1500 return err;
1501}
1502
1503/*
1504 * Get the local address ('name') of a socket object. Move the obtained
1505 * name to user space.
1506 */
1507
89bddce5
SH
1508asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1509 int __user *usockaddr_len)
1da177e4
LT
1510{
1511 struct socket *sock;
1512 char address[MAX_SOCK_ADDR];
6cb153ca 1513 int len, err, fput_needed;
89bddce5 1514
6cb153ca 1515 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1516 if (!sock)
1517 goto out;
1518
1519 err = security_socket_getsockname(sock);
1520 if (err)
1521 goto out_put;
1522
1523 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1524 if (err)
1525 goto out_put;
1526 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1527
1528out_put:
6cb153ca 1529 fput_light(sock->file, fput_needed);
1da177e4
LT
1530out:
1531 return err;
1532}
1533
1534/*
1535 * Get the remote address ('name') of a socket object. Move the obtained
1536 * name to user space.
1537 */
1538
89bddce5
SH
1539asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1540 int __user *usockaddr_len)
1da177e4
LT
1541{
1542 struct socket *sock;
1543 char address[MAX_SOCK_ADDR];
6cb153ca 1544 int len, err, fput_needed;
1da177e4 1545
89bddce5
SH
1546 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1547 if (sock != NULL) {
1da177e4
LT
1548 err = security_socket_getpeername(sock);
1549 if (err) {
6cb153ca 1550 fput_light(sock->file, fput_needed);
1da177e4
LT
1551 return err;
1552 }
1553
89bddce5
SH
1554 err =
1555 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1556 1);
1da177e4 1557 if (!err)
89bddce5
SH
1558 err = move_addr_to_user(address, len, usockaddr,
1559 usockaddr_len);
6cb153ca 1560 fput_light(sock->file, fput_needed);
1da177e4
LT
1561 }
1562 return err;
1563}
1564
1565/*
1566 * Send a datagram to a given address. We move the address into kernel
1567 * space and check the user space data area is readable before invoking
1568 * the protocol.
1569 */
1570
89bddce5
SH
1571asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1572 unsigned flags, struct sockaddr __user *addr,
1573 int addr_len)
1da177e4
LT
1574{
1575 struct socket *sock;
1576 char address[MAX_SOCK_ADDR];
1577 int err;
1578 struct msghdr msg;
1579 struct iovec iov;
6cb153ca
BL
1580 int fput_needed;
1581 struct file *sock_file;
1582
1583 sock_file = fget_light(fd, &fput_needed);
4387ff75 1584 err = -EBADF;
6cb153ca 1585 if (!sock_file)
4387ff75 1586 goto out;
6cb153ca
BL
1587
1588 sock = sock_from_file(sock_file, &err);
1da177e4 1589 if (!sock)
6cb153ca 1590 goto out_put;
89bddce5
SH
1591 iov.iov_base = buff;
1592 iov.iov_len = len;
1593 msg.msg_name = NULL;
1594 msg.msg_iov = &iov;
1595 msg.msg_iovlen = 1;
1596 msg.msg_control = NULL;
1597 msg.msg_controllen = 0;
1598 msg.msg_namelen = 0;
6cb153ca 1599 if (addr) {
1da177e4
LT
1600 err = move_addr_to_kernel(addr, addr_len, address);
1601 if (err < 0)
1602 goto out_put;
89bddce5
SH
1603 msg.msg_name = address;
1604 msg.msg_namelen = addr_len;
1da177e4
LT
1605 }
1606 if (sock->file->f_flags & O_NONBLOCK)
1607 flags |= MSG_DONTWAIT;
1608 msg.msg_flags = flags;
1609 err = sock_sendmsg(sock, &msg, len);
1610
89bddce5 1611out_put:
6cb153ca 1612 fput_light(sock_file, fput_needed);
4387ff75 1613out:
1da177e4
LT
1614 return err;
1615}
1616
1617/*
89bddce5 1618 * Send a datagram down a socket.
1da177e4
LT
1619 */
1620
89bddce5 1621asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1622{
1623 return sys_sendto(fd, buff, len, flags, NULL, 0);
1624}
1625
1626/*
89bddce5 1627 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1628 * sender. We verify the buffers are writable and if needed move the
1629 * sender address from kernel to user space.
1630 */
1631
89bddce5
SH
1632asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1633 unsigned flags, struct sockaddr __user *addr,
1634 int __user *addr_len)
1da177e4
LT
1635{
1636 struct socket *sock;
1637 struct iovec iov;
1638 struct msghdr msg;
1639 char address[MAX_SOCK_ADDR];
89bddce5 1640 int err, err2;
6cb153ca
BL
1641 struct file *sock_file;
1642 int fput_needed;
1643
1644 sock_file = fget_light(fd, &fput_needed);
4387ff75 1645 err = -EBADF;
6cb153ca 1646 if (!sock_file)
4387ff75 1647 goto out;
1da177e4 1648
6cb153ca 1649 sock = sock_from_file(sock_file, &err);
1da177e4 1650 if (!sock)
4387ff75 1651 goto out_put;
1da177e4 1652
89bddce5
SH
1653 msg.msg_control = NULL;
1654 msg.msg_controllen = 0;
1655 msg.msg_iovlen = 1;
1656 msg.msg_iov = &iov;
1657 iov.iov_len = size;
1658 iov.iov_base = ubuf;
1659 msg.msg_name = address;
1660 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1661 if (sock->file->f_flags & O_NONBLOCK)
1662 flags |= MSG_DONTWAIT;
89bddce5 1663 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1664
89bddce5
SH
1665 if (err >= 0 && addr != NULL) {
1666 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1667 if (err2 < 0)
1668 err = err2;
1da177e4 1669 }
4387ff75 1670out_put:
6cb153ca 1671 fput_light(sock_file, fput_needed);
4387ff75 1672out:
1da177e4
LT
1673 return err;
1674}
1675
1676/*
89bddce5 1677 * Receive a datagram from a socket.
1da177e4
LT
1678 */
1679
89bddce5
SH
1680asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1681 unsigned flags)
1da177e4
LT
1682{
1683 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1684}
1685
1686/*
1687 * Set a socket option. Because we don't know the option lengths we have
1688 * to pass the user mode parameter for the protocols to sort out.
1689 */
1690
89bddce5
SH
1691asmlinkage long sys_setsockopt(int fd, int level, int optname,
1692 char __user *optval, int optlen)
1da177e4 1693{
6cb153ca 1694 int err, fput_needed;
1da177e4
LT
1695 struct socket *sock;
1696
1697 if (optlen < 0)
1698 return -EINVAL;
89bddce5
SH
1699
1700 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1701 if (sock != NULL) {
1702 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1703 if (err)
1704 goto out_put;
1da177e4
LT
1705
1706 if (level == SOL_SOCKET)
89bddce5
SH
1707 err =
1708 sock_setsockopt(sock, level, optname, optval,
1709 optlen);
1da177e4 1710 else
89bddce5
SH
1711 err =
1712 sock->ops->setsockopt(sock, level, optname, optval,
1713 optlen);
6cb153ca
BL
1714out_put:
1715 fput_light(sock->file, fput_needed);
1da177e4
LT
1716 }
1717 return err;
1718}
1719
1720/*
1721 * Get a socket option. Because we don't know the option lengths we have
1722 * to pass a user mode parameter for the protocols to sort out.
1723 */
1724
89bddce5
SH
1725asmlinkage long sys_getsockopt(int fd, int level, int optname,
1726 char __user *optval, int __user *optlen)
1da177e4 1727{
6cb153ca 1728 int err, fput_needed;
1da177e4
LT
1729 struct socket *sock;
1730
89bddce5
SH
1731 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1732 if (sock != NULL) {
6cb153ca
BL
1733 err = security_socket_getsockopt(sock, level, optname);
1734 if (err)
1735 goto out_put;
1da177e4
LT
1736
1737 if (level == SOL_SOCKET)
89bddce5
SH
1738 err =
1739 sock_getsockopt(sock, level, optname, optval,
1740 optlen);
1da177e4 1741 else
89bddce5
SH
1742 err =
1743 sock->ops->getsockopt(sock, level, optname, optval,
1744 optlen);
6cb153ca
BL
1745out_put:
1746 fput_light(sock->file, fput_needed);
1da177e4
LT
1747 }
1748 return err;
1749}
1750
1da177e4
LT
1751/*
1752 * Shutdown a socket.
1753 */
1754
1755asmlinkage long sys_shutdown(int fd, int how)
1756{
6cb153ca 1757 int err, fput_needed;
1da177e4
LT
1758 struct socket *sock;
1759
89bddce5
SH
1760 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1761 if (sock != NULL) {
1da177e4 1762 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1763 if (!err)
1764 err = sock->ops->shutdown(sock, how);
1765 fput_light(sock->file, fput_needed);
1da177e4
LT
1766 }
1767 return err;
1768}
1769
89bddce5 1770/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1771 * fields which are the same type (int / unsigned) on our platforms.
1772 */
1773#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1774#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1775#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1776
1da177e4
LT
1777/*
1778 * BSD sendmsg interface
1779 */
1780
1781asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1782{
89bddce5
SH
1783 struct compat_msghdr __user *msg_compat =
1784 (struct compat_msghdr __user *)msg;
1da177e4
LT
1785 struct socket *sock;
1786 char address[MAX_SOCK_ADDR];
1787 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1788 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1789 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1790 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1791 unsigned char *ctl_buf = ctl;
1792 struct msghdr msg_sys;
1793 int err, ctl_len, iov_size, total_len;
6cb153ca 1794 int fput_needed;
89bddce5 1795
1da177e4
LT
1796 err = -EFAULT;
1797 if (MSG_CMSG_COMPAT & flags) {
1798 if (get_compat_msghdr(&msg_sys, msg_compat))
1799 return -EFAULT;
89bddce5
SH
1800 }
1801 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1802 return -EFAULT;
1803
6cb153ca 1804 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1805 if (!sock)
1da177e4
LT
1806 goto out;
1807
1808 /* do not move before msg_sys is valid */
1809 err = -EMSGSIZE;
1810 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1811 goto out_put;
1812
89bddce5 1813 /* Check whether to allocate the iovec area */
1da177e4
LT
1814 err = -ENOMEM;
1815 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1816 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1817 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1818 if (!iov)
1819 goto out_put;
1820 }
1821
1822 /* This will also move the address data into kernel space */
1823 if (MSG_CMSG_COMPAT & flags) {
1824 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1825 } else
1826 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1827 if (err < 0)
1da177e4
LT
1828 goto out_freeiov;
1829 total_len = err;
1830
1831 err = -ENOBUFS;
1832
1833 if (msg_sys.msg_controllen > INT_MAX)
1834 goto out_freeiov;
89bddce5 1835 ctl_len = msg_sys.msg_controllen;
1da177e4 1836 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1837 err =
1838 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1839 sizeof(ctl));
1da177e4
LT
1840 if (err)
1841 goto out_freeiov;
1842 ctl_buf = msg_sys.msg_control;
8920e8f9 1843 ctl_len = msg_sys.msg_controllen;
1da177e4 1844 } else if (ctl_len) {
89bddce5 1845 if (ctl_len > sizeof(ctl)) {
1da177e4 1846 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1847 if (ctl_buf == NULL)
1da177e4
LT
1848 goto out_freeiov;
1849 }
1850 err = -EFAULT;
1851 /*
1852 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1853 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1854 * checking falls down on this.
1855 */
89bddce5
SH
1856 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1857 ctl_len))
1da177e4
LT
1858 goto out_freectl;
1859 msg_sys.msg_control = ctl_buf;
1860 }
1861 msg_sys.msg_flags = flags;
1862
1863 if (sock->file->f_flags & O_NONBLOCK)
1864 msg_sys.msg_flags |= MSG_DONTWAIT;
1865 err = sock_sendmsg(sock, &msg_sys, total_len);
1866
1867out_freectl:
89bddce5 1868 if (ctl_buf != ctl)
1da177e4
LT
1869 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1870out_freeiov:
1871 if (iov != iovstack)
1872 sock_kfree_s(sock->sk, iov, iov_size);
1873out_put:
6cb153ca 1874 fput_light(sock->file, fput_needed);
89bddce5 1875out:
1da177e4
LT
1876 return err;
1877}
1878
1879/*
1880 * BSD recvmsg interface
1881 */
1882
89bddce5
SH
1883asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1884 unsigned int flags)
1da177e4 1885{
89bddce5
SH
1886 struct compat_msghdr __user *msg_compat =
1887 (struct compat_msghdr __user *)msg;
1da177e4
LT
1888 struct socket *sock;
1889 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1890 struct iovec *iov = iovstack;
1da177e4
LT
1891 struct msghdr msg_sys;
1892 unsigned long cmsg_ptr;
1893 int err, iov_size, total_len, len;
6cb153ca 1894 int fput_needed;
1da177e4
LT
1895
1896 /* kernel mode address */
1897 char addr[MAX_SOCK_ADDR];
1898
1899 /* user mode address pointers */
1900 struct sockaddr __user *uaddr;
1901 int __user *uaddr_len;
89bddce5 1902
1da177e4
LT
1903 if (MSG_CMSG_COMPAT & flags) {
1904 if (get_compat_msghdr(&msg_sys, msg_compat))
1905 return -EFAULT;
89bddce5
SH
1906 }
1907 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1908 return -EFAULT;
1da177e4 1909
6cb153ca 1910 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1911 if (!sock)
1912 goto out;
1913
1914 err = -EMSGSIZE;
1915 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1916 goto out_put;
89bddce5
SH
1917
1918 /* Check whether to allocate the iovec area */
1da177e4
LT
1919 err = -ENOMEM;
1920 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1921 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1922 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1923 if (!iov)
1924 goto out_put;
1925 }
1926
1927 /*
89bddce5
SH
1928 * Save the user-mode address (verify_iovec will change the
1929 * kernel msghdr to use the kernel address space)
1da177e4 1930 */
89bddce5
SH
1931
1932 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1933 uaddr_len = COMPAT_NAMELEN(msg);
1934 if (MSG_CMSG_COMPAT & flags) {
1935 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1936 } else
1937 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1938 if (err < 0)
1939 goto out_freeiov;
89bddce5 1940 total_len = err;
1da177e4
LT
1941
1942 cmsg_ptr = (unsigned long)msg_sys.msg_control;
4a19542e 1943 msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 1944
1da177e4
LT
1945 if (sock->file->f_flags & O_NONBLOCK)
1946 flags |= MSG_DONTWAIT;
1947 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1948 if (err < 0)
1949 goto out_freeiov;
1950 len = err;
1951
1952 if (uaddr != NULL) {
89bddce5
SH
1953 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1954 uaddr_len);
1da177e4
LT
1955 if (err < 0)
1956 goto out_freeiov;
1957 }
37f7f421
DM
1958 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1959 COMPAT_FLAGS(msg));
1da177e4
LT
1960 if (err)
1961 goto out_freeiov;
1962 if (MSG_CMSG_COMPAT & flags)
89bddce5 1963 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1964 &msg_compat->msg_controllen);
1965 else
89bddce5 1966 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1967 &msg->msg_controllen);
1968 if (err)
1969 goto out_freeiov;
1970 err = len;
1971
1972out_freeiov:
1973 if (iov != iovstack)
1974 sock_kfree_s(sock->sk, iov, iov_size);
1975out_put:
6cb153ca 1976 fput_light(sock->file, fput_needed);
1da177e4
LT
1977out:
1978 return err;
1979}
1980
1981#ifdef __ARCH_WANT_SYS_SOCKETCALL
1982
1983/* Argument list sizes for sys_socketcall */
1984#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1985static const unsigned char nargs[18]={
1986 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1987 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1988 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1989};
1990
1da177e4
LT
1991#undef AL
1992
1993/*
89bddce5 1994 * System call vectors.
1da177e4
LT
1995 *
1996 * Argument checking cleaned up. Saved 20% in size.
1997 * This function doesn't need to set the kernel lock because
89bddce5 1998 * it is set by the callees.
1da177e4
LT
1999 */
2000
2001asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2002{
2003 unsigned long a[6];
89bddce5 2004 unsigned long a0, a1;
1da177e4
LT
2005 int err;
2006
89bddce5 2007 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2008 return -EINVAL;
2009
2010 /* copy_from_user should be SMP safe. */
2011 if (copy_from_user(a, args, nargs[call]))
2012 return -EFAULT;
3ec3b2fb 2013
89bddce5 2014 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2015 if (err)
2016 return err;
2017
89bddce5
SH
2018 a0 = a[0];
2019 a1 = a[1];
2020
2021 switch (call) {
2022 case SYS_SOCKET:
2023 err = sys_socket(a0, a1, a[2]);
2024 break;
2025 case SYS_BIND:
2026 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2027 break;
2028 case SYS_CONNECT:
2029 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2030 break;
2031 case SYS_LISTEN:
2032 err = sys_listen(a0, a1);
2033 break;
2034 case SYS_ACCEPT:
2035 err =
2036 sys_accept(a0, (struct sockaddr __user *)a1,
2037 (int __user *)a[2]);
2038 break;
2039 case SYS_GETSOCKNAME:
2040 err =
2041 sys_getsockname(a0, (struct sockaddr __user *)a1,
2042 (int __user *)a[2]);
2043 break;
2044 case SYS_GETPEERNAME:
2045 err =
2046 sys_getpeername(a0, (struct sockaddr __user *)a1,
2047 (int __user *)a[2]);
2048 break;
2049 case SYS_SOCKETPAIR:
2050 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2051 break;
2052 case SYS_SEND:
2053 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2054 break;
2055 case SYS_SENDTO:
2056 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2057 (struct sockaddr __user *)a[4], a[5]);
2058 break;
2059 case SYS_RECV:
2060 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2061 break;
2062 case SYS_RECVFROM:
2063 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2064 (struct sockaddr __user *)a[4],
2065 (int __user *)a[5]);
2066 break;
2067 case SYS_SHUTDOWN:
2068 err = sys_shutdown(a0, a1);
2069 break;
2070 case SYS_SETSOCKOPT:
2071 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2072 break;
2073 case SYS_GETSOCKOPT:
2074 err =
2075 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2076 (int __user *)a[4]);
2077 break;
2078 case SYS_SENDMSG:
2079 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2080 break;
2081 case SYS_RECVMSG:
2082 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2083 break;
2084 default:
2085 err = -EINVAL;
2086 break;
1da177e4
LT
2087 }
2088 return err;
2089}
2090
89bddce5 2091#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2092
55737fda
SH
2093/**
2094 * sock_register - add a socket protocol handler
2095 * @ops: description of protocol
2096 *
1da177e4
LT
2097 * This function is called by a protocol handler that wants to
2098 * advertise its address family, and have it linked into the
55737fda
SH
2099 * socket interface. The value ops->family coresponds to the
2100 * socket system call protocol family.
1da177e4 2101 */
f0fd27d4 2102int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2103{
2104 int err;
2105
2106 if (ops->family >= NPROTO) {
89bddce5
SH
2107 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2108 NPROTO);
1da177e4
LT
2109 return -ENOBUFS;
2110 }
55737fda
SH
2111
2112 spin_lock(&net_family_lock);
2113 if (net_families[ops->family])
2114 err = -EEXIST;
2115 else {
89bddce5 2116 net_families[ops->family] = ops;
1da177e4
LT
2117 err = 0;
2118 }
55737fda
SH
2119 spin_unlock(&net_family_lock);
2120
89bddce5 2121 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2122 return err;
2123}
2124
55737fda
SH
2125/**
2126 * sock_unregister - remove a protocol handler
2127 * @family: protocol family to remove
2128 *
1da177e4
LT
2129 * This function is called by a protocol handler that wants to
2130 * remove its address family, and have it unlinked from the
55737fda
SH
2131 * new socket creation.
2132 *
2133 * If protocol handler is a module, then it can use module reference
2134 * counts to protect against new references. If protocol handler is not
2135 * a module then it needs to provide its own protection in
2136 * the ops->create routine.
1da177e4 2137 */
f0fd27d4 2138void sock_unregister(int family)
1da177e4 2139{
f0fd27d4 2140 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2141
55737fda 2142 spin_lock(&net_family_lock);
89bddce5 2143 net_families[family] = NULL;
55737fda
SH
2144 spin_unlock(&net_family_lock);
2145
2146 synchronize_rcu();
2147
89bddce5 2148 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2149}
2150
77d76ea3 2151static int __init sock_init(void)
1da177e4
LT
2152{
2153 /*
89bddce5 2154 * Initialize sock SLAB cache.
1da177e4 2155 */
89bddce5 2156
1da177e4
LT
2157 sk_init();
2158
1da177e4 2159 /*
89bddce5 2160 * Initialize skbuff SLAB cache
1da177e4
LT
2161 */
2162 skb_init();
1da177e4
LT
2163
2164 /*
89bddce5 2165 * Initialize the protocols module.
1da177e4
LT
2166 */
2167
2168 init_inodecache();
2169 register_filesystem(&sock_fs_type);
2170 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2171
2172 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2173 */
2174
2175#ifdef CONFIG_NETFILTER
2176 netfilter_init();
2177#endif
cbeb321a
DM
2178
2179 return 0;
1da177e4
LT
2180}
2181
77d76ea3
AK
2182core_initcall(sock_init); /* early initcall */
2183
1da177e4
LT
2184#ifdef CONFIG_PROC_FS
2185void socket_seq_show(struct seq_file *seq)
2186{
2187 int cpu;
2188 int counter = 0;
2189
6f912042 2190 for_each_possible_cpu(cpu)
89bddce5 2191 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2192
2193 /* It can be negative, by the way. 8) */
2194 if (counter < 0)
2195 counter = 0;
2196
2197 seq_printf(seq, "sockets: used %d\n", counter);
2198}
89bddce5 2199#endif /* CONFIG_PROC_FS */
1da177e4 2200
89bbfc95
SP
2201#ifdef CONFIG_COMPAT
2202static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2203 unsigned long arg)
89bbfc95
SP
2204{
2205 struct socket *sock = file->private_data;
2206 int ret = -ENOIOCTLCMD;
2207
2208 if (sock->ops->compat_ioctl)
2209 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2210
2211 return ret;
2212}
2213#endif
2214
ac5a488e
SS
2215int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2216{
2217 return sock->ops->bind(sock, addr, addrlen);
2218}
2219
2220int kernel_listen(struct socket *sock, int backlog)
2221{
2222 return sock->ops->listen(sock, backlog);
2223}
2224
2225int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2226{
2227 struct sock *sk = sock->sk;
2228 int err;
2229
2230 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2231 newsock);
2232 if (err < 0)
2233 goto done;
2234
2235 err = sock->ops->accept(sock, *newsock, flags);
2236 if (err < 0) {
2237 sock_release(*newsock);
2238 goto done;
2239 }
2240
2241 (*newsock)->ops = sock->ops;
2242
2243done:
2244 return err;
2245}
2246
2247int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2248 int flags)
ac5a488e
SS
2249{
2250 return sock->ops->connect(sock, addr, addrlen, flags);
2251}
2252
2253int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2254 int *addrlen)
2255{
2256 return sock->ops->getname(sock, addr, addrlen, 0);
2257}
2258
2259int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2260 int *addrlen)
2261{
2262 return sock->ops->getname(sock, addr, addrlen, 1);
2263}
2264
2265int kernel_getsockopt(struct socket *sock, int level, int optname,
2266 char *optval, int *optlen)
2267{
2268 mm_segment_t oldfs = get_fs();
2269 int err;
2270
2271 set_fs(KERNEL_DS);
2272 if (level == SOL_SOCKET)
2273 err = sock_getsockopt(sock, level, optname, optval, optlen);
2274 else
2275 err = sock->ops->getsockopt(sock, level, optname, optval,
2276 optlen);
2277 set_fs(oldfs);
2278 return err;
2279}
2280
2281int kernel_setsockopt(struct socket *sock, int level, int optname,
2282 char *optval, int optlen)
2283{
2284 mm_segment_t oldfs = get_fs();
2285 int err;
2286
2287 set_fs(KERNEL_DS);
2288 if (level == SOL_SOCKET)
2289 err = sock_setsockopt(sock, level, optname, optval, optlen);
2290 else
2291 err = sock->ops->setsockopt(sock, level, optname, optval,
2292 optlen);
2293 set_fs(oldfs);
2294 return err;
2295}
2296
2297int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2298 size_t size, int flags)
2299{
2300 if (sock->ops->sendpage)
2301 return sock->ops->sendpage(sock, page, offset, size, flags);
2302
2303 return sock_no_sendpage(sock, page, offset, size, flags);
2304}
2305
2306int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2307{
2308 mm_segment_t oldfs = get_fs();
2309 int err;
2310
2311 set_fs(KERNEL_DS);
2312 err = sock->ops->ioctl(sock, cmd, arg);
2313 set_fs(oldfs);
2314
2315 return err;
2316}
2317
1da177e4
LT
2318/* ABI emulation layers need these two */
2319EXPORT_SYMBOL(move_addr_to_kernel);
2320EXPORT_SYMBOL(move_addr_to_user);
2321EXPORT_SYMBOL(sock_create);
2322EXPORT_SYMBOL(sock_create_kern);
2323EXPORT_SYMBOL(sock_create_lite);
2324EXPORT_SYMBOL(sock_map_fd);
2325EXPORT_SYMBOL(sock_recvmsg);
2326EXPORT_SYMBOL(sock_register);
2327EXPORT_SYMBOL(sock_release);
2328EXPORT_SYMBOL(sock_sendmsg);
2329EXPORT_SYMBOL(sock_unregister);
2330EXPORT_SYMBOL(sock_wake_async);
2331EXPORT_SYMBOL(sockfd_lookup);
2332EXPORT_SYMBOL(kernel_sendmsg);
2333EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2334EXPORT_SYMBOL(kernel_bind);
2335EXPORT_SYMBOL(kernel_listen);
2336EXPORT_SYMBOL(kernel_accept);
2337EXPORT_SYMBOL(kernel_connect);
2338EXPORT_SYMBOL(kernel_getsockname);
2339EXPORT_SYMBOL(kernel_getpeername);
2340EXPORT_SYMBOL(kernel_getsockopt);
2341EXPORT_SYMBOL(kernel_setsockopt);
2342EXPORT_SYMBOL(kernel_sendpage);
2343EXPORT_SYMBOL(kernel_sock_ioctl);