]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/socket.c
SLUB: Do our own flags based on PG_active and PG_error
[mirror_ubuntu-zesty-kernel.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
55737fda 66#include <linux/rcupdate.h>
1da177e4
LT
67#include <linux/netdevice.h>
68#include <linux/proc_fs.h>
69#include <linux/seq_file.h>
4a3e2f71 70#include <linux/mutex.h>
1da177e4
LT
71#include <linux/wanrouter.h>
72#include <linux/if_bridge.h>
20380731
ACM
73#include <linux/if_frad.h>
74#include <linux/if_vlan.h>
1da177e4
LT
75#include <linux/init.h>
76#include <linux/poll.h>
77#include <linux/cache.h>
78#include <linux/module.h>
79#include <linux/highmem.h>
1da177e4
LT
80#include <linux/mount.h>
81#include <linux/security.h>
82#include <linux/syscalls.h>
83#include <linux/compat.h>
84#include <linux/kmod.h>
3ec3b2fb 85#include <linux/audit.h>
d86b5e0e 86#include <linux/wireless.h>
1da177e4
LT
87
88#include <asm/uaccess.h>
89#include <asm/unistd.h>
90
91#include <net/compat.h>
92
93#include <net/sock.h>
94#include <linux/netfilter.h>
95
96static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
97static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
98 unsigned long nr_segs, loff_t pos);
99static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
100 unsigned long nr_segs, loff_t pos);
89bddce5 101static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
102
103static int sock_close(struct inode *inode, struct file *file);
104static unsigned int sock_poll(struct file *file,
105 struct poll_table_struct *wait);
89bddce5 106static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
107#ifdef CONFIG_COMPAT
108static long compat_sock_ioctl(struct file *file,
89bddce5 109 unsigned int cmd, unsigned long arg);
89bbfc95 110#endif
1da177e4 111static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
112static ssize_t sock_sendpage(struct file *file, struct page *page,
113 int offset, size_t size, loff_t *ppos, int more);
114
1da177e4
LT
115/*
116 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
117 * in the operation structures but are done directly via the socketcall() multiplexor.
118 */
119
da7071d7 120static const struct file_operations socket_file_ops = {
1da177e4
LT
121 .owner = THIS_MODULE,
122 .llseek = no_llseek,
123 .aio_read = sock_aio_read,
124 .aio_write = sock_aio_write,
125 .poll = sock_poll,
126 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
127#ifdef CONFIG_COMPAT
128 .compat_ioctl = compat_sock_ioctl,
129#endif
1da177e4
LT
130 .mmap = sock_mmap,
131 .open = sock_no_open, /* special open code to disallow open via /proc */
132 .release = sock_close,
133 .fasync = sock_fasync,
5274f052
JA
134 .sendpage = sock_sendpage,
135 .splice_write = generic_splice_sendpage,
1da177e4
LT
136};
137
138/*
139 * The protocol list. Each protocol is registered in here.
140 */
141
1da177e4 142static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 143static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 144
1da177e4
LT
145/*
146 * Statistics counters of the socket lists
147 */
148
149static DEFINE_PER_CPU(int, sockets_in_use) = 0;
150
151/*
89bddce5
SH
152 * Support routines.
153 * Move socket addresses back and forth across the kernel/user
154 * divide and look after the messy bits.
1da177e4
LT
155 */
156
89bddce5 157#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
158 16 for IP, 16 for IPX,
159 24 for IPv6,
89bddce5 160 about 80 for AX.25
1da177e4
LT
161 must be at least one bigger than
162 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 163 :unix_mkname()).
1da177e4 164 */
89bddce5 165
1da177e4
LT
166/**
167 * move_addr_to_kernel - copy a socket address into kernel space
168 * @uaddr: Address in user space
169 * @kaddr: Address in kernel space
170 * @ulen: Length in user space
171 *
172 * The address is copied into kernel space. If the provided address is
173 * too long an error code of -EINVAL is returned. If the copy gives
174 * invalid addresses -EFAULT is returned. On a success 0 is returned.
175 */
176
177int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
178{
89bddce5 179 if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1da177e4 180 return -EINVAL;
89bddce5 181 if (ulen == 0)
1da177e4 182 return 0;
89bddce5 183 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 184 return -EFAULT;
3ec3b2fb 185 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
186}
187
188/**
189 * move_addr_to_user - copy an address to user space
190 * @kaddr: kernel space address
191 * @klen: length of address in kernel
192 * @uaddr: user space address
193 * @ulen: pointer to user length field
194 *
195 * The value pointed to by ulen on entry is the buffer length available.
196 * This is overwritten with the buffer space used. -EINVAL is returned
197 * if an overlong buffer is specified or a negative buffer size. -EFAULT
198 * is returned if either the buffer or the length field are not
199 * accessible.
200 * After copying the data up to the limit the user specifies, the true
201 * length of the data is written over the length limit the user
202 * specified. Zero is returned for a success.
203 */
89bddce5
SH
204
205int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
206 int __user *ulen)
1da177e4
LT
207{
208 int err;
209 int len;
210
89bddce5
SH
211 err = get_user(len, ulen);
212 if (err)
1da177e4 213 return err;
89bddce5
SH
214 if (len > klen)
215 len = klen;
216 if (len < 0 || len > MAX_SOCK_ADDR)
1da177e4 217 return -EINVAL;
89bddce5 218 if (len) {
d6fe3945
SG
219 if (audit_sockaddr(klen, kaddr))
220 return -ENOMEM;
89bddce5 221 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
222 return -EFAULT;
223 }
224 /*
89bddce5
SH
225 * "fromlen shall refer to the value before truncation.."
226 * 1003.1g
1da177e4
LT
227 */
228 return __put_user(klen, ulen);
229}
230
231#define SOCKFS_MAGIC 0x534F434B
232
e18b890b 233static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
234
235static struct inode *sock_alloc_inode(struct super_block *sb)
236{
237 struct socket_alloc *ei;
89bddce5 238
e94b1766 239 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
240 if (!ei)
241 return NULL;
242 init_waitqueue_head(&ei->socket.wait);
89bddce5 243
1da177e4
LT
244 ei->socket.fasync_list = NULL;
245 ei->socket.state = SS_UNCONNECTED;
246 ei->socket.flags = 0;
247 ei->socket.ops = NULL;
248 ei->socket.sk = NULL;
249 ei->socket.file = NULL;
1da177e4
LT
250
251 return &ei->vfs_inode;
252}
253
254static void sock_destroy_inode(struct inode *inode)
255{
256 kmem_cache_free(sock_inode_cachep,
257 container_of(inode, struct socket_alloc, vfs_inode));
258}
259
e18b890b 260static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
1da177e4 261{
89bddce5 262 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 263
50953fe9 264 if (flags & SLAB_CTOR_CONSTRUCTOR)
1da177e4
LT
265 inode_init_once(&ei->vfs_inode);
266}
89bddce5 267
1da177e4
LT
268static int init_inodecache(void)
269{
270 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
271 sizeof(struct socket_alloc),
272 0,
273 (SLAB_HWCACHE_ALIGN |
274 SLAB_RECLAIM_ACCOUNT |
275 SLAB_MEM_SPREAD),
276 init_once,
277 NULL);
1da177e4
LT
278 if (sock_inode_cachep == NULL)
279 return -ENOMEM;
280 return 0;
281}
282
283static struct super_operations sockfs_ops = {
284 .alloc_inode = sock_alloc_inode,
285 .destroy_inode =sock_destroy_inode,
286 .statfs = simple_statfs,
287};
288
454e2398 289static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
290 int flags, const char *dev_name, void *data,
291 struct vfsmount *mnt)
1da177e4 292{
454e2398
DH
293 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
294 mnt);
1da177e4
LT
295}
296
ba89966c 297static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
298
299static struct file_system_type sock_fs_type = {
300 .name = "sockfs",
301 .get_sb = sockfs_get_sb,
302 .kill_sb = kill_anon_super,
303};
89bddce5 304
1da177e4
LT
305static int sockfs_delete_dentry(struct dentry *dentry)
306{
304e61e6
ED
307 /*
308 * At creation time, we pretended this dentry was hashed
309 * (by clearing DCACHE_UNHASHED bit in d_flags)
310 * At delete time, we restore the truth : not hashed.
311 * (so that dput() can proceed correctly)
312 */
313 dentry->d_flags |= DCACHE_UNHASHED;
314 return 0;
1da177e4 315}
c23fbb6b
ED
316
317/*
318 * sockfs_dname() is called from d_path().
319 */
320static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
321{
322 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
323 dentry->d_inode->i_ino);
324}
325
1da177e4 326static struct dentry_operations sockfs_dentry_operations = {
89bddce5 327 .d_delete = sockfs_delete_dentry,
c23fbb6b 328 .d_dname = sockfs_dname,
1da177e4
LT
329};
330
331/*
332 * Obtains the first available file descriptor and sets it up for use.
333 *
39d8c1b6
DM
334 * These functions create file structures and maps them to fd space
335 * of the current process. On success it returns file descriptor
1da177e4
LT
336 * and file struct implicitly stored in sock->file.
337 * Note that another thread may close file descriptor before we return
338 * from this function. We use the fact that now we do not refer
339 * to socket after mapping. If one day we will need it, this
340 * function will increment ref. count on file by 1.
341 *
342 * In any case returned fd MAY BE not valid!
343 * This race condition is unavoidable
344 * with shared fd spaces, we cannot solve it inside kernel,
345 * but we take care of internal coherence yet.
346 */
347
39d8c1b6 348static int sock_alloc_fd(struct file **filep)
1da177e4
LT
349{
350 int fd;
1da177e4
LT
351
352 fd = get_unused_fd();
39d8c1b6 353 if (likely(fd >= 0)) {
1da177e4
LT
354 struct file *file = get_empty_filp();
355
39d8c1b6
DM
356 *filep = file;
357 if (unlikely(!file)) {
1da177e4 358 put_unused_fd(fd);
39d8c1b6 359 return -ENFILE;
1da177e4 360 }
39d8c1b6
DM
361 } else
362 *filep = NULL;
363 return fd;
364}
1da177e4 365
39d8c1b6
DM
366static int sock_attach_fd(struct socket *sock, struct file *file)
367{
c23fbb6b 368 struct qstr name = { .name = "" };
39d8c1b6 369
c23fbb6b 370 file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
3126a42c 371 if (unlikely(!file->f_path.dentry))
39d8c1b6
DM
372 return -ENOMEM;
373
3126a42c 374 file->f_path.dentry->d_op = &sockfs_dentry_operations;
304e61e6
ED
375 /*
376 * We dont want to push this dentry into global dentry hash table.
377 * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
378 * This permits a working /proc/$pid/fd/XXX on sockets
379 */
3126a42c
JS
380 file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED;
381 d_instantiate(file->f_path.dentry, SOCK_INODE(sock));
382 file->f_path.mnt = mntget(sock_mnt);
383 file->f_mapping = file->f_path.dentry->d_inode->i_mapping;
39d8c1b6
DM
384
385 sock->file = file;
386 file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
387 file->f_mode = FMODE_READ | FMODE_WRITE;
388 file->f_flags = O_RDWR;
389 file->f_pos = 0;
390 file->private_data = sock;
1da177e4 391
39d8c1b6
DM
392 return 0;
393}
394
395int sock_map_fd(struct socket *sock)
396{
397 struct file *newfile;
398 int fd = sock_alloc_fd(&newfile);
399
400 if (likely(fd >= 0)) {
401 int err = sock_attach_fd(sock, newfile);
402
403 if (unlikely(err < 0)) {
404 put_filp(newfile);
1da177e4 405 put_unused_fd(fd);
39d8c1b6 406 return err;
1da177e4 407 }
39d8c1b6 408 fd_install(fd, newfile);
1da177e4 409 }
1da177e4
LT
410 return fd;
411}
412
6cb153ca
BL
413static struct socket *sock_from_file(struct file *file, int *err)
414{
6cb153ca
BL
415 if (file->f_op == &socket_file_ops)
416 return file->private_data; /* set in sock_map_fd */
417
23bb80d2
ED
418 *err = -ENOTSOCK;
419 return NULL;
6cb153ca
BL
420}
421
1da177e4
LT
422/**
423 * sockfd_lookup - Go from a file number to its socket slot
424 * @fd: file handle
425 * @err: pointer to an error code return
426 *
427 * The file handle passed in is locked and the socket it is bound
428 * too is returned. If an error occurs the err pointer is overwritten
429 * with a negative errno code and NULL is returned. The function checks
430 * for both invalid handles and passing a handle which is not a socket.
431 *
432 * On a success the socket object pointer is returned.
433 */
434
435struct socket *sockfd_lookup(int fd, int *err)
436{
437 struct file *file;
1da177e4
LT
438 struct socket *sock;
439
89bddce5
SH
440 file = fget(fd);
441 if (!file) {
1da177e4
LT
442 *err = -EBADF;
443 return NULL;
444 }
89bddce5 445
6cb153ca
BL
446 sock = sock_from_file(file, err);
447 if (!sock)
1da177e4 448 fput(file);
6cb153ca
BL
449 return sock;
450}
1da177e4 451
6cb153ca
BL
452static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
453{
454 struct file *file;
455 struct socket *sock;
456
3672558c 457 *err = -EBADF;
6cb153ca
BL
458 file = fget_light(fd, fput_needed);
459 if (file) {
460 sock = sock_from_file(file, err);
461 if (sock)
462 return sock;
463 fput_light(file, *fput_needed);
1da177e4 464 }
6cb153ca 465 return NULL;
1da177e4
LT
466}
467
468/**
469 * sock_alloc - allocate a socket
89bddce5 470 *
1da177e4
LT
471 * Allocate a new inode and socket object. The two are bound together
472 * and initialised. The socket is then returned. If we are out of inodes
473 * NULL is returned.
474 */
475
476static struct socket *sock_alloc(void)
477{
89bddce5
SH
478 struct inode *inode;
479 struct socket *sock;
1da177e4
LT
480
481 inode = new_inode(sock_mnt->mnt_sb);
482 if (!inode)
483 return NULL;
484
485 sock = SOCKET_I(inode);
486
89bddce5 487 inode->i_mode = S_IFSOCK | S_IRWXUGO;
1da177e4
LT
488 inode->i_uid = current->fsuid;
489 inode->i_gid = current->fsgid;
490
491 get_cpu_var(sockets_in_use)++;
492 put_cpu_var(sockets_in_use);
493 return sock;
494}
495
496/*
497 * In theory you can't get an open on this inode, but /proc provides
498 * a back door. Remember to keep it shut otherwise you'll let the
499 * creepy crawlies in.
500 */
89bddce5 501
1da177e4
LT
502static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
503{
504 return -ENXIO;
505}
506
4b6f5d20 507const struct file_operations bad_sock_fops = {
1da177e4
LT
508 .owner = THIS_MODULE,
509 .open = sock_no_open,
510};
511
512/**
513 * sock_release - close a socket
514 * @sock: socket to close
515 *
516 * The socket is released from the protocol stack if it has a release
517 * callback, and the inode is then released if the socket is bound to
89bddce5 518 * an inode not a file.
1da177e4 519 */
89bddce5 520
1da177e4
LT
521void sock_release(struct socket *sock)
522{
523 if (sock->ops) {
524 struct module *owner = sock->ops->owner;
525
526 sock->ops->release(sock);
527 sock->ops = NULL;
528 module_put(owner);
529 }
530
531 if (sock->fasync_list)
532 printk(KERN_ERR "sock_release: fasync list not empty!\n");
533
534 get_cpu_var(sockets_in_use)--;
535 put_cpu_var(sockets_in_use);
536 if (!sock->file) {
537 iput(SOCK_INODE(sock));
538 return;
539 }
89bddce5 540 sock->file = NULL;
1da177e4
LT
541}
542
89bddce5 543static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
544 struct msghdr *msg, size_t size)
545{
546 struct sock_iocb *si = kiocb_to_siocb(iocb);
547 int err;
548
549 si->sock = sock;
550 si->scm = NULL;
551 si->msg = msg;
552 si->size = size;
553
554 err = security_socket_sendmsg(sock, msg, size);
555 if (err)
556 return err;
557
558 return sock->ops->sendmsg(iocb, sock, msg, size);
559}
560
561int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
562{
563 struct kiocb iocb;
564 struct sock_iocb siocb;
565 int ret;
566
567 init_sync_kiocb(&iocb, NULL);
568 iocb.private = &siocb;
569 ret = __sock_sendmsg(&iocb, sock, msg, size);
570 if (-EIOCBQUEUED == ret)
571 ret = wait_on_sync_kiocb(&iocb);
572 return ret;
573}
574
575int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
576 struct kvec *vec, size_t num, size_t size)
577{
578 mm_segment_t oldfs = get_fs();
579 int result;
580
581 set_fs(KERNEL_DS);
582 /*
583 * the following is safe, since for compiler definitions of kvec and
584 * iovec are identical, yielding the same in-core layout and alignment
585 */
89bddce5 586 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
587 msg->msg_iovlen = num;
588 result = sock_sendmsg(sock, msg, size);
589 set_fs(oldfs);
590 return result;
591}
592
92f37fd2
ED
593/*
594 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
595 */
596void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
597 struct sk_buff *skb)
598{
599 ktime_t kt = skb->tstamp;
600
601 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
602 struct timeval tv;
603 /* Race occurred between timestamp enabling and packet
604 receiving. Fill in the current time for now. */
605 if (kt.tv64 == 0)
606 kt = ktime_get_real();
607 skb->tstamp = kt;
608 tv = ktime_to_timeval(kt);
609 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
610 } else {
611 struct timespec ts;
612 /* Race occurred between timestamp enabling and packet
613 receiving. Fill in the current time for now. */
614 if (kt.tv64 == 0)
615 kt = ktime_get_real();
616 skb->tstamp = kt;
617 ts = ktime_to_timespec(kt);
618 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
619 }
620}
621
7c81fd8b
ACM
622EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
623
89bddce5 624static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
625 struct msghdr *msg, size_t size, int flags)
626{
627 int err;
628 struct sock_iocb *si = kiocb_to_siocb(iocb);
629
630 si->sock = sock;
631 si->scm = NULL;
632 si->msg = msg;
633 si->size = size;
634 si->flags = flags;
635
636 err = security_socket_recvmsg(sock, msg, size, flags);
637 if (err)
638 return err;
639
640 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
641}
642
89bddce5 643int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
644 size_t size, int flags)
645{
646 struct kiocb iocb;
647 struct sock_iocb siocb;
648 int ret;
649
89bddce5 650 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
651 iocb.private = &siocb;
652 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
653 if (-EIOCBQUEUED == ret)
654 ret = wait_on_sync_kiocb(&iocb);
655 return ret;
656}
657
89bddce5
SH
658int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
659 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
660{
661 mm_segment_t oldfs = get_fs();
662 int result;
663
664 set_fs(KERNEL_DS);
665 /*
666 * the following is safe, since for compiler definitions of kvec and
667 * iovec are identical, yielding the same in-core layout and alignment
668 */
89bddce5 669 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
670 result = sock_recvmsg(sock, msg, size, flags);
671 set_fs(oldfs);
672 return result;
673}
674
675static void sock_aio_dtor(struct kiocb *iocb)
676{
677 kfree(iocb->private);
678}
679
ce1d4d3e
CH
680static ssize_t sock_sendpage(struct file *file, struct page *page,
681 int offset, size_t size, loff_t *ppos, int more)
1da177e4 682{
1da177e4
LT
683 struct socket *sock;
684 int flags;
685
ce1d4d3e
CH
686 sock = file->private_data;
687
688 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
689 if (more)
690 flags |= MSG_MORE;
691
692 return sock->ops->sendpage(sock, page, offset, size, flags);
693}
1da177e4 694
ce1d4d3e 695static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 696 struct sock_iocb *siocb)
ce1d4d3e
CH
697{
698 if (!is_sync_kiocb(iocb)) {
699 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
700 if (!siocb)
701 return NULL;
1da177e4
LT
702 iocb->ki_dtor = sock_aio_dtor;
703 }
1da177e4 704
ce1d4d3e 705 siocb->kiocb = iocb;
ce1d4d3e
CH
706 iocb->private = siocb;
707 return siocb;
1da177e4
LT
708}
709
ce1d4d3e 710static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
711 struct file *file, const struct iovec *iov,
712 unsigned long nr_segs)
ce1d4d3e
CH
713{
714 struct socket *sock = file->private_data;
715 size_t size = 0;
716 int i;
1da177e4 717
89bddce5
SH
718 for (i = 0; i < nr_segs; i++)
719 size += iov[i].iov_len;
1da177e4 720
ce1d4d3e
CH
721 msg->msg_name = NULL;
722 msg->msg_namelen = 0;
723 msg->msg_control = NULL;
724 msg->msg_controllen = 0;
89bddce5 725 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
726 msg->msg_iovlen = nr_segs;
727 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
728
729 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
730}
731
027445c3
BP
732static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
733 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
734{
735 struct sock_iocb siocb, *x;
736
1da177e4
LT
737 if (pos != 0)
738 return -ESPIPE;
027445c3
BP
739
740 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
741 return 0;
742
027445c3
BP
743
744 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
745 if (!x)
746 return -ENOMEM;
027445c3 747 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
748}
749
ce1d4d3e 750static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
751 struct file *file, const struct iovec *iov,
752 unsigned long nr_segs)
1da177e4 753{
ce1d4d3e
CH
754 struct socket *sock = file->private_data;
755 size_t size = 0;
756 int i;
1da177e4 757
89bddce5
SH
758 for (i = 0; i < nr_segs; i++)
759 size += iov[i].iov_len;
1da177e4 760
ce1d4d3e
CH
761 msg->msg_name = NULL;
762 msg->msg_namelen = 0;
763 msg->msg_control = NULL;
764 msg->msg_controllen = 0;
89bddce5 765 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
766 msg->msg_iovlen = nr_segs;
767 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
768 if (sock->type == SOCK_SEQPACKET)
769 msg->msg_flags |= MSG_EOR;
1da177e4 770
ce1d4d3e 771 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
772}
773
027445c3
BP
774static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
775 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
776{
777 struct sock_iocb siocb, *x;
1da177e4 778
ce1d4d3e
CH
779 if (pos != 0)
780 return -ESPIPE;
027445c3
BP
781
782 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
ce1d4d3e 783 return 0;
1da177e4 784
027445c3 785 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
786 if (!x)
787 return -ENOMEM;
1da177e4 788
027445c3 789 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
790}
791
1da177e4
LT
792/*
793 * Atomic setting of ioctl hooks to avoid race
794 * with module unload.
795 */
796
4a3e2f71 797static DEFINE_MUTEX(br_ioctl_mutex);
89bddce5 798static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
1da177e4 799
89bddce5 800void brioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 801{
4a3e2f71 802 mutex_lock(&br_ioctl_mutex);
1da177e4 803 br_ioctl_hook = hook;
4a3e2f71 804 mutex_unlock(&br_ioctl_mutex);
1da177e4 805}
89bddce5 806
1da177e4
LT
807EXPORT_SYMBOL(brioctl_set);
808
4a3e2f71 809static DEFINE_MUTEX(vlan_ioctl_mutex);
89bddce5 810static int (*vlan_ioctl_hook) (void __user *arg);
1da177e4 811
89bddce5 812void vlan_ioctl_set(int (*hook) (void __user *))
1da177e4 813{
4a3e2f71 814 mutex_lock(&vlan_ioctl_mutex);
1da177e4 815 vlan_ioctl_hook = hook;
4a3e2f71 816 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 817}
89bddce5 818
1da177e4
LT
819EXPORT_SYMBOL(vlan_ioctl_set);
820
4a3e2f71 821static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 822static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 823
89bddce5 824void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 825{
4a3e2f71 826 mutex_lock(&dlci_ioctl_mutex);
1da177e4 827 dlci_ioctl_hook = hook;
4a3e2f71 828 mutex_unlock(&dlci_ioctl_mutex);
1da177e4 829}
89bddce5 830
1da177e4
LT
831EXPORT_SYMBOL(dlci_ioctl_set);
832
833/*
834 * With an ioctl, arg may well be a user mode pointer, but we don't know
835 * what to do with it - that's up to the protocol still.
836 */
837
838static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
839{
840 struct socket *sock;
841 void __user *argp = (void __user *)arg;
842 int pid, err;
843
b69aee04 844 sock = file->private_data;
1da177e4
LT
845 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
846 err = dev_ioctl(cmd, argp);
847 } else
d86b5e0e 848#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
849 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
850 err = dev_ioctl(cmd, argp);
851 } else
89bddce5
SH
852#endif /* CONFIG_WIRELESS_EXT */
853 switch (cmd) {
1da177e4
LT
854 case FIOSETOWN:
855 case SIOCSPGRP:
856 err = -EFAULT;
857 if (get_user(pid, (int __user *)argp))
858 break;
859 err = f_setown(sock->file, pid, 1);
860 break;
861 case FIOGETOWN:
862 case SIOCGPGRP:
609d7fa9 863 err = put_user(f_getown(sock->file),
89bddce5 864 (int __user *)argp);
1da177e4
LT
865 break;
866 case SIOCGIFBR:
867 case SIOCSIFBR:
868 case SIOCBRADDBR:
869 case SIOCBRDELBR:
870 err = -ENOPKG;
871 if (!br_ioctl_hook)
872 request_module("bridge");
873
4a3e2f71 874 mutex_lock(&br_ioctl_mutex);
89bddce5 875 if (br_ioctl_hook)
1da177e4 876 err = br_ioctl_hook(cmd, argp);
4a3e2f71 877 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
878 break;
879 case SIOCGIFVLAN:
880 case SIOCSIFVLAN:
881 err = -ENOPKG;
882 if (!vlan_ioctl_hook)
883 request_module("8021q");
884
4a3e2f71 885 mutex_lock(&vlan_ioctl_mutex);
1da177e4
LT
886 if (vlan_ioctl_hook)
887 err = vlan_ioctl_hook(argp);
4a3e2f71 888 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 889 break;
1da177e4
LT
890 case SIOCADDDLCI:
891 case SIOCDELDLCI:
892 err = -ENOPKG;
893 if (!dlci_ioctl_hook)
894 request_module("dlci");
895
896 if (dlci_ioctl_hook) {
4a3e2f71 897 mutex_lock(&dlci_ioctl_mutex);
1da177e4 898 err = dlci_ioctl_hook(cmd, argp);
4a3e2f71 899 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
900 }
901 break;
902 default:
903 err = sock->ops->ioctl(sock, cmd, arg);
b5e5fa5e
CH
904
905 /*
906 * If this ioctl is unknown try to hand it down
907 * to the NIC driver.
908 */
909 if (err == -ENOIOCTLCMD)
910 err = dev_ioctl(cmd, argp);
1da177e4 911 break;
89bddce5 912 }
1da177e4
LT
913 return err;
914}
915
916int sock_create_lite(int family, int type, int protocol, struct socket **res)
917{
918 int err;
919 struct socket *sock = NULL;
89bddce5 920
1da177e4
LT
921 err = security_socket_create(family, type, protocol, 1);
922 if (err)
923 goto out;
924
925 sock = sock_alloc();
926 if (!sock) {
927 err = -ENOMEM;
928 goto out;
929 }
930
1da177e4 931 sock->type = type;
7420ed23
VY
932 err = security_socket_post_create(sock, family, type, protocol, 1);
933 if (err)
934 goto out_release;
935
1da177e4
LT
936out:
937 *res = sock;
938 return err;
7420ed23
VY
939out_release:
940 sock_release(sock);
941 sock = NULL;
942 goto out;
1da177e4
LT
943}
944
945/* No kernel lock held - perfect */
89bddce5 946static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
947{
948 struct socket *sock;
949
950 /*
89bddce5 951 * We can't return errors to poll, so it's either yes or no.
1da177e4 952 */
b69aee04 953 sock = file->private_data;
1da177e4
LT
954 return sock->ops->poll(file, sock, wait);
955}
956
89bddce5 957static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 958{
b69aee04 959 struct socket *sock = file->private_data;
1da177e4
LT
960
961 return sock->ops->mmap(file, sock, vma);
962}
963
20380731 964static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
965{
966 /*
89bddce5
SH
967 * It was possible the inode is NULL we were
968 * closing an unfinished socket.
1da177e4
LT
969 */
970
89bddce5 971 if (!inode) {
1da177e4
LT
972 printk(KERN_DEBUG "sock_close: NULL inode\n");
973 return 0;
974 }
975 sock_fasync(-1, filp, 0);
976 sock_release(SOCKET_I(inode));
977 return 0;
978}
979
980/*
981 * Update the socket async list
982 *
983 * Fasync_list locking strategy.
984 *
985 * 1. fasync_list is modified only under process context socket lock
986 * i.e. under semaphore.
987 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
988 * or under socket lock.
989 * 3. fasync_list can be used from softirq context, so that
990 * modification under socket lock have to be enhanced with
991 * write_lock_bh(&sk->sk_callback_lock).
992 * --ANK (990710)
993 */
994
995static int sock_fasync(int fd, struct file *filp, int on)
996{
89bddce5 997 struct fasync_struct *fa, *fna = NULL, **prev;
1da177e4
LT
998 struct socket *sock;
999 struct sock *sk;
1000
89bddce5 1001 if (on) {
8b3a7005 1002 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
89bddce5 1003 if (fna == NULL)
1da177e4
LT
1004 return -ENOMEM;
1005 }
1006
b69aee04 1007 sock = filp->private_data;
1da177e4 1008
89bddce5
SH
1009 sk = sock->sk;
1010 if (sk == NULL) {
1da177e4
LT
1011 kfree(fna);
1012 return -EINVAL;
1013 }
1014
1015 lock_sock(sk);
1016
89bddce5 1017 prev = &(sock->fasync_list);
1da177e4 1018
89bddce5
SH
1019 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1020 if (fa->fa_file == filp)
1da177e4
LT
1021 break;
1022
89bddce5
SH
1023 if (on) {
1024 if (fa != NULL) {
1da177e4 1025 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1026 fa->fa_fd = fd;
1da177e4
LT
1027 write_unlock_bh(&sk->sk_callback_lock);
1028
1029 kfree(fna);
1030 goto out;
1031 }
89bddce5
SH
1032 fna->fa_file = filp;
1033 fna->fa_fd = fd;
1034 fna->magic = FASYNC_MAGIC;
1035 fna->fa_next = sock->fasync_list;
1da177e4 1036 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1037 sock->fasync_list = fna;
1da177e4 1038 write_unlock_bh(&sk->sk_callback_lock);
89bddce5
SH
1039 } else {
1040 if (fa != NULL) {
1da177e4 1041 write_lock_bh(&sk->sk_callback_lock);
89bddce5 1042 *prev = fa->fa_next;
1da177e4
LT
1043 write_unlock_bh(&sk->sk_callback_lock);
1044 kfree(fa);
1045 }
1046 }
1047
1048out:
1049 release_sock(sock->sk);
1050 return 0;
1051}
1052
1053/* This function may be called only under socket lock or callback_lock */
1054
1055int sock_wake_async(struct socket *sock, int how, int band)
1056{
1057 if (!sock || !sock->fasync_list)
1058 return -1;
89bddce5 1059 switch (how) {
1da177e4 1060 case 1:
89bddce5 1061
1da177e4
LT
1062 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1063 break;
1064 goto call_kill;
1065 case 2:
1066 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1067 break;
1068 /* fall through */
1069 case 0:
89bddce5 1070call_kill:
1da177e4
LT
1071 __kill_fasync(sock->fasync_list, SIGIO, band);
1072 break;
1073 case 3:
1074 __kill_fasync(sock->fasync_list, SIGURG, band);
1075 }
1076 return 0;
1077}
1078
89bddce5
SH
1079static int __sock_create(int family, int type, int protocol,
1080 struct socket **res, int kern)
1da177e4
LT
1081{
1082 int err;
1083 struct socket *sock;
55737fda 1084 const struct net_proto_family *pf;
1da177e4
LT
1085
1086 /*
89bddce5 1087 * Check protocol is in range
1da177e4
LT
1088 */
1089 if (family < 0 || family >= NPROTO)
1090 return -EAFNOSUPPORT;
1091 if (type < 0 || type >= SOCK_MAX)
1092 return -EINVAL;
1093
1094 /* Compatibility.
1095
1096 This uglymoron is moved from INET layer to here to avoid
1097 deadlock in module load.
1098 */
1099 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1100 static int warned;
1da177e4
LT
1101 if (!warned) {
1102 warned = 1;
89bddce5
SH
1103 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1104 current->comm);
1da177e4
LT
1105 }
1106 family = PF_PACKET;
1107 }
1108
1109 err = security_socket_create(family, type, protocol, kern);
1110 if (err)
1111 return err;
89bddce5 1112
55737fda
SH
1113 /*
1114 * Allocate the socket and allow the family to set things up. if
1115 * the protocol is 0, the family is instructed to select an appropriate
1116 * default.
1117 */
1118 sock = sock_alloc();
1119 if (!sock) {
1120 if (net_ratelimit())
1121 printk(KERN_WARNING "socket: no more sockets\n");
1122 return -ENFILE; /* Not exactly a match, but its the
1123 closest posix thing */
1124 }
1125
1126 sock->type = type;
1127
1da177e4 1128#if defined(CONFIG_KMOD)
89bddce5
SH
1129 /* Attempt to load a protocol module if the find failed.
1130 *
1131 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1132 * requested real, full-featured networking support upon configuration.
1133 * Otherwise module support will break!
1134 */
55737fda 1135 if (net_families[family] == NULL)
89bddce5 1136 request_module("net-pf-%d", family);
1da177e4
LT
1137#endif
1138
55737fda
SH
1139 rcu_read_lock();
1140 pf = rcu_dereference(net_families[family]);
1141 err = -EAFNOSUPPORT;
1142 if (!pf)
1143 goto out_release;
1da177e4
LT
1144
1145 /*
1146 * We will call the ->create function, that possibly is in a loadable
1147 * module, so we have to bump that loadable module refcnt first.
1148 */
55737fda 1149 if (!try_module_get(pf->owner))
1da177e4
LT
1150 goto out_release;
1151
55737fda
SH
1152 /* Now protected by module ref count */
1153 rcu_read_unlock();
1154
1155 err = pf->create(sock, protocol);
1156 if (err < 0)
1da177e4 1157 goto out_module_put;
a79af59e 1158
1da177e4
LT
1159 /*
1160 * Now to bump the refcnt of the [loadable] module that owns this
1161 * socket at sock_release time we decrement its refcnt.
1162 */
55737fda
SH
1163 if (!try_module_get(sock->ops->owner))
1164 goto out_module_busy;
1165
1da177e4
LT
1166 /*
1167 * Now that we're done with the ->create function, the [loadable]
1168 * module can have its refcnt decremented
1169 */
55737fda 1170 module_put(pf->owner);
7420ed23
VY
1171 err = security_socket_post_create(sock, family, type, protocol, kern);
1172 if (err)
1173 goto out_release;
55737fda 1174 *res = sock;
1da177e4 1175
55737fda
SH
1176 return 0;
1177
1178out_module_busy:
1179 err = -EAFNOSUPPORT;
1da177e4 1180out_module_put:
55737fda
SH
1181 sock->ops = NULL;
1182 module_put(pf->owner);
1183out_sock_release:
1da177e4 1184 sock_release(sock);
55737fda
SH
1185 return err;
1186
1187out_release:
1188 rcu_read_unlock();
1189 goto out_sock_release;
1da177e4
LT
1190}
1191
1192int sock_create(int family, int type, int protocol, struct socket **res)
1193{
1194 return __sock_create(family, type, protocol, res, 0);
1195}
1196
1197int sock_create_kern(int family, int type, int protocol, struct socket **res)
1198{
1199 return __sock_create(family, type, protocol, res, 1);
1200}
1201
1202asmlinkage long sys_socket(int family, int type, int protocol)
1203{
1204 int retval;
1205 struct socket *sock;
1206
1207 retval = sock_create(family, type, protocol, &sock);
1208 if (retval < 0)
1209 goto out;
1210
1211 retval = sock_map_fd(sock);
1212 if (retval < 0)
1213 goto out_release;
1214
1215out:
1216 /* It may be already another descriptor 8) Not kernel problem. */
1217 return retval;
1218
1219out_release:
1220 sock_release(sock);
1221 return retval;
1222}
1223
1224/*
1225 * Create a pair of connected sockets.
1226 */
1227
89bddce5
SH
1228asmlinkage long sys_socketpair(int family, int type, int protocol,
1229 int __user *usockvec)
1da177e4
LT
1230{
1231 struct socket *sock1, *sock2;
1232 int fd1, fd2, err;
db349509 1233 struct file *newfile1, *newfile2;
1da177e4
LT
1234
1235 /*
1236 * Obtain the first socket and check if the underlying protocol
1237 * supports the socketpair call.
1238 */
1239
1240 err = sock_create(family, type, protocol, &sock1);
1241 if (err < 0)
1242 goto out;
1243
1244 err = sock_create(family, type, protocol, &sock2);
1245 if (err < 0)
1246 goto out_release_1;
1247
1248 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1249 if (err < 0)
1da177e4
LT
1250 goto out_release_both;
1251
db349509
AV
1252 fd1 = sock_alloc_fd(&newfile1);
1253 if (unlikely(fd1 < 0))
1254 goto out_release_both;
1da177e4 1255
db349509
AV
1256 fd2 = sock_alloc_fd(&newfile2);
1257 if (unlikely(fd2 < 0)) {
1258 put_filp(newfile1);
1259 put_unused_fd(fd1);
1da177e4 1260 goto out_release_both;
db349509 1261 }
1da177e4 1262
db349509
AV
1263 err = sock_attach_fd(sock1, newfile1);
1264 if (unlikely(err < 0)) {
1265 goto out_fd2;
1266 }
1267
1268 err = sock_attach_fd(sock2, newfile2);
1269 if (unlikely(err < 0)) {
1270 fput(newfile1);
1271 goto out_fd1;
1272 }
1273
1274 err = audit_fd_pair(fd1, fd2);
1275 if (err < 0) {
1276 fput(newfile1);
1277 fput(newfile2);
1278 goto out_fd;
1279 }
1da177e4 1280
db349509
AV
1281 fd_install(fd1, newfile1);
1282 fd_install(fd2, newfile2);
1da177e4
LT
1283 /* fd1 and fd2 may be already another descriptors.
1284 * Not kernel problem.
1285 */
1286
89bddce5 1287 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1288 if (!err)
1289 err = put_user(fd2, &usockvec[1]);
1290 if (!err)
1291 return 0;
1292
1293 sys_close(fd2);
1294 sys_close(fd1);
1295 return err;
1296
1da177e4 1297out_release_both:
89bddce5 1298 sock_release(sock2);
1da177e4 1299out_release_1:
89bddce5 1300 sock_release(sock1);
1da177e4
LT
1301out:
1302 return err;
db349509
AV
1303
1304out_fd2:
1305 put_filp(newfile1);
1306 sock_release(sock1);
1307out_fd1:
1308 put_filp(newfile2);
1309 sock_release(sock2);
1310out_fd:
1311 put_unused_fd(fd1);
1312 put_unused_fd(fd2);
1313 goto out;
1da177e4
LT
1314}
1315
1da177e4
LT
1316/*
1317 * Bind a name to a socket. Nothing much to do here since it's
1318 * the protocol's responsibility to handle the local address.
1319 *
1320 * We move the socket address to kernel space before we call
1321 * the protocol layer (having also checked the address is ok).
1322 */
1323
1324asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1325{
1326 struct socket *sock;
1327 char address[MAX_SOCK_ADDR];
6cb153ca 1328 int err, fput_needed;
1da177e4 1329
89bddce5 1330 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1331 if (sock) {
89bddce5
SH
1332 err = move_addr_to_kernel(umyaddr, addrlen, address);
1333 if (err >= 0) {
1334 err = security_socket_bind(sock,
1335 (struct sockaddr *)address,
1336 addrlen);
6cb153ca
BL
1337 if (!err)
1338 err = sock->ops->bind(sock,
89bddce5
SH
1339 (struct sockaddr *)
1340 address, addrlen);
1da177e4 1341 }
6cb153ca 1342 fput_light(sock->file, fput_needed);
89bddce5 1343 }
1da177e4
LT
1344 return err;
1345}
1346
1da177e4
LT
1347/*
1348 * Perform a listen. Basically, we allow the protocol to do anything
1349 * necessary for a listen, and if that works, we mark the socket as
1350 * ready for listening.
1351 */
1352
7a42c217 1353int sysctl_somaxconn __read_mostly = SOMAXCONN;
1da177e4
LT
1354
1355asmlinkage long sys_listen(int fd, int backlog)
1356{
1357 struct socket *sock;
6cb153ca 1358 int err, fput_needed;
89bddce5
SH
1359
1360 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1361 if (sock) {
1362 if ((unsigned)backlog > sysctl_somaxconn)
1da177e4
LT
1363 backlog = sysctl_somaxconn;
1364
1365 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1366 if (!err)
1367 err = sock->ops->listen(sock, backlog);
1da177e4 1368
6cb153ca 1369 fput_light(sock->file, fput_needed);
1da177e4
LT
1370 }
1371 return err;
1372}
1373
1da177e4
LT
1374/*
1375 * For accept, we attempt to create a new socket, set up the link
1376 * with the client, wake up the client, then return the new
1377 * connected fd. We collect the address of the connector in kernel
1378 * space and move it to user at the very end. This is unclean because
1379 * we open the socket then return an error.
1380 *
1381 * 1003.1g adds the ability to recvmsg() to query connection pending
1382 * status to recvmsg. We need to add that support in a way thats
1383 * clean when we restucture accept also.
1384 */
1385
89bddce5
SH
1386asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
1387 int __user *upeer_addrlen)
1da177e4
LT
1388{
1389 struct socket *sock, *newsock;
39d8c1b6 1390 struct file *newfile;
6cb153ca 1391 int err, len, newfd, fput_needed;
1da177e4
LT
1392 char address[MAX_SOCK_ADDR];
1393
6cb153ca 1394 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1395 if (!sock)
1396 goto out;
1397
1398 err = -ENFILE;
89bddce5 1399 if (!(newsock = sock_alloc()))
1da177e4
LT
1400 goto out_put;
1401
1402 newsock->type = sock->type;
1403 newsock->ops = sock->ops;
1404
1da177e4
LT
1405 /*
1406 * We don't need try_module_get here, as the listening socket (sock)
1407 * has the protocol module (sock->ops->owner) held.
1408 */
1409 __module_get(newsock->ops->owner);
1410
39d8c1b6
DM
1411 newfd = sock_alloc_fd(&newfile);
1412 if (unlikely(newfd < 0)) {
1413 err = newfd;
9a1875e6
DM
1414 sock_release(newsock);
1415 goto out_put;
39d8c1b6
DM
1416 }
1417
1418 err = sock_attach_fd(newsock, newfile);
1419 if (err < 0)
79f4f642 1420 goto out_fd_simple;
39d8c1b6 1421
a79af59e
FF
1422 err = security_socket_accept(sock, newsock);
1423 if (err)
39d8c1b6 1424 goto out_fd;
a79af59e 1425
1da177e4
LT
1426 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1427 if (err < 0)
39d8c1b6 1428 goto out_fd;
1da177e4
LT
1429
1430 if (upeer_sockaddr) {
89bddce5
SH
1431 if (newsock->ops->getname(newsock, (struct sockaddr *)address,
1432 &len, 2) < 0) {
1da177e4 1433 err = -ECONNABORTED;
39d8c1b6 1434 goto out_fd;
1da177e4 1435 }
89bddce5
SH
1436 err = move_addr_to_user(address, len, upeer_sockaddr,
1437 upeer_addrlen);
1da177e4 1438 if (err < 0)
39d8c1b6 1439 goto out_fd;
1da177e4
LT
1440 }
1441
1442 /* File flags are not inherited via accept() unlike another OSes. */
1443
39d8c1b6
DM
1444 fd_install(newfd, newfile);
1445 err = newfd;
1da177e4
LT
1446
1447 security_socket_post_accept(sock, newsock);
1448
1449out_put:
6cb153ca 1450 fput_light(sock->file, fput_needed);
1da177e4
LT
1451out:
1452 return err;
79f4f642
AD
1453out_fd_simple:
1454 sock_release(newsock);
1455 put_filp(newfile);
1456 put_unused_fd(newfd);
1457 goto out_put;
39d8c1b6 1458out_fd:
9606a216 1459 fput(newfile);
39d8c1b6 1460 put_unused_fd(newfd);
1da177e4
LT
1461 goto out_put;
1462}
1463
1da177e4
LT
1464/*
1465 * Attempt to connect to a socket with the server address. The address
1466 * is in user space so we verify it is OK and move it to kernel space.
1467 *
1468 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1469 * break bindings
1470 *
1471 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1472 * other SEQPACKET protocols that take time to connect() as it doesn't
1473 * include the -EINPROGRESS status for such sockets.
1474 */
1475
89bddce5
SH
1476asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
1477 int addrlen)
1da177e4
LT
1478{
1479 struct socket *sock;
1480 char address[MAX_SOCK_ADDR];
6cb153ca 1481 int err, fput_needed;
1da177e4 1482
6cb153ca 1483 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1484 if (!sock)
1485 goto out;
1486 err = move_addr_to_kernel(uservaddr, addrlen, address);
1487 if (err < 0)
1488 goto out_put;
1489
89bddce5
SH
1490 err =
1491 security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1da177e4
LT
1492 if (err)
1493 goto out_put;
1494
89bddce5 1495 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1da177e4
LT
1496 sock->file->f_flags);
1497out_put:
6cb153ca 1498 fput_light(sock->file, fput_needed);
1da177e4
LT
1499out:
1500 return err;
1501}
1502
1503/*
1504 * Get the local address ('name') of a socket object. Move the obtained
1505 * name to user space.
1506 */
1507
89bddce5
SH
1508asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
1509 int __user *usockaddr_len)
1da177e4
LT
1510{
1511 struct socket *sock;
1512 char address[MAX_SOCK_ADDR];
6cb153ca 1513 int len, err, fput_needed;
89bddce5 1514
6cb153ca 1515 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1516 if (!sock)
1517 goto out;
1518
1519 err = security_socket_getsockname(sock);
1520 if (err)
1521 goto out_put;
1522
1523 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1524 if (err)
1525 goto out_put;
1526 err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1527
1528out_put:
6cb153ca 1529 fput_light(sock->file, fput_needed);
1da177e4
LT
1530out:
1531 return err;
1532}
1533
1534/*
1535 * Get the remote address ('name') of a socket object. Move the obtained
1536 * name to user space.
1537 */
1538
89bddce5
SH
1539asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1540 int __user *usockaddr_len)
1da177e4
LT
1541{
1542 struct socket *sock;
1543 char address[MAX_SOCK_ADDR];
6cb153ca 1544 int len, err, fput_needed;
1da177e4 1545
89bddce5
SH
1546 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1547 if (sock != NULL) {
1da177e4
LT
1548 err = security_socket_getpeername(sock);
1549 if (err) {
6cb153ca 1550 fput_light(sock->file, fput_needed);
1da177e4
LT
1551 return err;
1552 }
1553
89bddce5
SH
1554 err =
1555 sock->ops->getname(sock, (struct sockaddr *)address, &len,
1556 1);
1da177e4 1557 if (!err)
89bddce5
SH
1558 err = move_addr_to_user(address, len, usockaddr,
1559 usockaddr_len);
6cb153ca 1560 fput_light(sock->file, fput_needed);
1da177e4
LT
1561 }
1562 return err;
1563}
1564
1565/*
1566 * Send a datagram to a given address. We move the address into kernel
1567 * space and check the user space data area is readable before invoking
1568 * the protocol.
1569 */
1570
89bddce5
SH
1571asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
1572 unsigned flags, struct sockaddr __user *addr,
1573 int addr_len)
1da177e4
LT
1574{
1575 struct socket *sock;
1576 char address[MAX_SOCK_ADDR];
1577 int err;
1578 struct msghdr msg;
1579 struct iovec iov;
6cb153ca
BL
1580 int fput_needed;
1581 struct file *sock_file;
1582
1583 sock_file = fget_light(fd, &fput_needed);
4387ff75 1584 err = -EBADF;
6cb153ca 1585 if (!sock_file)
4387ff75 1586 goto out;
6cb153ca
BL
1587
1588 sock = sock_from_file(sock_file, &err);
1da177e4 1589 if (!sock)
6cb153ca 1590 goto out_put;
89bddce5
SH
1591 iov.iov_base = buff;
1592 iov.iov_len = len;
1593 msg.msg_name = NULL;
1594 msg.msg_iov = &iov;
1595 msg.msg_iovlen = 1;
1596 msg.msg_control = NULL;
1597 msg.msg_controllen = 0;
1598 msg.msg_namelen = 0;
6cb153ca 1599 if (addr) {
1da177e4
LT
1600 err = move_addr_to_kernel(addr, addr_len, address);
1601 if (err < 0)
1602 goto out_put;
89bddce5
SH
1603 msg.msg_name = address;
1604 msg.msg_namelen = addr_len;
1da177e4
LT
1605 }
1606 if (sock->file->f_flags & O_NONBLOCK)
1607 flags |= MSG_DONTWAIT;
1608 msg.msg_flags = flags;
1609 err = sock_sendmsg(sock, &msg, len);
1610
89bddce5 1611out_put:
6cb153ca 1612 fput_light(sock_file, fput_needed);
4387ff75 1613out:
1da177e4
LT
1614 return err;
1615}
1616
1617/*
89bddce5 1618 * Send a datagram down a socket.
1da177e4
LT
1619 */
1620
89bddce5 1621asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
1da177e4
LT
1622{
1623 return sys_sendto(fd, buff, len, flags, NULL, 0);
1624}
1625
1626/*
89bddce5 1627 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1628 * sender. We verify the buffers are writable and if needed move the
1629 * sender address from kernel to user space.
1630 */
1631
89bddce5
SH
1632asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
1633 unsigned flags, struct sockaddr __user *addr,
1634 int __user *addr_len)
1da177e4
LT
1635{
1636 struct socket *sock;
1637 struct iovec iov;
1638 struct msghdr msg;
1639 char address[MAX_SOCK_ADDR];
89bddce5 1640 int err, err2;
6cb153ca
BL
1641 struct file *sock_file;
1642 int fput_needed;
1643
1644 sock_file = fget_light(fd, &fput_needed);
4387ff75 1645 err = -EBADF;
6cb153ca 1646 if (!sock_file)
4387ff75 1647 goto out;
1da177e4 1648
6cb153ca 1649 sock = sock_from_file(sock_file, &err);
1da177e4 1650 if (!sock)
4387ff75 1651 goto out_put;
1da177e4 1652
89bddce5
SH
1653 msg.msg_control = NULL;
1654 msg.msg_controllen = 0;
1655 msg.msg_iovlen = 1;
1656 msg.msg_iov = &iov;
1657 iov.iov_len = size;
1658 iov.iov_base = ubuf;
1659 msg.msg_name = address;
1660 msg.msg_namelen = MAX_SOCK_ADDR;
1da177e4
LT
1661 if (sock->file->f_flags & O_NONBLOCK)
1662 flags |= MSG_DONTWAIT;
89bddce5 1663 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1664
89bddce5
SH
1665 if (err >= 0 && addr != NULL) {
1666 err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1667 if (err2 < 0)
1668 err = err2;
1da177e4 1669 }
4387ff75 1670out_put:
6cb153ca 1671 fput_light(sock_file, fput_needed);
4387ff75 1672out:
1da177e4
LT
1673 return err;
1674}
1675
1676/*
89bddce5 1677 * Receive a datagram from a socket.
1da177e4
LT
1678 */
1679
89bddce5
SH
1680asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1681 unsigned flags)
1da177e4
LT
1682{
1683 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1684}
1685
1686/*
1687 * Set a socket option. Because we don't know the option lengths we have
1688 * to pass the user mode parameter for the protocols to sort out.
1689 */
1690
89bddce5
SH
1691asmlinkage long sys_setsockopt(int fd, int level, int optname,
1692 char __user *optval, int optlen)
1da177e4 1693{
6cb153ca 1694 int err, fput_needed;
1da177e4
LT
1695 struct socket *sock;
1696
1697 if (optlen < 0)
1698 return -EINVAL;
89bddce5
SH
1699
1700 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1701 if (sock != NULL) {
1702 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1703 if (err)
1704 goto out_put;
1da177e4
LT
1705
1706 if (level == SOL_SOCKET)
89bddce5
SH
1707 err =
1708 sock_setsockopt(sock, level, optname, optval,
1709 optlen);
1da177e4 1710 else
89bddce5
SH
1711 err =
1712 sock->ops->setsockopt(sock, level, optname, optval,
1713 optlen);
6cb153ca
BL
1714out_put:
1715 fput_light(sock->file, fput_needed);
1da177e4
LT
1716 }
1717 return err;
1718}
1719
1720/*
1721 * Get a socket option. Because we don't know the option lengths we have
1722 * to pass a user mode parameter for the protocols to sort out.
1723 */
1724
89bddce5
SH
1725asmlinkage long sys_getsockopt(int fd, int level, int optname,
1726 char __user *optval, int __user *optlen)
1da177e4 1727{
6cb153ca 1728 int err, fput_needed;
1da177e4
LT
1729 struct socket *sock;
1730
89bddce5
SH
1731 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1732 if (sock != NULL) {
6cb153ca
BL
1733 err = security_socket_getsockopt(sock, level, optname);
1734 if (err)
1735 goto out_put;
1da177e4
LT
1736
1737 if (level == SOL_SOCKET)
89bddce5
SH
1738 err =
1739 sock_getsockopt(sock, level, optname, optval,
1740 optlen);
1da177e4 1741 else
89bddce5
SH
1742 err =
1743 sock->ops->getsockopt(sock, level, optname, optval,
1744 optlen);
6cb153ca
BL
1745out_put:
1746 fput_light(sock->file, fput_needed);
1da177e4
LT
1747 }
1748 return err;
1749}
1750
1da177e4
LT
1751/*
1752 * Shutdown a socket.
1753 */
1754
1755asmlinkage long sys_shutdown(int fd, int how)
1756{
6cb153ca 1757 int err, fput_needed;
1da177e4
LT
1758 struct socket *sock;
1759
89bddce5
SH
1760 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1761 if (sock != NULL) {
1da177e4 1762 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1763 if (!err)
1764 err = sock->ops->shutdown(sock, how);
1765 fput_light(sock->file, fput_needed);
1da177e4
LT
1766 }
1767 return err;
1768}
1769
89bddce5 1770/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1771 * fields which are the same type (int / unsigned) on our platforms.
1772 */
1773#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1774#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1775#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1776
1da177e4
LT
1777/*
1778 * BSD sendmsg interface
1779 */
1780
1781asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1782{
89bddce5
SH
1783 struct compat_msghdr __user *msg_compat =
1784 (struct compat_msghdr __user *)msg;
1da177e4
LT
1785 struct socket *sock;
1786 char address[MAX_SOCK_ADDR];
1787 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1788 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1789 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1790 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1791 unsigned char *ctl_buf = ctl;
1792 struct msghdr msg_sys;
1793 int err, ctl_len, iov_size, total_len;
6cb153ca 1794 int fput_needed;
89bddce5 1795
1da177e4
LT
1796 err = -EFAULT;
1797 if (MSG_CMSG_COMPAT & flags) {
1798 if (get_compat_msghdr(&msg_sys, msg_compat))
1799 return -EFAULT;
89bddce5
SH
1800 }
1801 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1802 return -EFAULT;
1803
6cb153ca 1804 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1805 if (!sock)
1da177e4
LT
1806 goto out;
1807
1808 /* do not move before msg_sys is valid */
1809 err = -EMSGSIZE;
1810 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1811 goto out_put;
1812
89bddce5 1813 /* Check whether to allocate the iovec area */
1da177e4
LT
1814 err = -ENOMEM;
1815 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1816 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1817 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1818 if (!iov)
1819 goto out_put;
1820 }
1821
1822 /* This will also move the address data into kernel space */
1823 if (MSG_CMSG_COMPAT & flags) {
1824 err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1825 } else
1826 err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
89bddce5 1827 if (err < 0)
1da177e4
LT
1828 goto out_freeiov;
1829 total_len = err;
1830
1831 err = -ENOBUFS;
1832
1833 if (msg_sys.msg_controllen > INT_MAX)
1834 goto out_freeiov;
89bddce5 1835 ctl_len = msg_sys.msg_controllen;
1da177e4 1836 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1837 err =
1838 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1839 sizeof(ctl));
1da177e4
LT
1840 if (err)
1841 goto out_freeiov;
1842 ctl_buf = msg_sys.msg_control;
8920e8f9 1843 ctl_len = msg_sys.msg_controllen;
1da177e4 1844 } else if (ctl_len) {
89bddce5 1845 if (ctl_len > sizeof(ctl)) {
1da177e4 1846 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1847 if (ctl_buf == NULL)
1da177e4
LT
1848 goto out_freeiov;
1849 }
1850 err = -EFAULT;
1851 /*
1852 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1853 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1854 * checking falls down on this.
1855 */
89bddce5
SH
1856 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1857 ctl_len))
1da177e4
LT
1858 goto out_freectl;
1859 msg_sys.msg_control = ctl_buf;
1860 }
1861 msg_sys.msg_flags = flags;
1862
1863 if (sock->file->f_flags & O_NONBLOCK)
1864 msg_sys.msg_flags |= MSG_DONTWAIT;
1865 err = sock_sendmsg(sock, &msg_sys, total_len);
1866
1867out_freectl:
89bddce5 1868 if (ctl_buf != ctl)
1da177e4
LT
1869 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1870out_freeiov:
1871 if (iov != iovstack)
1872 sock_kfree_s(sock->sk, iov, iov_size);
1873out_put:
6cb153ca 1874 fput_light(sock->file, fput_needed);
89bddce5 1875out:
1da177e4
LT
1876 return err;
1877}
1878
1879/*
1880 * BSD recvmsg interface
1881 */
1882
89bddce5
SH
1883asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
1884 unsigned int flags)
1da177e4 1885{
89bddce5
SH
1886 struct compat_msghdr __user *msg_compat =
1887 (struct compat_msghdr __user *)msg;
1da177e4
LT
1888 struct socket *sock;
1889 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1890 struct iovec *iov = iovstack;
1da177e4
LT
1891 struct msghdr msg_sys;
1892 unsigned long cmsg_ptr;
1893 int err, iov_size, total_len, len;
6cb153ca 1894 int fput_needed;
1da177e4
LT
1895
1896 /* kernel mode address */
1897 char addr[MAX_SOCK_ADDR];
1898
1899 /* user mode address pointers */
1900 struct sockaddr __user *uaddr;
1901 int __user *uaddr_len;
89bddce5 1902
1da177e4
LT
1903 if (MSG_CMSG_COMPAT & flags) {
1904 if (get_compat_msghdr(&msg_sys, msg_compat))
1905 return -EFAULT;
89bddce5
SH
1906 }
1907 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1908 return -EFAULT;
1da177e4 1909
6cb153ca 1910 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1911 if (!sock)
1912 goto out;
1913
1914 err = -EMSGSIZE;
1915 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1916 goto out_put;
89bddce5
SH
1917
1918 /* Check whether to allocate the iovec area */
1da177e4
LT
1919 err = -ENOMEM;
1920 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1921 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1922 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1923 if (!iov)
1924 goto out_put;
1925 }
1926
1927 /*
89bddce5
SH
1928 * Save the user-mode address (verify_iovec will change the
1929 * kernel msghdr to use the kernel address space)
1da177e4 1930 */
89bddce5
SH
1931
1932 uaddr = (void __user *)msg_sys.msg_name;
1da177e4
LT
1933 uaddr_len = COMPAT_NAMELEN(msg);
1934 if (MSG_CMSG_COMPAT & flags) {
1935 err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1936 } else
1937 err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1938 if (err < 0)
1939 goto out_freeiov;
89bddce5 1940 total_len = err;
1da177e4
LT
1941
1942 cmsg_ptr = (unsigned long)msg_sys.msg_control;
1943 msg_sys.msg_flags = 0;
1944 if (MSG_CMSG_COMPAT & flags)
1945 msg_sys.msg_flags = MSG_CMSG_COMPAT;
89bddce5 1946
1da177e4
LT
1947 if (sock->file->f_flags & O_NONBLOCK)
1948 flags |= MSG_DONTWAIT;
1949 err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1950 if (err < 0)
1951 goto out_freeiov;
1952 len = err;
1953
1954 if (uaddr != NULL) {
89bddce5
SH
1955 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
1956 uaddr_len);
1da177e4
LT
1957 if (err < 0)
1958 goto out_freeiov;
1959 }
37f7f421
DM
1960 err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
1961 COMPAT_FLAGS(msg));
1da177e4
LT
1962 if (err)
1963 goto out_freeiov;
1964 if (MSG_CMSG_COMPAT & flags)
89bddce5 1965 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1966 &msg_compat->msg_controllen);
1967 else
89bddce5 1968 err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
1da177e4
LT
1969 &msg->msg_controllen);
1970 if (err)
1971 goto out_freeiov;
1972 err = len;
1973
1974out_freeiov:
1975 if (iov != iovstack)
1976 sock_kfree_s(sock->sk, iov, iov_size);
1977out_put:
6cb153ca 1978 fput_light(sock->file, fput_needed);
1da177e4
LT
1979out:
1980 return err;
1981}
1982
1983#ifdef __ARCH_WANT_SYS_SOCKETCALL
1984
1985/* Argument list sizes for sys_socketcall */
1986#define AL(x) ((x) * sizeof(unsigned long))
89bddce5
SH
1987static const unsigned char nargs[18]={
1988 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1989 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1990 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
1991};
1992
1da177e4
LT
1993#undef AL
1994
1995/*
89bddce5 1996 * System call vectors.
1da177e4
LT
1997 *
1998 * Argument checking cleaned up. Saved 20% in size.
1999 * This function doesn't need to set the kernel lock because
89bddce5 2000 * it is set by the callees.
1da177e4
LT
2001 */
2002
2003asmlinkage long sys_socketcall(int call, unsigned long __user *args)
2004{
2005 unsigned long a[6];
89bddce5 2006 unsigned long a0, a1;
1da177e4
LT
2007 int err;
2008
89bddce5 2009 if (call < 1 || call > SYS_RECVMSG)
1da177e4
LT
2010 return -EINVAL;
2011
2012 /* copy_from_user should be SMP safe. */
2013 if (copy_from_user(a, args, nargs[call]))
2014 return -EFAULT;
3ec3b2fb 2015
89bddce5 2016 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb
DW
2017 if (err)
2018 return err;
2019
89bddce5
SH
2020 a0 = a[0];
2021 a1 = a[1];
2022
2023 switch (call) {
2024 case SYS_SOCKET:
2025 err = sys_socket(a0, a1, a[2]);
2026 break;
2027 case SYS_BIND:
2028 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2029 break;
2030 case SYS_CONNECT:
2031 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2032 break;
2033 case SYS_LISTEN:
2034 err = sys_listen(a0, a1);
2035 break;
2036 case SYS_ACCEPT:
2037 err =
2038 sys_accept(a0, (struct sockaddr __user *)a1,
2039 (int __user *)a[2]);
2040 break;
2041 case SYS_GETSOCKNAME:
2042 err =
2043 sys_getsockname(a0, (struct sockaddr __user *)a1,
2044 (int __user *)a[2]);
2045 break;
2046 case SYS_GETPEERNAME:
2047 err =
2048 sys_getpeername(a0, (struct sockaddr __user *)a1,
2049 (int __user *)a[2]);
2050 break;
2051 case SYS_SOCKETPAIR:
2052 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2053 break;
2054 case SYS_SEND:
2055 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2056 break;
2057 case SYS_SENDTO:
2058 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2059 (struct sockaddr __user *)a[4], a[5]);
2060 break;
2061 case SYS_RECV:
2062 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2063 break;
2064 case SYS_RECVFROM:
2065 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2066 (struct sockaddr __user *)a[4],
2067 (int __user *)a[5]);
2068 break;
2069 case SYS_SHUTDOWN:
2070 err = sys_shutdown(a0, a1);
2071 break;
2072 case SYS_SETSOCKOPT:
2073 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2074 break;
2075 case SYS_GETSOCKOPT:
2076 err =
2077 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2078 (int __user *)a[4]);
2079 break;
2080 case SYS_SENDMSG:
2081 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2082 break;
2083 case SYS_RECVMSG:
2084 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2085 break;
2086 default:
2087 err = -EINVAL;
2088 break;
1da177e4
LT
2089 }
2090 return err;
2091}
2092
89bddce5 2093#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2094
55737fda
SH
2095/**
2096 * sock_register - add a socket protocol handler
2097 * @ops: description of protocol
2098 *
1da177e4
LT
2099 * This function is called by a protocol handler that wants to
2100 * advertise its address family, and have it linked into the
55737fda
SH
2101 * socket interface. The value ops->family coresponds to the
2102 * socket system call protocol family.
1da177e4 2103 */
f0fd27d4 2104int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2105{
2106 int err;
2107
2108 if (ops->family >= NPROTO) {
89bddce5
SH
2109 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2110 NPROTO);
1da177e4
LT
2111 return -ENOBUFS;
2112 }
55737fda
SH
2113
2114 spin_lock(&net_family_lock);
2115 if (net_families[ops->family])
2116 err = -EEXIST;
2117 else {
89bddce5 2118 net_families[ops->family] = ops;
1da177e4
LT
2119 err = 0;
2120 }
55737fda
SH
2121 spin_unlock(&net_family_lock);
2122
89bddce5 2123 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2124 return err;
2125}
2126
55737fda
SH
2127/**
2128 * sock_unregister - remove a protocol handler
2129 * @family: protocol family to remove
2130 *
1da177e4
LT
2131 * This function is called by a protocol handler that wants to
2132 * remove its address family, and have it unlinked from the
55737fda
SH
2133 * new socket creation.
2134 *
2135 * If protocol handler is a module, then it can use module reference
2136 * counts to protect against new references. If protocol handler is not
2137 * a module then it needs to provide its own protection in
2138 * the ops->create routine.
1da177e4 2139 */
f0fd27d4 2140void sock_unregister(int family)
1da177e4 2141{
f0fd27d4 2142 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2143
55737fda 2144 spin_lock(&net_family_lock);
89bddce5 2145 net_families[family] = NULL;
55737fda
SH
2146 spin_unlock(&net_family_lock);
2147
2148 synchronize_rcu();
2149
89bddce5 2150 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4
LT
2151}
2152
77d76ea3 2153static int __init sock_init(void)
1da177e4
LT
2154{
2155 /*
89bddce5 2156 * Initialize sock SLAB cache.
1da177e4 2157 */
89bddce5 2158
1da177e4
LT
2159 sk_init();
2160
1da177e4 2161 /*
89bddce5 2162 * Initialize skbuff SLAB cache
1da177e4
LT
2163 */
2164 skb_init();
1da177e4
LT
2165
2166 /*
89bddce5 2167 * Initialize the protocols module.
1da177e4
LT
2168 */
2169
2170 init_inodecache();
2171 register_filesystem(&sock_fs_type);
2172 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2173
2174 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2175 */
2176
2177#ifdef CONFIG_NETFILTER
2178 netfilter_init();
2179#endif
cbeb321a
DM
2180
2181 return 0;
1da177e4
LT
2182}
2183
77d76ea3
AK
2184core_initcall(sock_init); /* early initcall */
2185
1da177e4
LT
2186#ifdef CONFIG_PROC_FS
2187void socket_seq_show(struct seq_file *seq)
2188{
2189 int cpu;
2190 int counter = 0;
2191
6f912042 2192 for_each_possible_cpu(cpu)
89bddce5 2193 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2194
2195 /* It can be negative, by the way. 8) */
2196 if (counter < 0)
2197 counter = 0;
2198
2199 seq_printf(seq, "sockets: used %d\n", counter);
2200}
89bddce5 2201#endif /* CONFIG_PROC_FS */
1da177e4 2202
89bbfc95
SP
2203#ifdef CONFIG_COMPAT
2204static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2205 unsigned long arg)
89bbfc95
SP
2206{
2207 struct socket *sock = file->private_data;
2208 int ret = -ENOIOCTLCMD;
2209
2210 if (sock->ops->compat_ioctl)
2211 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2212
2213 return ret;
2214}
2215#endif
2216
ac5a488e
SS
2217int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2218{
2219 return sock->ops->bind(sock, addr, addrlen);
2220}
2221
2222int kernel_listen(struct socket *sock, int backlog)
2223{
2224 return sock->ops->listen(sock, backlog);
2225}
2226
2227int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2228{
2229 struct sock *sk = sock->sk;
2230 int err;
2231
2232 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2233 newsock);
2234 if (err < 0)
2235 goto done;
2236
2237 err = sock->ops->accept(sock, *newsock, flags);
2238 if (err < 0) {
2239 sock_release(*newsock);
2240 goto done;
2241 }
2242
2243 (*newsock)->ops = sock->ops;
2244
2245done:
2246 return err;
2247}
2248
2249int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 2250 int flags)
ac5a488e
SS
2251{
2252 return sock->ops->connect(sock, addr, addrlen, flags);
2253}
2254
2255int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2256 int *addrlen)
2257{
2258 return sock->ops->getname(sock, addr, addrlen, 0);
2259}
2260
2261int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2262 int *addrlen)
2263{
2264 return sock->ops->getname(sock, addr, addrlen, 1);
2265}
2266
2267int kernel_getsockopt(struct socket *sock, int level, int optname,
2268 char *optval, int *optlen)
2269{
2270 mm_segment_t oldfs = get_fs();
2271 int err;
2272
2273 set_fs(KERNEL_DS);
2274 if (level == SOL_SOCKET)
2275 err = sock_getsockopt(sock, level, optname, optval, optlen);
2276 else
2277 err = sock->ops->getsockopt(sock, level, optname, optval,
2278 optlen);
2279 set_fs(oldfs);
2280 return err;
2281}
2282
2283int kernel_setsockopt(struct socket *sock, int level, int optname,
2284 char *optval, int optlen)
2285{
2286 mm_segment_t oldfs = get_fs();
2287 int err;
2288
2289 set_fs(KERNEL_DS);
2290 if (level == SOL_SOCKET)
2291 err = sock_setsockopt(sock, level, optname, optval, optlen);
2292 else
2293 err = sock->ops->setsockopt(sock, level, optname, optval,
2294 optlen);
2295 set_fs(oldfs);
2296 return err;
2297}
2298
2299int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2300 size_t size, int flags)
2301{
2302 if (sock->ops->sendpage)
2303 return sock->ops->sendpage(sock, page, offset, size, flags);
2304
2305 return sock_no_sendpage(sock, page, offset, size, flags);
2306}
2307
2308int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2309{
2310 mm_segment_t oldfs = get_fs();
2311 int err;
2312
2313 set_fs(KERNEL_DS);
2314 err = sock->ops->ioctl(sock, cmd, arg);
2315 set_fs(oldfs);
2316
2317 return err;
2318}
2319
1da177e4
LT
2320/* ABI emulation layers need these two */
2321EXPORT_SYMBOL(move_addr_to_kernel);
2322EXPORT_SYMBOL(move_addr_to_user);
2323EXPORT_SYMBOL(sock_create);
2324EXPORT_SYMBOL(sock_create_kern);
2325EXPORT_SYMBOL(sock_create_lite);
2326EXPORT_SYMBOL(sock_map_fd);
2327EXPORT_SYMBOL(sock_recvmsg);
2328EXPORT_SYMBOL(sock_register);
2329EXPORT_SYMBOL(sock_release);
2330EXPORT_SYMBOL(sock_sendmsg);
2331EXPORT_SYMBOL(sock_unregister);
2332EXPORT_SYMBOL(sock_wake_async);
2333EXPORT_SYMBOL(sockfd_lookup);
2334EXPORT_SYMBOL(kernel_sendmsg);
2335EXPORT_SYMBOL(kernel_recvmsg);
ac5a488e
SS
2336EXPORT_SYMBOL(kernel_bind);
2337EXPORT_SYMBOL(kernel_listen);
2338EXPORT_SYMBOL(kernel_accept);
2339EXPORT_SYMBOL(kernel_connect);
2340EXPORT_SYMBOL(kernel_getsockname);
2341EXPORT_SYMBOL(kernel_getpeername);
2342EXPORT_SYMBOL(kernel_getsockopt);
2343EXPORT_SYMBOL(kernel_setsockopt);
2344EXPORT_SYMBOL(kernel_sendpage);
2345EXPORT_SYMBOL(kernel_sock_ioctl);