]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/unix/af_unix.c
net: use skb_queue_empty_lockless() in poll() handlers
[mirror_ubuntu-bionic-kernel.git] / net / unix / af_unix.c
CommitLineData
1da177e4
LT
1/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
113aa838 4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4
LT
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
1da177e4
LT
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
5cc208be 83#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84
1da177e4 85#include <linux/module.h>
1da177e4 86#include <linux/kernel.h>
1da177e4 87#include <linux/signal.h>
3f07c014 88#include <linux/sched/signal.h>
1da177e4
LT
89#include <linux/errno.h>
90#include <linux/string.h>
91#include <linux/stat.h>
92#include <linux/dcache.h>
93#include <linux/namei.h>
94#include <linux/socket.h>
95#include <linux/un.h>
96#include <linux/fcntl.h>
97#include <linux/termios.h>
98#include <linux/sockios.h>
99#include <linux/net.h>
100#include <linux/in.h>
101#include <linux/fs.h>
102#include <linux/slab.h>
7c0f6ba6 103#include <linux/uaccess.h>
1da177e4
LT
104#include <linux/skbuff.h>
105#include <linux/netdevice.h>
457c4cbc 106#include <net/net_namespace.h>
1da177e4 107#include <net/sock.h>
c752f073 108#include <net/tcp_states.h>
1da177e4
LT
109#include <net/af_unix.h>
110#include <linux/proc_fs.h>
111#include <linux/seq_file.h>
112#include <net/scm.h>
113#include <linux/init.h>
114#include <linux/poll.h>
1da177e4
LT
115#include <linux/rtnetlink.h>
116#include <linux/mount.h>
117#include <net/checksum.h>
118#include <linux/security.h>
2b15af6f 119#include <linux/freezer.h>
ba94f308 120#include <linux/file.h>
1da177e4 121
7123aaa3 122struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
fa7ff56f
PE
123EXPORT_SYMBOL_GPL(unix_socket_table);
124DEFINE_SPINLOCK(unix_table_lock);
125EXPORT_SYMBOL_GPL(unix_table_lock);
518de9b3 126static atomic_long_t unix_nr_socks;
1da177e4 127
1da177e4 128
7123aaa3
ED
129static struct hlist_head *unix_sockets_unbound(void *addr)
130{
131 unsigned long hash = (unsigned long)addr;
132
133 hash ^= hash >> 16;
134 hash ^= hash >> 8;
135 hash %= UNIX_HASH_SIZE;
136 return &unix_socket_table[UNIX_HASH_SIZE + hash];
137}
138
139#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
1da177e4 140
877ce7c1 141#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 142static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 143{
37a9a8df 144 UNIXCB(skb).secid = scm->secid;
877ce7c1
CZ
145}
146
147static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
148{
37a9a8df
SS
149 scm->secid = UNIXCB(skb).secid;
150}
151
152static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
153{
154 return (scm->secid == UNIXCB(skb).secid);
877ce7c1
CZ
155}
156#else
dc49c1f9 157static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
158{ }
159
160static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
161{ }
37a9a8df
SS
162
163static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
164{
165 return true;
166}
877ce7c1
CZ
167#endif /* CONFIG_SECURITY_NETWORK */
168
1da177e4
LT
169/*
170 * SMP locking strategy:
fbe9cc4a 171 * hash table is protected with spinlock unix_table_lock
663717f6 172 * each socket state is protected by separate spin lock.
1da177e4
LT
173 */
174
95c96174 175static inline unsigned int unix_hash_fold(__wsum n)
1da177e4 176{
0a13404d 177 unsigned int hash = (__force unsigned int)csum_fold(n);
95c96174 178
1da177e4
LT
179 hash ^= hash>>8;
180 return hash&(UNIX_HASH_SIZE-1);
181}
182
183#define unix_peer(sk) (unix_sk(sk)->peer)
184
185static inline int unix_our_peer(struct sock *sk, struct sock *osk)
186{
187 return unix_peer(osk) == sk;
188}
189
190static inline int unix_may_send(struct sock *sk, struct sock *osk)
191{
6eba6a37 192 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
1da177e4
LT
193}
194
3c73419c
RW
195static inline int unix_recvq_full(struct sock const *sk)
196{
197 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
198}
199
fa7ff56f 200struct sock *unix_peer_get(struct sock *s)
1da177e4
LT
201{
202 struct sock *peer;
203
1c92b4e5 204 unix_state_lock(s);
1da177e4
LT
205 peer = unix_peer(s);
206 if (peer)
207 sock_hold(peer);
1c92b4e5 208 unix_state_unlock(s);
1da177e4
LT
209 return peer;
210}
fa7ff56f 211EXPORT_SYMBOL_GPL(unix_peer_get);
1da177e4
LT
212
213static inline void unix_release_addr(struct unix_address *addr)
214{
8c9814b9 215 if (refcount_dec_and_test(&addr->refcnt))
1da177e4
LT
216 kfree(addr);
217}
218
219/*
220 * Check unix socket name:
221 * - should be not zero length.
222 * - if started by not zero, should be NULL terminated (FS object)
223 * - if started by zero, it is abstract name.
224 */
ac7bfa62 225
95c96174 226static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
1da177e4
LT
227{
228 if (len <= sizeof(short) || len > sizeof(*sunaddr))
229 return -EINVAL;
230 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
231 return -EINVAL;
232 if (sunaddr->sun_path[0]) {
233 /*
234 * This may look like an off by one error but it is a bit more
235 * subtle. 108 is the longest valid AF_UNIX path for a binding.
25985edc 236 * sun_path[108] doesn't as such exist. However in kernel space
1da177e4
LT
237 * we are guaranteed that it is a valid memory location in our
238 * kernel address buffer.
239 */
e27dfcea 240 ((char *)sunaddr)[len] = 0;
1da177e4
LT
241 len = strlen(sunaddr->sun_path)+1+sizeof(short);
242 return len;
243 }
244
07f0757a 245 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
1da177e4
LT
246 return len;
247}
248
249static void __unix_remove_socket(struct sock *sk)
250{
251 sk_del_node_init(sk);
252}
253
254static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
255{
547b792c 256 WARN_ON(!sk_unhashed(sk));
1da177e4
LT
257 sk_add_node(sk, list);
258}
259
260static inline void unix_remove_socket(struct sock *sk)
261{
fbe9cc4a 262 spin_lock(&unix_table_lock);
1da177e4 263 __unix_remove_socket(sk);
fbe9cc4a 264 spin_unlock(&unix_table_lock);
1da177e4
LT
265}
266
267static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
268{
fbe9cc4a 269 spin_lock(&unix_table_lock);
1da177e4 270 __unix_insert_socket(list, sk);
fbe9cc4a 271 spin_unlock(&unix_table_lock);
1da177e4
LT
272}
273
097e66c5
DL
274static struct sock *__unix_find_socket_byname(struct net *net,
275 struct sockaddr_un *sunname,
95c96174 276 int len, int type, unsigned int hash)
1da177e4
LT
277{
278 struct sock *s;
1da177e4 279
b67bfe0d 280 sk_for_each(s, &unix_socket_table[hash ^ type]) {
1da177e4
LT
281 struct unix_sock *u = unix_sk(s);
282
878628fb 283 if (!net_eq(sock_net(s), net))
097e66c5
DL
284 continue;
285
1da177e4
LT
286 if (u->addr->len == len &&
287 !memcmp(u->addr->name, sunname, len))
288 goto found;
289 }
290 s = NULL;
291found:
292 return s;
293}
294
097e66c5
DL
295static inline struct sock *unix_find_socket_byname(struct net *net,
296 struct sockaddr_un *sunname,
1da177e4 297 int len, int type,
95c96174 298 unsigned int hash)
1da177e4
LT
299{
300 struct sock *s;
301
fbe9cc4a 302 spin_lock(&unix_table_lock);
097e66c5 303 s = __unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
304 if (s)
305 sock_hold(s);
fbe9cc4a 306 spin_unlock(&unix_table_lock);
1da177e4
LT
307 return s;
308}
309
6616f788 310static struct sock *unix_find_socket_byinode(struct inode *i)
1da177e4
LT
311{
312 struct sock *s;
1da177e4 313
fbe9cc4a 314 spin_lock(&unix_table_lock);
b67bfe0d 315 sk_for_each(s,
1da177e4 316 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
40ffe67d 317 struct dentry *dentry = unix_sk(s)->path.dentry;
1da177e4 318
beef5121 319 if (dentry && d_backing_inode(dentry) == i) {
1da177e4
LT
320 sock_hold(s);
321 goto found;
322 }
323 }
324 s = NULL;
325found:
fbe9cc4a 326 spin_unlock(&unix_table_lock);
1da177e4
LT
327 return s;
328}
329
7d267278
RW
330/* Support code for asymmetrically connected dgram sockets
331 *
332 * If a datagram socket is connected to a socket not itself connected
333 * to the first socket (eg, /dev/log), clients may only enqueue more
334 * messages if the present receive queue of the server socket is not
335 * "too large". This means there's a second writeability condition
336 * poll and sendmsg need to test. The dgram recv code will do a wake
337 * up on the peer_wait wait queue of a socket upon reception of a
338 * datagram which needs to be propagated to sleeping would-be writers
339 * since these might not have sent anything so far. This can't be
340 * accomplished via poll_wait because the lifetime of the server
341 * socket might be less than that of its clients if these break their
342 * association with it or if the server socket is closed while clients
343 * are still connected to it and there's no way to inform "a polling
344 * implementation" that it should let go of a certain wait queue
345 *
ac6424b9 346 * In order to propagate a wake up, a wait_queue_entry_t of the client
7d267278
RW
347 * socket is enqueued on the peer_wait queue of the server socket
348 * whose wake function does a wake_up on the ordinary client socket
349 * wait queue. This connection is established whenever a write (or
350 * poll for write) hit the flow control condition and broken when the
351 * association to the server socket is dissolved or after a wake up
352 * was relayed.
353 */
354
ac6424b9 355static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
7d267278
RW
356 void *key)
357{
358 struct unix_sock *u;
359 wait_queue_head_t *u_sleep;
360
361 u = container_of(q, struct unix_sock, peer_wake);
362
363 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
364 q);
365 u->peer_wake.private = NULL;
366
367 /* relaying can only happen while the wq still exists */
368 u_sleep = sk_sleep(&u->sk);
369 if (u_sleep)
370 wake_up_interruptible_poll(u_sleep, key);
371
372 return 0;
373}
374
375static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
376{
377 struct unix_sock *u, *u_other;
378 int rc;
379
380 u = unix_sk(sk);
381 u_other = unix_sk(other);
382 rc = 0;
383 spin_lock(&u_other->peer_wait.lock);
384
385 if (!u->peer_wake.private) {
386 u->peer_wake.private = other;
387 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
388
389 rc = 1;
390 }
391
392 spin_unlock(&u_other->peer_wait.lock);
393 return rc;
394}
395
396static void unix_dgram_peer_wake_disconnect(struct sock *sk,
397 struct sock *other)
398{
399 struct unix_sock *u, *u_other;
400
401 u = unix_sk(sk);
402 u_other = unix_sk(other);
403 spin_lock(&u_other->peer_wait.lock);
404
405 if (u->peer_wake.private == other) {
406 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
407 u->peer_wake.private = NULL;
408 }
409
410 spin_unlock(&u_other->peer_wait.lock);
411}
412
413static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
414 struct sock *other)
415{
416 unix_dgram_peer_wake_disconnect(sk, other);
417 wake_up_interruptible_poll(sk_sleep(sk),
418 POLLOUT |
419 POLLWRNORM |
420 POLLWRBAND);
421}
422
423/* preconditions:
424 * - unix_peer(sk) == other
425 * - association is stable
426 */
427static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
428{
429 int connected;
430
431 connected = unix_dgram_peer_wake_connect(sk, other);
432
433 if (unix_recvq_full(other))
434 return 1;
435
436 if (connected)
437 unix_dgram_peer_wake_disconnect(sk, other);
438
439 return 0;
440}
441
1586a587 442static int unix_writable(const struct sock *sk)
1da177e4 443{
1586a587 444 return sk->sk_state != TCP_LISTEN &&
14afee4b 445 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
1da177e4
LT
446}
447
448static void unix_write_space(struct sock *sk)
449{
43815482
ED
450 struct socket_wq *wq;
451
452 rcu_read_lock();
1da177e4 453 if (unix_writable(sk)) {
43815482 454 wq = rcu_dereference(sk->sk_wq);
1ce0bf50 455 if (skwq_has_sleeper(wq))
67426b75
ED
456 wake_up_interruptible_sync_poll(&wq->wait,
457 POLLOUT | POLLWRNORM | POLLWRBAND);
8d8ad9d7 458 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4 459 }
43815482 460 rcu_read_unlock();
1da177e4
LT
461}
462
463/* When dgram socket disconnects (or changes its peer), we clear its receive
464 * queue of packets arrived from previous peer. First, it allows to do
465 * flow control based only on wmem_alloc; second, sk connected to peer
466 * may receive messages only from that peer. */
467static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
468{
b03efcfb 469 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
470 skb_queue_purge(&sk->sk_receive_queue);
471 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
472
473 /* If one link of bidirectional dgram pipe is disconnected,
474 * we signal error. Messages are lost. Do not make this,
475 * when peer was not connected to us.
476 */
477 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
478 other->sk_err = ECONNRESET;
479 other->sk_error_report(other);
480 }
481 }
482}
483
484static void unix_sock_destructor(struct sock *sk)
485{
486 struct unix_sock *u = unix_sk(sk);
487
488 skb_queue_purge(&sk->sk_receive_queue);
489
14afee4b 490 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
547b792c
IJ
491 WARN_ON(!sk_unhashed(sk));
492 WARN_ON(sk->sk_socket);
1da177e4 493 if (!sock_flag(sk, SOCK_DEAD)) {
5cc208be 494 pr_info("Attempt to release alive unix socket: %p\n", sk);
1da177e4
LT
495 return;
496 }
497
498 if (u->addr)
499 unix_release_addr(u->addr);
500
518de9b3 501 atomic_long_dec(&unix_nr_socks);
6f756a8c 502 local_bh_disable();
a8076d8d 503 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
6f756a8c 504 local_bh_enable();
1da177e4 505#ifdef UNIX_REFCNT_DEBUG
5cc208be 506 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
518de9b3 507 atomic_long_read(&unix_nr_socks));
1da177e4
LT
508#endif
509}
510
ded34e0f 511static void unix_release_sock(struct sock *sk, int embrion)
1da177e4
LT
512{
513 struct unix_sock *u = unix_sk(sk);
40ffe67d 514 struct path path;
1da177e4
LT
515 struct sock *skpair;
516 struct sk_buff *skb;
517 int state;
518
519 unix_remove_socket(sk);
520
521 /* Clear state */
1c92b4e5 522 unix_state_lock(sk);
1da177e4
LT
523 sock_orphan(sk);
524 sk->sk_shutdown = SHUTDOWN_MASK;
40ffe67d
AV
525 path = u->path;
526 u->path.dentry = NULL;
527 u->path.mnt = NULL;
1da177e4
LT
528 state = sk->sk_state;
529 sk->sk_state = TCP_CLOSE;
1c92b4e5 530 unix_state_unlock(sk);
1da177e4
LT
531
532 wake_up_interruptible_all(&u->peer_wait);
533
e27dfcea 534 skpair = unix_peer(sk);
1da177e4 535
e27dfcea 536 if (skpair != NULL) {
1da177e4 537 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
1c92b4e5 538 unix_state_lock(skpair);
1da177e4
LT
539 /* No more writes */
540 skpair->sk_shutdown = SHUTDOWN_MASK;
541 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
542 skpair->sk_err = ECONNRESET;
1c92b4e5 543 unix_state_unlock(skpair);
1da177e4 544 skpair->sk_state_change(skpair);
8d8ad9d7 545 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4 546 }
7d267278
RW
547
548 unix_dgram_peer_wake_disconnect(sk, skpair);
1da177e4
LT
549 sock_put(skpair); /* It may now die */
550 unix_peer(sk) = NULL;
551 }
552
553 /* Try to flush out this socket. Throw out buffers at least */
554
555 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
e27dfcea 556 if (state == TCP_LISTEN)
1da177e4
LT
557 unix_release_sock(skb->sk, 1);
558 /* passed fds are erased in the kfree_skb hook */
73ed5d25 559 UNIXCB(skb).consumed = skb->len;
1da177e4
LT
560 kfree_skb(skb);
561 }
562
40ffe67d
AV
563 if (path.dentry)
564 path_put(&path);
1da177e4
LT
565
566 sock_put(sk);
567
568 /* ---- Socket is dead now and most probably destroyed ---- */
569
570 /*
e04dae84 571 * Fixme: BSD difference: In BSD all sockets connected to us get
1da177e4
LT
572 * ECONNRESET and we die on the spot. In Linux we behave
573 * like files and pipes do and wait for the last
574 * dereference.
575 *
576 * Can't we simply set sock->err?
577 *
578 * What the above comment does talk about? --ANK(980817)
579 */
580
9305cfa4 581 if (unix_tot_inflight)
ac7bfa62 582 unix_gc(); /* Garbage collect fds */
1da177e4
LT
583}
584
109f6e39
EB
585static void init_peercred(struct sock *sk)
586{
587 put_pid(sk->sk_peer_pid);
588 if (sk->sk_peer_cred)
589 put_cred(sk->sk_peer_cred);
590 sk->sk_peer_pid = get_pid(task_tgid(current));
591 sk->sk_peer_cred = get_current_cred();
592}
593
594static void copy_peercred(struct sock *sk, struct sock *peersk)
595{
596 put_pid(sk->sk_peer_pid);
597 if (sk->sk_peer_cred)
598 put_cred(sk->sk_peer_cred);
599 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
600 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
601}
602
1da177e4
LT
603static int unix_listen(struct socket *sock, int backlog)
604{
605 int err;
606 struct sock *sk = sock->sk;
607 struct unix_sock *u = unix_sk(sk);
109f6e39 608 struct pid *old_pid = NULL;
1da177e4
LT
609
610 err = -EOPNOTSUPP;
6eba6a37
ED
611 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
612 goto out; /* Only stream/seqpacket sockets accept */
1da177e4
LT
613 err = -EINVAL;
614 if (!u->addr)
6eba6a37 615 goto out; /* No listens on an unbound socket */
1c92b4e5 616 unix_state_lock(sk);
1da177e4
LT
617 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
618 goto out_unlock;
619 if (backlog > sk->sk_max_ack_backlog)
620 wake_up_interruptible_all(&u->peer_wait);
621 sk->sk_max_ack_backlog = backlog;
622 sk->sk_state = TCP_LISTEN;
623 /* set credentials so connect can copy them */
109f6e39 624 init_peercred(sk);
1da177e4
LT
625 err = 0;
626
627out_unlock:
1c92b4e5 628 unix_state_unlock(sk);
109f6e39 629 put_pid(old_pid);
1da177e4
LT
630out:
631 return err;
632}
633
634static int unix_release(struct socket *);
635static int unix_bind(struct socket *, struct sockaddr *, int);
636static int unix_stream_connect(struct socket *, struct sockaddr *,
637 int addr_len, int flags);
638static int unix_socketpair(struct socket *, struct socket *);
cdfbabfb 639static int unix_accept(struct socket *, struct socket *, int, bool);
1da177e4
LT
640static int unix_getname(struct socket *, struct sockaddr *, int *, int);
641static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
ec0d215f
RW
642static unsigned int unix_dgram_poll(struct file *, struct socket *,
643 poll_table *);
1da177e4
LT
644static int unix_ioctl(struct socket *, unsigned int, unsigned long);
645static int unix_shutdown(struct socket *, int);
1b784140
YX
646static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
647static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
869e7c62
HFS
648static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
649 size_t size, int flags);
2b514574
HFS
650static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
651 struct pipe_inode_info *, size_t size,
652 unsigned int flags);
1b784140
YX
653static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
654static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
1da177e4
LT
655static int unix_dgram_connect(struct socket *, struct sockaddr *,
656 int, int);
1b784140
YX
657static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
658static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
659 int);
1da177e4 660
12663bfc 661static int unix_set_peek_off(struct sock *sk, int val)
f55bb7f9
PE
662{
663 struct unix_sock *u = unix_sk(sk);
664
6e1ce3c3 665 if (mutex_lock_interruptible(&u->iolock))
12663bfc
SL
666 return -EINTR;
667
f55bb7f9 668 sk->sk_peek_off = val;
6e1ce3c3 669 mutex_unlock(&u->iolock);
12663bfc
SL
670
671 return 0;
f55bb7f9
PE
672}
673
674
90ddc4f0 675static const struct proto_ops unix_stream_ops = {
1da177e4
LT
676 .family = PF_UNIX,
677 .owner = THIS_MODULE,
678 .release = unix_release,
679 .bind = unix_bind,
680 .connect = unix_stream_connect,
681 .socketpair = unix_socketpair,
682 .accept = unix_accept,
683 .getname = unix_getname,
684 .poll = unix_poll,
685 .ioctl = unix_ioctl,
686 .listen = unix_listen,
687 .shutdown = unix_shutdown,
688 .setsockopt = sock_no_setsockopt,
689 .getsockopt = sock_no_getsockopt,
690 .sendmsg = unix_stream_sendmsg,
691 .recvmsg = unix_stream_recvmsg,
692 .mmap = sock_no_mmap,
869e7c62 693 .sendpage = unix_stream_sendpage,
2b514574 694 .splice_read = unix_stream_splice_read,
fc0d7536 695 .set_peek_off = unix_set_peek_off,
1da177e4
LT
696};
697
90ddc4f0 698static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
699 .family = PF_UNIX,
700 .owner = THIS_MODULE,
701 .release = unix_release,
702 .bind = unix_bind,
703 .connect = unix_dgram_connect,
704 .socketpair = unix_socketpair,
705 .accept = sock_no_accept,
706 .getname = unix_getname,
ec0d215f 707 .poll = unix_dgram_poll,
1da177e4
LT
708 .ioctl = unix_ioctl,
709 .listen = sock_no_listen,
710 .shutdown = unix_shutdown,
711 .setsockopt = sock_no_setsockopt,
712 .getsockopt = sock_no_getsockopt,
713 .sendmsg = unix_dgram_sendmsg,
714 .recvmsg = unix_dgram_recvmsg,
715 .mmap = sock_no_mmap,
716 .sendpage = sock_no_sendpage,
f55bb7f9 717 .set_peek_off = unix_set_peek_off,
1da177e4
LT
718};
719
90ddc4f0 720static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
721 .family = PF_UNIX,
722 .owner = THIS_MODULE,
723 .release = unix_release,
724 .bind = unix_bind,
725 .connect = unix_stream_connect,
726 .socketpair = unix_socketpair,
727 .accept = unix_accept,
728 .getname = unix_getname,
ec0d215f 729 .poll = unix_dgram_poll,
1da177e4
LT
730 .ioctl = unix_ioctl,
731 .listen = unix_listen,
732 .shutdown = unix_shutdown,
733 .setsockopt = sock_no_setsockopt,
734 .getsockopt = sock_no_getsockopt,
735 .sendmsg = unix_seqpacket_sendmsg,
a05d2ad1 736 .recvmsg = unix_seqpacket_recvmsg,
1da177e4
LT
737 .mmap = sock_no_mmap,
738 .sendpage = sock_no_sendpage,
f55bb7f9 739 .set_peek_off = unix_set_peek_off,
1da177e4
LT
740};
741
742static struct proto unix_proto = {
248969ae
ED
743 .name = "UNIX",
744 .owner = THIS_MODULE,
248969ae 745 .obj_size = sizeof(struct unix_sock),
1da177e4
LT
746};
747
a09785a2
IM
748/*
749 * AF_UNIX sockets do not interact with hardware, hence they
750 * dont trigger interrupts - so it's safe for them to have
751 * bh-unsafe locking for their sk_receive_queue.lock. Split off
752 * this special lock-class by reinitializing the spinlock key:
753 */
754static struct lock_class_key af_unix_sk_receive_queue_lock_key;
755
11aa9c28 756static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
1da177e4
LT
757{
758 struct sock *sk = NULL;
759 struct unix_sock *u;
760
518de9b3
ED
761 atomic_long_inc(&unix_nr_socks);
762 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
1da177e4
LT
763 goto out;
764
11aa9c28 765 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
1da177e4
LT
766 if (!sk)
767 goto out;
768
6eba6a37 769 sock_init_data(sock, sk);
a09785a2
IM
770 lockdep_set_class(&sk->sk_receive_queue.lock,
771 &af_unix_sk_receive_queue_lock_key);
1da177e4 772
3aa9799e 773 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
1da177e4 774 sk->sk_write_space = unix_write_space;
a0a53c8b 775 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
1da177e4
LT
776 sk->sk_destruct = unix_sock_destructor;
777 u = unix_sk(sk);
40ffe67d
AV
778 u->path.dentry = NULL;
779 u->path.mnt = NULL;
fd19f329 780 spin_lock_init(&u->lock);
516e0cc5 781 atomic_long_set(&u->inflight, 0);
1fd05ba5 782 INIT_LIST_HEAD(&u->link);
6e1ce3c3
LT
783 mutex_init(&u->iolock); /* single task reading lock */
784 mutex_init(&u->bindlock); /* single task binding lock */
1da177e4 785 init_waitqueue_head(&u->peer_wait);
7d267278 786 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
7123aaa3 787 unix_insert_socket(unix_sockets_unbound(sk), sk);
1da177e4 788out:
284b327b 789 if (sk == NULL)
518de9b3 790 atomic_long_dec(&unix_nr_socks);
920de804
ED
791 else {
792 local_bh_disable();
a8076d8d 793 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
920de804
ED
794 local_bh_enable();
795 }
1da177e4
LT
796 return sk;
797}
798
3f378b68
EP
799static int unix_create(struct net *net, struct socket *sock, int protocol,
800 int kern)
1da177e4
LT
801{
802 if (protocol && protocol != PF_UNIX)
803 return -EPROTONOSUPPORT;
804
805 sock->state = SS_UNCONNECTED;
806
807 switch (sock->type) {
808 case SOCK_STREAM:
809 sock->ops = &unix_stream_ops;
810 break;
811 /*
812 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
813 * nothing uses it.
814 */
815 case SOCK_RAW:
e27dfcea 816 sock->type = SOCK_DGRAM;
110af3ac 817 /* fall through */
1da177e4
LT
818 case SOCK_DGRAM:
819 sock->ops = &unix_dgram_ops;
820 break;
821 case SOCK_SEQPACKET:
822 sock->ops = &unix_seqpacket_ops;
823 break;
824 default:
825 return -ESOCKTNOSUPPORT;
826 }
827
11aa9c28 828 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
1da177e4
LT
829}
830
831static int unix_release(struct socket *sock)
832{
833 struct sock *sk = sock->sk;
834
835 if (!sk)
836 return 0;
837
ded34e0f 838 unix_release_sock(sk, 0);
1da177e4
LT
839 sock->sk = NULL;
840
ded34e0f 841 return 0;
1da177e4
LT
842}
843
844static int unix_autobind(struct socket *sock)
845{
846 struct sock *sk = sock->sk;
3b1e0a65 847 struct net *net = sock_net(sk);
1da177e4
LT
848 struct unix_sock *u = unix_sk(sk);
849 static u32 ordernum = 1;
6eba6a37 850 struct unix_address *addr;
1da177e4 851 int err;
8df73ff9 852 unsigned int retries = 0;
1da177e4 853
6e1ce3c3 854 err = mutex_lock_interruptible(&u->bindlock);
37ab4fa7
SL
855 if (err)
856 return err;
1da177e4
LT
857
858 err = 0;
859 if (u->addr)
860 goto out;
861
862 err = -ENOMEM;
0da974f4 863 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
1da177e4
LT
864 if (!addr)
865 goto out;
866
1da177e4 867 addr->name->sun_family = AF_UNIX;
8c9814b9 868 refcount_set(&addr->refcnt, 1);
1da177e4
LT
869
870retry:
871 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
07f0757a 872 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
1da177e4 873
fbe9cc4a 874 spin_lock(&unix_table_lock);
1da177e4
LT
875 ordernum = (ordernum+1)&0xFFFFF;
876
097e66c5 877 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
1da177e4 878 addr->hash)) {
fbe9cc4a 879 spin_unlock(&unix_table_lock);
8df73ff9
TH
880 /*
881 * __unix_find_socket_byname() may take long time if many names
882 * are already in use.
883 */
884 cond_resched();
885 /* Give up if all names seems to be in use. */
886 if (retries++ == 0xFFFFF) {
887 err = -ENOSPC;
888 kfree(addr);
889 goto out;
890 }
1da177e4
LT
891 goto retry;
892 }
893 addr->hash ^= sk->sk_type;
894
895 __unix_remove_socket(sk);
3c661530 896 smp_store_release(&u->addr, addr);
1da177e4 897 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
fbe9cc4a 898 spin_unlock(&unix_table_lock);
1da177e4
LT
899 err = 0;
900
6e1ce3c3 901out: mutex_unlock(&u->bindlock);
1da177e4
LT
902 return err;
903}
904
097e66c5
DL
905static struct sock *unix_find_other(struct net *net,
906 struct sockaddr_un *sunname, int len,
95c96174 907 int type, unsigned int hash, int *error)
1da177e4
LT
908{
909 struct sock *u;
421748ec 910 struct path path;
1da177e4 911 int err = 0;
ac7bfa62 912
1da177e4 913 if (sunname->sun_path[0]) {
421748ec
AV
914 struct inode *inode;
915 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1da177e4
LT
916 if (err)
917 goto fail;
beef5121 918 inode = d_backing_inode(path.dentry);
421748ec 919 err = inode_permission(inode, MAY_WRITE);
1da177e4
LT
920 if (err)
921 goto put_fail;
922
923 err = -ECONNREFUSED;
421748ec 924 if (!S_ISSOCK(inode->i_mode))
1da177e4 925 goto put_fail;
6616f788 926 u = unix_find_socket_byinode(inode);
1da177e4
LT
927 if (!u)
928 goto put_fail;
929
930 if (u->sk_type == type)
68ac1234 931 touch_atime(&path);
1da177e4 932
421748ec 933 path_put(&path);
1da177e4 934
e27dfcea 935 err = -EPROTOTYPE;
1da177e4
LT
936 if (u->sk_type != type) {
937 sock_put(u);
938 goto fail;
939 }
940 } else {
941 err = -ECONNREFUSED;
e27dfcea 942 u = unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
943 if (u) {
944 struct dentry *dentry;
40ffe67d 945 dentry = unix_sk(u)->path.dentry;
1da177e4 946 if (dentry)
68ac1234 947 touch_atime(&unix_sk(u)->path);
1da177e4
LT
948 } else
949 goto fail;
950 }
951 return u;
952
953put_fail:
421748ec 954 path_put(&path);
1da177e4 955fail:
e27dfcea 956 *error = err;
1da177e4
LT
957 return NULL;
958}
959
38f7bd94 960static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
faf02010 961{
38f7bd94
LT
962 struct dentry *dentry;
963 struct path path;
964 int err = 0;
965 /*
966 * Get the parent directory, calculate the hash for last
967 * component.
968 */
969 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
970 err = PTR_ERR(dentry);
971 if (IS_ERR(dentry))
972 return err;
faf02010 973
38f7bd94
LT
974 /*
975 * All right, let's create it.
976 */
977 err = security_path_mknod(&path, dentry, mode, 0);
faf02010 978 if (!err) {
38f7bd94 979 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
faf02010 980 if (!err) {
38f7bd94 981 res->mnt = mntget(path.mnt);
faf02010
AV
982 res->dentry = dget(dentry);
983 }
984 }
38f7bd94 985 done_path_create(&path, dentry);
faf02010
AV
986 return err;
987}
1da177e4
LT
988
989static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
990{
991 struct sock *sk = sock->sk;
3b1e0a65 992 struct net *net = sock_net(sk);
1da177e4 993 struct unix_sock *u = unix_sk(sk);
e27dfcea 994 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
dae6ad8f 995 char *sun_path = sunaddr->sun_path;
38f7bd94 996 int err;
95c96174 997 unsigned int hash;
1da177e4
LT
998 struct unix_address *addr;
999 struct hlist_head *list;
82fe0d2b 1000 struct path path = { };
1da177e4
LT
1001
1002 err = -EINVAL;
defbcf2d
MJ
1003 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1004 sunaddr->sun_family != AF_UNIX)
1da177e4
LT
1005 goto out;
1006
e27dfcea 1007 if (addr_len == sizeof(short)) {
1da177e4
LT
1008 err = unix_autobind(sock);
1009 goto out;
1010 }
1011
1012 err = unix_mkname(sunaddr, addr_len, &hash);
1013 if (err < 0)
1014 goto out;
1015 addr_len = err;
1016
0fb44559
WC
1017 if (sun_path[0]) {
1018 umode_t mode = S_IFSOCK |
1019 (SOCK_INODE(sock)->i_mode & ~current_umask());
1020 err = unix_mknod(sun_path, mode, &path);
1021 if (err) {
1022 if (err == -EEXIST)
1023 err = -EADDRINUSE;
1024 goto out;
1025 }
1026 }
1027
6e1ce3c3 1028 err = mutex_lock_interruptible(&u->bindlock);
37ab4fa7 1029 if (err)
0fb44559 1030 goto out_put;
1da177e4
LT
1031
1032 err = -EINVAL;
1033 if (u->addr)
1034 goto out_up;
1035
1036 err = -ENOMEM;
1037 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1038 if (!addr)
1039 goto out_up;
1040
1041 memcpy(addr->name, sunaddr, addr_len);
1042 addr->len = addr_len;
1043 addr->hash = hash ^ sk->sk_type;
8c9814b9 1044 refcount_set(&addr->refcnt, 1);
1da177e4 1045
38f7bd94 1046 if (sun_path[0]) {
1da177e4 1047 addr->hash = UNIX_HASH_SIZE;
beef5121 1048 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
faf02010 1049 spin_lock(&unix_table_lock);
38f7bd94 1050 u->path = path;
faf02010
AV
1051 list = &unix_socket_table[hash];
1052 } else {
1053 spin_lock(&unix_table_lock);
1da177e4 1054 err = -EADDRINUSE;
097e66c5 1055 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1da177e4
LT
1056 sk->sk_type, hash)) {
1057 unix_release_addr(addr);
1058 goto out_unlock;
1059 }
1060
1061 list = &unix_socket_table[addr->hash];
1da177e4
LT
1062 }
1063
1064 err = 0;
1065 __unix_remove_socket(sk);
3c661530 1066 smp_store_release(&u->addr, addr);
1da177e4
LT
1067 __unix_insert_socket(list, sk);
1068
1069out_unlock:
fbe9cc4a 1070 spin_unlock(&unix_table_lock);
1da177e4 1071out_up:
6e1ce3c3 1072 mutex_unlock(&u->bindlock);
0fb44559
WC
1073out_put:
1074 if (err)
1075 path_put(&path);
1da177e4
LT
1076out:
1077 return err;
1da177e4
LT
1078}
1079
278a3de5
DM
1080static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1081{
1082 if (unlikely(sk1 == sk2) || !sk2) {
1083 unix_state_lock(sk1);
1084 return;
1085 }
1086 if (sk1 < sk2) {
1087 unix_state_lock(sk1);
1088 unix_state_lock_nested(sk2);
1089 } else {
1090 unix_state_lock(sk2);
1091 unix_state_lock_nested(sk1);
1092 }
1093}
1094
1095static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1096{
1097 if (unlikely(sk1 == sk2) || !sk2) {
1098 unix_state_unlock(sk1);
1099 return;
1100 }
1101 unix_state_unlock(sk1);
1102 unix_state_unlock(sk2);
1103}
1104
1da177e4
LT
1105static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1106 int alen, int flags)
1107{
1108 struct sock *sk = sock->sk;
3b1e0a65 1109 struct net *net = sock_net(sk);
e27dfcea 1110 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1da177e4 1111 struct sock *other;
95c96174 1112 unsigned int hash;
1da177e4
LT
1113 int err;
1114
defbcf2d
MJ
1115 err = -EINVAL;
1116 if (alen < offsetofend(struct sockaddr, sa_family))
1117 goto out;
1118
1da177e4
LT
1119 if (addr->sa_family != AF_UNSPEC) {
1120 err = unix_mkname(sunaddr, alen, &hash);
1121 if (err < 0)
1122 goto out;
1123 alen = err;
1124
1125 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1126 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1127 goto out;
1128
278a3de5 1129restart:
e27dfcea 1130 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1da177e4
LT
1131 if (!other)
1132 goto out;
1133
278a3de5
DM
1134 unix_state_double_lock(sk, other);
1135
1136 /* Apparently VFS overslept socket death. Retry. */
1137 if (sock_flag(other, SOCK_DEAD)) {
1138 unix_state_double_unlock(sk, other);
1139 sock_put(other);
1140 goto restart;
1141 }
1da177e4
LT
1142
1143 err = -EPERM;
1144 if (!unix_may_send(sk, other))
1145 goto out_unlock;
1146
1147 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1148 if (err)
1149 goto out_unlock;
1150
1151 } else {
1152 /*
1153 * 1003.1g breaking connected state with AF_UNSPEC
1154 */
1155 other = NULL;
278a3de5 1156 unix_state_double_lock(sk, other);
1da177e4
LT
1157 }
1158
1159 /*
1160 * If it was connected, reconnect.
1161 */
1162 if (unix_peer(sk)) {
1163 struct sock *old_peer = unix_peer(sk);
e27dfcea 1164 unix_peer(sk) = other;
7d267278
RW
1165 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1166
278a3de5 1167 unix_state_double_unlock(sk, other);
1da177e4
LT
1168
1169 if (other != old_peer)
1170 unix_dgram_disconnected(sk, old_peer);
1171 sock_put(old_peer);
1172 } else {
e27dfcea 1173 unix_peer(sk) = other;
278a3de5 1174 unix_state_double_unlock(sk, other);
1da177e4 1175 }
ac7bfa62 1176 return 0;
1da177e4
LT
1177
1178out_unlock:
278a3de5 1179 unix_state_double_unlock(sk, other);
1da177e4
LT
1180 sock_put(other);
1181out:
1182 return err;
1183}
1184
1185static long unix_wait_for_peer(struct sock *other, long timeo)
1186{
1187 struct unix_sock *u = unix_sk(other);
1188 int sched;
1189 DEFINE_WAIT(wait);
1190
1191 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1192
1193 sched = !sock_flag(other, SOCK_DEAD) &&
1194 !(other->sk_shutdown & RCV_SHUTDOWN) &&
3c73419c 1195 unix_recvq_full(other);
1da177e4 1196
1c92b4e5 1197 unix_state_unlock(other);
1da177e4
LT
1198
1199 if (sched)
1200 timeo = schedule_timeout(timeo);
1201
1202 finish_wait(&u->peer_wait, &wait);
1203 return timeo;
1204}
1205
1206static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1207 int addr_len, int flags)
1208{
e27dfcea 1209 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1da177e4 1210 struct sock *sk = sock->sk;
3b1e0a65 1211 struct net *net = sock_net(sk);
1da177e4
LT
1212 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1213 struct sock *newsk = NULL;
1214 struct sock *other = NULL;
1215 struct sk_buff *skb = NULL;
95c96174 1216 unsigned int hash;
1da177e4
LT
1217 int st;
1218 int err;
1219 long timeo;
1220
1221 err = unix_mkname(sunaddr, addr_len, &hash);
1222 if (err < 0)
1223 goto out;
1224 addr_len = err;
1225
f64f9e71
JP
1226 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1227 (err = unix_autobind(sock)) != 0)
1da177e4
LT
1228 goto out;
1229
1230 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1231
1232 /* First of all allocate resources.
1233 If we will make it after state is locked,
1234 we will have to recheck all again in any case.
1235 */
1236
1237 err = -ENOMEM;
1238
1239 /* create new sock for complete connection */
11aa9c28 1240 newsk = unix_create1(sock_net(sk), NULL, 0);
1da177e4
LT
1241 if (newsk == NULL)
1242 goto out;
1243
1244 /* Allocate skb for sending to listening sock */
1245 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1246 if (skb == NULL)
1247 goto out;
1248
1249restart:
1250 /* Find listening sock. */
097e66c5 1251 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1da177e4
LT
1252 if (!other)
1253 goto out;
1254
1255 /* Latch state of peer */
1c92b4e5 1256 unix_state_lock(other);
1da177e4
LT
1257
1258 /* Apparently VFS overslept socket death. Retry. */
1259 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1260 unix_state_unlock(other);
1da177e4
LT
1261 sock_put(other);
1262 goto restart;
1263 }
1264
1265 err = -ECONNREFUSED;
1266 if (other->sk_state != TCP_LISTEN)
1267 goto out_unlock;
77238f2b
TS
1268 if (other->sk_shutdown & RCV_SHUTDOWN)
1269 goto out_unlock;
1da177e4 1270
3c73419c 1271 if (unix_recvq_full(other)) {
1da177e4
LT
1272 err = -EAGAIN;
1273 if (!timeo)
1274 goto out_unlock;
1275
1276 timeo = unix_wait_for_peer(other, timeo);
1277
1278 err = sock_intr_errno(timeo);
1279 if (signal_pending(current))
1280 goto out;
1281 sock_put(other);
1282 goto restart;
ac7bfa62 1283 }
1da177e4
LT
1284
1285 /* Latch our state.
1286
e5537bfc 1287 It is tricky place. We need to grab our state lock and cannot
1da177e4
LT
1288 drop lock on peer. It is dangerous because deadlock is
1289 possible. Connect to self case and simultaneous
1290 attempt to connect are eliminated by checking socket
1291 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1292 check this before attempt to grab lock.
1293
1294 Well, and we have to recheck the state after socket locked.
1295 */
1296 st = sk->sk_state;
1297
1298 switch (st) {
1299 case TCP_CLOSE:
1300 /* This is ok... continue with connect */
1301 break;
1302 case TCP_ESTABLISHED:
1303 /* Socket is already connected */
1304 err = -EISCONN;
1305 goto out_unlock;
1306 default:
1307 err = -EINVAL;
1308 goto out_unlock;
1309 }
1310
1c92b4e5 1311 unix_state_lock_nested(sk);
1da177e4
LT
1312
1313 if (sk->sk_state != st) {
1c92b4e5
DM
1314 unix_state_unlock(sk);
1315 unix_state_unlock(other);
1da177e4
LT
1316 sock_put(other);
1317 goto restart;
1318 }
1319
3610cda5 1320 err = security_unix_stream_connect(sk, other, newsk);
1da177e4 1321 if (err) {
1c92b4e5 1322 unix_state_unlock(sk);
1da177e4
LT
1323 goto out_unlock;
1324 }
1325
1326 /* The way is open! Fastly set all the necessary fields... */
1327
1328 sock_hold(sk);
1329 unix_peer(newsk) = sk;
1330 newsk->sk_state = TCP_ESTABLISHED;
1331 newsk->sk_type = sk->sk_type;
109f6e39 1332 init_peercred(newsk);
1da177e4 1333 newu = unix_sk(newsk);
eaefd110 1334 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1da177e4
LT
1335 otheru = unix_sk(other);
1336
3c661530
AV
1337 /* copy address information from listening to new sock
1338 *
1339 * The contents of *(otheru->addr) and otheru->path
1340 * are seen fully set up here, since we have found
1341 * otheru in hash under unix_table_lock. Insertion
1342 * into the hash chain we'd found it in had been done
1343 * in an earlier critical area protected by unix_table_lock,
1344 * the same one where we'd set *(otheru->addr) contents,
1345 * as well as otheru->path and otheru->addr itself.
1346 *
1347 * Using smp_store_release() here to set newu->addr
1348 * is enough to make those stores, as well as stores
1349 * to newu->path visible to anyone who gets newu->addr
1350 * by smp_load_acquire(). IOW, the same warranties
1351 * as for unix_sock instances bound in unix_bind() or
1352 * in unix_autobind().
1353 */
40ffe67d
AV
1354 if (otheru->path.dentry) {
1355 path_get(&otheru->path);
1356 newu->path = otheru->path;
1da177e4 1357 }
3c661530
AV
1358 refcount_inc(&otheru->addr->refcnt);
1359 smp_store_release(&newu->addr, otheru->addr);
1da177e4
LT
1360
1361 /* Set credentials */
109f6e39 1362 copy_peercred(sk, other);
1da177e4 1363
1da177e4
LT
1364 sock->state = SS_CONNECTED;
1365 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1366 sock_hold(newsk);
1367
4e857c58 1368 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
830a1e5c 1369 unix_peer(sk) = newsk;
1da177e4 1370
1c92b4e5 1371 unix_state_unlock(sk);
1da177e4
LT
1372
1373 /* take ten and and send info to listening sock */
1374 spin_lock(&other->sk_receive_queue.lock);
1375 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1376 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1377 unix_state_unlock(other);
676d2369 1378 other->sk_data_ready(other);
1da177e4
LT
1379 sock_put(other);
1380 return 0;
1381
1382out_unlock:
1383 if (other)
1c92b4e5 1384 unix_state_unlock(other);
1da177e4
LT
1385
1386out:
40d44446 1387 kfree_skb(skb);
1da177e4
LT
1388 if (newsk)
1389 unix_release_sock(newsk, 0);
1390 if (other)
1391 sock_put(other);
1392 return err;
1393}
1394
1395static int unix_socketpair(struct socket *socka, struct socket *sockb)
1396{
e27dfcea 1397 struct sock *ska = socka->sk, *skb = sockb->sk;
1da177e4
LT
1398
1399 /* Join our sockets back to back */
1400 sock_hold(ska);
1401 sock_hold(skb);
e27dfcea
JK
1402 unix_peer(ska) = skb;
1403 unix_peer(skb) = ska;
109f6e39
EB
1404 init_peercred(ska);
1405 init_peercred(skb);
1da177e4
LT
1406
1407 if (ska->sk_type != SOCK_DGRAM) {
1408 ska->sk_state = TCP_ESTABLISHED;
1409 skb->sk_state = TCP_ESTABLISHED;
1410 socka->state = SS_CONNECTED;
1411 sockb->state = SS_CONNECTED;
1412 }
1413 return 0;
1414}
1415
90c6bd34
DB
1416static void unix_sock_inherit_flags(const struct socket *old,
1417 struct socket *new)
1418{
1419 if (test_bit(SOCK_PASSCRED, &old->flags))
1420 set_bit(SOCK_PASSCRED, &new->flags);
1421 if (test_bit(SOCK_PASSSEC, &old->flags))
1422 set_bit(SOCK_PASSSEC, &new->flags);
1423}
1424
cdfbabfb
DH
1425static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1426 bool kern)
1da177e4
LT
1427{
1428 struct sock *sk = sock->sk;
1429 struct sock *tsk;
1430 struct sk_buff *skb;
1431 int err;
1432
1433 err = -EOPNOTSUPP;
6eba6a37 1434 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1da177e4
LT
1435 goto out;
1436
1437 err = -EINVAL;
1438 if (sk->sk_state != TCP_LISTEN)
1439 goto out;
1440
1441 /* If socket state is TCP_LISTEN it cannot change (for now...),
1442 * so that no locks are necessary.
1443 */
1444
1445 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1446 if (!skb) {
1447 /* This means receive shutdown. */
1448 if (err == 0)
1449 err = -EINVAL;
1450 goto out;
1451 }
1452
1453 tsk = skb->sk;
1454 skb_free_datagram(sk, skb);
1455 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1456
1457 /* attach accepted sock to socket */
1c92b4e5 1458 unix_state_lock(tsk);
1da177e4 1459 newsock->state = SS_CONNECTED;
90c6bd34 1460 unix_sock_inherit_flags(sock, newsock);
1da177e4 1461 sock_graft(tsk, newsock);
1c92b4e5 1462 unix_state_unlock(tsk);
1da177e4
LT
1463 return 0;
1464
1465out:
1466 return err;
1467}
1468
1469
1470static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1471{
1472 struct sock *sk = sock->sk;
3c661530 1473 struct unix_address *addr;
13cfa97b 1474 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1da177e4
LT
1475 int err = 0;
1476
1477 if (peer) {
1478 sk = unix_peer_get(sk);
1479
1480 err = -ENOTCONN;
1481 if (!sk)
1482 goto out;
1483 err = 0;
1484 } else {
1485 sock_hold(sk);
1486 }
1487
3c661530
AV
1488 addr = smp_load_acquire(&unix_sk(sk)->addr);
1489 if (!addr) {
1da177e4
LT
1490 sunaddr->sun_family = AF_UNIX;
1491 sunaddr->sun_path[0] = 0;
1492 *uaddr_len = sizeof(short);
1493 } else {
1da177e4
LT
1494 *uaddr_len = addr->len;
1495 memcpy(sunaddr, addr->name, *uaddr_len);
1496 }
1da177e4
LT
1497 sock_put(sk);
1498out:
1499 return err;
1500}
1501
1502static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1503{
1504 int i;
1505
1506 scm->fp = UNIXCB(skb).fp;
1da177e4
LT
1507 UNIXCB(skb).fp = NULL;
1508
6eba6a37 1509 for (i = scm->fp->count-1; i >= 0; i--)
415e3d3e 1510 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1da177e4
LT
1511}
1512
7361c36c 1513static void unix_destruct_scm(struct sk_buff *skb)
1da177e4
LT
1514{
1515 struct scm_cookie scm;
1516 memset(&scm, 0, sizeof(scm));
7361c36c 1517 scm.pid = UNIXCB(skb).pid;
7361c36c
EB
1518 if (UNIXCB(skb).fp)
1519 unix_detach_fds(&scm, skb);
1da177e4
LT
1520
1521 /* Alas, it calls VFS */
1522 /* So fscking what? fput() had been SMP-safe since the last Summer */
1523 scm_destroy(&scm);
1524 sock_wfree(skb);
1525}
1526
712f4aad 1527/*
1528 * The "user->unix_inflight" variable is protected by the garbage
1529 * collection lock, and we just read it locklessly here. If you go
1530 * over the limit, there might be a tiny race in actually noticing
1531 * it across threads. Tough.
1532 */
1533static inline bool too_many_unix_fds(struct task_struct *p)
1534{
1535 struct user_struct *user = current_user();
1536
1537 if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1538 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1539 return false;
1540}
1541
6209344f 1542static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1da177e4
LT
1543{
1544 int i;
25888e30 1545
712f4aad 1546 if (too_many_unix_fds(current))
1547 return -ETOOMANYREFS;
1548
6209344f
MS
1549 /*
1550 * Need to duplicate file references for the sake of garbage
1551 * collection. Otherwise a socket in the fps might become a
1552 * candidate for GC while the skb is not yet queued.
1553 */
1554 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1555 if (!UNIXCB(skb).fp)
1556 return -ENOMEM;
1557
712f4aad 1558 for (i = scm->fp->count - 1; i >= 0; i--)
415e3d3e 1559 unix_inflight(scm->fp->user, scm->fp->fp[i]);
27eac47b 1560 return 0;
1da177e4
LT
1561}
1562
f78a5fda 1563static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
7361c36c
EB
1564{
1565 int err = 0;
16e57262 1566
f78a5fda 1567 UNIXCB(skb).pid = get_pid(scm->pid);
6b0ee8c0
EB
1568 UNIXCB(skb).uid = scm->creds.uid;
1569 UNIXCB(skb).gid = scm->creds.gid;
7361c36c 1570 UNIXCB(skb).fp = NULL;
37a9a8df 1571 unix_get_secdata(scm, skb);
7361c36c
EB
1572 if (scm->fp && send_fds)
1573 err = unix_attach_fds(scm, skb);
1574
1575 skb->destructor = unix_destruct_scm;
1576 return err;
1577}
1578
9490f886
HFS
1579static bool unix_passcred_enabled(const struct socket *sock,
1580 const struct sock *other)
1581{
1582 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1583 !other->sk_socket ||
1584 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1585}
1586
16e57262
ED
1587/*
1588 * Some apps rely on write() giving SCM_CREDENTIALS
1589 * We include credentials if source or destination socket
1590 * asserted SOCK_PASSCRED.
1591 */
1592static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1593 const struct sock *other)
1594{
6b0ee8c0 1595 if (UNIXCB(skb).pid)
16e57262 1596 return;
9490f886 1597 if (unix_passcred_enabled(sock, other)) {
16e57262 1598 UNIXCB(skb).pid = get_pid(task_tgid(current));
6e0895c2 1599 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
16e57262
ED
1600 }
1601}
1602
9490f886
HFS
1603static int maybe_init_creds(struct scm_cookie *scm,
1604 struct socket *socket,
1605 const struct sock *other)
1606{
1607 int err;
1608 struct msghdr msg = { .msg_controllen = 0 };
1609
1610 err = scm_send(socket, &msg, scm, false);
1611 if (err)
1612 return err;
1613
1614 if (unix_passcred_enabled(socket, other)) {
1615 scm->pid = get_pid(task_tgid(current));
1616 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1617 }
1618 return err;
1619}
1620
1621static bool unix_skb_scm_eq(struct sk_buff *skb,
1622 struct scm_cookie *scm)
1623{
1624 const struct unix_skb_parms *u = &UNIXCB(skb);
1625
1626 return u->pid == scm->pid &&
1627 uid_eq(u->uid, scm->creds.uid) &&
1628 gid_eq(u->gid, scm->creds.gid) &&
1629 unix_secdata_eq(scm, skb);
1630}
1631
1da177e4
LT
1632/*
1633 * Send AF_UNIX data.
1634 */
1635
1b784140
YX
1636static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1637 size_t len)
1da177e4 1638{
1da177e4 1639 struct sock *sk = sock->sk;
3b1e0a65 1640 struct net *net = sock_net(sk);
1da177e4 1641 struct unix_sock *u = unix_sk(sk);
342dfc30 1642 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1da177e4
LT
1643 struct sock *other = NULL;
1644 int namelen = 0; /* fake GCC */
1645 int err;
95c96174 1646 unsigned int hash;
f78a5fda 1647 struct sk_buff *skb;
1da177e4 1648 long timeo;
7cc05662 1649 struct scm_cookie scm;
eb6a2481 1650 int data_len = 0;
7d267278 1651 int sk_locked;
1da177e4 1652
5f23b734 1653 wait_for_unix_gc();
7cc05662 1654 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1655 if (err < 0)
1656 return err;
1657
1658 err = -EOPNOTSUPP;
1659 if (msg->msg_flags&MSG_OOB)
1660 goto out;
1661
1662 if (msg->msg_namelen) {
1663 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1664 if (err < 0)
1665 goto out;
1666 namelen = err;
1667 } else {
1668 sunaddr = NULL;
1669 err = -ENOTCONN;
1670 other = unix_peer_get(sk);
1671 if (!other)
1672 goto out;
1673 }
1674
f64f9e71
JP
1675 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1676 && (err = unix_autobind(sock)) != 0)
1da177e4
LT
1677 goto out;
1678
1679 err = -EMSGSIZE;
1680 if (len > sk->sk_sndbuf - 32)
1681 goto out;
1682
31ff6aa5 1683 if (len > SKB_MAX_ALLOC) {
eb6a2481
ED
1684 data_len = min_t(size_t,
1685 len - SKB_MAX_ALLOC,
1686 MAX_SKB_FRAGS * PAGE_SIZE);
31ff6aa5
KT
1687 data_len = PAGE_ALIGN(data_len);
1688
1689 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1690 }
eb6a2481
ED
1691
1692 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
28d64271
ED
1693 msg->msg_flags & MSG_DONTWAIT, &err,
1694 PAGE_ALLOC_COSTLY_ORDER);
e27dfcea 1695 if (skb == NULL)
1da177e4
LT
1696 goto out;
1697
7cc05662 1698 err = unix_scm_to_skb(&scm, skb, true);
25888e30 1699 if (err < 0)
7361c36c 1700 goto out_free;
877ce7c1 1701
eb6a2481
ED
1702 skb_put(skb, len - data_len);
1703 skb->data_len = data_len;
1704 skb->len = len;
c0371da6 1705 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1da177e4
LT
1706 if (err)
1707 goto out_free;
1708
1709 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1710
1711restart:
1712 if (!other) {
1713 err = -ECONNRESET;
1714 if (sunaddr == NULL)
1715 goto out_free;
1716
097e66c5 1717 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1da177e4 1718 hash, &err);
e27dfcea 1719 if (other == NULL)
1da177e4
LT
1720 goto out_free;
1721 }
1722
d6ae3bae
AC
1723 if (sk_filter(other, skb) < 0) {
1724 /* Toss the packet but do not return any error to the sender */
1725 err = len;
1726 goto out_free;
1727 }
1728
7d267278 1729 sk_locked = 0;
1c92b4e5 1730 unix_state_lock(other);
7d267278 1731restart_locked:
1da177e4
LT
1732 err = -EPERM;
1733 if (!unix_may_send(sk, other))
1734 goto out_unlock;
1735
7d267278 1736 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1da177e4
LT
1737 /*
1738 * Check with 1003.1g - what should
1739 * datagram error
1740 */
1c92b4e5 1741 unix_state_unlock(other);
1da177e4
LT
1742 sock_put(other);
1743
7d267278
RW
1744 if (!sk_locked)
1745 unix_state_lock(sk);
1746
1da177e4 1747 err = 0;
1da177e4 1748 if (unix_peer(sk) == other) {
e27dfcea 1749 unix_peer(sk) = NULL;
7d267278
RW
1750 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1751
1c92b4e5 1752 unix_state_unlock(sk);
1da177e4
LT
1753
1754 unix_dgram_disconnected(sk, other);
1755 sock_put(other);
1756 err = -ECONNREFUSED;
1757 } else {
1c92b4e5 1758 unix_state_unlock(sk);
1da177e4
LT
1759 }
1760
1761 other = NULL;
1762 if (err)
1763 goto out_free;
1764 goto restart;
1765 }
1766
1767 err = -EPIPE;
1768 if (other->sk_shutdown & RCV_SHUTDOWN)
1769 goto out_unlock;
1770
1771 if (sk->sk_type != SOCK_SEQPACKET) {
1772 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1773 if (err)
1774 goto out_unlock;
1775 }
1776
a5527dda
RW
1777 /* other == sk && unix_peer(other) != sk if
1778 * - unix_peer(sk) == NULL, destination address bound to sk
1779 * - unix_peer(sk) == sk by time of get but disconnected before lock
1780 */
1781 if (other != sk &&
1782 unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
7d267278
RW
1783 if (timeo) {
1784 timeo = unix_wait_for_peer(other, timeo);
1785
1786 err = sock_intr_errno(timeo);
1787 if (signal_pending(current))
1788 goto out_free;
1789
1790 goto restart;
1da177e4
LT
1791 }
1792
7d267278
RW
1793 if (!sk_locked) {
1794 unix_state_unlock(other);
1795 unix_state_double_lock(sk, other);
1796 }
1da177e4 1797
7d267278
RW
1798 if (unix_peer(sk) != other ||
1799 unix_dgram_peer_wake_me(sk, other)) {
1800 err = -EAGAIN;
1801 sk_locked = 1;
1802 goto out_unlock;
1803 }
1da177e4 1804
7d267278
RW
1805 if (!sk_locked) {
1806 sk_locked = 1;
1807 goto restart_locked;
1808 }
1da177e4
LT
1809 }
1810
7d267278
RW
1811 if (unlikely(sk_locked))
1812 unix_state_unlock(sk);
1813
3f66116e
AC
1814 if (sock_flag(other, SOCK_RCVTSTAMP))
1815 __net_timestamp(skb);
16e57262 1816 maybe_add_creds(skb, sock, other);
1da177e4 1817 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 1818 unix_state_unlock(other);
676d2369 1819 other->sk_data_ready(other);
1da177e4 1820 sock_put(other);
7cc05662 1821 scm_destroy(&scm);
1da177e4
LT
1822 return len;
1823
1824out_unlock:
7d267278
RW
1825 if (sk_locked)
1826 unix_state_unlock(sk);
1c92b4e5 1827 unix_state_unlock(other);
1da177e4
LT
1828out_free:
1829 kfree_skb(skb);
1830out:
1831 if (other)
1832 sock_put(other);
7cc05662 1833 scm_destroy(&scm);
1da177e4
LT
1834 return err;
1835}
1836
e370a723
ED
1837/* We use paged skbs for stream sockets, and limit occupancy to 32768
1838 * bytes, and a minimun of a full page.
1839 */
1840#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
ac7bfa62 1841
1b784140
YX
1842static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1843 size_t len)
1da177e4 1844{
1da177e4
LT
1845 struct sock *sk = sock->sk;
1846 struct sock *other = NULL;
6eba6a37 1847 int err, size;
f78a5fda 1848 struct sk_buff *skb;
e27dfcea 1849 int sent = 0;
7cc05662 1850 struct scm_cookie scm;
8ba69ba6 1851 bool fds_sent = false;
e370a723 1852 int data_len;
1da177e4 1853
5f23b734 1854 wait_for_unix_gc();
7cc05662 1855 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1856 if (err < 0)
1857 return err;
1858
1859 err = -EOPNOTSUPP;
1860 if (msg->msg_flags&MSG_OOB)
1861 goto out_err;
1862
1863 if (msg->msg_namelen) {
1864 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1865 goto out_err;
1866 } else {
1da177e4 1867 err = -ENOTCONN;
830a1e5c 1868 other = unix_peer(sk);
1da177e4
LT
1869 if (!other)
1870 goto out_err;
1871 }
1872
1873 if (sk->sk_shutdown & SEND_SHUTDOWN)
1874 goto pipe_err;
1875
6eba6a37 1876 while (sent < len) {
e370a723 1877 size = len - sent;
1da177e4
LT
1878
1879 /* Keep two messages in the pipe so it schedules better */
e370a723 1880 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1da177e4 1881
e370a723
ED
1882 /* allow fallback to order-0 allocations */
1883 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
ac7bfa62 1884
e370a723 1885 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1da177e4 1886
31ff6aa5
KT
1887 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1888
e370a723 1889 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
28d64271
ED
1890 msg->msg_flags & MSG_DONTWAIT, &err,
1891 get_order(UNIX_SKB_FRAGS_SZ));
e370a723 1892 if (!skb)
1da177e4
LT
1893 goto out_err;
1894
f78a5fda 1895 /* Only send the fds in the first buffer */
7cc05662 1896 err = unix_scm_to_skb(&scm, skb, !fds_sent);
25888e30 1897 if (err < 0) {
7361c36c 1898 kfree_skb(skb);
f78a5fda 1899 goto out_err;
6209344f 1900 }
7361c36c 1901 fds_sent = true;
1da177e4 1902
e370a723
ED
1903 skb_put(skb, size - data_len);
1904 skb->data_len = data_len;
1905 skb->len = size;
c0371da6 1906 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
6eba6a37 1907 if (err) {
1da177e4 1908 kfree_skb(skb);
f78a5fda 1909 goto out_err;
1da177e4
LT
1910 }
1911
1c92b4e5 1912 unix_state_lock(other);
1da177e4
LT
1913
1914 if (sock_flag(other, SOCK_DEAD) ||
1915 (other->sk_shutdown & RCV_SHUTDOWN))
1916 goto pipe_err_free;
1917
16e57262 1918 maybe_add_creds(skb, sock, other);
1da177e4 1919 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 1920 unix_state_unlock(other);
676d2369 1921 other->sk_data_ready(other);
e27dfcea 1922 sent += size;
1da177e4 1923 }
1da177e4 1924
7cc05662 1925 scm_destroy(&scm);
1da177e4
LT
1926
1927 return sent;
1928
1929pipe_err_free:
1c92b4e5 1930 unix_state_unlock(other);
1da177e4
LT
1931 kfree_skb(skb);
1932pipe_err:
6eba6a37
ED
1933 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1934 send_sig(SIGPIPE, current, 0);
1da177e4
LT
1935 err = -EPIPE;
1936out_err:
7cc05662 1937 scm_destroy(&scm);
1da177e4
LT
1938 return sent ? : err;
1939}
1940
869e7c62
HFS
1941static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1942 int offset, size_t size, int flags)
1943{
9490f886
HFS
1944 int err;
1945 bool send_sigpipe = false;
1946 bool init_scm = true;
1947 struct scm_cookie scm;
869e7c62
HFS
1948 struct sock *other, *sk = socket->sk;
1949 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1950
1951 if (flags & MSG_OOB)
1952 return -EOPNOTSUPP;
1953
1954 other = unix_peer(sk);
1955 if (!other || sk->sk_state != TCP_ESTABLISHED)
1956 return -ENOTCONN;
1957
1958 if (false) {
1959alloc_skb:
1960 unix_state_unlock(other);
6e1ce3c3 1961 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
1962 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1963 &err, 0);
1964 if (!newskb)
9490f886 1965 goto err;
869e7c62
HFS
1966 }
1967
6e1ce3c3 1968 /* we must acquire iolock as we modify already present
869e7c62
HFS
1969 * skbs in the sk_receive_queue and mess with skb->len
1970 */
6e1ce3c3 1971 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
869e7c62
HFS
1972 if (err) {
1973 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
869e7c62
HFS
1974 goto err;
1975 }
1976
1977 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1978 err = -EPIPE;
9490f886 1979 send_sigpipe = true;
869e7c62
HFS
1980 goto err_unlock;
1981 }
1982
1983 unix_state_lock(other);
1984
1985 if (sock_flag(other, SOCK_DEAD) ||
1986 other->sk_shutdown & RCV_SHUTDOWN) {
1987 err = -EPIPE;
9490f886 1988 send_sigpipe = true;
869e7c62
HFS
1989 goto err_state_unlock;
1990 }
1991
9490f886
HFS
1992 if (init_scm) {
1993 err = maybe_init_creds(&scm, socket, other);
1994 if (err)
1995 goto err_state_unlock;
1996 init_scm = false;
1997 }
1998
869e7c62
HFS
1999 skb = skb_peek_tail(&other->sk_receive_queue);
2000 if (tail && tail == skb) {
2001 skb = newskb;
9490f886
HFS
2002 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2003 if (newskb) {
869e7c62 2004 skb = newskb;
9490f886
HFS
2005 } else {
2006 tail = skb;
869e7c62 2007 goto alloc_skb;
9490f886 2008 }
869e7c62
HFS
2009 } else if (newskb) {
2010 /* this is fast path, we don't necessarily need to
2011 * call to kfree_skb even though with newskb == NULL
2012 * this - does no harm
2013 */
2014 consume_skb(newskb);
8844f972 2015 newskb = NULL;
869e7c62
HFS
2016 }
2017
2018 if (skb_append_pagefrags(skb, page, offset, size)) {
2019 tail = skb;
2020 goto alloc_skb;
2021 }
2022
2023 skb->len += size;
2024 skb->data_len += size;
2025 skb->truesize += size;
14afee4b 2026 refcount_add(size, &sk->sk_wmem_alloc);
869e7c62 2027
a3a116e0 2028 if (newskb) {
9490f886
HFS
2029 err = unix_scm_to_skb(&scm, skb, false);
2030 if (err)
2031 goto err_state_unlock;
a3a116e0 2032 spin_lock(&other->sk_receive_queue.lock);
869e7c62 2033 __skb_queue_tail(&other->sk_receive_queue, newskb);
a3a116e0
HFS
2034 spin_unlock(&other->sk_receive_queue.lock);
2035 }
869e7c62
HFS
2036
2037 unix_state_unlock(other);
6e1ce3c3 2038 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
2039
2040 other->sk_data_ready(other);
9490f886 2041 scm_destroy(&scm);
869e7c62
HFS
2042 return size;
2043
2044err_state_unlock:
2045 unix_state_unlock(other);
2046err_unlock:
6e1ce3c3 2047 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
2048err:
2049 kfree_skb(newskb);
2050 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2051 send_sig(SIGPIPE, current, 0);
9490f886
HFS
2052 if (!init_scm)
2053 scm_destroy(&scm);
869e7c62
HFS
2054 return err;
2055}
2056
1b784140
YX
2057static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2058 size_t len)
1da177e4
LT
2059{
2060 int err;
2061 struct sock *sk = sock->sk;
ac7bfa62 2062
1da177e4
LT
2063 err = sock_error(sk);
2064 if (err)
2065 return err;
2066
2067 if (sk->sk_state != TCP_ESTABLISHED)
2068 return -ENOTCONN;
2069
2070 if (msg->msg_namelen)
2071 msg->msg_namelen = 0;
2072
1b784140 2073 return unix_dgram_sendmsg(sock, msg, len);
1da177e4 2074}
ac7bfa62 2075
1b784140
YX
2076static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2077 size_t size, int flags)
a05d2ad1
EB
2078{
2079 struct sock *sk = sock->sk;
2080
2081 if (sk->sk_state != TCP_ESTABLISHED)
2082 return -ENOTCONN;
2083
1b784140 2084 return unix_dgram_recvmsg(sock, msg, size, flags);
a05d2ad1
EB
2085}
2086
1da177e4
LT
2087static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2088{
3c661530 2089 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
1da177e4 2090
3c661530
AV
2091 if (addr) {
2092 msg->msg_namelen = addr->len;
2093 memcpy(msg->msg_name, addr->name, addr->len);
1da177e4
LT
2094 }
2095}
2096
1b784140
YX
2097static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2098 size_t size, int flags)
1da177e4 2099{
7cc05662 2100 struct scm_cookie scm;
1da177e4
LT
2101 struct sock *sk = sock->sk;
2102 struct unix_sock *u = unix_sk(sk);
64874280
RW
2103 struct sk_buff *skb, *last;
2104 long timeo;
1da177e4 2105 int err;
f55bb7f9 2106 int peeked, skip;
1da177e4
LT
2107
2108 err = -EOPNOTSUPP;
2109 if (flags&MSG_OOB)
2110 goto out;
2111
64874280 2112 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1da177e4 2113
64874280 2114 do {
6e1ce3c3 2115 mutex_lock(&u->iolock);
f55bb7f9 2116
64874280 2117 skip = sk_peek_offset(sk, flags);
7c13f97f
PA
2118 skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2119 &err, &last);
64874280
RW
2120 if (skb)
2121 break;
2122
6e1ce3c3 2123 mutex_unlock(&u->iolock);
64874280
RW
2124
2125 if (err != -EAGAIN)
2126 break;
2127 } while (timeo &&
2128 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2129
6e1ce3c3 2130 if (!skb) { /* implies iolock unlocked */
0a112258
FZ
2131 unix_state_lock(sk);
2132 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2133 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2134 (sk->sk_shutdown & RCV_SHUTDOWN))
2135 err = 0;
2136 unix_state_unlock(sk);
64874280 2137 goto out;
0a112258 2138 }
1da177e4 2139
77b75f4d
RW
2140 if (wq_has_sleeper(&u->peer_wait))
2141 wake_up_interruptible_sync_poll(&u->peer_wait,
2142 POLLOUT | POLLWRNORM |
2143 POLLWRBAND);
1da177e4
LT
2144
2145 if (msg->msg_name)
2146 unix_copy_addr(msg, skb->sk);
2147
f55bb7f9
PE
2148 if (size > skb->len - skip)
2149 size = skb->len - skip;
2150 else if (size < skb->len - skip)
1da177e4
LT
2151 msg->msg_flags |= MSG_TRUNC;
2152
51f3d02b 2153 err = skb_copy_datagram_msg(skb, skip, msg, size);
1da177e4
LT
2154 if (err)
2155 goto out_free;
2156
3f66116e
AC
2157 if (sock_flag(sk, SOCK_RCVTSTAMP))
2158 __sock_recv_timestamp(msg, sk, skb);
2159
7cc05662
CH
2160 memset(&scm, 0, sizeof(scm));
2161
2162 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2163 unix_set_secdata(&scm, skb);
1da177e4 2164
6eba6a37 2165 if (!(flags & MSG_PEEK)) {
1da177e4 2166 if (UNIXCB(skb).fp)
7cc05662 2167 unix_detach_fds(&scm, skb);
f55bb7f9
PE
2168
2169 sk_peek_offset_bwd(sk, skb->len);
6eba6a37 2170 } else {
1da177e4
LT
2171 /* It is questionable: on PEEK we could:
2172 - do not return fds - good, but too simple 8)
2173 - return fds, and do not return them on read (old strategy,
2174 apparently wrong)
2175 - clone fds (I chose it for now, it is the most universal
2176 solution)
ac7bfa62
YH
2177
2178 POSIX 1003.1g does not actually define this clearly
2179 at all. POSIX 1003.1g doesn't define a lot of things
2180 clearly however!
2181
1da177e4 2182 */
f55bb7f9
PE
2183
2184 sk_peek_offset_fwd(sk, size);
2185
1da177e4 2186 if (UNIXCB(skb).fp)
7cc05662 2187 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
1da177e4 2188 }
9f6f9af7 2189 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1da177e4 2190
7cc05662 2191 scm_recv(sock, msg, &scm, flags);
1da177e4
LT
2192
2193out_free:
6eba6a37 2194 skb_free_datagram(sk, skb);
6e1ce3c3 2195 mutex_unlock(&u->iolock);
1da177e4
LT
2196out:
2197 return err;
2198}
2199
2200/*
79f632c7 2201 * Sleep until more data has arrived. But check for races..
1da177e4 2202 */
79f632c7 2203static long unix_stream_data_wait(struct sock *sk, long timeo,
06a77b07
WC
2204 struct sk_buff *last, unsigned int last_len,
2205 bool freezable)
1da177e4 2206{
2b514574 2207 struct sk_buff *tail;
1da177e4
LT
2208 DEFINE_WAIT(wait);
2209
1c92b4e5 2210 unix_state_lock(sk);
1da177e4
LT
2211
2212 for (;;) {
aa395145 2213 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4 2214
2b514574
HFS
2215 tail = skb_peek_tail(&sk->sk_receive_queue);
2216 if (tail != last ||
2217 (tail && tail->len != last_len) ||
1da177e4
LT
2218 sk->sk_err ||
2219 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2220 signal_pending(current) ||
2221 !timeo)
2222 break;
2223
9cd3e072 2224 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1c92b4e5 2225 unix_state_unlock(sk);
06a77b07
WC
2226 if (freezable)
2227 timeo = freezable_schedule_timeout(timeo);
2228 else
2229 timeo = schedule_timeout(timeo);
1c92b4e5 2230 unix_state_lock(sk);
b48732e4
MS
2231
2232 if (sock_flag(sk, SOCK_DEAD))
2233 break;
2234
9cd3e072 2235 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1da177e4
LT
2236 }
2237
aa395145 2238 finish_wait(sk_sleep(sk), &wait);
1c92b4e5 2239 unix_state_unlock(sk);
1da177e4
LT
2240 return timeo;
2241}
2242
e370a723
ED
2243static unsigned int unix_skb_len(const struct sk_buff *skb)
2244{
2245 return skb->len - UNIXCB(skb).consumed;
2246}
2247
2b514574
HFS
2248struct unix_stream_read_state {
2249 int (*recv_actor)(struct sk_buff *, int, int,
2250 struct unix_stream_read_state *);
2251 struct socket *socket;
2252 struct msghdr *msg;
2253 struct pipe_inode_info *pipe;
2254 size_t size;
2255 int flags;
2256 unsigned int splice_flags;
2257};
2258
06a77b07
WC
2259static int unix_stream_read_generic(struct unix_stream_read_state *state,
2260 bool freezable)
1da177e4 2261{
7cc05662 2262 struct scm_cookie scm;
2b514574 2263 struct socket *sock = state->socket;
1da177e4
LT
2264 struct sock *sk = sock->sk;
2265 struct unix_sock *u = unix_sk(sk);
1da177e4 2266 int copied = 0;
2b514574 2267 int flags = state->flags;
de144391 2268 int noblock = flags & MSG_DONTWAIT;
2b514574 2269 bool check_creds = false;
1da177e4
LT
2270 int target;
2271 int err = 0;
2272 long timeo;
fc0d7536 2273 int skip;
2b514574
HFS
2274 size_t size = state->size;
2275 unsigned int last_len;
1da177e4 2276
1b92ee3d
RW
2277 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2278 err = -EINVAL;
1da177e4 2279 goto out;
1b92ee3d 2280 }
1da177e4 2281
1b92ee3d
RW
2282 if (unlikely(flags & MSG_OOB)) {
2283 err = -EOPNOTSUPP;
1da177e4 2284 goto out;
1b92ee3d 2285 }
1da177e4 2286
2b514574 2287 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
de144391 2288 timeo = sock_rcvtimeo(sk, noblock);
1da177e4 2289
2b514574
HFS
2290 memset(&scm, 0, sizeof(scm));
2291
1da177e4
LT
2292 /* Lock the socket to prevent queue disordering
2293 * while sleeps in memcpy_tomsg
2294 */
6e1ce3c3 2295 mutex_lock(&u->iolock);
1da177e4 2296
a0917e0b 2297 skip = max(sk_peek_offset(sk, flags), 0);
e9193d60 2298
6eba6a37 2299 do {
1da177e4 2300 int chunk;
73ed5d25 2301 bool drop_skb;
79f632c7 2302 struct sk_buff *skb, *last;
1da177e4 2303
18eceb81 2304redo:
3c0d2f37 2305 unix_state_lock(sk);
b48732e4
MS
2306 if (sock_flag(sk, SOCK_DEAD)) {
2307 err = -ECONNRESET;
2308 goto unlock;
2309 }
79f632c7 2310 last = skb = skb_peek(&sk->sk_receive_queue);
2b514574 2311 last_len = last ? last->len : 0;
fc0d7536 2312again:
6eba6a37 2313 if (skb == NULL) {
1da177e4 2314 if (copied >= target)
3c0d2f37 2315 goto unlock;
1da177e4
LT
2316
2317 /*
2318 * POSIX 1003.1g mandates this order.
2319 */
ac7bfa62 2320
6eba6a37
ED
2321 err = sock_error(sk);
2322 if (err)
3c0d2f37 2323 goto unlock;
1da177e4 2324 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
2325 goto unlock;
2326
2327 unix_state_unlock(sk);
1b92ee3d
RW
2328 if (!timeo) {
2329 err = -EAGAIN;
1da177e4 2330 break;
1b92ee3d
RW
2331 }
2332
6e1ce3c3 2333 mutex_unlock(&u->iolock);
1da177e4 2334
2b514574 2335 timeo = unix_stream_data_wait(sk, timeo, last,
06a77b07 2336 last_len, freezable);
1da177e4 2337
3822b5c2 2338 if (signal_pending(current)) {
1da177e4 2339 err = sock_intr_errno(timeo);
fa0dc04d 2340 scm_destroy(&scm);
1da177e4
LT
2341 goto out;
2342 }
b3ca9b02 2343
6e1ce3c3 2344 mutex_lock(&u->iolock);
18eceb81 2345 goto redo;
2b514574 2346unlock:
3c0d2f37
MS
2347 unix_state_unlock(sk);
2348 break;
1da177e4 2349 }
fc0d7536 2350
e370a723
ED
2351 while (skip >= unix_skb_len(skb)) {
2352 skip -= unix_skb_len(skb);
79f632c7 2353 last = skb;
2b514574 2354 last_len = skb->len;
fc0d7536 2355 skb = skb_peek_next(skb, &sk->sk_receive_queue);
79f632c7
BP
2356 if (!skb)
2357 goto again;
fc0d7536
PE
2358 }
2359
3c0d2f37 2360 unix_state_unlock(sk);
1da177e4
LT
2361
2362 if (check_creds) {
2363 /* Never glue messages from different writers */
9490f886 2364 if (!unix_skb_scm_eq(skb, &scm))
1da177e4 2365 break;
0e82e7f6 2366 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1da177e4 2367 /* Copy credentials */
7cc05662 2368 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
37a9a8df 2369 unix_set_secdata(&scm, skb);
2b514574 2370 check_creds = true;
1da177e4
LT
2371 }
2372
2373 /* Copy address just once */
2b514574
HFS
2374 if (state->msg && state->msg->msg_name) {
2375 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2376 state->msg->msg_name);
2377 unix_copy_addr(state->msg, skb->sk);
1da177e4
LT
2378 sunaddr = NULL;
2379 }
2380
e370a723 2381 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
73ed5d25 2382 skb_get(skb);
2b514574 2383 chunk = state->recv_actor(skb, skip, chunk, state);
73ed5d25
HFS
2384 drop_skb = !unix_skb_len(skb);
2385 /* skb is only safe to use if !drop_skb */
2386 consume_skb(skb);
2b514574 2387 if (chunk < 0) {
1da177e4
LT
2388 if (copied == 0)
2389 copied = -EFAULT;
2390 break;
2391 }
2392 copied += chunk;
2393 size -= chunk;
2394
73ed5d25
HFS
2395 if (drop_skb) {
2396 /* the skb was touched by a concurrent reader;
2397 * we should not expect anything from this skb
2398 * anymore and assume it invalid - we can be
2399 * sure it was dropped from the socket queue
2400 *
2401 * let's report a short read
2402 */
2403 err = 0;
2404 break;
2405 }
2406
1da177e4 2407 /* Mark read part of skb as used */
6eba6a37 2408 if (!(flags & MSG_PEEK)) {
e370a723 2409 UNIXCB(skb).consumed += chunk;
1da177e4 2410
fc0d7536
PE
2411 sk_peek_offset_bwd(sk, chunk);
2412
1da177e4 2413 if (UNIXCB(skb).fp)
7cc05662 2414 unix_detach_fds(&scm, skb);
1da177e4 2415
e370a723 2416 if (unix_skb_len(skb))
1da177e4 2417 break;
1da177e4 2418
6f01fd6e 2419 skb_unlink(skb, &sk->sk_receive_queue);
70d4bf6d 2420 consume_skb(skb);
1da177e4 2421
7cc05662 2422 if (scm.fp)
1da177e4 2423 break;
6eba6a37 2424 } else {
1da177e4
LT
2425 /* It is questionable, see note in unix_dgram_recvmsg.
2426 */
2427 if (UNIXCB(skb).fp)
7cc05662 2428 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
1da177e4 2429
e9193d60 2430 sk_peek_offset_fwd(sk, chunk);
fc0d7536 2431
9f389e35
AC
2432 if (UNIXCB(skb).fp)
2433 break;
2434
e9193d60 2435 skip = 0;
9f389e35
AC
2436 last = skb;
2437 last_len = skb->len;
2438 unix_state_lock(sk);
2439 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2440 if (skb)
2441 goto again;
2442 unix_state_unlock(sk);
1da177e4
LT
2443 break;
2444 }
2445 } while (size);
2446
6e1ce3c3 2447 mutex_unlock(&u->iolock);
2b514574
HFS
2448 if (state->msg)
2449 scm_recv(sock, state->msg, &scm, flags);
2450 else
2451 scm_destroy(&scm);
1da177e4
LT
2452out:
2453 return copied ? : err;
2454}
2455
2b514574
HFS
2456static int unix_stream_read_actor(struct sk_buff *skb,
2457 int skip, int chunk,
2458 struct unix_stream_read_state *state)
2459{
2460 int ret;
2461
2462 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2463 state->msg, chunk);
2464 return ret ?: chunk;
2465}
2466
2467static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2468 size_t size, int flags)
2469{
2470 struct unix_stream_read_state state = {
2471 .recv_actor = unix_stream_read_actor,
2472 .socket = sock,
2473 .msg = msg,
2474 .size = size,
2475 .flags = flags
2476 };
2477
06a77b07 2478 return unix_stream_read_generic(&state, true);
2b514574
HFS
2479}
2480
2b514574
HFS
2481static int unix_stream_splice_actor(struct sk_buff *skb,
2482 int skip, int chunk,
2483 struct unix_stream_read_state *state)
2484{
2485 return skb_splice_bits(skb, state->socket->sk,
2486 UNIXCB(skb).consumed + skip,
25869262 2487 state->pipe, chunk, state->splice_flags);
2b514574
HFS
2488}
2489
2490static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2491 struct pipe_inode_info *pipe,
2492 size_t size, unsigned int flags)
2493{
2494 struct unix_stream_read_state state = {
2495 .recv_actor = unix_stream_splice_actor,
2496 .socket = sock,
2497 .pipe = pipe,
2498 .size = size,
2499 .splice_flags = flags,
2500 };
2501
2502 if (unlikely(*ppos))
2503 return -ESPIPE;
2504
2505 if (sock->file->f_flags & O_NONBLOCK ||
2506 flags & SPLICE_F_NONBLOCK)
2507 state.flags = MSG_DONTWAIT;
2508
06a77b07 2509 return unix_stream_read_generic(&state, false);
2b514574
HFS
2510}
2511
1da177e4
LT
2512static int unix_shutdown(struct socket *sock, int mode)
2513{
2514 struct sock *sk = sock->sk;
2515 struct sock *other;
2516
fc61b928
XW
2517 if (mode < SHUT_RD || mode > SHUT_RDWR)
2518 return -EINVAL;
2519 /* This maps:
2520 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2521 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2522 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2523 */
2524 ++mode;
7180a031
AC
2525
2526 unix_state_lock(sk);
2527 sk->sk_shutdown |= mode;
2528 other = unix_peer(sk);
2529 if (other)
2530 sock_hold(other);
2531 unix_state_unlock(sk);
2532 sk->sk_state_change(sk);
2533
2534 if (other &&
2535 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2536
2537 int peer_mode = 0;
2538
2539 if (mode&RCV_SHUTDOWN)
2540 peer_mode |= SEND_SHUTDOWN;
2541 if (mode&SEND_SHUTDOWN)
2542 peer_mode |= RCV_SHUTDOWN;
2543 unix_state_lock(other);
2544 other->sk_shutdown |= peer_mode;
2545 unix_state_unlock(other);
2546 other->sk_state_change(other);
2547 if (peer_mode == SHUTDOWN_MASK)
2548 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2549 else if (peer_mode & RCV_SHUTDOWN)
2550 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1da177e4 2551 }
7180a031
AC
2552 if (other)
2553 sock_put(other);
2554
1da177e4
LT
2555 return 0;
2556}
2557
885ee74d
PE
2558long unix_inq_len(struct sock *sk)
2559{
2560 struct sk_buff *skb;
2561 long amount = 0;
2562
2563 if (sk->sk_state == TCP_LISTEN)
2564 return -EINVAL;
2565
2566 spin_lock(&sk->sk_receive_queue.lock);
2567 if (sk->sk_type == SOCK_STREAM ||
2568 sk->sk_type == SOCK_SEQPACKET) {
2569 skb_queue_walk(&sk->sk_receive_queue, skb)
e370a723 2570 amount += unix_skb_len(skb);
885ee74d
PE
2571 } else {
2572 skb = skb_peek(&sk->sk_receive_queue);
2573 if (skb)
2574 amount = skb->len;
2575 }
2576 spin_unlock(&sk->sk_receive_queue.lock);
2577
2578 return amount;
2579}
2580EXPORT_SYMBOL_GPL(unix_inq_len);
2581
2582long unix_outq_len(struct sock *sk)
2583{
2584 return sk_wmem_alloc_get(sk);
2585}
2586EXPORT_SYMBOL_GPL(unix_outq_len);
2587
ba94f308
AV
2588static int unix_open_file(struct sock *sk)
2589{
2590 struct path path;
2591 struct file *f;
2592 int fd;
2593
2594 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2595 return -EPERM;
2596
3c661530
AV
2597 if (!smp_load_acquire(&unix_sk(sk)->addr))
2598 return -ENOENT;
2599
ba94f308 2600 path = unix_sk(sk)->path;
3c661530 2601 if (!path.dentry)
ba94f308 2602 return -ENOENT;
ba94f308
AV
2603
2604 path_get(&path);
ba94f308
AV
2605
2606 fd = get_unused_fd_flags(O_CLOEXEC);
2607 if (fd < 0)
2608 goto out;
2609
2610 f = dentry_open(&path, O_PATH, current_cred());
2611 if (IS_ERR(f)) {
2612 put_unused_fd(fd);
2613 fd = PTR_ERR(f);
2614 goto out;
2615 }
2616
2617 fd_install(fd, f);
2618out:
2619 path_put(&path);
2620
2621 return fd;
2622}
2623
1da177e4
LT
2624static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2625{
2626 struct sock *sk = sock->sk;
e27dfcea 2627 long amount = 0;
1da177e4
LT
2628 int err;
2629
6eba6a37
ED
2630 switch (cmd) {
2631 case SIOCOUTQ:
885ee74d 2632 amount = unix_outq_len(sk);
6eba6a37
ED
2633 err = put_user(amount, (int __user *)arg);
2634 break;
2635 case SIOCINQ:
885ee74d
PE
2636 amount = unix_inq_len(sk);
2637 if (amount < 0)
2638 err = amount;
2639 else
1da177e4 2640 err = put_user(amount, (int __user *)arg);
885ee74d 2641 break;
ba94f308
AV
2642 case SIOCUNIXFILE:
2643 err = unix_open_file(sk);
2644 break;
6eba6a37
ED
2645 default:
2646 err = -ENOIOCTLCMD;
2647 break;
1da177e4
LT
2648 }
2649 return err;
2650}
2651
6eba6a37 2652static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1da177e4
LT
2653{
2654 struct sock *sk = sock->sk;
2655 unsigned int mask;
2656
aa395145 2657 sock_poll_wait(file, sk_sleep(sk), wait);
1da177e4
LT
2658 mask = 0;
2659
2660 /* exceptional events? */
2661 if (sk->sk_err)
2662 mask |= POLLERR;
2663 if (sk->sk_shutdown == SHUTDOWN_MASK)
2664 mask |= POLLHUP;
f348d70a 2665 if (sk->sk_shutdown & RCV_SHUTDOWN)
db40980f 2666 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
1da177e4
LT
2667
2668 /* readable? */
09f6676b 2669 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
1da177e4
LT
2670 mask |= POLLIN | POLLRDNORM;
2671
2672 /* Connection-based need to check for termination and startup */
6eba6a37
ED
2673 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2674 sk->sk_state == TCP_CLOSE)
1da177e4
LT
2675 mask |= POLLHUP;
2676
2677 /*
2678 * we set writable also when the other side has shut down the
2679 * connection. This prevents stuck sockets.
2680 */
2681 if (unix_writable(sk))
2682 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2683
2684 return mask;
2685}
2686
ec0d215f
RW
2687static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2688 poll_table *wait)
3c73419c 2689{
ec0d215f
RW
2690 struct sock *sk = sock->sk, *other;
2691 unsigned int mask, writable;
3c73419c 2692
aa395145 2693 sock_poll_wait(file, sk_sleep(sk), wait);
3c73419c
RW
2694 mask = 0;
2695
2696 /* exceptional events? */
09f6676b 2697 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
7d4c04fc 2698 mask |= POLLERR |
8facd5fb 2699 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
7d4c04fc 2700
3c73419c 2701 if (sk->sk_shutdown & RCV_SHUTDOWN)
5456f09a 2702 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
3c73419c
RW
2703 if (sk->sk_shutdown == SHUTDOWN_MASK)
2704 mask |= POLLHUP;
2705
2706 /* readable? */
09f6676b 2707 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3c73419c
RW
2708 mask |= POLLIN | POLLRDNORM;
2709
2710 /* Connection-based need to check for termination and startup */
2711 if (sk->sk_type == SOCK_SEQPACKET) {
2712 if (sk->sk_state == TCP_CLOSE)
2713 mask |= POLLHUP;
2714 /* connection hasn't started yet? */
2715 if (sk->sk_state == TCP_SYN_SENT)
2716 return mask;
2717 }
2718
973a34aa 2719 /* No write status requested, avoid expensive OUT tests. */
626cf236 2720 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
973a34aa
ED
2721 return mask;
2722
ec0d215f 2723 writable = unix_writable(sk);
7d267278
RW
2724 if (writable) {
2725 unix_state_lock(sk);
2726
2727 other = unix_peer(sk);
2728 if (other && unix_peer(other) != sk &&
2729 unix_recvq_full(other) &&
2730 unix_dgram_peer_wake_me(sk, other))
2731 writable = 0;
2732
2733 unix_state_unlock(sk);
ec0d215f
RW
2734 }
2735
2736 if (writable)
3c73419c
RW
2737 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2738 else
9cd3e072 2739 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3c73419c 2740
3c73419c
RW
2741 return mask;
2742}
1da177e4
LT
2743
2744#ifdef CONFIG_PROC_FS
a53eb3fe 2745
7123aaa3
ED
2746#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2747
2748#define get_bucket(x) ((x) >> BUCKET_SPACE)
2749#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2750#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
a53eb3fe 2751
7123aaa3 2752static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
1da177e4 2753{
7123aaa3
ED
2754 unsigned long offset = get_offset(*pos);
2755 unsigned long bucket = get_bucket(*pos);
2756 struct sock *sk;
2757 unsigned long count = 0;
1da177e4 2758
7123aaa3
ED
2759 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2760 if (sock_net(sk) != seq_file_net(seq))
097e66c5 2761 continue;
7123aaa3
ED
2762 if (++count == offset)
2763 break;
2764 }
2765
2766 return sk;
2767}
2768
2769static struct sock *unix_next_socket(struct seq_file *seq,
2770 struct sock *sk,
2771 loff_t *pos)
2772{
2773 unsigned long bucket;
2774
2775 while (sk > (struct sock *)SEQ_START_TOKEN) {
2776 sk = sk_next(sk);
2777 if (!sk)
2778 goto next_bucket;
2779 if (sock_net(sk) == seq_file_net(seq))
2780 return sk;
1da177e4 2781 }
7123aaa3
ED
2782
2783 do {
2784 sk = unix_from_bucket(seq, pos);
2785 if (sk)
2786 return sk;
2787
2788next_bucket:
2789 bucket = get_bucket(*pos) + 1;
2790 *pos = set_bucket_offset(bucket, 1);
2791 } while (bucket < ARRAY_SIZE(unix_socket_table));
2792
1da177e4
LT
2793 return NULL;
2794}
2795
1da177e4 2796static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 2797 __acquires(unix_table_lock)
1da177e4 2798{
fbe9cc4a 2799 spin_lock(&unix_table_lock);
7123aaa3
ED
2800
2801 if (!*pos)
2802 return SEQ_START_TOKEN;
2803
2804 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2805 return NULL;
2806
2807 return unix_next_socket(seq, NULL, pos);
1da177e4
LT
2808}
2809
2810static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2811{
2812 ++*pos;
7123aaa3 2813 return unix_next_socket(seq, v, pos);
1da177e4
LT
2814}
2815
2816static void unix_seq_stop(struct seq_file *seq, void *v)
9a429c49 2817 __releases(unix_table_lock)
1da177e4 2818{
fbe9cc4a 2819 spin_unlock(&unix_table_lock);
1da177e4
LT
2820}
2821
2822static int unix_seq_show(struct seq_file *seq, void *v)
2823{
ac7bfa62 2824
b9f3124f 2825 if (v == SEQ_START_TOKEN)
1da177e4
LT
2826 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2827 "Inode Path\n");
2828 else {
2829 struct sock *s = v;
2830 struct unix_sock *u = unix_sk(s);
1c92b4e5 2831 unix_state_lock(s);
1da177e4 2832
71338aa7 2833 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
1da177e4 2834 s,
41c6d650 2835 refcount_read(&s->sk_refcnt),
1da177e4
LT
2836 0,
2837 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2838 s->sk_type,
2839 s->sk_socket ?
2840 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2841 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2842 sock_i_ino(s));
2843
3c661530 2844 if (u->addr) { // under unix_table_lock here
1da177e4
LT
2845 int i, len;
2846 seq_putc(seq, ' ');
2847
2848 i = 0;
2849 len = u->addr->len - sizeof(short);
2850 if (!UNIX_ABSTRACT(s))
2851 len--;
2852 else {
2853 seq_putc(seq, '@');
2854 i++;
2855 }
2856 for ( ; i < len; i++)
e7947ea7
IB
2857 seq_putc(seq, u->addr->name->sun_path[i] ?:
2858 '@');
1da177e4 2859 }
1c92b4e5 2860 unix_state_unlock(s);
1da177e4
LT
2861 seq_putc(seq, '\n');
2862 }
2863
2864 return 0;
2865}
2866
56b3d975 2867static const struct seq_operations unix_seq_ops = {
1da177e4
LT
2868 .start = unix_seq_start,
2869 .next = unix_seq_next,
2870 .stop = unix_seq_stop,
2871 .show = unix_seq_show,
2872};
2873
1da177e4
LT
2874static int unix_seq_open(struct inode *inode, struct file *file)
2875{
e372c414 2876 return seq_open_net(inode, file, &unix_seq_ops,
8b51b064 2877 sizeof(struct seq_net_private));
1da177e4
LT
2878}
2879
da7071d7 2880static const struct file_operations unix_seq_fops = {
1da177e4
LT
2881 .owner = THIS_MODULE,
2882 .open = unix_seq_open,
2883 .read = seq_read,
2884 .llseek = seq_lseek,
e372c414 2885 .release = seq_release_net,
1da177e4
LT
2886};
2887
2888#endif
2889
ec1b4cf7 2890static const struct net_proto_family unix_family_ops = {
1da177e4
LT
2891 .family = PF_UNIX,
2892 .create = unix_create,
2893 .owner = THIS_MODULE,
2894};
2895
097e66c5 2896
2c8c1e72 2897static int __net_init unix_net_init(struct net *net)
097e66c5
DL
2898{
2899 int error = -ENOMEM;
2900
a0a53c8b 2901 net->unx.sysctl_max_dgram_qlen = 10;
1597fbc0
PE
2902 if (unix_sysctl_register(net))
2903 goto out;
d392e497 2904
097e66c5 2905#ifdef CONFIG_PROC_FS
d4beaa66 2906 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
1597fbc0 2907 unix_sysctl_unregister(net);
097e66c5 2908 goto out;
1597fbc0 2909 }
097e66c5
DL
2910#endif
2911 error = 0;
2912out:
48dcc33e 2913 return error;
097e66c5
DL
2914}
2915
2c8c1e72 2916static void __net_exit unix_net_exit(struct net *net)
097e66c5 2917{
1597fbc0 2918 unix_sysctl_unregister(net);
ece31ffd 2919 remove_proc_entry("unix", net->proc_net);
097e66c5
DL
2920}
2921
2922static struct pernet_operations unix_net_ops = {
2923 .init = unix_net_init,
2924 .exit = unix_net_exit,
2925};
2926
1da177e4
LT
2927static int __init af_unix_init(void)
2928{
2929 int rc = -1;
1da177e4 2930
b4fff5f8 2931 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
1da177e4
LT
2932
2933 rc = proto_register(&unix_proto, 1);
ac7bfa62 2934 if (rc != 0) {
5cc208be 2935 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
1da177e4
LT
2936 goto out;
2937 }
2938
2939 sock_register(&unix_family_ops);
097e66c5 2940 register_pernet_subsys(&unix_net_ops);
1da177e4
LT
2941out:
2942 return rc;
2943}
2944
2945static void __exit af_unix_exit(void)
2946{
2947 sock_unregister(PF_UNIX);
1da177e4 2948 proto_unregister(&unix_proto);
097e66c5 2949 unregister_pernet_subsys(&unix_net_ops);
1da177e4
LT
2950}
2951
3d366960
DW
2952/* Earlier than device_initcall() so that other drivers invoking
2953 request_module() don't end up in a loop when modprobe tries
2954 to use a UNIX socket. But later than subsys_initcall() because
2955 we depend on stuff initialised there */
2956fs_initcall(af_unix_init);
1da177e4
LT
2957module_exit(af_unix_exit);
2958
2959MODULE_LICENSE("GPL");
2960MODULE_ALIAS_NETPROTO(PF_UNIX);