]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/unix/af_unix.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[mirror_ubuntu-jammy-kernel.git] / net / unix / af_unix.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET4: Implementation of BSD Unix domain sockets.
4 *
113aa838 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4 6 *
1da177e4
LT
7 * Fixes:
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
17 * Mike Shaver's work.
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
24 * reference counting
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
27 * Lots of bug fixes.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
39 * dgram receiver.
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
47 *
1da177e4
LT
48 * Known differences from reference BSD that was tested:
49 *
50 * [TO FIX]
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
55 * [NOT TO FIX]
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
63 *
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
68 *
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
75 * with BSD names.
76 */
77
5cc208be 78#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
1da177e4 80#include <linux/module.h>
1da177e4 81#include <linux/kernel.h>
1da177e4 82#include <linux/signal.h>
3f07c014 83#include <linux/sched/signal.h>
1da177e4
LT
84#include <linux/errno.h>
85#include <linux/string.h>
86#include <linux/stat.h>
87#include <linux/dcache.h>
88#include <linux/namei.h>
89#include <linux/socket.h>
90#include <linux/un.h>
91#include <linux/fcntl.h>
92#include <linux/termios.h>
93#include <linux/sockios.h>
94#include <linux/net.h>
95#include <linux/in.h>
96#include <linux/fs.h>
97#include <linux/slab.h>
7c0f6ba6 98#include <linux/uaccess.h>
1da177e4
LT
99#include <linux/skbuff.h>
100#include <linux/netdevice.h>
457c4cbc 101#include <net/net_namespace.h>
1da177e4 102#include <net/sock.h>
c752f073 103#include <net/tcp_states.h>
1da177e4
LT
104#include <net/af_unix.h>
105#include <linux/proc_fs.h>
106#include <linux/seq_file.h>
107#include <net/scm.h>
108#include <linux/init.h>
109#include <linux/poll.h>
1da177e4
LT
110#include <linux/rtnetlink.h>
111#include <linux/mount.h>
112#include <net/checksum.h>
113#include <linux/security.h>
2b15af6f 114#include <linux/freezer.h>
ba94f308 115#include <linux/file.h>
1da177e4 116
f4e65870
JA
117#include "scm.h"
118
7123aaa3 119struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
fa7ff56f
PE
120EXPORT_SYMBOL_GPL(unix_socket_table);
121DEFINE_SPINLOCK(unix_table_lock);
122EXPORT_SYMBOL_GPL(unix_table_lock);
518de9b3 123static atomic_long_t unix_nr_socks;
1da177e4 124
1da177e4 125
7123aaa3
ED
126static struct hlist_head *unix_sockets_unbound(void *addr)
127{
128 unsigned long hash = (unsigned long)addr;
129
130 hash ^= hash >> 16;
131 hash ^= hash >> 8;
132 hash %= UNIX_HASH_SIZE;
133 return &unix_socket_table[UNIX_HASH_SIZE + hash];
134}
135
136#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
1da177e4 137
877ce7c1 138#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 139static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 140{
37a9a8df 141 UNIXCB(skb).secid = scm->secid;
877ce7c1
CZ
142}
143
144static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145{
37a9a8df
SS
146 scm->secid = UNIXCB(skb).secid;
147}
148
149static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150{
151 return (scm->secid == UNIXCB(skb).secid);
877ce7c1
CZ
152}
153#else
dc49c1f9 154static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
155{ }
156
157static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158{ }
37a9a8df
SS
159
160static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161{
162 return true;
163}
877ce7c1
CZ
164#endif /* CONFIG_SECURITY_NETWORK */
165
1da177e4
LT
166/*
167 * SMP locking strategy:
fbe9cc4a 168 * hash table is protected with spinlock unix_table_lock
663717f6 169 * each socket state is protected by separate spin lock.
1da177e4
LT
170 */
171
95c96174 172static inline unsigned int unix_hash_fold(__wsum n)
1da177e4 173{
0a13404d 174 unsigned int hash = (__force unsigned int)csum_fold(n);
95c96174 175
1da177e4
LT
176 hash ^= hash>>8;
177 return hash&(UNIX_HASH_SIZE-1);
178}
179
180#define unix_peer(sk) (unix_sk(sk)->peer)
181
182static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183{
184 return unix_peer(osk) == sk;
185}
186
187static inline int unix_may_send(struct sock *sk, struct sock *osk)
188{
6eba6a37 189 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
1da177e4
LT
190}
191
3c73419c
RW
192static inline int unix_recvq_full(struct sock const *sk)
193{
194 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195}
196
fa7ff56f 197struct sock *unix_peer_get(struct sock *s)
1da177e4
LT
198{
199 struct sock *peer;
200
1c92b4e5 201 unix_state_lock(s);
1da177e4
LT
202 peer = unix_peer(s);
203 if (peer)
204 sock_hold(peer);
1c92b4e5 205 unix_state_unlock(s);
1da177e4
LT
206 return peer;
207}
fa7ff56f 208EXPORT_SYMBOL_GPL(unix_peer_get);
1da177e4
LT
209
210static inline void unix_release_addr(struct unix_address *addr)
211{
8c9814b9 212 if (refcount_dec_and_test(&addr->refcnt))
1da177e4
LT
213 kfree(addr);
214}
215
216/*
217 * Check unix socket name:
218 * - should be not zero length.
219 * - if started by not zero, should be NULL terminated (FS object)
220 * - if started by zero, it is abstract name.
221 */
ac7bfa62 222
95c96174 223static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
1da177e4 224{
33c4368e
KK
225 *hashp = 0;
226
1da177e4
LT
227 if (len <= sizeof(short) || len > sizeof(*sunaddr))
228 return -EINVAL;
229 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
230 return -EINVAL;
231 if (sunaddr->sun_path[0]) {
232 /*
233 * This may look like an off by one error but it is a bit more
234 * subtle. 108 is the longest valid AF_UNIX path for a binding.
25985edc 235 * sun_path[108] doesn't as such exist. However in kernel space
1da177e4
LT
236 * we are guaranteed that it is a valid memory location in our
237 * kernel address buffer.
238 */
e27dfcea 239 ((char *)sunaddr)[len] = 0;
1da177e4
LT
240 len = strlen(sunaddr->sun_path)+1+sizeof(short);
241 return len;
242 }
243
07f0757a 244 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
1da177e4
LT
245 return len;
246}
247
248static void __unix_remove_socket(struct sock *sk)
249{
250 sk_del_node_init(sk);
251}
252
253static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
254{
547b792c 255 WARN_ON(!sk_unhashed(sk));
1da177e4
LT
256 sk_add_node(sk, list);
257}
258
259static inline void unix_remove_socket(struct sock *sk)
260{
fbe9cc4a 261 spin_lock(&unix_table_lock);
1da177e4 262 __unix_remove_socket(sk);
fbe9cc4a 263 spin_unlock(&unix_table_lock);
1da177e4
LT
264}
265
266static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
267{
fbe9cc4a 268 spin_lock(&unix_table_lock);
1da177e4 269 __unix_insert_socket(list, sk);
fbe9cc4a 270 spin_unlock(&unix_table_lock);
1da177e4
LT
271}
272
097e66c5
DL
273static struct sock *__unix_find_socket_byname(struct net *net,
274 struct sockaddr_un *sunname,
95c96174 275 int len, int type, unsigned int hash)
1da177e4
LT
276{
277 struct sock *s;
1da177e4 278
b67bfe0d 279 sk_for_each(s, &unix_socket_table[hash ^ type]) {
1da177e4
LT
280 struct unix_sock *u = unix_sk(s);
281
878628fb 282 if (!net_eq(sock_net(s), net))
097e66c5
DL
283 continue;
284
1da177e4
LT
285 if (u->addr->len == len &&
286 !memcmp(u->addr->name, sunname, len))
262ce0af 287 return s;
1da177e4 288 }
262ce0af 289 return NULL;
1da177e4
LT
290}
291
097e66c5
DL
292static inline struct sock *unix_find_socket_byname(struct net *net,
293 struct sockaddr_un *sunname,
1da177e4 294 int len, int type,
95c96174 295 unsigned int hash)
1da177e4
LT
296{
297 struct sock *s;
298
fbe9cc4a 299 spin_lock(&unix_table_lock);
097e66c5 300 s = __unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
301 if (s)
302 sock_hold(s);
fbe9cc4a 303 spin_unlock(&unix_table_lock);
1da177e4
LT
304 return s;
305}
306
6616f788 307static struct sock *unix_find_socket_byinode(struct inode *i)
1da177e4
LT
308{
309 struct sock *s;
1da177e4 310
fbe9cc4a 311 spin_lock(&unix_table_lock);
b67bfe0d 312 sk_for_each(s,
1da177e4 313 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
40ffe67d 314 struct dentry *dentry = unix_sk(s)->path.dentry;
1da177e4 315
beef5121 316 if (dentry && d_backing_inode(dentry) == i) {
1da177e4
LT
317 sock_hold(s);
318 goto found;
319 }
320 }
321 s = NULL;
322found:
fbe9cc4a 323 spin_unlock(&unix_table_lock);
1da177e4
LT
324 return s;
325}
326
7d267278
RW
327/* Support code for asymmetrically connected dgram sockets
328 *
329 * If a datagram socket is connected to a socket not itself connected
330 * to the first socket (eg, /dev/log), clients may only enqueue more
331 * messages if the present receive queue of the server socket is not
332 * "too large". This means there's a second writeability condition
333 * poll and sendmsg need to test. The dgram recv code will do a wake
334 * up on the peer_wait wait queue of a socket upon reception of a
335 * datagram which needs to be propagated to sleeping would-be writers
336 * since these might not have sent anything so far. This can't be
337 * accomplished via poll_wait because the lifetime of the server
338 * socket might be less than that of its clients if these break their
339 * association with it or if the server socket is closed while clients
340 * are still connected to it and there's no way to inform "a polling
341 * implementation" that it should let go of a certain wait queue
342 *
ac6424b9 343 * In order to propagate a wake up, a wait_queue_entry_t of the client
7d267278
RW
344 * socket is enqueued on the peer_wait queue of the server socket
345 * whose wake function does a wake_up on the ordinary client socket
346 * wait queue. This connection is established whenever a write (or
347 * poll for write) hit the flow control condition and broken when the
348 * association to the server socket is dissolved or after a wake up
349 * was relayed.
350 */
351
ac6424b9 352static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
7d267278
RW
353 void *key)
354{
355 struct unix_sock *u;
356 wait_queue_head_t *u_sleep;
357
358 u = container_of(q, struct unix_sock, peer_wake);
359
360 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
361 q);
362 u->peer_wake.private = NULL;
363
364 /* relaying can only happen while the wq still exists */
365 u_sleep = sk_sleep(&u->sk);
366 if (u_sleep)
3ad6f93e 367 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
7d267278
RW
368
369 return 0;
370}
371
372static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
373{
374 struct unix_sock *u, *u_other;
375 int rc;
376
377 u = unix_sk(sk);
378 u_other = unix_sk(other);
379 rc = 0;
380 spin_lock(&u_other->peer_wait.lock);
381
382 if (!u->peer_wake.private) {
383 u->peer_wake.private = other;
384 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
385
386 rc = 1;
387 }
388
389 spin_unlock(&u_other->peer_wait.lock);
390 return rc;
391}
392
393static void unix_dgram_peer_wake_disconnect(struct sock *sk,
394 struct sock *other)
395{
396 struct unix_sock *u, *u_other;
397
398 u = unix_sk(sk);
399 u_other = unix_sk(other);
400 spin_lock(&u_other->peer_wait.lock);
401
402 if (u->peer_wake.private == other) {
403 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
404 u->peer_wake.private = NULL;
405 }
406
407 spin_unlock(&u_other->peer_wait.lock);
408}
409
410static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
411 struct sock *other)
412{
413 unix_dgram_peer_wake_disconnect(sk, other);
414 wake_up_interruptible_poll(sk_sleep(sk),
a9a08845
LT
415 EPOLLOUT |
416 EPOLLWRNORM |
417 EPOLLWRBAND);
7d267278
RW
418}
419
420/* preconditions:
421 * - unix_peer(sk) == other
422 * - association is stable
423 */
424static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
425{
426 int connected;
427
428 connected = unix_dgram_peer_wake_connect(sk, other);
429
51f7e951
JB
430 /* If other is SOCK_DEAD, we want to make sure we signal
431 * POLLOUT, such that a subsequent write() can get a
432 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
433 * to other and its full, we will hang waiting for POLLOUT.
434 */
435 if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
7d267278
RW
436 return 1;
437
438 if (connected)
439 unix_dgram_peer_wake_disconnect(sk, other);
440
441 return 0;
442}
443
1586a587 444static int unix_writable(const struct sock *sk)
1da177e4 445{
1586a587 446 return sk->sk_state != TCP_LISTEN &&
14afee4b 447 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
1da177e4
LT
448}
449
450static void unix_write_space(struct sock *sk)
451{
43815482
ED
452 struct socket_wq *wq;
453
454 rcu_read_lock();
1da177e4 455 if (unix_writable(sk)) {
43815482 456 wq = rcu_dereference(sk->sk_wq);
1ce0bf50 457 if (skwq_has_sleeper(wq))
67426b75 458 wake_up_interruptible_sync_poll(&wq->wait,
a9a08845 459 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
8d8ad9d7 460 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4 461 }
43815482 462 rcu_read_unlock();
1da177e4
LT
463}
464
465/* When dgram socket disconnects (or changes its peer), we clear its receive
466 * queue of packets arrived from previous peer. First, it allows to do
467 * flow control based only on wmem_alloc; second, sk connected to peer
468 * may receive messages only from that peer. */
469static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
470{
b03efcfb 471 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
472 skb_queue_purge(&sk->sk_receive_queue);
473 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
474
475 /* If one link of bidirectional dgram pipe is disconnected,
476 * we signal error. Messages are lost. Do not make this,
477 * when peer was not connected to us.
478 */
479 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
480 other->sk_err = ECONNRESET;
481 other->sk_error_report(other);
482 }
483 }
484}
485
486static void unix_sock_destructor(struct sock *sk)
487{
488 struct unix_sock *u = unix_sk(sk);
489
490 skb_queue_purge(&sk->sk_receive_queue);
491
14afee4b 492 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
547b792c
IJ
493 WARN_ON(!sk_unhashed(sk));
494 WARN_ON(sk->sk_socket);
1da177e4 495 if (!sock_flag(sk, SOCK_DEAD)) {
5cc208be 496 pr_info("Attempt to release alive unix socket: %p\n", sk);
1da177e4
LT
497 return;
498 }
499
500 if (u->addr)
501 unix_release_addr(u->addr);
502
518de9b3 503 atomic_long_dec(&unix_nr_socks);
6f756a8c 504 local_bh_disable();
a8076d8d 505 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
6f756a8c 506 local_bh_enable();
1da177e4 507#ifdef UNIX_REFCNT_DEBUG
5cc208be 508 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
518de9b3 509 atomic_long_read(&unix_nr_socks));
1da177e4
LT
510#endif
511}
512
ded34e0f 513static void unix_release_sock(struct sock *sk, int embrion)
1da177e4
LT
514{
515 struct unix_sock *u = unix_sk(sk);
40ffe67d 516 struct path path;
1da177e4
LT
517 struct sock *skpair;
518 struct sk_buff *skb;
519 int state;
520
521 unix_remove_socket(sk);
522
523 /* Clear state */
1c92b4e5 524 unix_state_lock(sk);
1da177e4
LT
525 sock_orphan(sk);
526 sk->sk_shutdown = SHUTDOWN_MASK;
40ffe67d
AV
527 path = u->path;
528 u->path.dentry = NULL;
529 u->path.mnt = NULL;
1da177e4
LT
530 state = sk->sk_state;
531 sk->sk_state = TCP_CLOSE;
1c92b4e5 532 unix_state_unlock(sk);
1da177e4
LT
533
534 wake_up_interruptible_all(&u->peer_wait);
535
e27dfcea 536 skpair = unix_peer(sk);
1da177e4 537
e27dfcea 538 if (skpair != NULL) {
1da177e4 539 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
1c92b4e5 540 unix_state_lock(skpair);
1da177e4
LT
541 /* No more writes */
542 skpair->sk_shutdown = SHUTDOWN_MASK;
543 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
544 skpair->sk_err = ECONNRESET;
1c92b4e5 545 unix_state_unlock(skpair);
1da177e4 546 skpair->sk_state_change(skpair);
8d8ad9d7 547 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4 548 }
7d267278
RW
549
550 unix_dgram_peer_wake_disconnect(sk, skpair);
1da177e4
LT
551 sock_put(skpair); /* It may now die */
552 unix_peer(sk) = NULL;
553 }
554
555 /* Try to flush out this socket. Throw out buffers at least */
556
557 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
e27dfcea 558 if (state == TCP_LISTEN)
1da177e4
LT
559 unix_release_sock(skb->sk, 1);
560 /* passed fds are erased in the kfree_skb hook */
73ed5d25 561 UNIXCB(skb).consumed = skb->len;
1da177e4
LT
562 kfree_skb(skb);
563 }
564
40ffe67d
AV
565 if (path.dentry)
566 path_put(&path);
1da177e4
LT
567
568 sock_put(sk);
569
570 /* ---- Socket is dead now and most probably destroyed ---- */
571
572 /*
e04dae84 573 * Fixme: BSD difference: In BSD all sockets connected to us get
1da177e4
LT
574 * ECONNRESET and we die on the spot. In Linux we behave
575 * like files and pipes do and wait for the last
576 * dereference.
577 *
578 * Can't we simply set sock->err?
579 *
580 * What the above comment does talk about? --ANK(980817)
581 */
582
9305cfa4 583 if (unix_tot_inflight)
ac7bfa62 584 unix_gc(); /* Garbage collect fds */
1da177e4
LT
585}
586
109f6e39
EB
587static void init_peercred(struct sock *sk)
588{
589 put_pid(sk->sk_peer_pid);
590 if (sk->sk_peer_cred)
591 put_cred(sk->sk_peer_cred);
592 sk->sk_peer_pid = get_pid(task_tgid(current));
593 sk->sk_peer_cred = get_current_cred();
594}
595
596static void copy_peercred(struct sock *sk, struct sock *peersk)
597{
598 put_pid(sk->sk_peer_pid);
599 if (sk->sk_peer_cred)
600 put_cred(sk->sk_peer_cred);
601 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
602 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
603}
604
1da177e4
LT
605static int unix_listen(struct socket *sock, int backlog)
606{
607 int err;
608 struct sock *sk = sock->sk;
609 struct unix_sock *u = unix_sk(sk);
109f6e39 610 struct pid *old_pid = NULL;
1da177e4
LT
611
612 err = -EOPNOTSUPP;
6eba6a37
ED
613 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
614 goto out; /* Only stream/seqpacket sockets accept */
1da177e4
LT
615 err = -EINVAL;
616 if (!u->addr)
6eba6a37 617 goto out; /* No listens on an unbound socket */
1c92b4e5 618 unix_state_lock(sk);
1da177e4
LT
619 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
620 goto out_unlock;
621 if (backlog > sk->sk_max_ack_backlog)
622 wake_up_interruptible_all(&u->peer_wait);
623 sk->sk_max_ack_backlog = backlog;
624 sk->sk_state = TCP_LISTEN;
625 /* set credentials so connect can copy them */
109f6e39 626 init_peercred(sk);
1da177e4
LT
627 err = 0;
628
629out_unlock:
1c92b4e5 630 unix_state_unlock(sk);
109f6e39 631 put_pid(old_pid);
1da177e4
LT
632out:
633 return err;
634}
635
636static int unix_release(struct socket *);
637static int unix_bind(struct socket *, struct sockaddr *, int);
638static int unix_stream_connect(struct socket *, struct sockaddr *,
639 int addr_len, int flags);
640static int unix_socketpair(struct socket *, struct socket *);
cdfbabfb 641static int unix_accept(struct socket *, struct socket *, int, bool);
9b2c45d4 642static int unix_getname(struct socket *, struct sockaddr *, int);
a11e1d43
LT
643static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
644static __poll_t unix_dgram_poll(struct file *, struct socket *,
645 poll_table *);
1da177e4
LT
646static int unix_ioctl(struct socket *, unsigned int, unsigned long);
647static int unix_shutdown(struct socket *, int);
1b784140
YX
648static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
649static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
869e7c62
HFS
650static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
651 size_t size, int flags);
2b514574
HFS
652static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
653 struct pipe_inode_info *, size_t size,
654 unsigned int flags);
1b784140
YX
655static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
656static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
1da177e4
LT
657static int unix_dgram_connect(struct socket *, struct sockaddr *,
658 int, int);
1b784140
YX
659static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
660static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
661 int);
1da177e4 662
12663bfc 663static int unix_set_peek_off(struct sock *sk, int val)
f55bb7f9
PE
664{
665 struct unix_sock *u = unix_sk(sk);
666
6e1ce3c3 667 if (mutex_lock_interruptible(&u->iolock))
12663bfc
SL
668 return -EINTR;
669
f55bb7f9 670 sk->sk_peek_off = val;
6e1ce3c3 671 mutex_unlock(&u->iolock);
12663bfc
SL
672
673 return 0;
f55bb7f9
PE
674}
675
676
90ddc4f0 677static const struct proto_ops unix_stream_ops = {
1da177e4
LT
678 .family = PF_UNIX,
679 .owner = THIS_MODULE,
680 .release = unix_release,
681 .bind = unix_bind,
682 .connect = unix_stream_connect,
683 .socketpair = unix_socketpair,
684 .accept = unix_accept,
685 .getname = unix_getname,
a11e1d43 686 .poll = unix_poll,
1da177e4
LT
687 .ioctl = unix_ioctl,
688 .listen = unix_listen,
689 .shutdown = unix_shutdown,
690 .setsockopt = sock_no_setsockopt,
691 .getsockopt = sock_no_getsockopt,
692 .sendmsg = unix_stream_sendmsg,
693 .recvmsg = unix_stream_recvmsg,
694 .mmap = sock_no_mmap,
869e7c62 695 .sendpage = unix_stream_sendpage,
2b514574 696 .splice_read = unix_stream_splice_read,
fc0d7536 697 .set_peek_off = unix_set_peek_off,
1da177e4
LT
698};
699
90ddc4f0 700static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
701 .family = PF_UNIX,
702 .owner = THIS_MODULE,
703 .release = unix_release,
704 .bind = unix_bind,
705 .connect = unix_dgram_connect,
706 .socketpair = unix_socketpair,
707 .accept = sock_no_accept,
708 .getname = unix_getname,
a11e1d43 709 .poll = unix_dgram_poll,
1da177e4
LT
710 .ioctl = unix_ioctl,
711 .listen = sock_no_listen,
712 .shutdown = unix_shutdown,
713 .setsockopt = sock_no_setsockopt,
714 .getsockopt = sock_no_getsockopt,
715 .sendmsg = unix_dgram_sendmsg,
716 .recvmsg = unix_dgram_recvmsg,
717 .mmap = sock_no_mmap,
718 .sendpage = sock_no_sendpage,
f55bb7f9 719 .set_peek_off = unix_set_peek_off,
1da177e4
LT
720};
721
90ddc4f0 722static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
723 .family = PF_UNIX,
724 .owner = THIS_MODULE,
725 .release = unix_release,
726 .bind = unix_bind,
727 .connect = unix_stream_connect,
728 .socketpair = unix_socketpair,
729 .accept = unix_accept,
730 .getname = unix_getname,
a11e1d43 731 .poll = unix_dgram_poll,
1da177e4
LT
732 .ioctl = unix_ioctl,
733 .listen = unix_listen,
734 .shutdown = unix_shutdown,
735 .setsockopt = sock_no_setsockopt,
736 .getsockopt = sock_no_getsockopt,
737 .sendmsg = unix_seqpacket_sendmsg,
a05d2ad1 738 .recvmsg = unix_seqpacket_recvmsg,
1da177e4
LT
739 .mmap = sock_no_mmap,
740 .sendpage = sock_no_sendpage,
f55bb7f9 741 .set_peek_off = unix_set_peek_off,
1da177e4
LT
742};
743
744static struct proto unix_proto = {
248969ae
ED
745 .name = "UNIX",
746 .owner = THIS_MODULE,
248969ae 747 .obj_size = sizeof(struct unix_sock),
1da177e4
LT
748};
749
11aa9c28 750static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
1da177e4
LT
751{
752 struct sock *sk = NULL;
753 struct unix_sock *u;
754
518de9b3
ED
755 atomic_long_inc(&unix_nr_socks);
756 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
1da177e4
LT
757 goto out;
758
11aa9c28 759 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
1da177e4
LT
760 if (!sk)
761 goto out;
762
6eba6a37 763 sock_init_data(sock, sk);
1da177e4 764
3aa9799e 765 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
1da177e4 766 sk->sk_write_space = unix_write_space;
a0a53c8b 767 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
1da177e4
LT
768 sk->sk_destruct = unix_sock_destructor;
769 u = unix_sk(sk);
40ffe67d
AV
770 u->path.dentry = NULL;
771 u->path.mnt = NULL;
fd19f329 772 spin_lock_init(&u->lock);
516e0cc5 773 atomic_long_set(&u->inflight, 0);
1fd05ba5 774 INIT_LIST_HEAD(&u->link);
6e1ce3c3
LT
775 mutex_init(&u->iolock); /* single task reading lock */
776 mutex_init(&u->bindlock); /* single task binding lock */
1da177e4 777 init_waitqueue_head(&u->peer_wait);
7d267278 778 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
7123aaa3 779 unix_insert_socket(unix_sockets_unbound(sk), sk);
1da177e4 780out:
284b327b 781 if (sk == NULL)
518de9b3 782 atomic_long_dec(&unix_nr_socks);
920de804
ED
783 else {
784 local_bh_disable();
a8076d8d 785 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
920de804
ED
786 local_bh_enable();
787 }
1da177e4
LT
788 return sk;
789}
790
3f378b68
EP
791static int unix_create(struct net *net, struct socket *sock, int protocol,
792 int kern)
1da177e4
LT
793{
794 if (protocol && protocol != PF_UNIX)
795 return -EPROTONOSUPPORT;
796
797 sock->state = SS_UNCONNECTED;
798
799 switch (sock->type) {
800 case SOCK_STREAM:
801 sock->ops = &unix_stream_ops;
802 break;
803 /*
804 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
805 * nothing uses it.
806 */
807 case SOCK_RAW:
e27dfcea 808 sock->type = SOCK_DGRAM;
110af3ac 809 /* fall through */
1da177e4
LT
810 case SOCK_DGRAM:
811 sock->ops = &unix_dgram_ops;
812 break;
813 case SOCK_SEQPACKET:
814 sock->ops = &unix_seqpacket_ops;
815 break;
816 default:
817 return -ESOCKTNOSUPPORT;
818 }
819
11aa9c28 820 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
1da177e4
LT
821}
822
823static int unix_release(struct socket *sock)
824{
825 struct sock *sk = sock->sk;
826
827 if (!sk)
828 return 0;
829
ded34e0f 830 unix_release_sock(sk, 0);
1da177e4
LT
831 sock->sk = NULL;
832
ded34e0f 833 return 0;
1da177e4
LT
834}
835
836static int unix_autobind(struct socket *sock)
837{
838 struct sock *sk = sock->sk;
3b1e0a65 839 struct net *net = sock_net(sk);
1da177e4
LT
840 struct unix_sock *u = unix_sk(sk);
841 static u32 ordernum = 1;
6eba6a37 842 struct unix_address *addr;
1da177e4 843 int err;
8df73ff9 844 unsigned int retries = 0;
1da177e4 845
6e1ce3c3 846 err = mutex_lock_interruptible(&u->bindlock);
37ab4fa7
SL
847 if (err)
848 return err;
1da177e4
LT
849
850 err = 0;
851 if (u->addr)
852 goto out;
853
854 err = -ENOMEM;
0da974f4 855 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
1da177e4
LT
856 if (!addr)
857 goto out;
858
1da177e4 859 addr->name->sun_family = AF_UNIX;
8c9814b9 860 refcount_set(&addr->refcnt, 1);
1da177e4
LT
861
862retry:
863 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
07f0757a 864 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
1da177e4 865
fbe9cc4a 866 spin_lock(&unix_table_lock);
1da177e4
LT
867 ordernum = (ordernum+1)&0xFFFFF;
868
097e66c5 869 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
1da177e4 870 addr->hash)) {
fbe9cc4a 871 spin_unlock(&unix_table_lock);
8df73ff9
TH
872 /*
873 * __unix_find_socket_byname() may take long time if many names
874 * are already in use.
875 */
876 cond_resched();
877 /* Give up if all names seems to be in use. */
878 if (retries++ == 0xFFFFF) {
879 err = -ENOSPC;
880 kfree(addr);
881 goto out;
882 }
1da177e4
LT
883 goto retry;
884 }
885 addr->hash ^= sk->sk_type;
886
887 __unix_remove_socket(sk);
ae3b5641 888 smp_store_release(&u->addr, addr);
1da177e4 889 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
fbe9cc4a 890 spin_unlock(&unix_table_lock);
1da177e4
LT
891 err = 0;
892
6e1ce3c3 893out: mutex_unlock(&u->bindlock);
1da177e4
LT
894 return err;
895}
896
097e66c5
DL
897static struct sock *unix_find_other(struct net *net,
898 struct sockaddr_un *sunname, int len,
95c96174 899 int type, unsigned int hash, int *error)
1da177e4
LT
900{
901 struct sock *u;
421748ec 902 struct path path;
1da177e4 903 int err = 0;
ac7bfa62 904
1da177e4 905 if (sunname->sun_path[0]) {
421748ec
AV
906 struct inode *inode;
907 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1da177e4
LT
908 if (err)
909 goto fail;
beef5121 910 inode = d_backing_inode(path.dentry);
421748ec 911 err = inode_permission(inode, MAY_WRITE);
1da177e4
LT
912 if (err)
913 goto put_fail;
914
915 err = -ECONNREFUSED;
421748ec 916 if (!S_ISSOCK(inode->i_mode))
1da177e4 917 goto put_fail;
6616f788 918 u = unix_find_socket_byinode(inode);
1da177e4
LT
919 if (!u)
920 goto put_fail;
921
922 if (u->sk_type == type)
68ac1234 923 touch_atime(&path);
1da177e4 924
421748ec 925 path_put(&path);
1da177e4 926
e27dfcea 927 err = -EPROTOTYPE;
1da177e4
LT
928 if (u->sk_type != type) {
929 sock_put(u);
930 goto fail;
931 }
932 } else {
933 err = -ECONNREFUSED;
e27dfcea 934 u = unix_find_socket_byname(net, sunname, len, type, hash);
1da177e4
LT
935 if (u) {
936 struct dentry *dentry;
40ffe67d 937 dentry = unix_sk(u)->path.dentry;
1da177e4 938 if (dentry)
68ac1234 939 touch_atime(&unix_sk(u)->path);
1da177e4
LT
940 } else
941 goto fail;
942 }
943 return u;
944
945put_fail:
421748ec 946 path_put(&path);
1da177e4 947fail:
e27dfcea 948 *error = err;
1da177e4
LT
949 return NULL;
950}
951
38f7bd94 952static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
faf02010 953{
38f7bd94
LT
954 struct dentry *dentry;
955 struct path path;
956 int err = 0;
957 /*
958 * Get the parent directory, calculate the hash for last
959 * component.
960 */
961 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
962 err = PTR_ERR(dentry);
963 if (IS_ERR(dentry))
964 return err;
faf02010 965
38f7bd94
LT
966 /*
967 * All right, let's create it.
968 */
969 err = security_path_mknod(&path, dentry, mode, 0);
faf02010 970 if (!err) {
38f7bd94 971 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
faf02010 972 if (!err) {
38f7bd94 973 res->mnt = mntget(path.mnt);
faf02010
AV
974 res->dentry = dget(dentry);
975 }
976 }
38f7bd94 977 done_path_create(&path, dentry);
faf02010
AV
978 return err;
979}
1da177e4
LT
980
981static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
982{
983 struct sock *sk = sock->sk;
3b1e0a65 984 struct net *net = sock_net(sk);
1da177e4 985 struct unix_sock *u = unix_sk(sk);
e27dfcea 986 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
dae6ad8f 987 char *sun_path = sunaddr->sun_path;
38f7bd94 988 int err;
95c96174 989 unsigned int hash;
1da177e4
LT
990 struct unix_address *addr;
991 struct hlist_head *list;
82fe0d2b 992 struct path path = { };
1da177e4
LT
993
994 err = -EINVAL;
defbcf2d
MJ
995 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
996 sunaddr->sun_family != AF_UNIX)
1da177e4
LT
997 goto out;
998
e27dfcea 999 if (addr_len == sizeof(short)) {
1da177e4
LT
1000 err = unix_autobind(sock);
1001 goto out;
1002 }
1003
1004 err = unix_mkname(sunaddr, addr_len, &hash);
1005 if (err < 0)
1006 goto out;
1007 addr_len = err;
1008
0fb44559
WC
1009 if (sun_path[0]) {
1010 umode_t mode = S_IFSOCK |
1011 (SOCK_INODE(sock)->i_mode & ~current_umask());
1012 err = unix_mknod(sun_path, mode, &path);
1013 if (err) {
1014 if (err == -EEXIST)
1015 err = -EADDRINUSE;
1016 goto out;
1017 }
1018 }
1019
6e1ce3c3 1020 err = mutex_lock_interruptible(&u->bindlock);
37ab4fa7 1021 if (err)
0fb44559 1022 goto out_put;
1da177e4
LT
1023
1024 err = -EINVAL;
1025 if (u->addr)
1026 goto out_up;
1027
1028 err = -ENOMEM;
1029 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1030 if (!addr)
1031 goto out_up;
1032
1033 memcpy(addr->name, sunaddr, addr_len);
1034 addr->len = addr_len;
1035 addr->hash = hash ^ sk->sk_type;
8c9814b9 1036 refcount_set(&addr->refcnt, 1);
1da177e4 1037
38f7bd94 1038 if (sun_path[0]) {
1da177e4 1039 addr->hash = UNIX_HASH_SIZE;
beef5121 1040 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
faf02010 1041 spin_lock(&unix_table_lock);
38f7bd94 1042 u->path = path;
faf02010
AV
1043 list = &unix_socket_table[hash];
1044 } else {
1045 spin_lock(&unix_table_lock);
1da177e4 1046 err = -EADDRINUSE;
097e66c5 1047 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1da177e4
LT
1048 sk->sk_type, hash)) {
1049 unix_release_addr(addr);
1050 goto out_unlock;
1051 }
1052
1053 list = &unix_socket_table[addr->hash];
1da177e4
LT
1054 }
1055
1056 err = 0;
1057 __unix_remove_socket(sk);
ae3b5641 1058 smp_store_release(&u->addr, addr);
1da177e4
LT
1059 __unix_insert_socket(list, sk);
1060
1061out_unlock:
fbe9cc4a 1062 spin_unlock(&unix_table_lock);
1da177e4 1063out_up:
6e1ce3c3 1064 mutex_unlock(&u->bindlock);
0fb44559
WC
1065out_put:
1066 if (err)
1067 path_put(&path);
1da177e4
LT
1068out:
1069 return err;
1da177e4
LT
1070}
1071
278a3de5
DM
1072static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1073{
1074 if (unlikely(sk1 == sk2) || !sk2) {
1075 unix_state_lock(sk1);
1076 return;
1077 }
1078 if (sk1 < sk2) {
1079 unix_state_lock(sk1);
1080 unix_state_lock_nested(sk2);
1081 } else {
1082 unix_state_lock(sk2);
1083 unix_state_lock_nested(sk1);
1084 }
1085}
1086
1087static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1088{
1089 if (unlikely(sk1 == sk2) || !sk2) {
1090 unix_state_unlock(sk1);
1091 return;
1092 }
1093 unix_state_unlock(sk1);
1094 unix_state_unlock(sk2);
1095}
1096
1da177e4
LT
1097static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1098 int alen, int flags)
1099{
1100 struct sock *sk = sock->sk;
3b1e0a65 1101 struct net *net = sock_net(sk);
e27dfcea 1102 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1da177e4 1103 struct sock *other;
95c96174 1104 unsigned int hash;
1da177e4
LT
1105 int err;
1106
defbcf2d
MJ
1107 err = -EINVAL;
1108 if (alen < offsetofend(struct sockaddr, sa_family))
1109 goto out;
1110
1da177e4
LT
1111 if (addr->sa_family != AF_UNSPEC) {
1112 err = unix_mkname(sunaddr, alen, &hash);
1113 if (err < 0)
1114 goto out;
1115 alen = err;
1116
1117 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1118 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1119 goto out;
1120
278a3de5 1121restart:
e27dfcea 1122 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1da177e4
LT
1123 if (!other)
1124 goto out;
1125
278a3de5
DM
1126 unix_state_double_lock(sk, other);
1127
1128 /* Apparently VFS overslept socket death. Retry. */
1129 if (sock_flag(other, SOCK_DEAD)) {
1130 unix_state_double_unlock(sk, other);
1131 sock_put(other);
1132 goto restart;
1133 }
1da177e4
LT
1134
1135 err = -EPERM;
1136 if (!unix_may_send(sk, other))
1137 goto out_unlock;
1138
1139 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1140 if (err)
1141 goto out_unlock;
1142
1143 } else {
1144 /*
1145 * 1003.1g breaking connected state with AF_UNSPEC
1146 */
1147 other = NULL;
278a3de5 1148 unix_state_double_lock(sk, other);
1da177e4
LT
1149 }
1150
1151 /*
1152 * If it was connected, reconnect.
1153 */
1154 if (unix_peer(sk)) {
1155 struct sock *old_peer = unix_peer(sk);
e27dfcea 1156 unix_peer(sk) = other;
7d267278
RW
1157 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1158
278a3de5 1159 unix_state_double_unlock(sk, other);
1da177e4
LT
1160
1161 if (other != old_peer)
1162 unix_dgram_disconnected(sk, old_peer);
1163 sock_put(old_peer);
1164 } else {
e27dfcea 1165 unix_peer(sk) = other;
278a3de5 1166 unix_state_double_unlock(sk, other);
1da177e4 1167 }
ac7bfa62 1168 return 0;
1da177e4
LT
1169
1170out_unlock:
278a3de5 1171 unix_state_double_unlock(sk, other);
1da177e4
LT
1172 sock_put(other);
1173out:
1174 return err;
1175}
1176
1177static long unix_wait_for_peer(struct sock *other, long timeo)
1178{
1179 struct unix_sock *u = unix_sk(other);
1180 int sched;
1181 DEFINE_WAIT(wait);
1182
1183 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1184
1185 sched = !sock_flag(other, SOCK_DEAD) &&
1186 !(other->sk_shutdown & RCV_SHUTDOWN) &&
3c73419c 1187 unix_recvq_full(other);
1da177e4 1188
1c92b4e5 1189 unix_state_unlock(other);
1da177e4
LT
1190
1191 if (sched)
1192 timeo = schedule_timeout(timeo);
1193
1194 finish_wait(&u->peer_wait, &wait);
1195 return timeo;
1196}
1197
1198static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1199 int addr_len, int flags)
1200{
e27dfcea 1201 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1da177e4 1202 struct sock *sk = sock->sk;
3b1e0a65 1203 struct net *net = sock_net(sk);
1da177e4
LT
1204 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1205 struct sock *newsk = NULL;
1206 struct sock *other = NULL;
1207 struct sk_buff *skb = NULL;
95c96174 1208 unsigned int hash;
1da177e4
LT
1209 int st;
1210 int err;
1211 long timeo;
1212
1213 err = unix_mkname(sunaddr, addr_len, &hash);
1214 if (err < 0)
1215 goto out;
1216 addr_len = err;
1217
f64f9e71
JP
1218 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1219 (err = unix_autobind(sock)) != 0)
1da177e4
LT
1220 goto out;
1221
1222 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1223
1224 /* First of all allocate resources.
1225 If we will make it after state is locked,
1226 we will have to recheck all again in any case.
1227 */
1228
1229 err = -ENOMEM;
1230
1231 /* create new sock for complete connection */
11aa9c28 1232 newsk = unix_create1(sock_net(sk), NULL, 0);
1da177e4
LT
1233 if (newsk == NULL)
1234 goto out;
1235
1236 /* Allocate skb for sending to listening sock */
1237 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1238 if (skb == NULL)
1239 goto out;
1240
1241restart:
1242 /* Find listening sock. */
097e66c5 1243 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1da177e4
LT
1244 if (!other)
1245 goto out;
1246
1247 /* Latch state of peer */
1c92b4e5 1248 unix_state_lock(other);
1da177e4
LT
1249
1250 /* Apparently VFS overslept socket death. Retry. */
1251 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1252 unix_state_unlock(other);
1da177e4
LT
1253 sock_put(other);
1254 goto restart;
1255 }
1256
1257 err = -ECONNREFUSED;
1258 if (other->sk_state != TCP_LISTEN)
1259 goto out_unlock;
77238f2b
TS
1260 if (other->sk_shutdown & RCV_SHUTDOWN)
1261 goto out_unlock;
1da177e4 1262
3c73419c 1263 if (unix_recvq_full(other)) {
1da177e4
LT
1264 err = -EAGAIN;
1265 if (!timeo)
1266 goto out_unlock;
1267
1268 timeo = unix_wait_for_peer(other, timeo);
1269
1270 err = sock_intr_errno(timeo);
1271 if (signal_pending(current))
1272 goto out;
1273 sock_put(other);
1274 goto restart;
ac7bfa62 1275 }
1da177e4
LT
1276
1277 /* Latch our state.
1278
e5537bfc 1279 It is tricky place. We need to grab our state lock and cannot
1da177e4
LT
1280 drop lock on peer. It is dangerous because deadlock is
1281 possible. Connect to self case and simultaneous
1282 attempt to connect are eliminated by checking socket
1283 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1284 check this before attempt to grab lock.
1285
1286 Well, and we have to recheck the state after socket locked.
1287 */
1288 st = sk->sk_state;
1289
1290 switch (st) {
1291 case TCP_CLOSE:
1292 /* This is ok... continue with connect */
1293 break;
1294 case TCP_ESTABLISHED:
1295 /* Socket is already connected */
1296 err = -EISCONN;
1297 goto out_unlock;
1298 default:
1299 err = -EINVAL;
1300 goto out_unlock;
1301 }
1302
1c92b4e5 1303 unix_state_lock_nested(sk);
1da177e4
LT
1304
1305 if (sk->sk_state != st) {
1c92b4e5
DM
1306 unix_state_unlock(sk);
1307 unix_state_unlock(other);
1da177e4
LT
1308 sock_put(other);
1309 goto restart;
1310 }
1311
3610cda5 1312 err = security_unix_stream_connect(sk, other, newsk);
1da177e4 1313 if (err) {
1c92b4e5 1314 unix_state_unlock(sk);
1da177e4
LT
1315 goto out_unlock;
1316 }
1317
1318 /* The way is open! Fastly set all the necessary fields... */
1319
1320 sock_hold(sk);
1321 unix_peer(newsk) = sk;
1322 newsk->sk_state = TCP_ESTABLISHED;
1323 newsk->sk_type = sk->sk_type;
109f6e39 1324 init_peercred(newsk);
1da177e4 1325 newu = unix_sk(newsk);
eaefd110 1326 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1da177e4
LT
1327 otheru = unix_sk(other);
1328
ae3b5641
AV
1329 /* copy address information from listening to new sock
1330 *
1331 * The contents of *(otheru->addr) and otheru->path
1332 * are seen fully set up here, since we have found
1333 * otheru in hash under unix_table_lock. Insertion
1334 * into the hash chain we'd found it in had been done
1335 * in an earlier critical area protected by unix_table_lock,
1336 * the same one where we'd set *(otheru->addr) contents,
1337 * as well as otheru->path and otheru->addr itself.
1338 *
1339 * Using smp_store_release() here to set newu->addr
1340 * is enough to make those stores, as well as stores
1341 * to newu->path visible to anyone who gets newu->addr
1342 * by smp_load_acquire(). IOW, the same warranties
1343 * as for unix_sock instances bound in unix_bind() or
1344 * in unix_autobind().
1345 */
40ffe67d
AV
1346 if (otheru->path.dentry) {
1347 path_get(&otheru->path);
1348 newu->path = otheru->path;
1da177e4 1349 }
ae3b5641
AV
1350 refcount_inc(&otheru->addr->refcnt);
1351 smp_store_release(&newu->addr, otheru->addr);
1da177e4
LT
1352
1353 /* Set credentials */
109f6e39 1354 copy_peercred(sk, other);
1da177e4 1355
1da177e4
LT
1356 sock->state = SS_CONNECTED;
1357 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1358 sock_hold(newsk);
1359
4e857c58 1360 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
830a1e5c 1361 unix_peer(sk) = newsk;
1da177e4 1362
1c92b4e5 1363 unix_state_unlock(sk);
1da177e4
LT
1364
1365 /* take ten and and send info to listening sock */
1366 spin_lock(&other->sk_receive_queue.lock);
1367 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1368 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1369 unix_state_unlock(other);
676d2369 1370 other->sk_data_ready(other);
1da177e4
LT
1371 sock_put(other);
1372 return 0;
1373
1374out_unlock:
1375 if (other)
1c92b4e5 1376 unix_state_unlock(other);
1da177e4
LT
1377
1378out:
40d44446 1379 kfree_skb(skb);
1da177e4
LT
1380 if (newsk)
1381 unix_release_sock(newsk, 0);
1382 if (other)
1383 sock_put(other);
1384 return err;
1385}
1386
1387static int unix_socketpair(struct socket *socka, struct socket *sockb)
1388{
e27dfcea 1389 struct sock *ska = socka->sk, *skb = sockb->sk;
1da177e4
LT
1390
1391 /* Join our sockets back to back */
1392 sock_hold(ska);
1393 sock_hold(skb);
e27dfcea
JK
1394 unix_peer(ska) = skb;
1395 unix_peer(skb) = ska;
109f6e39
EB
1396 init_peercred(ska);
1397 init_peercred(skb);
1da177e4
LT
1398
1399 if (ska->sk_type != SOCK_DGRAM) {
1400 ska->sk_state = TCP_ESTABLISHED;
1401 skb->sk_state = TCP_ESTABLISHED;
1402 socka->state = SS_CONNECTED;
1403 sockb->state = SS_CONNECTED;
1404 }
1405 return 0;
1406}
1407
90c6bd34
DB
1408static void unix_sock_inherit_flags(const struct socket *old,
1409 struct socket *new)
1410{
1411 if (test_bit(SOCK_PASSCRED, &old->flags))
1412 set_bit(SOCK_PASSCRED, &new->flags);
1413 if (test_bit(SOCK_PASSSEC, &old->flags))
1414 set_bit(SOCK_PASSSEC, &new->flags);
1415}
1416
cdfbabfb
DH
1417static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1418 bool kern)
1da177e4
LT
1419{
1420 struct sock *sk = sock->sk;
1421 struct sock *tsk;
1422 struct sk_buff *skb;
1423 int err;
1424
1425 err = -EOPNOTSUPP;
6eba6a37 1426 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1da177e4
LT
1427 goto out;
1428
1429 err = -EINVAL;
1430 if (sk->sk_state != TCP_LISTEN)
1431 goto out;
1432
1433 /* If socket state is TCP_LISTEN it cannot change (for now...),
1434 * so that no locks are necessary.
1435 */
1436
1437 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1438 if (!skb) {
1439 /* This means receive shutdown. */
1440 if (err == 0)
1441 err = -EINVAL;
1442 goto out;
1443 }
1444
1445 tsk = skb->sk;
1446 skb_free_datagram(sk, skb);
1447 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1448
1449 /* attach accepted sock to socket */
1c92b4e5 1450 unix_state_lock(tsk);
1da177e4 1451 newsock->state = SS_CONNECTED;
90c6bd34 1452 unix_sock_inherit_flags(sock, newsock);
1da177e4 1453 sock_graft(tsk, newsock);
1c92b4e5 1454 unix_state_unlock(tsk);
1da177e4
LT
1455 return 0;
1456
1457out:
1458 return err;
1459}
1460
1461
9b2c45d4 1462static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1da177e4
LT
1463{
1464 struct sock *sk = sock->sk;
ae3b5641 1465 struct unix_address *addr;
13cfa97b 1466 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1da177e4
LT
1467 int err = 0;
1468
1469 if (peer) {
1470 sk = unix_peer_get(sk);
1471
1472 err = -ENOTCONN;
1473 if (!sk)
1474 goto out;
1475 err = 0;
1476 } else {
1477 sock_hold(sk);
1478 }
1479
ae3b5641
AV
1480 addr = smp_load_acquire(&unix_sk(sk)->addr);
1481 if (!addr) {
1da177e4
LT
1482 sunaddr->sun_family = AF_UNIX;
1483 sunaddr->sun_path[0] = 0;
9b2c45d4 1484 err = sizeof(short);
1da177e4 1485 } else {
9b2c45d4
DV
1486 err = addr->len;
1487 memcpy(sunaddr, addr->name, addr->len);
1da177e4 1488 }
1da177e4
LT
1489 sock_put(sk);
1490out:
1491 return err;
1492}
1493
f78a5fda 1494static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
7361c36c
EB
1495{
1496 int err = 0;
16e57262 1497
f78a5fda 1498 UNIXCB(skb).pid = get_pid(scm->pid);
6b0ee8c0
EB
1499 UNIXCB(skb).uid = scm->creds.uid;
1500 UNIXCB(skb).gid = scm->creds.gid;
7361c36c 1501 UNIXCB(skb).fp = NULL;
37a9a8df 1502 unix_get_secdata(scm, skb);
7361c36c
EB
1503 if (scm->fp && send_fds)
1504 err = unix_attach_fds(scm, skb);
1505
1506 skb->destructor = unix_destruct_scm;
1507 return err;
1508}
1509
9490f886
HFS
1510static bool unix_passcred_enabled(const struct socket *sock,
1511 const struct sock *other)
1512{
1513 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1514 !other->sk_socket ||
1515 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1516}
1517
16e57262
ED
1518/*
1519 * Some apps rely on write() giving SCM_CREDENTIALS
1520 * We include credentials if source or destination socket
1521 * asserted SOCK_PASSCRED.
1522 */
1523static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1524 const struct sock *other)
1525{
6b0ee8c0 1526 if (UNIXCB(skb).pid)
16e57262 1527 return;
9490f886 1528 if (unix_passcred_enabled(sock, other)) {
16e57262 1529 UNIXCB(skb).pid = get_pid(task_tgid(current));
6e0895c2 1530 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
16e57262
ED
1531 }
1532}
1533
9490f886
HFS
1534static int maybe_init_creds(struct scm_cookie *scm,
1535 struct socket *socket,
1536 const struct sock *other)
1537{
1538 int err;
1539 struct msghdr msg = { .msg_controllen = 0 };
1540
1541 err = scm_send(socket, &msg, scm, false);
1542 if (err)
1543 return err;
1544
1545 if (unix_passcred_enabled(socket, other)) {
1546 scm->pid = get_pid(task_tgid(current));
1547 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1548 }
1549 return err;
1550}
1551
1552static bool unix_skb_scm_eq(struct sk_buff *skb,
1553 struct scm_cookie *scm)
1554{
1555 const struct unix_skb_parms *u = &UNIXCB(skb);
1556
1557 return u->pid == scm->pid &&
1558 uid_eq(u->uid, scm->creds.uid) &&
1559 gid_eq(u->gid, scm->creds.gid) &&
1560 unix_secdata_eq(scm, skb);
1561}
1562
1da177e4
LT
1563/*
1564 * Send AF_UNIX data.
1565 */
1566
1b784140
YX
1567static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1568 size_t len)
1da177e4 1569{
1da177e4 1570 struct sock *sk = sock->sk;
3b1e0a65 1571 struct net *net = sock_net(sk);
1da177e4 1572 struct unix_sock *u = unix_sk(sk);
342dfc30 1573 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1da177e4
LT
1574 struct sock *other = NULL;
1575 int namelen = 0; /* fake GCC */
1576 int err;
95c96174 1577 unsigned int hash;
f78a5fda 1578 struct sk_buff *skb;
1da177e4 1579 long timeo;
7cc05662 1580 struct scm_cookie scm;
eb6a2481 1581 int data_len = 0;
7d267278 1582 int sk_locked;
1da177e4 1583
5f23b734 1584 wait_for_unix_gc();
7cc05662 1585 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1586 if (err < 0)
1587 return err;
1588
1589 err = -EOPNOTSUPP;
1590 if (msg->msg_flags&MSG_OOB)
1591 goto out;
1592
1593 if (msg->msg_namelen) {
1594 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1595 if (err < 0)
1596 goto out;
1597 namelen = err;
1598 } else {
1599 sunaddr = NULL;
1600 err = -ENOTCONN;
1601 other = unix_peer_get(sk);
1602 if (!other)
1603 goto out;
1604 }
1605
f64f9e71
JP
1606 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1607 && (err = unix_autobind(sock)) != 0)
1da177e4
LT
1608 goto out;
1609
1610 err = -EMSGSIZE;
1611 if (len > sk->sk_sndbuf - 32)
1612 goto out;
1613
31ff6aa5 1614 if (len > SKB_MAX_ALLOC) {
eb6a2481
ED
1615 data_len = min_t(size_t,
1616 len - SKB_MAX_ALLOC,
1617 MAX_SKB_FRAGS * PAGE_SIZE);
31ff6aa5
KT
1618 data_len = PAGE_ALIGN(data_len);
1619
1620 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1621 }
eb6a2481
ED
1622
1623 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
28d64271
ED
1624 msg->msg_flags & MSG_DONTWAIT, &err,
1625 PAGE_ALLOC_COSTLY_ORDER);
e27dfcea 1626 if (skb == NULL)
1da177e4
LT
1627 goto out;
1628
7cc05662 1629 err = unix_scm_to_skb(&scm, skb, true);
25888e30 1630 if (err < 0)
7361c36c 1631 goto out_free;
877ce7c1 1632
eb6a2481
ED
1633 skb_put(skb, len - data_len);
1634 skb->data_len = data_len;
1635 skb->len = len;
c0371da6 1636 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1da177e4
LT
1637 if (err)
1638 goto out_free;
1639
1640 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1641
1642restart:
1643 if (!other) {
1644 err = -ECONNRESET;
1645 if (sunaddr == NULL)
1646 goto out_free;
1647
097e66c5 1648 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1da177e4 1649 hash, &err);
e27dfcea 1650 if (other == NULL)
1da177e4
LT
1651 goto out_free;
1652 }
1653
d6ae3bae
AC
1654 if (sk_filter(other, skb) < 0) {
1655 /* Toss the packet but do not return any error to the sender */
1656 err = len;
1657 goto out_free;
1658 }
1659
7d267278 1660 sk_locked = 0;
1c92b4e5 1661 unix_state_lock(other);
7d267278 1662restart_locked:
1da177e4
LT
1663 err = -EPERM;
1664 if (!unix_may_send(sk, other))
1665 goto out_unlock;
1666
7d267278 1667 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1da177e4
LT
1668 /*
1669 * Check with 1003.1g - what should
1670 * datagram error
1671 */
1c92b4e5 1672 unix_state_unlock(other);
1da177e4
LT
1673 sock_put(other);
1674
7d267278
RW
1675 if (!sk_locked)
1676 unix_state_lock(sk);
1677
1da177e4 1678 err = 0;
1da177e4 1679 if (unix_peer(sk) == other) {
e27dfcea 1680 unix_peer(sk) = NULL;
7d267278
RW
1681 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1682
1c92b4e5 1683 unix_state_unlock(sk);
1da177e4
LT
1684
1685 unix_dgram_disconnected(sk, other);
1686 sock_put(other);
1687 err = -ECONNREFUSED;
1688 } else {
1c92b4e5 1689 unix_state_unlock(sk);
1da177e4
LT
1690 }
1691
1692 other = NULL;
1693 if (err)
1694 goto out_free;
1695 goto restart;
1696 }
1697
1698 err = -EPIPE;
1699 if (other->sk_shutdown & RCV_SHUTDOWN)
1700 goto out_unlock;
1701
1702 if (sk->sk_type != SOCK_SEQPACKET) {
1703 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1704 if (err)
1705 goto out_unlock;
1706 }
1707
a5527dda
RW
1708 /* other == sk && unix_peer(other) != sk if
1709 * - unix_peer(sk) == NULL, destination address bound to sk
1710 * - unix_peer(sk) == sk by time of get but disconnected before lock
1711 */
1712 if (other != sk &&
1713 unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
7d267278
RW
1714 if (timeo) {
1715 timeo = unix_wait_for_peer(other, timeo);
1716
1717 err = sock_intr_errno(timeo);
1718 if (signal_pending(current))
1719 goto out_free;
1720
1721 goto restart;
1da177e4
LT
1722 }
1723
7d267278
RW
1724 if (!sk_locked) {
1725 unix_state_unlock(other);
1726 unix_state_double_lock(sk, other);
1727 }
1da177e4 1728
7d267278
RW
1729 if (unix_peer(sk) != other ||
1730 unix_dgram_peer_wake_me(sk, other)) {
1731 err = -EAGAIN;
1732 sk_locked = 1;
1733 goto out_unlock;
1734 }
1da177e4 1735
7d267278
RW
1736 if (!sk_locked) {
1737 sk_locked = 1;
1738 goto restart_locked;
1739 }
1da177e4
LT
1740 }
1741
7d267278
RW
1742 if (unlikely(sk_locked))
1743 unix_state_unlock(sk);
1744
3f66116e
AC
1745 if (sock_flag(other, SOCK_RCVTSTAMP))
1746 __net_timestamp(skb);
16e57262 1747 maybe_add_creds(skb, sock, other);
1da177e4 1748 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 1749 unix_state_unlock(other);
676d2369 1750 other->sk_data_ready(other);
1da177e4 1751 sock_put(other);
7cc05662 1752 scm_destroy(&scm);
1da177e4
LT
1753 return len;
1754
1755out_unlock:
7d267278
RW
1756 if (sk_locked)
1757 unix_state_unlock(sk);
1c92b4e5 1758 unix_state_unlock(other);
1da177e4
LT
1759out_free:
1760 kfree_skb(skb);
1761out:
1762 if (other)
1763 sock_put(other);
7cc05662 1764 scm_destroy(&scm);
1da177e4
LT
1765 return err;
1766}
1767
e370a723 1768/* We use paged skbs for stream sockets, and limit occupancy to 32768
d4e9a408 1769 * bytes, and a minimum of a full page.
e370a723
ED
1770 */
1771#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
ac7bfa62 1772
1b784140
YX
1773static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1774 size_t len)
1da177e4 1775{
1da177e4
LT
1776 struct sock *sk = sock->sk;
1777 struct sock *other = NULL;
6eba6a37 1778 int err, size;
f78a5fda 1779 struct sk_buff *skb;
e27dfcea 1780 int sent = 0;
7cc05662 1781 struct scm_cookie scm;
8ba69ba6 1782 bool fds_sent = false;
e370a723 1783 int data_len;
1da177e4 1784
5f23b734 1785 wait_for_unix_gc();
7cc05662 1786 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1787 if (err < 0)
1788 return err;
1789
1790 err = -EOPNOTSUPP;
1791 if (msg->msg_flags&MSG_OOB)
1792 goto out_err;
1793
1794 if (msg->msg_namelen) {
1795 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1796 goto out_err;
1797 } else {
1da177e4 1798 err = -ENOTCONN;
830a1e5c 1799 other = unix_peer(sk);
1da177e4
LT
1800 if (!other)
1801 goto out_err;
1802 }
1803
1804 if (sk->sk_shutdown & SEND_SHUTDOWN)
1805 goto pipe_err;
1806
6eba6a37 1807 while (sent < len) {
e370a723 1808 size = len - sent;
1da177e4
LT
1809
1810 /* Keep two messages in the pipe so it schedules better */
e370a723 1811 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1da177e4 1812
e370a723
ED
1813 /* allow fallback to order-0 allocations */
1814 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
ac7bfa62 1815
e370a723 1816 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1da177e4 1817
31ff6aa5
KT
1818 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1819
e370a723 1820 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
28d64271
ED
1821 msg->msg_flags & MSG_DONTWAIT, &err,
1822 get_order(UNIX_SKB_FRAGS_SZ));
e370a723 1823 if (!skb)
1da177e4
LT
1824 goto out_err;
1825
f78a5fda 1826 /* Only send the fds in the first buffer */
7cc05662 1827 err = unix_scm_to_skb(&scm, skb, !fds_sent);
25888e30 1828 if (err < 0) {
7361c36c 1829 kfree_skb(skb);
f78a5fda 1830 goto out_err;
6209344f 1831 }
7361c36c 1832 fds_sent = true;
1da177e4 1833
e370a723
ED
1834 skb_put(skb, size - data_len);
1835 skb->data_len = data_len;
1836 skb->len = size;
c0371da6 1837 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
6eba6a37 1838 if (err) {
1da177e4 1839 kfree_skb(skb);
f78a5fda 1840 goto out_err;
1da177e4
LT
1841 }
1842
1c92b4e5 1843 unix_state_lock(other);
1da177e4
LT
1844
1845 if (sock_flag(other, SOCK_DEAD) ||
1846 (other->sk_shutdown & RCV_SHUTDOWN))
1847 goto pipe_err_free;
1848
16e57262 1849 maybe_add_creds(skb, sock, other);
1da177e4 1850 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 1851 unix_state_unlock(other);
676d2369 1852 other->sk_data_ready(other);
e27dfcea 1853 sent += size;
1da177e4 1854 }
1da177e4 1855
7cc05662 1856 scm_destroy(&scm);
1da177e4
LT
1857
1858 return sent;
1859
1860pipe_err_free:
1c92b4e5 1861 unix_state_unlock(other);
1da177e4
LT
1862 kfree_skb(skb);
1863pipe_err:
6eba6a37
ED
1864 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1865 send_sig(SIGPIPE, current, 0);
1da177e4
LT
1866 err = -EPIPE;
1867out_err:
7cc05662 1868 scm_destroy(&scm);
1da177e4
LT
1869 return sent ? : err;
1870}
1871
869e7c62
HFS
1872static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1873 int offset, size_t size, int flags)
1874{
9490f886
HFS
1875 int err;
1876 bool send_sigpipe = false;
1877 bool init_scm = true;
1878 struct scm_cookie scm;
869e7c62
HFS
1879 struct sock *other, *sk = socket->sk;
1880 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1881
1882 if (flags & MSG_OOB)
1883 return -EOPNOTSUPP;
1884
1885 other = unix_peer(sk);
1886 if (!other || sk->sk_state != TCP_ESTABLISHED)
1887 return -ENOTCONN;
1888
1889 if (false) {
1890alloc_skb:
1891 unix_state_unlock(other);
6e1ce3c3 1892 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
1893 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1894 &err, 0);
1895 if (!newskb)
9490f886 1896 goto err;
869e7c62
HFS
1897 }
1898
6e1ce3c3 1899 /* we must acquire iolock as we modify already present
869e7c62
HFS
1900 * skbs in the sk_receive_queue and mess with skb->len
1901 */
6e1ce3c3 1902 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
869e7c62
HFS
1903 if (err) {
1904 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
869e7c62
HFS
1905 goto err;
1906 }
1907
1908 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1909 err = -EPIPE;
9490f886 1910 send_sigpipe = true;
869e7c62
HFS
1911 goto err_unlock;
1912 }
1913
1914 unix_state_lock(other);
1915
1916 if (sock_flag(other, SOCK_DEAD) ||
1917 other->sk_shutdown & RCV_SHUTDOWN) {
1918 err = -EPIPE;
9490f886 1919 send_sigpipe = true;
869e7c62
HFS
1920 goto err_state_unlock;
1921 }
1922
9490f886
HFS
1923 if (init_scm) {
1924 err = maybe_init_creds(&scm, socket, other);
1925 if (err)
1926 goto err_state_unlock;
1927 init_scm = false;
1928 }
1929
869e7c62
HFS
1930 skb = skb_peek_tail(&other->sk_receive_queue);
1931 if (tail && tail == skb) {
1932 skb = newskb;
9490f886
HFS
1933 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1934 if (newskb) {
869e7c62 1935 skb = newskb;
9490f886
HFS
1936 } else {
1937 tail = skb;
869e7c62 1938 goto alloc_skb;
9490f886 1939 }
869e7c62
HFS
1940 } else if (newskb) {
1941 /* this is fast path, we don't necessarily need to
1942 * call to kfree_skb even though with newskb == NULL
1943 * this - does no harm
1944 */
1945 consume_skb(newskb);
8844f972 1946 newskb = NULL;
869e7c62
HFS
1947 }
1948
1949 if (skb_append_pagefrags(skb, page, offset, size)) {
1950 tail = skb;
1951 goto alloc_skb;
1952 }
1953
1954 skb->len += size;
1955 skb->data_len += size;
1956 skb->truesize += size;
14afee4b 1957 refcount_add(size, &sk->sk_wmem_alloc);
869e7c62 1958
a3a116e0 1959 if (newskb) {
9490f886
HFS
1960 err = unix_scm_to_skb(&scm, skb, false);
1961 if (err)
1962 goto err_state_unlock;
a3a116e0 1963 spin_lock(&other->sk_receive_queue.lock);
869e7c62 1964 __skb_queue_tail(&other->sk_receive_queue, newskb);
a3a116e0
HFS
1965 spin_unlock(&other->sk_receive_queue.lock);
1966 }
869e7c62
HFS
1967
1968 unix_state_unlock(other);
6e1ce3c3 1969 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
1970
1971 other->sk_data_ready(other);
9490f886 1972 scm_destroy(&scm);
869e7c62
HFS
1973 return size;
1974
1975err_state_unlock:
1976 unix_state_unlock(other);
1977err_unlock:
6e1ce3c3 1978 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
1979err:
1980 kfree_skb(newskb);
1981 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1982 send_sig(SIGPIPE, current, 0);
9490f886
HFS
1983 if (!init_scm)
1984 scm_destroy(&scm);
869e7c62
HFS
1985 return err;
1986}
1987
1b784140
YX
1988static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
1989 size_t len)
1da177e4
LT
1990{
1991 int err;
1992 struct sock *sk = sock->sk;
ac7bfa62 1993
1da177e4
LT
1994 err = sock_error(sk);
1995 if (err)
1996 return err;
1997
1998 if (sk->sk_state != TCP_ESTABLISHED)
1999 return -ENOTCONN;
2000
2001 if (msg->msg_namelen)
2002 msg->msg_namelen = 0;
2003
1b784140 2004 return unix_dgram_sendmsg(sock, msg, len);
1da177e4 2005}
ac7bfa62 2006
1b784140
YX
2007static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2008 size_t size, int flags)
a05d2ad1
EB
2009{
2010 struct sock *sk = sock->sk;
2011
2012 if (sk->sk_state != TCP_ESTABLISHED)
2013 return -ENOTCONN;
2014
1b784140 2015 return unix_dgram_recvmsg(sock, msg, size, flags);
a05d2ad1
EB
2016}
2017
1da177e4
LT
2018static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2019{
ae3b5641 2020 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
1da177e4 2021
ae3b5641
AV
2022 if (addr) {
2023 msg->msg_namelen = addr->len;
2024 memcpy(msg->msg_name, addr->name, addr->len);
1da177e4
LT
2025 }
2026}
2027
1b784140
YX
2028static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2029 size_t size, int flags)
1da177e4 2030{
7cc05662 2031 struct scm_cookie scm;
1da177e4
LT
2032 struct sock *sk = sock->sk;
2033 struct unix_sock *u = unix_sk(sk);
64874280
RW
2034 struct sk_buff *skb, *last;
2035 long timeo;
fd69c399 2036 int skip;
1da177e4
LT
2037 int err;
2038
2039 err = -EOPNOTSUPP;
2040 if (flags&MSG_OOB)
2041 goto out;
2042
64874280 2043 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1da177e4 2044
64874280 2045 do {
6e1ce3c3 2046 mutex_lock(&u->iolock);
f55bb7f9 2047
64874280 2048 skip = sk_peek_offset(sk, flags);
fd69c399
PA
2049 skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
2050 &last);
64874280
RW
2051 if (skb)
2052 break;
2053
6e1ce3c3 2054 mutex_unlock(&u->iolock);
64874280
RW
2055
2056 if (err != -EAGAIN)
2057 break;
2058 } while (timeo &&
2059 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2060
6e1ce3c3 2061 if (!skb) { /* implies iolock unlocked */
0a112258
FZ
2062 unix_state_lock(sk);
2063 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2064 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2065 (sk->sk_shutdown & RCV_SHUTDOWN))
2066 err = 0;
2067 unix_state_unlock(sk);
64874280 2068 goto out;
0a112258 2069 }
1da177e4 2070
77b75f4d
RW
2071 if (wq_has_sleeper(&u->peer_wait))
2072 wake_up_interruptible_sync_poll(&u->peer_wait,
a9a08845
LT
2073 EPOLLOUT | EPOLLWRNORM |
2074 EPOLLWRBAND);
1da177e4
LT
2075
2076 if (msg->msg_name)
2077 unix_copy_addr(msg, skb->sk);
2078
f55bb7f9
PE
2079 if (size > skb->len - skip)
2080 size = skb->len - skip;
2081 else if (size < skb->len - skip)
1da177e4
LT
2082 msg->msg_flags |= MSG_TRUNC;
2083
51f3d02b 2084 err = skb_copy_datagram_msg(skb, skip, msg, size);
1da177e4
LT
2085 if (err)
2086 goto out_free;
2087
3f66116e
AC
2088 if (sock_flag(sk, SOCK_RCVTSTAMP))
2089 __sock_recv_timestamp(msg, sk, skb);
2090
7cc05662
CH
2091 memset(&scm, 0, sizeof(scm));
2092
2093 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2094 unix_set_secdata(&scm, skb);
1da177e4 2095
6eba6a37 2096 if (!(flags & MSG_PEEK)) {
1da177e4 2097 if (UNIXCB(skb).fp)
7cc05662 2098 unix_detach_fds(&scm, skb);
f55bb7f9
PE
2099
2100 sk_peek_offset_bwd(sk, skb->len);
6eba6a37 2101 } else {
1da177e4
LT
2102 /* It is questionable: on PEEK we could:
2103 - do not return fds - good, but too simple 8)
2104 - return fds, and do not return them on read (old strategy,
2105 apparently wrong)
2106 - clone fds (I chose it for now, it is the most universal
2107 solution)
ac7bfa62
YH
2108
2109 POSIX 1003.1g does not actually define this clearly
2110 at all. POSIX 1003.1g doesn't define a lot of things
2111 clearly however!
2112
1da177e4 2113 */
f55bb7f9
PE
2114
2115 sk_peek_offset_fwd(sk, size);
2116
1da177e4 2117 if (UNIXCB(skb).fp)
7cc05662 2118 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
1da177e4 2119 }
9f6f9af7 2120 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1da177e4 2121
7cc05662 2122 scm_recv(sock, msg, &scm, flags);
1da177e4
LT
2123
2124out_free:
6eba6a37 2125 skb_free_datagram(sk, skb);
6e1ce3c3 2126 mutex_unlock(&u->iolock);
1da177e4
LT
2127out:
2128 return err;
2129}
2130
2131/*
79f632c7 2132 * Sleep until more data has arrived. But check for races..
1da177e4 2133 */
79f632c7 2134static long unix_stream_data_wait(struct sock *sk, long timeo,
06a77b07
WC
2135 struct sk_buff *last, unsigned int last_len,
2136 bool freezable)
1da177e4 2137{
2b514574 2138 struct sk_buff *tail;
1da177e4
LT
2139 DEFINE_WAIT(wait);
2140
1c92b4e5 2141 unix_state_lock(sk);
1da177e4
LT
2142
2143 for (;;) {
aa395145 2144 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4 2145
2b514574
HFS
2146 tail = skb_peek_tail(&sk->sk_receive_queue);
2147 if (tail != last ||
2148 (tail && tail->len != last_len) ||
1da177e4
LT
2149 sk->sk_err ||
2150 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2151 signal_pending(current) ||
2152 !timeo)
2153 break;
2154
9cd3e072 2155 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1c92b4e5 2156 unix_state_unlock(sk);
06a77b07
WC
2157 if (freezable)
2158 timeo = freezable_schedule_timeout(timeo);
2159 else
2160 timeo = schedule_timeout(timeo);
1c92b4e5 2161 unix_state_lock(sk);
b48732e4
MS
2162
2163 if (sock_flag(sk, SOCK_DEAD))
2164 break;
2165
9cd3e072 2166 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1da177e4
LT
2167 }
2168
aa395145 2169 finish_wait(sk_sleep(sk), &wait);
1c92b4e5 2170 unix_state_unlock(sk);
1da177e4
LT
2171 return timeo;
2172}
2173
e370a723
ED
2174static unsigned int unix_skb_len(const struct sk_buff *skb)
2175{
2176 return skb->len - UNIXCB(skb).consumed;
2177}
2178
2b514574
HFS
2179struct unix_stream_read_state {
2180 int (*recv_actor)(struct sk_buff *, int, int,
2181 struct unix_stream_read_state *);
2182 struct socket *socket;
2183 struct msghdr *msg;
2184 struct pipe_inode_info *pipe;
2185 size_t size;
2186 int flags;
2187 unsigned int splice_flags;
2188};
2189
06a77b07
WC
2190static int unix_stream_read_generic(struct unix_stream_read_state *state,
2191 bool freezable)
1da177e4 2192{
7cc05662 2193 struct scm_cookie scm;
2b514574 2194 struct socket *sock = state->socket;
1da177e4
LT
2195 struct sock *sk = sock->sk;
2196 struct unix_sock *u = unix_sk(sk);
1da177e4 2197 int copied = 0;
2b514574 2198 int flags = state->flags;
de144391 2199 int noblock = flags & MSG_DONTWAIT;
2b514574 2200 bool check_creds = false;
1da177e4
LT
2201 int target;
2202 int err = 0;
2203 long timeo;
fc0d7536 2204 int skip;
2b514574
HFS
2205 size_t size = state->size;
2206 unsigned int last_len;
1da177e4 2207
1b92ee3d
RW
2208 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2209 err = -EINVAL;
1da177e4 2210 goto out;
1b92ee3d 2211 }
1da177e4 2212
1b92ee3d
RW
2213 if (unlikely(flags & MSG_OOB)) {
2214 err = -EOPNOTSUPP;
1da177e4 2215 goto out;
1b92ee3d 2216 }
1da177e4 2217
2b514574 2218 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
de144391 2219 timeo = sock_rcvtimeo(sk, noblock);
1da177e4 2220
2b514574
HFS
2221 memset(&scm, 0, sizeof(scm));
2222
1da177e4
LT
2223 /* Lock the socket to prevent queue disordering
2224 * while sleeps in memcpy_tomsg
2225 */
6e1ce3c3 2226 mutex_lock(&u->iolock);
1da177e4 2227
a0917e0b 2228 skip = max(sk_peek_offset(sk, flags), 0);
e9193d60 2229
6eba6a37 2230 do {
1da177e4 2231 int chunk;
73ed5d25 2232 bool drop_skb;
79f632c7 2233 struct sk_buff *skb, *last;
1da177e4 2234
18eceb81 2235redo:
3c0d2f37 2236 unix_state_lock(sk);
b48732e4
MS
2237 if (sock_flag(sk, SOCK_DEAD)) {
2238 err = -ECONNRESET;
2239 goto unlock;
2240 }
79f632c7 2241 last = skb = skb_peek(&sk->sk_receive_queue);
2b514574 2242 last_len = last ? last->len : 0;
fc0d7536 2243again:
6eba6a37 2244 if (skb == NULL) {
1da177e4 2245 if (copied >= target)
3c0d2f37 2246 goto unlock;
1da177e4
LT
2247
2248 /*
2249 * POSIX 1003.1g mandates this order.
2250 */
ac7bfa62 2251
6eba6a37
ED
2252 err = sock_error(sk);
2253 if (err)
3c0d2f37 2254 goto unlock;
1da177e4 2255 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
2256 goto unlock;
2257
2258 unix_state_unlock(sk);
1b92ee3d
RW
2259 if (!timeo) {
2260 err = -EAGAIN;
1da177e4 2261 break;
1b92ee3d
RW
2262 }
2263
6e1ce3c3 2264 mutex_unlock(&u->iolock);
1da177e4 2265
2b514574 2266 timeo = unix_stream_data_wait(sk, timeo, last,
06a77b07 2267 last_len, freezable);
1da177e4 2268
3822b5c2 2269 if (signal_pending(current)) {
1da177e4 2270 err = sock_intr_errno(timeo);
fa0dc04d 2271 scm_destroy(&scm);
1da177e4
LT
2272 goto out;
2273 }
b3ca9b02 2274
6e1ce3c3 2275 mutex_lock(&u->iolock);
18eceb81 2276 goto redo;
2b514574 2277unlock:
3c0d2f37
MS
2278 unix_state_unlock(sk);
2279 break;
1da177e4 2280 }
fc0d7536 2281
e370a723
ED
2282 while (skip >= unix_skb_len(skb)) {
2283 skip -= unix_skb_len(skb);
79f632c7 2284 last = skb;
2b514574 2285 last_len = skb->len;
fc0d7536 2286 skb = skb_peek_next(skb, &sk->sk_receive_queue);
79f632c7
BP
2287 if (!skb)
2288 goto again;
fc0d7536
PE
2289 }
2290
3c0d2f37 2291 unix_state_unlock(sk);
1da177e4
LT
2292
2293 if (check_creds) {
2294 /* Never glue messages from different writers */
9490f886 2295 if (!unix_skb_scm_eq(skb, &scm))
1da177e4 2296 break;
0e82e7f6 2297 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1da177e4 2298 /* Copy credentials */
7cc05662 2299 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
37a9a8df 2300 unix_set_secdata(&scm, skb);
2b514574 2301 check_creds = true;
1da177e4
LT
2302 }
2303
2304 /* Copy address just once */
2b514574
HFS
2305 if (state->msg && state->msg->msg_name) {
2306 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2307 state->msg->msg_name);
2308 unix_copy_addr(state->msg, skb->sk);
1da177e4
LT
2309 sunaddr = NULL;
2310 }
2311
e370a723 2312 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
73ed5d25 2313 skb_get(skb);
2b514574 2314 chunk = state->recv_actor(skb, skip, chunk, state);
73ed5d25
HFS
2315 drop_skb = !unix_skb_len(skb);
2316 /* skb is only safe to use if !drop_skb */
2317 consume_skb(skb);
2b514574 2318 if (chunk < 0) {
1da177e4
LT
2319 if (copied == 0)
2320 copied = -EFAULT;
2321 break;
2322 }
2323 copied += chunk;
2324 size -= chunk;
2325
73ed5d25
HFS
2326 if (drop_skb) {
2327 /* the skb was touched by a concurrent reader;
2328 * we should not expect anything from this skb
2329 * anymore and assume it invalid - we can be
2330 * sure it was dropped from the socket queue
2331 *
2332 * let's report a short read
2333 */
2334 err = 0;
2335 break;
2336 }
2337
1da177e4 2338 /* Mark read part of skb as used */
6eba6a37 2339 if (!(flags & MSG_PEEK)) {
e370a723 2340 UNIXCB(skb).consumed += chunk;
1da177e4 2341
fc0d7536
PE
2342 sk_peek_offset_bwd(sk, chunk);
2343
1da177e4 2344 if (UNIXCB(skb).fp)
7cc05662 2345 unix_detach_fds(&scm, skb);
1da177e4 2346
e370a723 2347 if (unix_skb_len(skb))
1da177e4 2348 break;
1da177e4 2349
6f01fd6e 2350 skb_unlink(skb, &sk->sk_receive_queue);
70d4bf6d 2351 consume_skb(skb);
1da177e4 2352
7cc05662 2353 if (scm.fp)
1da177e4 2354 break;
6eba6a37 2355 } else {
1da177e4
LT
2356 /* It is questionable, see note in unix_dgram_recvmsg.
2357 */
2358 if (UNIXCB(skb).fp)
7cc05662 2359 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
1da177e4 2360
e9193d60 2361 sk_peek_offset_fwd(sk, chunk);
fc0d7536 2362
9f389e35
AC
2363 if (UNIXCB(skb).fp)
2364 break;
2365
e9193d60 2366 skip = 0;
9f389e35
AC
2367 last = skb;
2368 last_len = skb->len;
2369 unix_state_lock(sk);
2370 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2371 if (skb)
2372 goto again;
2373 unix_state_unlock(sk);
1da177e4
LT
2374 break;
2375 }
2376 } while (size);
2377
6e1ce3c3 2378 mutex_unlock(&u->iolock);
2b514574
HFS
2379 if (state->msg)
2380 scm_recv(sock, state->msg, &scm, flags);
2381 else
2382 scm_destroy(&scm);
1da177e4
LT
2383out:
2384 return copied ? : err;
2385}
2386
2b514574
HFS
2387static int unix_stream_read_actor(struct sk_buff *skb,
2388 int skip, int chunk,
2389 struct unix_stream_read_state *state)
2390{
2391 int ret;
2392
2393 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2394 state->msg, chunk);
2395 return ret ?: chunk;
2396}
2397
2398static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2399 size_t size, int flags)
2400{
2401 struct unix_stream_read_state state = {
2402 .recv_actor = unix_stream_read_actor,
2403 .socket = sock,
2404 .msg = msg,
2405 .size = size,
2406 .flags = flags
2407 };
2408
06a77b07 2409 return unix_stream_read_generic(&state, true);
2b514574
HFS
2410}
2411
2b514574
HFS
2412static int unix_stream_splice_actor(struct sk_buff *skb,
2413 int skip, int chunk,
2414 struct unix_stream_read_state *state)
2415{
2416 return skb_splice_bits(skb, state->socket->sk,
2417 UNIXCB(skb).consumed + skip,
25869262 2418 state->pipe, chunk, state->splice_flags);
2b514574
HFS
2419}
2420
2421static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2422 struct pipe_inode_info *pipe,
2423 size_t size, unsigned int flags)
2424{
2425 struct unix_stream_read_state state = {
2426 .recv_actor = unix_stream_splice_actor,
2427 .socket = sock,
2428 .pipe = pipe,
2429 .size = size,
2430 .splice_flags = flags,
2431 };
2432
2433 if (unlikely(*ppos))
2434 return -ESPIPE;
2435
2436 if (sock->file->f_flags & O_NONBLOCK ||
2437 flags & SPLICE_F_NONBLOCK)
2438 state.flags = MSG_DONTWAIT;
2439
06a77b07 2440 return unix_stream_read_generic(&state, false);
2b514574
HFS
2441}
2442
1da177e4
LT
2443static int unix_shutdown(struct socket *sock, int mode)
2444{
2445 struct sock *sk = sock->sk;
2446 struct sock *other;
2447
fc61b928
XW
2448 if (mode < SHUT_RD || mode > SHUT_RDWR)
2449 return -EINVAL;
2450 /* This maps:
2451 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2452 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2453 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2454 */
2455 ++mode;
7180a031
AC
2456
2457 unix_state_lock(sk);
2458 sk->sk_shutdown |= mode;
2459 other = unix_peer(sk);
2460 if (other)
2461 sock_hold(other);
2462 unix_state_unlock(sk);
2463 sk->sk_state_change(sk);
2464
2465 if (other &&
2466 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2467
2468 int peer_mode = 0;
2469
2470 if (mode&RCV_SHUTDOWN)
2471 peer_mode |= SEND_SHUTDOWN;
2472 if (mode&SEND_SHUTDOWN)
2473 peer_mode |= RCV_SHUTDOWN;
2474 unix_state_lock(other);
2475 other->sk_shutdown |= peer_mode;
2476 unix_state_unlock(other);
2477 other->sk_state_change(other);
2478 if (peer_mode == SHUTDOWN_MASK)
2479 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2480 else if (peer_mode & RCV_SHUTDOWN)
2481 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1da177e4 2482 }
7180a031
AC
2483 if (other)
2484 sock_put(other);
2485
1da177e4
LT
2486 return 0;
2487}
2488
885ee74d
PE
2489long unix_inq_len(struct sock *sk)
2490{
2491 struct sk_buff *skb;
2492 long amount = 0;
2493
2494 if (sk->sk_state == TCP_LISTEN)
2495 return -EINVAL;
2496
2497 spin_lock(&sk->sk_receive_queue.lock);
2498 if (sk->sk_type == SOCK_STREAM ||
2499 sk->sk_type == SOCK_SEQPACKET) {
2500 skb_queue_walk(&sk->sk_receive_queue, skb)
e370a723 2501 amount += unix_skb_len(skb);
885ee74d
PE
2502 } else {
2503 skb = skb_peek(&sk->sk_receive_queue);
2504 if (skb)
2505 amount = skb->len;
2506 }
2507 spin_unlock(&sk->sk_receive_queue.lock);
2508
2509 return amount;
2510}
2511EXPORT_SYMBOL_GPL(unix_inq_len);
2512
2513long unix_outq_len(struct sock *sk)
2514{
2515 return sk_wmem_alloc_get(sk);
2516}
2517EXPORT_SYMBOL_GPL(unix_outq_len);
2518
ba94f308
AV
2519static int unix_open_file(struct sock *sk)
2520{
2521 struct path path;
2522 struct file *f;
2523 int fd;
2524
2525 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2526 return -EPERM;
2527
ae3b5641
AV
2528 if (!smp_load_acquire(&unix_sk(sk)->addr))
2529 return -ENOENT;
2530
ba94f308 2531 path = unix_sk(sk)->path;
ae3b5641 2532 if (!path.dentry)
ba94f308 2533 return -ENOENT;
ba94f308
AV
2534
2535 path_get(&path);
ba94f308
AV
2536
2537 fd = get_unused_fd_flags(O_CLOEXEC);
2538 if (fd < 0)
2539 goto out;
2540
2541 f = dentry_open(&path, O_PATH, current_cred());
2542 if (IS_ERR(f)) {
2543 put_unused_fd(fd);
2544 fd = PTR_ERR(f);
2545 goto out;
2546 }
2547
2548 fd_install(fd, f);
2549out:
2550 path_put(&path);
2551
2552 return fd;
2553}
2554
1da177e4
LT
2555static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2556{
2557 struct sock *sk = sock->sk;
e27dfcea 2558 long amount = 0;
1da177e4
LT
2559 int err;
2560
6eba6a37
ED
2561 switch (cmd) {
2562 case SIOCOUTQ:
885ee74d 2563 amount = unix_outq_len(sk);
6eba6a37
ED
2564 err = put_user(amount, (int __user *)arg);
2565 break;
2566 case SIOCINQ:
885ee74d
PE
2567 amount = unix_inq_len(sk);
2568 if (amount < 0)
2569 err = amount;
2570 else
1da177e4 2571 err = put_user(amount, (int __user *)arg);
885ee74d 2572 break;
ba94f308
AV
2573 case SIOCUNIXFILE:
2574 err = unix_open_file(sk);
2575 break;
6eba6a37
ED
2576 default:
2577 err = -ENOIOCTLCMD;
2578 break;
1da177e4
LT
2579 }
2580 return err;
2581}
2582
a11e1d43 2583static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1da177e4
LT
2584{
2585 struct sock *sk = sock->sk;
a11e1d43
LT
2586 __poll_t mask;
2587
89ab066d 2588 sock_poll_wait(file, sock, wait);
a11e1d43 2589 mask = 0;
1da177e4
LT
2590
2591 /* exceptional events? */
2592 if (sk->sk_err)
a9a08845 2593 mask |= EPOLLERR;
1da177e4 2594 if (sk->sk_shutdown == SHUTDOWN_MASK)
a9a08845 2595 mask |= EPOLLHUP;
f348d70a 2596 if (sk->sk_shutdown & RCV_SHUTDOWN)
a9a08845 2597 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
1da177e4
LT
2598
2599 /* readable? */
3ef7cf57 2600 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
a9a08845 2601 mask |= EPOLLIN | EPOLLRDNORM;
1da177e4
LT
2602
2603 /* Connection-based need to check for termination and startup */
6eba6a37
ED
2604 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2605 sk->sk_state == TCP_CLOSE)
a9a08845 2606 mask |= EPOLLHUP;
1da177e4
LT
2607
2608 /*
2609 * we set writable also when the other side has shut down the
2610 * connection. This prevents stuck sockets.
2611 */
2612 if (unix_writable(sk))
a9a08845 2613 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
1da177e4
LT
2614
2615 return mask;
2616}
2617
a11e1d43
LT
2618static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2619 poll_table *wait)
3c73419c 2620{
ec0d215f 2621 struct sock *sk = sock->sk, *other;
a11e1d43
LT
2622 unsigned int writable;
2623 __poll_t mask;
2624
89ab066d 2625 sock_poll_wait(file, sock, wait);
a11e1d43 2626 mask = 0;
3c73419c
RW
2627
2628 /* exceptional events? */
3ef7cf57 2629 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
a9a08845
LT
2630 mask |= EPOLLERR |
2631 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
7d4c04fc 2632
3c73419c 2633 if (sk->sk_shutdown & RCV_SHUTDOWN)
a9a08845 2634 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3c73419c 2635 if (sk->sk_shutdown == SHUTDOWN_MASK)
a9a08845 2636 mask |= EPOLLHUP;
3c73419c
RW
2637
2638 /* readable? */
3ef7cf57 2639 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
a9a08845 2640 mask |= EPOLLIN | EPOLLRDNORM;
3c73419c
RW
2641
2642 /* Connection-based need to check for termination and startup */
2643 if (sk->sk_type == SOCK_SEQPACKET) {
2644 if (sk->sk_state == TCP_CLOSE)
a9a08845 2645 mask |= EPOLLHUP;
3c73419c
RW
2646 /* connection hasn't started yet? */
2647 if (sk->sk_state == TCP_SYN_SENT)
2648 return mask;
2649 }
2650
973a34aa 2651 /* No write status requested, avoid expensive OUT tests. */
a11e1d43 2652 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
973a34aa
ED
2653 return mask;
2654
ec0d215f 2655 writable = unix_writable(sk);
7d267278
RW
2656 if (writable) {
2657 unix_state_lock(sk);
2658
2659 other = unix_peer(sk);
2660 if (other && unix_peer(other) != sk &&
2661 unix_recvq_full(other) &&
2662 unix_dgram_peer_wake_me(sk, other))
2663 writable = 0;
2664
2665 unix_state_unlock(sk);
ec0d215f
RW
2666 }
2667
2668 if (writable)
a9a08845 2669 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3c73419c 2670 else
9cd3e072 2671 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3c73419c 2672
3c73419c
RW
2673 return mask;
2674}
1da177e4
LT
2675
2676#ifdef CONFIG_PROC_FS
a53eb3fe 2677
7123aaa3
ED
2678#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2679
2680#define get_bucket(x) ((x) >> BUCKET_SPACE)
2681#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2682#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
a53eb3fe 2683
7123aaa3 2684static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
1da177e4 2685{
7123aaa3
ED
2686 unsigned long offset = get_offset(*pos);
2687 unsigned long bucket = get_bucket(*pos);
2688 struct sock *sk;
2689 unsigned long count = 0;
1da177e4 2690
7123aaa3
ED
2691 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2692 if (sock_net(sk) != seq_file_net(seq))
097e66c5 2693 continue;
7123aaa3
ED
2694 if (++count == offset)
2695 break;
2696 }
2697
2698 return sk;
2699}
2700
2701static struct sock *unix_next_socket(struct seq_file *seq,
2702 struct sock *sk,
2703 loff_t *pos)
2704{
2705 unsigned long bucket;
2706
2707 while (sk > (struct sock *)SEQ_START_TOKEN) {
2708 sk = sk_next(sk);
2709 if (!sk)
2710 goto next_bucket;
2711 if (sock_net(sk) == seq_file_net(seq))
2712 return sk;
1da177e4 2713 }
7123aaa3
ED
2714
2715 do {
2716 sk = unix_from_bucket(seq, pos);
2717 if (sk)
2718 return sk;
2719
2720next_bucket:
2721 bucket = get_bucket(*pos) + 1;
2722 *pos = set_bucket_offset(bucket, 1);
2723 } while (bucket < ARRAY_SIZE(unix_socket_table));
2724
1da177e4
LT
2725 return NULL;
2726}
2727
1da177e4 2728static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 2729 __acquires(unix_table_lock)
1da177e4 2730{
fbe9cc4a 2731 spin_lock(&unix_table_lock);
7123aaa3
ED
2732
2733 if (!*pos)
2734 return SEQ_START_TOKEN;
2735
2736 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2737 return NULL;
2738
2739 return unix_next_socket(seq, NULL, pos);
1da177e4
LT
2740}
2741
2742static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2743{
2744 ++*pos;
7123aaa3 2745 return unix_next_socket(seq, v, pos);
1da177e4
LT
2746}
2747
2748static void unix_seq_stop(struct seq_file *seq, void *v)
9a429c49 2749 __releases(unix_table_lock)
1da177e4 2750{
fbe9cc4a 2751 spin_unlock(&unix_table_lock);
1da177e4
LT
2752}
2753
2754static int unix_seq_show(struct seq_file *seq, void *v)
2755{
ac7bfa62 2756
b9f3124f 2757 if (v == SEQ_START_TOKEN)
1da177e4
LT
2758 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2759 "Inode Path\n");
2760 else {
2761 struct sock *s = v;
2762 struct unix_sock *u = unix_sk(s);
1c92b4e5 2763 unix_state_lock(s);
1da177e4 2764
71338aa7 2765 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
1da177e4 2766 s,
41c6d650 2767 refcount_read(&s->sk_refcnt),
1da177e4
LT
2768 0,
2769 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2770 s->sk_type,
2771 s->sk_socket ?
2772 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2773 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2774 sock_i_ino(s));
2775
ae3b5641 2776 if (u->addr) { // under unix_table_lock here
1da177e4
LT
2777 int i, len;
2778 seq_putc(seq, ' ');
2779
2780 i = 0;
2781 len = u->addr->len - sizeof(short);
2782 if (!UNIX_ABSTRACT(s))
2783 len--;
2784 else {
2785 seq_putc(seq, '@');
2786 i++;
2787 }
2788 for ( ; i < len; i++)
e7947ea7
IB
2789 seq_putc(seq, u->addr->name->sun_path[i] ?:
2790 '@');
1da177e4 2791 }
1c92b4e5 2792 unix_state_unlock(s);
1da177e4
LT
2793 seq_putc(seq, '\n');
2794 }
2795
2796 return 0;
2797}
2798
56b3d975 2799static const struct seq_operations unix_seq_ops = {
1da177e4
LT
2800 .start = unix_seq_start,
2801 .next = unix_seq_next,
2802 .stop = unix_seq_stop,
2803 .show = unix_seq_show,
2804};
1da177e4
LT
2805#endif
2806
ec1b4cf7 2807static const struct net_proto_family unix_family_ops = {
1da177e4
LT
2808 .family = PF_UNIX,
2809 .create = unix_create,
2810 .owner = THIS_MODULE,
2811};
2812
097e66c5 2813
2c8c1e72 2814static int __net_init unix_net_init(struct net *net)
097e66c5
DL
2815{
2816 int error = -ENOMEM;
2817
a0a53c8b 2818 net->unx.sysctl_max_dgram_qlen = 10;
1597fbc0
PE
2819 if (unix_sysctl_register(net))
2820 goto out;
d392e497 2821
097e66c5 2822#ifdef CONFIG_PROC_FS
c3506372
CH
2823 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2824 sizeof(struct seq_net_private))) {
1597fbc0 2825 unix_sysctl_unregister(net);
097e66c5 2826 goto out;
1597fbc0 2827 }
097e66c5
DL
2828#endif
2829 error = 0;
2830out:
48dcc33e 2831 return error;
097e66c5
DL
2832}
2833
2c8c1e72 2834static void __net_exit unix_net_exit(struct net *net)
097e66c5 2835{
1597fbc0 2836 unix_sysctl_unregister(net);
ece31ffd 2837 remove_proc_entry("unix", net->proc_net);
097e66c5
DL
2838}
2839
2840static struct pernet_operations unix_net_ops = {
2841 .init = unix_net_init,
2842 .exit = unix_net_exit,
2843};
2844
1da177e4
LT
2845static int __init af_unix_init(void)
2846{
2847 int rc = -1;
1da177e4 2848
b4fff5f8 2849 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
1da177e4
LT
2850
2851 rc = proto_register(&unix_proto, 1);
ac7bfa62 2852 if (rc != 0) {
5cc208be 2853 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
1da177e4
LT
2854 goto out;
2855 }
2856
2857 sock_register(&unix_family_ops);
097e66c5 2858 register_pernet_subsys(&unix_net_ops);
1da177e4
LT
2859out:
2860 return rc;
2861}
2862
2863static void __exit af_unix_exit(void)
2864{
2865 sock_unregister(PF_UNIX);
1da177e4 2866 proto_unregister(&unix_proto);
097e66c5 2867 unregister_pernet_subsys(&unix_net_ops);
1da177e4
LT
2868}
2869
3d366960
DW
2870/* Earlier than device_initcall() so that other drivers invoking
2871 request_module() don't end up in a loop when modprobe tries
2872 to use a UNIX socket. But later than subsys_initcall() because
2873 we depend on stuff initialised there */
2874fs_initcall(af_unix_init);
1da177e4
LT
2875module_exit(af_unix_exit);
2876
2877MODULE_LICENSE("GPL");
2878MODULE_ALIAS_NETPROTO(PF_UNIX);