]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/unix/af_unix.c
af_unix: Support POLLPRI for OOB.
[mirror_ubuntu-jammy-kernel.git] / net / unix / af_unix.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * NET4: Implementation of BSD Unix domain sockets.
4 *
113aa838 5 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
1da177e4 6 *
1da177e4
LT
7 * Fixes:
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
17 * Mike Shaver's work.
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
24 * reference counting
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
27 * Lots of bug fixes.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
39 * dgram receiver.
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
47 *
1da177e4
LT
48 * Known differences from reference BSD that was tested:
49 *
50 * [TO FIX]
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
55 * [NOT TO FIX]
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
63 *
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
68 *
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
75 * with BSD names.
76 */
77
5cc208be 78#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
1da177e4 80#include <linux/module.h>
1da177e4 81#include <linux/kernel.h>
1da177e4 82#include <linux/signal.h>
3f07c014 83#include <linux/sched/signal.h>
1da177e4
LT
84#include <linux/errno.h>
85#include <linux/string.h>
86#include <linux/stat.h>
87#include <linux/dcache.h>
88#include <linux/namei.h>
89#include <linux/socket.h>
90#include <linux/un.h>
91#include <linux/fcntl.h>
92#include <linux/termios.h>
93#include <linux/sockios.h>
94#include <linux/net.h>
95#include <linux/in.h>
96#include <linux/fs.h>
97#include <linux/slab.h>
7c0f6ba6 98#include <linux/uaccess.h>
1da177e4
LT
99#include <linux/skbuff.h>
100#include <linux/netdevice.h>
457c4cbc 101#include <net/net_namespace.h>
1da177e4 102#include <net/sock.h>
c752f073 103#include <net/tcp_states.h>
1da177e4
LT
104#include <net/af_unix.h>
105#include <linux/proc_fs.h>
106#include <linux/seq_file.h>
107#include <net/scm.h>
108#include <linux/init.h>
109#include <linux/poll.h>
1da177e4
LT
110#include <linux/rtnetlink.h>
111#include <linux/mount.h>
112#include <net/checksum.h>
113#include <linux/security.h>
2b15af6f 114#include <linux/freezer.h>
ba94f308 115#include <linux/file.h>
2c860a43 116#include <linux/btf_ids.h>
1da177e4 117
f4e65870
JA
118#include "scm.h"
119
7123aaa3 120struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
fa7ff56f
PE
121EXPORT_SYMBOL_GPL(unix_socket_table);
122DEFINE_SPINLOCK(unix_table_lock);
123EXPORT_SYMBOL_GPL(unix_table_lock);
518de9b3 124static atomic_long_t unix_nr_socks;
1da177e4 125
1da177e4 126
7123aaa3
ED
127static struct hlist_head *unix_sockets_unbound(void *addr)
128{
129 unsigned long hash = (unsigned long)addr;
130
131 hash ^= hash >> 16;
132 hash ^= hash >> 8;
133 hash %= UNIX_HASH_SIZE;
134 return &unix_socket_table[UNIX_HASH_SIZE + hash];
135}
136
137#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
1da177e4 138
877ce7c1 139#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 140static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 141{
3d328ede 142 UNIXCB(skb).lsmblob = scm->lsmblob;
877ce7c1
CZ
143}
144
145static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
146{
3d328ede 147 scm->lsmblob = UNIXCB(skb).lsmblob;
37a9a8df
SS
148}
149
150static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
151{
3d328ede 152 return lsmblob_equal(&scm->lsmblob, &(UNIXCB(skb).lsmblob));
877ce7c1
CZ
153}
154#else
dc49c1f9 155static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
156{ }
157
158static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
159{ }
37a9a8df
SS
160
161static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
162{
163 return true;
164}
877ce7c1
CZ
165#endif /* CONFIG_SECURITY_NETWORK */
166
1da177e4
LT
167/*
168 * SMP locking strategy:
fbe9cc4a 169 * hash table is protected with spinlock unix_table_lock
663717f6 170 * each socket state is protected by separate spin lock.
1da177e4
LT
171 */
172
95c96174 173static inline unsigned int unix_hash_fold(__wsum n)
1da177e4 174{
0a13404d 175 unsigned int hash = (__force unsigned int)csum_fold(n);
95c96174 176
1da177e4
LT
177 hash ^= hash>>8;
178 return hash&(UNIX_HASH_SIZE-1);
179}
180
181#define unix_peer(sk) (unix_sk(sk)->peer)
182
183static inline int unix_our_peer(struct sock *sk, struct sock *osk)
184{
185 return unix_peer(osk) == sk;
186}
187
188static inline int unix_may_send(struct sock *sk, struct sock *osk)
189{
6eba6a37 190 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
1da177e4
LT
191}
192
86b18aaa 193static inline int unix_recvq_full(const struct sock *sk)
3c73419c
RW
194{
195 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
196}
197
86b18aaa
QC
198static inline int unix_recvq_full_lockless(const struct sock *sk)
199{
200 return skb_queue_len_lockless(&sk->sk_receive_queue) >
201 READ_ONCE(sk->sk_max_ack_backlog);
202}
203
fa7ff56f 204struct sock *unix_peer_get(struct sock *s)
1da177e4
LT
205{
206 struct sock *peer;
207
1c92b4e5 208 unix_state_lock(s);
1da177e4
LT
209 peer = unix_peer(s);
210 if (peer)
211 sock_hold(peer);
1c92b4e5 212 unix_state_unlock(s);
1da177e4
LT
213 return peer;
214}
fa7ff56f 215EXPORT_SYMBOL_GPL(unix_peer_get);
1da177e4
LT
216
217static inline void unix_release_addr(struct unix_address *addr)
218{
8c9814b9 219 if (refcount_dec_and_test(&addr->refcnt))
1da177e4
LT
220 kfree(addr);
221}
222
223/*
224 * Check unix socket name:
225 * - should be not zero length.
226 * - if started by not zero, should be NULL terminated (FS object)
227 * - if started by zero, it is abstract name.
228 */
ac7bfa62 229
95c96174 230static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
1da177e4 231{
33c4368e
KK
232 *hashp = 0;
233
1da177e4
LT
234 if (len <= sizeof(short) || len > sizeof(*sunaddr))
235 return -EINVAL;
236 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
237 return -EINVAL;
238 if (sunaddr->sun_path[0]) {
239 /*
240 * This may look like an off by one error but it is a bit more
241 * subtle. 108 is the longest valid AF_UNIX path for a binding.
25985edc 242 * sun_path[108] doesn't as such exist. However in kernel space
1da177e4
LT
243 * we are guaranteed that it is a valid memory location in our
244 * kernel address buffer.
245 */
e27dfcea 246 ((char *)sunaddr)[len] = 0;
1da177e4
LT
247 len = strlen(sunaddr->sun_path)+1+sizeof(short);
248 return len;
249 }
250
07f0757a 251 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
1da177e4
LT
252 return len;
253}
254
255static void __unix_remove_socket(struct sock *sk)
256{
257 sk_del_node_init(sk);
258}
259
260static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
261{
547b792c 262 WARN_ON(!sk_unhashed(sk));
1da177e4
LT
263 sk_add_node(sk, list);
264}
265
185ab886
AV
266static void __unix_set_addr(struct sock *sk, struct unix_address *addr,
267 unsigned hash)
268{
269 __unix_remove_socket(sk);
270 smp_store_release(&unix_sk(sk)->addr, addr);
271 __unix_insert_socket(&unix_socket_table[hash], sk);
272}
273
1da177e4
LT
274static inline void unix_remove_socket(struct sock *sk)
275{
fbe9cc4a 276 spin_lock(&unix_table_lock);
1da177e4 277 __unix_remove_socket(sk);
fbe9cc4a 278 spin_unlock(&unix_table_lock);
1da177e4
LT
279}
280
281static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
282{
fbe9cc4a 283 spin_lock(&unix_table_lock);
1da177e4 284 __unix_insert_socket(list, sk);
fbe9cc4a 285 spin_unlock(&unix_table_lock);
1da177e4
LT
286}
287
097e66c5
DL
288static struct sock *__unix_find_socket_byname(struct net *net,
289 struct sockaddr_un *sunname,
be752283 290 int len, unsigned int hash)
1da177e4
LT
291{
292 struct sock *s;
1da177e4 293
be752283 294 sk_for_each(s, &unix_socket_table[hash]) {
1da177e4
LT
295 struct unix_sock *u = unix_sk(s);
296
878628fb 297 if (!net_eq(sock_net(s), net))
097e66c5
DL
298 continue;
299
1da177e4
LT
300 if (u->addr->len == len &&
301 !memcmp(u->addr->name, sunname, len))
262ce0af 302 return s;
1da177e4 303 }
262ce0af 304 return NULL;
1da177e4
LT
305}
306
097e66c5
DL
307static inline struct sock *unix_find_socket_byname(struct net *net,
308 struct sockaddr_un *sunname,
be752283 309 int len, unsigned int hash)
1da177e4
LT
310{
311 struct sock *s;
312
fbe9cc4a 313 spin_lock(&unix_table_lock);
be752283 314 s = __unix_find_socket_byname(net, sunname, len, hash);
1da177e4
LT
315 if (s)
316 sock_hold(s);
fbe9cc4a 317 spin_unlock(&unix_table_lock);
1da177e4
LT
318 return s;
319}
320
6616f788 321static struct sock *unix_find_socket_byinode(struct inode *i)
1da177e4
LT
322{
323 struct sock *s;
1da177e4 324
fbe9cc4a 325 spin_lock(&unix_table_lock);
b67bfe0d 326 sk_for_each(s,
1da177e4 327 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
40ffe67d 328 struct dentry *dentry = unix_sk(s)->path.dentry;
1da177e4 329
beef5121 330 if (dentry && d_backing_inode(dentry) == i) {
1da177e4
LT
331 sock_hold(s);
332 goto found;
333 }
334 }
335 s = NULL;
336found:
fbe9cc4a 337 spin_unlock(&unix_table_lock);
1da177e4
LT
338 return s;
339}
340
7d267278
RW
341/* Support code for asymmetrically connected dgram sockets
342 *
343 * If a datagram socket is connected to a socket not itself connected
344 * to the first socket (eg, /dev/log), clients may only enqueue more
345 * messages if the present receive queue of the server socket is not
346 * "too large". This means there's a second writeability condition
347 * poll and sendmsg need to test. The dgram recv code will do a wake
348 * up on the peer_wait wait queue of a socket upon reception of a
349 * datagram which needs to be propagated to sleeping would-be writers
350 * since these might not have sent anything so far. This can't be
351 * accomplished via poll_wait because the lifetime of the server
352 * socket might be less than that of its clients if these break their
353 * association with it or if the server socket is closed while clients
354 * are still connected to it and there's no way to inform "a polling
355 * implementation" that it should let go of a certain wait queue
356 *
ac6424b9 357 * In order to propagate a wake up, a wait_queue_entry_t of the client
7d267278
RW
358 * socket is enqueued on the peer_wait queue of the server socket
359 * whose wake function does a wake_up on the ordinary client socket
360 * wait queue. This connection is established whenever a write (or
361 * poll for write) hit the flow control condition and broken when the
362 * association to the server socket is dissolved or after a wake up
363 * was relayed.
364 */
365
ac6424b9 366static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
7d267278
RW
367 void *key)
368{
369 struct unix_sock *u;
370 wait_queue_head_t *u_sleep;
371
372 u = container_of(q, struct unix_sock, peer_wake);
373
374 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
375 q);
376 u->peer_wake.private = NULL;
377
378 /* relaying can only happen while the wq still exists */
379 u_sleep = sk_sleep(&u->sk);
380 if (u_sleep)
3ad6f93e 381 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
7d267278
RW
382
383 return 0;
384}
385
386static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
387{
388 struct unix_sock *u, *u_other;
389 int rc;
390
391 u = unix_sk(sk);
392 u_other = unix_sk(other);
393 rc = 0;
394 spin_lock(&u_other->peer_wait.lock);
395
396 if (!u->peer_wake.private) {
397 u->peer_wake.private = other;
398 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
399
400 rc = 1;
401 }
402
403 spin_unlock(&u_other->peer_wait.lock);
404 return rc;
405}
406
407static void unix_dgram_peer_wake_disconnect(struct sock *sk,
408 struct sock *other)
409{
410 struct unix_sock *u, *u_other;
411
412 u = unix_sk(sk);
413 u_other = unix_sk(other);
414 spin_lock(&u_other->peer_wait.lock);
415
416 if (u->peer_wake.private == other) {
417 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
418 u->peer_wake.private = NULL;
419 }
420
421 spin_unlock(&u_other->peer_wait.lock);
422}
423
424static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
425 struct sock *other)
426{
427 unix_dgram_peer_wake_disconnect(sk, other);
428 wake_up_interruptible_poll(sk_sleep(sk),
a9a08845
LT
429 EPOLLOUT |
430 EPOLLWRNORM |
431 EPOLLWRBAND);
7d267278
RW
432}
433
434/* preconditions:
435 * - unix_peer(sk) == other
436 * - association is stable
437 */
438static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
439{
440 int connected;
441
442 connected = unix_dgram_peer_wake_connect(sk, other);
443
51f7e951
JB
444 /* If other is SOCK_DEAD, we want to make sure we signal
445 * POLLOUT, such that a subsequent write() can get a
446 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
447 * to other and its full, we will hang waiting for POLLOUT.
448 */
449 if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
7d267278
RW
450 return 1;
451
452 if (connected)
453 unix_dgram_peer_wake_disconnect(sk, other);
454
455 return 0;
456}
457
1586a587 458static int unix_writable(const struct sock *sk)
1da177e4 459{
1586a587 460 return sk->sk_state != TCP_LISTEN &&
14afee4b 461 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
1da177e4
LT
462}
463
464static void unix_write_space(struct sock *sk)
465{
43815482
ED
466 struct socket_wq *wq;
467
468 rcu_read_lock();
1da177e4 469 if (unix_writable(sk)) {
43815482 470 wq = rcu_dereference(sk->sk_wq);
1ce0bf50 471 if (skwq_has_sleeper(wq))
67426b75 472 wake_up_interruptible_sync_poll(&wq->wait,
a9a08845 473 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
8d8ad9d7 474 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1da177e4 475 }
43815482 476 rcu_read_unlock();
1da177e4
LT
477}
478
479/* When dgram socket disconnects (or changes its peer), we clear its receive
480 * queue of packets arrived from previous peer. First, it allows to do
481 * flow control based only on wmem_alloc; second, sk connected to peer
482 * may receive messages only from that peer. */
483static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
484{
b03efcfb 485 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
486 skb_queue_purge(&sk->sk_receive_queue);
487 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
488
489 /* If one link of bidirectional dgram pipe is disconnected,
490 * we signal error. Messages are lost. Do not make this,
491 * when peer was not connected to us.
492 */
493 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
494 other->sk_err = ECONNRESET;
e3ae2365 495 sk_error_report(other);
1da177e4
LT
496 }
497 }
dc56ad70 498 other->sk_state = TCP_CLOSE;
1da177e4
LT
499}
500
501static void unix_sock_destructor(struct sock *sk)
502{
503 struct unix_sock *u = unix_sk(sk);
504
505 skb_queue_purge(&sk->sk_receive_queue);
506
314001f0
RS
507#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
508 if (u->oob_skb) {
509 kfree_skb(u->oob_skb);
510 u->oob_skb = NULL;
511 }
512#endif
14afee4b 513 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
547b792c
IJ
514 WARN_ON(!sk_unhashed(sk));
515 WARN_ON(sk->sk_socket);
1da177e4 516 if (!sock_flag(sk, SOCK_DEAD)) {
5cc208be 517 pr_info("Attempt to release alive unix socket: %p\n", sk);
1da177e4
LT
518 return;
519 }
520
521 if (u->addr)
522 unix_release_addr(u->addr);
523
518de9b3 524 atomic_long_dec(&unix_nr_socks);
6f756a8c 525 local_bh_disable();
a8076d8d 526 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
6f756a8c 527 local_bh_enable();
1da177e4 528#ifdef UNIX_REFCNT_DEBUG
5cc208be 529 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
518de9b3 530 atomic_long_read(&unix_nr_socks));
1da177e4
LT
531#endif
532}
533
ded34e0f 534static void unix_release_sock(struct sock *sk, int embrion)
1da177e4
LT
535{
536 struct unix_sock *u = unix_sk(sk);
40ffe67d 537 struct path path;
1da177e4
LT
538 struct sock *skpair;
539 struct sk_buff *skb;
540 int state;
541
542 unix_remove_socket(sk);
543
544 /* Clear state */
1c92b4e5 545 unix_state_lock(sk);
1da177e4
LT
546 sock_orphan(sk);
547 sk->sk_shutdown = SHUTDOWN_MASK;
40ffe67d
AV
548 path = u->path;
549 u->path.dentry = NULL;
550 u->path.mnt = NULL;
1da177e4
LT
551 state = sk->sk_state;
552 sk->sk_state = TCP_CLOSE;
a494bd64
ED
553
554 skpair = unix_peer(sk);
555 unix_peer(sk) = NULL;
556
1c92b4e5 557 unix_state_unlock(sk);
1da177e4
LT
558
559 wake_up_interruptible_all(&u->peer_wait);
560
e27dfcea 561 if (skpair != NULL) {
1da177e4 562 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
1c92b4e5 563 unix_state_lock(skpair);
1da177e4
LT
564 /* No more writes */
565 skpair->sk_shutdown = SHUTDOWN_MASK;
566 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
567 skpair->sk_err = ECONNRESET;
1c92b4e5 568 unix_state_unlock(skpair);
1da177e4 569 skpair->sk_state_change(skpair);
8d8ad9d7 570 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
1da177e4 571 }
7d267278
RW
572
573 unix_dgram_peer_wake_disconnect(sk, skpair);
1da177e4 574 sock_put(skpair); /* It may now die */
1da177e4
LT
575 }
576
577 /* Try to flush out this socket. Throw out buffers at least */
578
579 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
e27dfcea 580 if (state == TCP_LISTEN)
1da177e4
LT
581 unix_release_sock(skb->sk, 1);
582 /* passed fds are erased in the kfree_skb hook */
73ed5d25 583 UNIXCB(skb).consumed = skb->len;
1da177e4
LT
584 kfree_skb(skb);
585 }
586
40ffe67d
AV
587 if (path.dentry)
588 path_put(&path);
1da177e4
LT
589
590 sock_put(sk);
591
592 /* ---- Socket is dead now and most probably destroyed ---- */
593
594 /*
e04dae84 595 * Fixme: BSD difference: In BSD all sockets connected to us get
1da177e4
LT
596 * ECONNRESET and we die on the spot. In Linux we behave
597 * like files and pipes do and wait for the last
598 * dereference.
599 *
600 * Can't we simply set sock->err?
601 *
602 * What the above comment does talk about? --ANK(980817)
603 */
604
9305cfa4 605 if (unix_tot_inflight)
ac7bfa62 606 unix_gc(); /* Garbage collect fds */
1da177e4
LT
607}
608
109f6e39
EB
609static void init_peercred(struct sock *sk)
610{
35306eb2
ED
611 const struct cred *old_cred;
612 struct pid *old_pid;
613
614 spin_lock(&sk->sk_peer_lock);
615 old_pid = sk->sk_peer_pid;
616 old_cred = sk->sk_peer_cred;
109f6e39
EB
617 sk->sk_peer_pid = get_pid(task_tgid(current));
618 sk->sk_peer_cred = get_current_cred();
35306eb2
ED
619 spin_unlock(&sk->sk_peer_lock);
620
621 put_pid(old_pid);
622 put_cred(old_cred);
109f6e39
EB
623}
624
625static void copy_peercred(struct sock *sk, struct sock *peersk)
626{
35306eb2
ED
627 const struct cred *old_cred;
628 struct pid *old_pid;
629
630 if (sk < peersk) {
631 spin_lock(&sk->sk_peer_lock);
632 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
633 } else {
634 spin_lock(&peersk->sk_peer_lock);
635 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
636 }
637 old_pid = sk->sk_peer_pid;
638 old_cred = sk->sk_peer_cred;
109f6e39
EB
639 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
640 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
35306eb2
ED
641
642 spin_unlock(&sk->sk_peer_lock);
643 spin_unlock(&peersk->sk_peer_lock);
644
645 put_pid(old_pid);
646 put_cred(old_cred);
109f6e39
EB
647}
648
1da177e4
LT
649static int unix_listen(struct socket *sock, int backlog)
650{
651 int err;
652 struct sock *sk = sock->sk;
653 struct unix_sock *u = unix_sk(sk);
654
655 err = -EOPNOTSUPP;
6eba6a37
ED
656 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
657 goto out; /* Only stream/seqpacket sockets accept */
1da177e4
LT
658 err = -EINVAL;
659 if (!u->addr)
6eba6a37 660 goto out; /* No listens on an unbound socket */
1c92b4e5 661 unix_state_lock(sk);
1da177e4
LT
662 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
663 goto out_unlock;
664 if (backlog > sk->sk_max_ack_backlog)
665 wake_up_interruptible_all(&u->peer_wait);
666 sk->sk_max_ack_backlog = backlog;
667 sk->sk_state = TCP_LISTEN;
668 /* set credentials so connect can copy them */
109f6e39 669 init_peercred(sk);
1da177e4
LT
670 err = 0;
671
672out_unlock:
1c92b4e5 673 unix_state_unlock(sk);
1da177e4
LT
674out:
675 return err;
676}
677
678static int unix_release(struct socket *);
679static int unix_bind(struct socket *, struct sockaddr *, int);
680static int unix_stream_connect(struct socket *, struct sockaddr *,
681 int addr_len, int flags);
682static int unix_socketpair(struct socket *, struct socket *);
cdfbabfb 683static int unix_accept(struct socket *, struct socket *, int, bool);
9b2c45d4 684static int unix_getname(struct socket *, struct sockaddr *, int);
a11e1d43
LT
685static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
686static __poll_t unix_dgram_poll(struct file *, struct socket *,
687 poll_table *);
1da177e4 688static int unix_ioctl(struct socket *, unsigned int, unsigned long);
5f6beb9e
AB
689#ifdef CONFIG_COMPAT
690static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
691#endif
1da177e4 692static int unix_shutdown(struct socket *, int);
1b784140
YX
693static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
694static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
869e7c62
HFS
695static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
696 size_t size, int flags);
2b514574
HFS
697static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
698 struct pipe_inode_info *, size_t size,
699 unsigned int flags);
1b784140
YX
700static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
701static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
29df44fa
CW
702static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
703 sk_read_actor_t recv_actor);
77462de1
JW
704static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
705 sk_read_actor_t recv_actor);
1da177e4
LT
706static int unix_dgram_connect(struct socket *, struct sockaddr *,
707 int, int);
1b784140
YX
708static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
709static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
710 int);
1da177e4 711
12663bfc 712static int unix_set_peek_off(struct sock *sk, int val)
f55bb7f9
PE
713{
714 struct unix_sock *u = unix_sk(sk);
715
6e1ce3c3 716 if (mutex_lock_interruptible(&u->iolock))
12663bfc
SL
717 return -EINTR;
718
f55bb7f9 719 sk->sk_peek_off = val;
6e1ce3c3 720 mutex_unlock(&u->iolock);
12663bfc
SL
721
722 return 0;
f55bb7f9
PE
723}
724
5c05a164 725#ifdef CONFIG_PROC_FS
3c32da19
KT
726static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
727{
728 struct sock *sk = sock->sk;
729 struct unix_sock *u;
730
731 if (sk) {
732 u = unix_sk(sock->sk);
7782040b
PA
733 seq_printf(m, "scm_fds: %u\n",
734 atomic_read(&u->scm_stat.nr_fds));
3c32da19
KT
735 }
736}
3a12500e
TK
737#else
738#define unix_show_fdinfo NULL
739#endif
f55bb7f9 740
90ddc4f0 741static const struct proto_ops unix_stream_ops = {
1da177e4
LT
742 .family = PF_UNIX,
743 .owner = THIS_MODULE,
744 .release = unix_release,
745 .bind = unix_bind,
746 .connect = unix_stream_connect,
747 .socketpair = unix_socketpair,
748 .accept = unix_accept,
749 .getname = unix_getname,
a11e1d43 750 .poll = unix_poll,
1da177e4 751 .ioctl = unix_ioctl,
5f6beb9e
AB
752#ifdef CONFIG_COMPAT
753 .compat_ioctl = unix_compat_ioctl,
754#endif
1da177e4
LT
755 .listen = unix_listen,
756 .shutdown = unix_shutdown,
1da177e4
LT
757 .sendmsg = unix_stream_sendmsg,
758 .recvmsg = unix_stream_recvmsg,
77462de1 759 .read_sock = unix_stream_read_sock,
1da177e4 760 .mmap = sock_no_mmap,
869e7c62 761 .sendpage = unix_stream_sendpage,
2b514574 762 .splice_read = unix_stream_splice_read,
fc0d7536 763 .set_peek_off = unix_set_peek_off,
3c32da19 764 .show_fdinfo = unix_show_fdinfo,
1da177e4
LT
765};
766
90ddc4f0 767static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
768 .family = PF_UNIX,
769 .owner = THIS_MODULE,
770 .release = unix_release,
771 .bind = unix_bind,
772 .connect = unix_dgram_connect,
773 .socketpair = unix_socketpair,
774 .accept = sock_no_accept,
775 .getname = unix_getname,
a11e1d43 776 .poll = unix_dgram_poll,
1da177e4 777 .ioctl = unix_ioctl,
5f6beb9e
AB
778#ifdef CONFIG_COMPAT
779 .compat_ioctl = unix_compat_ioctl,
780#endif
1da177e4
LT
781 .listen = sock_no_listen,
782 .shutdown = unix_shutdown,
1da177e4 783 .sendmsg = unix_dgram_sendmsg,
29df44fa 784 .read_sock = unix_read_sock,
1da177e4
LT
785 .recvmsg = unix_dgram_recvmsg,
786 .mmap = sock_no_mmap,
787 .sendpage = sock_no_sendpage,
f55bb7f9 788 .set_peek_off = unix_set_peek_off,
3c32da19 789 .show_fdinfo = unix_show_fdinfo,
1da177e4
LT
790};
791
90ddc4f0 792static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
793 .family = PF_UNIX,
794 .owner = THIS_MODULE,
795 .release = unix_release,
796 .bind = unix_bind,
797 .connect = unix_stream_connect,
798 .socketpair = unix_socketpair,
799 .accept = unix_accept,
800 .getname = unix_getname,
a11e1d43 801 .poll = unix_dgram_poll,
1da177e4 802 .ioctl = unix_ioctl,
5f6beb9e
AB
803#ifdef CONFIG_COMPAT
804 .compat_ioctl = unix_compat_ioctl,
805#endif
1da177e4
LT
806 .listen = unix_listen,
807 .shutdown = unix_shutdown,
1da177e4 808 .sendmsg = unix_seqpacket_sendmsg,
a05d2ad1 809 .recvmsg = unix_seqpacket_recvmsg,
1da177e4
LT
810 .mmap = sock_no_mmap,
811 .sendpage = sock_no_sendpage,
f55bb7f9 812 .set_peek_off = unix_set_peek_off,
3c32da19 813 .show_fdinfo = unix_show_fdinfo,
1da177e4
LT
814};
815
c7272e15
CW
816static void unix_close(struct sock *sk, long timeout)
817{
818 /* Nothing to do here, unix socket does not need a ->close().
819 * This is merely for sockmap.
820 */
821}
822
94531cfc
JW
823static void unix_unhash(struct sock *sk)
824{
825 /* Nothing to do here, unix socket does not need a ->unhash().
826 * This is merely for sockmap.
827 */
828}
829
830struct proto unix_dgram_proto = {
0edf0824 831 .name = "UNIX",
248969ae 832 .owner = THIS_MODULE,
248969ae 833 .obj_size = sizeof(struct unix_sock),
c7272e15 834 .close = unix_close,
c6382918 835#ifdef CONFIG_BPF_SYSCALL
94531cfc 836 .psock_update_sk_prot = unix_dgram_bpf_update_proto,
c6382918 837#endif
1da177e4
LT
838};
839
94531cfc
JW
840struct proto unix_stream_proto = {
841 .name = "UNIX-STREAM",
248969ae 842 .owner = THIS_MODULE,
248969ae 843 .obj_size = sizeof(struct unix_sock),
c7272e15 844 .close = unix_close,
94531cfc 845 .unhash = unix_unhash,
c6382918 846#ifdef CONFIG_BPF_SYSCALL
94531cfc 847 .psock_update_sk_prot = unix_stream_bpf_update_proto,
c6382918 848#endif
1da177e4
LT
849};
850
94531cfc 851static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
1da177e4 852{
1da177e4 853 struct unix_sock *u;
f4bd73b5
KI
854 struct sock *sk;
855 int err;
1da177e4 856
518de9b3 857 atomic_long_inc(&unix_nr_socks);
f4bd73b5
KI
858 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
859 err = -ENFILE;
860 goto err;
861 }
1da177e4 862
94531cfc
JW
863 if (type == SOCK_STREAM)
864 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
865 else /*dgram and seqpacket */
866 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
867
f4bd73b5
KI
868 if (!sk) {
869 err = -ENOMEM;
870 goto err;
871 }
1da177e4 872
6eba6a37 873 sock_init_data(sock, sk);
1da177e4 874
3aa9799e 875 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
1da177e4 876 sk->sk_write_space = unix_write_space;
a0a53c8b 877 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
1da177e4
LT
878 sk->sk_destruct = unix_sock_destructor;
879 u = unix_sk(sk);
40ffe67d
AV
880 u->path.dentry = NULL;
881 u->path.mnt = NULL;
fd19f329 882 spin_lock_init(&u->lock);
516e0cc5 883 atomic_long_set(&u->inflight, 0);
1fd05ba5 884 INIT_LIST_HEAD(&u->link);
6e1ce3c3
LT
885 mutex_init(&u->iolock); /* single task reading lock */
886 mutex_init(&u->bindlock); /* single task binding lock */
1da177e4 887 init_waitqueue_head(&u->peer_wait);
7d267278 888 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
3c32da19 889 memset(&u->scm_stat, 0, sizeof(struct scm_stat));
7123aaa3 890 unix_insert_socket(unix_sockets_unbound(sk), sk);
f4bd73b5
KI
891
892 local_bh_disable();
893 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
894 local_bh_enable();
895
1da177e4 896 return sk;
f4bd73b5
KI
897
898err:
899 atomic_long_dec(&unix_nr_socks);
900 return ERR_PTR(err);
1da177e4
LT
901}
902
3f378b68
EP
903static int unix_create(struct net *net, struct socket *sock, int protocol,
904 int kern)
1da177e4 905{
f4bd73b5
KI
906 struct sock *sk;
907
1da177e4
LT
908 if (protocol && protocol != PF_UNIX)
909 return -EPROTONOSUPPORT;
910
911 sock->state = SS_UNCONNECTED;
912
913 switch (sock->type) {
914 case SOCK_STREAM:
915 sock->ops = &unix_stream_ops;
916 break;
917 /*
918 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
919 * nothing uses it.
920 */
921 case SOCK_RAW:
e27dfcea 922 sock->type = SOCK_DGRAM;
df561f66 923 fallthrough;
1da177e4
LT
924 case SOCK_DGRAM:
925 sock->ops = &unix_dgram_ops;
926 break;
927 case SOCK_SEQPACKET:
928 sock->ops = &unix_seqpacket_ops;
929 break;
930 default:
931 return -ESOCKTNOSUPPORT;
932 }
933
f4bd73b5
KI
934 sk = unix_create1(net, sock, kern, sock->type);
935 if (IS_ERR(sk))
936 return PTR_ERR(sk);
937
938 return 0;
1da177e4
LT
939}
940
941static int unix_release(struct socket *sock)
942{
943 struct sock *sk = sock->sk;
944
945 if (!sk)
946 return 0;
947
c7272e15 948 sk->sk_prot->close(sk, 0);
ded34e0f 949 unix_release_sock(sk, 0);
1da177e4
LT
950 sock->sk = NULL;
951
ded34e0f 952 return 0;
1da177e4
LT
953}
954
955static int unix_autobind(struct socket *sock)
956{
957 struct sock *sk = sock->sk;
3b1e0a65 958 struct net *net = sock_net(sk);
1da177e4
LT
959 struct unix_sock *u = unix_sk(sk);
960 static u32 ordernum = 1;
6eba6a37 961 struct unix_address *addr;
1da177e4 962 int err;
8df73ff9 963 unsigned int retries = 0;
1da177e4 964
6e1ce3c3 965 err = mutex_lock_interruptible(&u->bindlock);
37ab4fa7
SL
966 if (err)
967 return err;
1da177e4 968
1da177e4
LT
969 if (u->addr)
970 goto out;
971
972 err = -ENOMEM;
0da974f4 973 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
1da177e4
LT
974 if (!addr)
975 goto out;
976
1da177e4 977 addr->name->sun_family = AF_UNIX;
8c9814b9 978 refcount_set(&addr->refcnt, 1);
1da177e4
LT
979
980retry:
981 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
07f0757a 982 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
be752283 983 addr->hash ^= sk->sk_type;
1da177e4 984
fbe9cc4a 985 spin_lock(&unix_table_lock);
1da177e4
LT
986 ordernum = (ordernum+1)&0xFFFFF;
987
be752283 988 if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) {
fbe9cc4a 989 spin_unlock(&unix_table_lock);
8df73ff9
TH
990 /*
991 * __unix_find_socket_byname() may take long time if many names
992 * are already in use.
993 */
994 cond_resched();
995 /* Give up if all names seems to be in use. */
996 if (retries++ == 0xFFFFF) {
997 err = -ENOSPC;
998 kfree(addr);
999 goto out;
1000 }
1da177e4
LT
1001 goto retry;
1002 }
1da177e4 1003
185ab886 1004 __unix_set_addr(sk, addr, addr->hash);
fbe9cc4a 1005 spin_unlock(&unix_table_lock);
1da177e4
LT
1006 err = 0;
1007
6e1ce3c3 1008out: mutex_unlock(&u->bindlock);
1da177e4
LT
1009 return err;
1010}
1011
097e66c5
DL
1012static struct sock *unix_find_other(struct net *net,
1013 struct sockaddr_un *sunname, int len,
95c96174 1014 int type, unsigned int hash, int *error)
1da177e4
LT
1015{
1016 struct sock *u;
421748ec 1017 struct path path;
1da177e4 1018 int err = 0;
ac7bfa62 1019
1da177e4 1020 if (sunname->sun_path[0]) {
421748ec
AV
1021 struct inode *inode;
1022 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1da177e4
LT
1023 if (err)
1024 goto fail;
beef5121 1025 inode = d_backing_inode(path.dentry);
02f92b38 1026 err = path_permission(&path, MAY_WRITE);
1da177e4
LT
1027 if (err)
1028 goto put_fail;
1029
1030 err = -ECONNREFUSED;
421748ec 1031 if (!S_ISSOCK(inode->i_mode))
1da177e4 1032 goto put_fail;
6616f788 1033 u = unix_find_socket_byinode(inode);
1da177e4
LT
1034 if (!u)
1035 goto put_fail;
1036
1037 if (u->sk_type == type)
68ac1234 1038 touch_atime(&path);
1da177e4 1039
421748ec 1040 path_put(&path);
1da177e4 1041
e27dfcea 1042 err = -EPROTOTYPE;
1da177e4
LT
1043 if (u->sk_type != type) {
1044 sock_put(u);
1045 goto fail;
1046 }
1047 } else {
1048 err = -ECONNREFUSED;
be752283 1049 u = unix_find_socket_byname(net, sunname, len, type ^ hash);
1da177e4
LT
1050 if (u) {
1051 struct dentry *dentry;
40ffe67d 1052 dentry = unix_sk(u)->path.dentry;
1da177e4 1053 if (dentry)
68ac1234 1054 touch_atime(&unix_sk(u)->path);
1da177e4
LT
1055 } else
1056 goto fail;
1057 }
1058 return u;
1059
1060put_fail:
421748ec 1061 path_put(&path);
1da177e4 1062fail:
e27dfcea 1063 *error = err;
1da177e4
LT
1064 return NULL;
1065}
1066
71e6be6f 1067static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
faf02010 1068{
71e6be6f
AV
1069 struct unix_sock *u = unix_sk(sk);
1070 umode_t mode = S_IFSOCK |
1071 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
71e6be6f 1072 struct user_namespace *ns; // barf...
56c1731b 1073 struct path parent;
38f7bd94 1074 struct dentry *dentry;
71e6be6f
AV
1075 unsigned int hash;
1076 int err;
1077
38f7bd94
LT
1078 /*
1079 * Get the parent directory, calculate the hash for last
1080 * component.
1081 */
71e6be6f 1082 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
38f7bd94 1083 if (IS_ERR(dentry))
71e6be6f
AV
1084 return PTR_ERR(dentry);
1085 ns = mnt_user_ns(parent.mnt);
faf02010 1086
38f7bd94
LT
1087 /*
1088 * All right, let's create it.
1089 */
71e6be6f 1090 err = security_path_mknod(&parent, dentry, mode, 0);
56c1731b 1091 if (!err)
71e6be6f 1092 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
c0c3b8d3
AV
1093 if (err)
1094 goto out;
fa42d910 1095 err = mutex_lock_interruptible(&u->bindlock);
c0c3b8d3
AV
1096 if (err)
1097 goto out_unlink;
1098 if (u->addr)
1099 goto out_unlock;
fa42d910
AV
1100
1101 addr->hash = UNIX_HASH_SIZE;
56c1731b 1102 hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
fa42d910 1103 spin_lock(&unix_table_lock);
56c1731b
AV
1104 u->path.mnt = mntget(parent.mnt);
1105 u->path.dentry = dget(dentry);
fa42d910
AV
1106 __unix_set_addr(sk, addr, hash);
1107 spin_unlock(&unix_table_lock);
1108 mutex_unlock(&u->bindlock);
56c1731b 1109 done_path_create(&parent, dentry);
fa42d910 1110 return 0;
c0c3b8d3
AV
1111
1112out_unlock:
1113 mutex_unlock(&u->bindlock);
1114 err = -EINVAL;
1115out_unlink:
1116 /* failed after successful mknod? unlink what we'd created... */
1117 vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
1118out:
1119 done_path_create(&parent, dentry);
1120 return err;
fa42d910
AV
1121}
1122
be752283 1123static int unix_bind_abstract(struct sock *sk, struct unix_address *addr)
fa42d910
AV
1124{
1125 struct unix_sock *u = unix_sk(sk);
1126 int err;
1127
1128 err = mutex_lock_interruptible(&u->bindlock);
1129 if (err)
1130 return err;
1131
1132 if (u->addr) {
1133 mutex_unlock(&u->bindlock);
1134 return -EINVAL;
1135 }
1136
1137 spin_lock(&unix_table_lock);
1138 if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
be752283 1139 addr->hash)) {
fa42d910
AV
1140 spin_unlock(&unix_table_lock);
1141 mutex_unlock(&u->bindlock);
1142 return -EADDRINUSE;
1143 }
1144 __unix_set_addr(sk, addr, addr->hash);
1145 spin_unlock(&unix_table_lock);
1146 mutex_unlock(&u->bindlock);
1147 return 0;
1148}
1149
1da177e4
LT
1150static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1151{
1152 struct sock *sk = sock->sk;
e27dfcea 1153 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
dae6ad8f 1154 char *sun_path = sunaddr->sun_path;
38f7bd94 1155 int err;
95c96174 1156 unsigned int hash;
1da177e4 1157 struct unix_address *addr;
1da177e4 1158
defbcf2d
MJ
1159 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1160 sunaddr->sun_family != AF_UNIX)
fa42d910 1161 return -EINVAL;
1da177e4 1162
fa42d910
AV
1163 if (addr_len == sizeof(short))
1164 return unix_autobind(sock);
1da177e4
LT
1165
1166 err = unix_mkname(sunaddr, addr_len, &hash);
1167 if (err < 0)
fa42d910 1168 return err;
1da177e4 1169 addr_len = err;
c34d4582
AV
1170 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1171 if (!addr)
fa42d910 1172 return -ENOMEM;
c34d4582
AV
1173
1174 memcpy(addr->name, sunaddr, addr_len);
1175 addr->len = addr_len;
1176 addr->hash = hash ^ sk->sk_type;
1177 refcount_set(&addr->refcnt, 1);
1da177e4 1178
fa42d910
AV
1179 if (sun_path[0])
1180 err = unix_bind_bsd(sk, addr);
1181 else
be752283 1182 err = unix_bind_abstract(sk, addr);
fa42d910 1183 if (err)
c34d4582 1184 unix_release_addr(addr);
fa42d910 1185 return err == -EEXIST ? -EADDRINUSE : err;
1da177e4
LT
1186}
1187
278a3de5
DM
1188static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1189{
1190 if (unlikely(sk1 == sk2) || !sk2) {
1191 unix_state_lock(sk1);
1192 return;
1193 }
1194 if (sk1 < sk2) {
1195 unix_state_lock(sk1);
1196 unix_state_lock_nested(sk2);
1197 } else {
1198 unix_state_lock(sk2);
1199 unix_state_lock_nested(sk1);
1200 }
1201}
1202
1203static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1204{
1205 if (unlikely(sk1 == sk2) || !sk2) {
1206 unix_state_unlock(sk1);
1207 return;
1208 }
1209 unix_state_unlock(sk1);
1210 unix_state_unlock(sk2);
1211}
1212
1da177e4
LT
1213static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1214 int alen, int flags)
1215{
1216 struct sock *sk = sock->sk;
3b1e0a65 1217 struct net *net = sock_net(sk);
e27dfcea 1218 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1da177e4 1219 struct sock *other;
95c96174 1220 unsigned int hash;
1da177e4
LT
1221 int err;
1222
defbcf2d
MJ
1223 err = -EINVAL;
1224 if (alen < offsetofend(struct sockaddr, sa_family))
1225 goto out;
1226
1da177e4
LT
1227 if (addr->sa_family != AF_UNSPEC) {
1228 err = unix_mkname(sunaddr, alen, &hash);
1229 if (err < 0)
1230 goto out;
1231 alen = err;
1232
1233 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1234 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1235 goto out;
1236
278a3de5 1237restart:
e27dfcea 1238 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1da177e4
LT
1239 if (!other)
1240 goto out;
1241
278a3de5
DM
1242 unix_state_double_lock(sk, other);
1243
1244 /* Apparently VFS overslept socket death. Retry. */
1245 if (sock_flag(other, SOCK_DEAD)) {
1246 unix_state_double_unlock(sk, other);
1247 sock_put(other);
1248 goto restart;
1249 }
1da177e4
LT
1250
1251 err = -EPERM;
1252 if (!unix_may_send(sk, other))
1253 goto out_unlock;
1254
1255 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1256 if (err)
1257 goto out_unlock;
1258
dc56ad70 1259 sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1da177e4
LT
1260 } else {
1261 /*
1262 * 1003.1g breaking connected state with AF_UNSPEC
1263 */
1264 other = NULL;
278a3de5 1265 unix_state_double_lock(sk, other);
1da177e4
LT
1266 }
1267
1268 /*
1269 * If it was connected, reconnect.
1270 */
1271 if (unix_peer(sk)) {
1272 struct sock *old_peer = unix_peer(sk);
dc56ad70 1273
e27dfcea 1274 unix_peer(sk) = other;
dc56ad70
ED
1275 if (!other)
1276 sk->sk_state = TCP_CLOSE;
7d267278
RW
1277 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1278
278a3de5 1279 unix_state_double_unlock(sk, other);
1da177e4
LT
1280
1281 if (other != old_peer)
1282 unix_dgram_disconnected(sk, old_peer);
1283 sock_put(old_peer);
1284 } else {
e27dfcea 1285 unix_peer(sk) = other;
278a3de5 1286 unix_state_double_unlock(sk, other);
1da177e4 1287 }
83301b53 1288
ac7bfa62 1289 return 0;
1da177e4
LT
1290
1291out_unlock:
278a3de5 1292 unix_state_double_unlock(sk, other);
1da177e4
LT
1293 sock_put(other);
1294out:
1295 return err;
1296}
1297
1298static long unix_wait_for_peer(struct sock *other, long timeo)
48851e9e 1299 __releases(&unix_sk(other)->lock)
1da177e4
LT
1300{
1301 struct unix_sock *u = unix_sk(other);
1302 int sched;
1303 DEFINE_WAIT(wait);
1304
1305 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1306
1307 sched = !sock_flag(other, SOCK_DEAD) &&
1308 !(other->sk_shutdown & RCV_SHUTDOWN) &&
3c73419c 1309 unix_recvq_full(other);
1da177e4 1310
1c92b4e5 1311 unix_state_unlock(other);
1da177e4
LT
1312
1313 if (sched)
1314 timeo = schedule_timeout(timeo);
1315
1316 finish_wait(&u->peer_wait, &wait);
1317 return timeo;
1318}
1319
1320static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1321 int addr_len, int flags)
1322{
e27dfcea 1323 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1da177e4 1324 struct sock *sk = sock->sk;
3b1e0a65 1325 struct net *net = sock_net(sk);
1da177e4
LT
1326 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1327 struct sock *newsk = NULL;
1328 struct sock *other = NULL;
1329 struct sk_buff *skb = NULL;
95c96174 1330 unsigned int hash;
1da177e4
LT
1331 int st;
1332 int err;
1333 long timeo;
1334
1335 err = unix_mkname(sunaddr, addr_len, &hash);
1336 if (err < 0)
1337 goto out;
1338 addr_len = err;
1339
f64f9e71
JP
1340 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1341 (err = unix_autobind(sock)) != 0)
1da177e4
LT
1342 goto out;
1343
1344 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1345
1346 /* First of all allocate resources.
1347 If we will make it after state is locked,
1348 we will have to recheck all again in any case.
1349 */
1350
1da177e4 1351 /* create new sock for complete connection */
94531cfc 1352 newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
f4bd73b5
KI
1353 if (IS_ERR(newsk)) {
1354 err = PTR_ERR(newsk);
1355 newsk = NULL;
1da177e4 1356 goto out;
f4bd73b5
KI
1357 }
1358
1359 err = -ENOMEM;
1da177e4
LT
1360
1361 /* Allocate skb for sending to listening sock */
1362 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1363 if (skb == NULL)
1364 goto out;
1365
1366restart:
1367 /* Find listening sock. */
097e66c5 1368 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1da177e4
LT
1369 if (!other)
1370 goto out;
1371
1372 /* Latch state of peer */
1c92b4e5 1373 unix_state_lock(other);
1da177e4
LT
1374
1375 /* Apparently VFS overslept socket death. Retry. */
1376 if (sock_flag(other, SOCK_DEAD)) {
1c92b4e5 1377 unix_state_unlock(other);
1da177e4
LT
1378 sock_put(other);
1379 goto restart;
1380 }
1381
1382 err = -ECONNREFUSED;
1383 if (other->sk_state != TCP_LISTEN)
1384 goto out_unlock;
77238f2b
TS
1385 if (other->sk_shutdown & RCV_SHUTDOWN)
1386 goto out_unlock;
1da177e4 1387
3c73419c 1388 if (unix_recvq_full(other)) {
1da177e4
LT
1389 err = -EAGAIN;
1390 if (!timeo)
1391 goto out_unlock;
1392
1393 timeo = unix_wait_for_peer(other, timeo);
1394
1395 err = sock_intr_errno(timeo);
1396 if (signal_pending(current))
1397 goto out;
1398 sock_put(other);
1399 goto restart;
ac7bfa62 1400 }
1da177e4
LT
1401
1402 /* Latch our state.
1403
e5537bfc 1404 It is tricky place. We need to grab our state lock and cannot
1da177e4
LT
1405 drop lock on peer. It is dangerous because deadlock is
1406 possible. Connect to self case and simultaneous
1407 attempt to connect are eliminated by checking socket
1408 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1409 check this before attempt to grab lock.
1410
1411 Well, and we have to recheck the state after socket locked.
1412 */
1413 st = sk->sk_state;
1414
1415 switch (st) {
1416 case TCP_CLOSE:
1417 /* This is ok... continue with connect */
1418 break;
1419 case TCP_ESTABLISHED:
1420 /* Socket is already connected */
1421 err = -EISCONN;
1422 goto out_unlock;
1423 default:
1424 err = -EINVAL;
1425 goto out_unlock;
1426 }
1427
1c92b4e5 1428 unix_state_lock_nested(sk);
1da177e4
LT
1429
1430 if (sk->sk_state != st) {
1c92b4e5
DM
1431 unix_state_unlock(sk);
1432 unix_state_unlock(other);
1da177e4
LT
1433 sock_put(other);
1434 goto restart;
1435 }
1436
3610cda5 1437 err = security_unix_stream_connect(sk, other, newsk);
1da177e4 1438 if (err) {
1c92b4e5 1439 unix_state_unlock(sk);
1da177e4
LT
1440 goto out_unlock;
1441 }
1442
1443 /* The way is open! Fastly set all the necessary fields... */
1444
1445 sock_hold(sk);
1446 unix_peer(newsk) = sk;
1447 newsk->sk_state = TCP_ESTABLISHED;
1448 newsk->sk_type = sk->sk_type;
109f6e39 1449 init_peercred(newsk);
1da177e4 1450 newu = unix_sk(newsk);
eaefd110 1451 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1da177e4
LT
1452 otheru = unix_sk(other);
1453
ae3b5641
AV
1454 /* copy address information from listening to new sock
1455 *
1456 * The contents of *(otheru->addr) and otheru->path
1457 * are seen fully set up here, since we have found
1458 * otheru in hash under unix_table_lock. Insertion
1459 * into the hash chain we'd found it in had been done
1460 * in an earlier critical area protected by unix_table_lock,
1461 * the same one where we'd set *(otheru->addr) contents,
1462 * as well as otheru->path and otheru->addr itself.
1463 *
1464 * Using smp_store_release() here to set newu->addr
1465 * is enough to make those stores, as well as stores
1466 * to newu->path visible to anyone who gets newu->addr
1467 * by smp_load_acquire(). IOW, the same warranties
1468 * as for unix_sock instances bound in unix_bind() or
1469 * in unix_autobind().
1470 */
40ffe67d
AV
1471 if (otheru->path.dentry) {
1472 path_get(&otheru->path);
1473 newu->path = otheru->path;
1da177e4 1474 }
ae3b5641
AV
1475 refcount_inc(&otheru->addr->refcnt);
1476 smp_store_release(&newu->addr, otheru->addr);
1da177e4
LT
1477
1478 /* Set credentials */
109f6e39 1479 copy_peercred(sk, other);
1da177e4 1480
1da177e4
LT
1481 sock->state = SS_CONNECTED;
1482 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1483 sock_hold(newsk);
1484
4e857c58 1485 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
830a1e5c 1486 unix_peer(sk) = newsk;
1da177e4 1487
1c92b4e5 1488 unix_state_unlock(sk);
1da177e4 1489
4e03d073 1490 /* take ten and send info to listening sock */
1da177e4
LT
1491 spin_lock(&other->sk_receive_queue.lock);
1492 __skb_queue_tail(&other->sk_receive_queue, skb);
1da177e4 1493 spin_unlock(&other->sk_receive_queue.lock);
1c92b4e5 1494 unix_state_unlock(other);
676d2369 1495 other->sk_data_ready(other);
1da177e4
LT
1496 sock_put(other);
1497 return 0;
1498
1499out_unlock:
1500 if (other)
1c92b4e5 1501 unix_state_unlock(other);
1da177e4
LT
1502
1503out:
40d44446 1504 kfree_skb(skb);
1da177e4
LT
1505 if (newsk)
1506 unix_release_sock(newsk, 0);
1507 if (other)
1508 sock_put(other);
1509 return err;
1510}
1511
1512static int unix_socketpair(struct socket *socka, struct socket *sockb)
1513{
e27dfcea 1514 struct sock *ska = socka->sk, *skb = sockb->sk;
1da177e4
LT
1515
1516 /* Join our sockets back to back */
1517 sock_hold(ska);
1518 sock_hold(skb);
e27dfcea
JK
1519 unix_peer(ska) = skb;
1520 unix_peer(skb) = ska;
109f6e39
EB
1521 init_peercred(ska);
1522 init_peercred(skb);
1da177e4 1523
83301b53
CW
1524 ska->sk_state = TCP_ESTABLISHED;
1525 skb->sk_state = TCP_ESTABLISHED;
1526 socka->state = SS_CONNECTED;
1527 sockb->state = SS_CONNECTED;
1da177e4
LT
1528 return 0;
1529}
1530
90c6bd34
DB
1531static void unix_sock_inherit_flags(const struct socket *old,
1532 struct socket *new)
1533{
1534 if (test_bit(SOCK_PASSCRED, &old->flags))
1535 set_bit(SOCK_PASSCRED, &new->flags);
1536 if (test_bit(SOCK_PASSSEC, &old->flags))
1537 set_bit(SOCK_PASSSEC, &new->flags);
1538}
1539
cdfbabfb
DH
1540static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1541 bool kern)
1da177e4
LT
1542{
1543 struct sock *sk = sock->sk;
1544 struct sock *tsk;
1545 struct sk_buff *skb;
1546 int err;
1547
1548 err = -EOPNOTSUPP;
6eba6a37 1549 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1da177e4
LT
1550 goto out;
1551
1552 err = -EINVAL;
1553 if (sk->sk_state != TCP_LISTEN)
1554 goto out;
1555
1556 /* If socket state is TCP_LISTEN it cannot change (for now...),
1557 * so that no locks are necessary.
1558 */
1559
1560 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1561 if (!skb) {
1562 /* This means receive shutdown. */
1563 if (err == 0)
1564 err = -EINVAL;
1565 goto out;
1566 }
1567
1568 tsk = skb->sk;
1569 skb_free_datagram(sk, skb);
1570 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1571
1572 /* attach accepted sock to socket */
1c92b4e5 1573 unix_state_lock(tsk);
1da177e4 1574 newsock->state = SS_CONNECTED;
90c6bd34 1575 unix_sock_inherit_flags(sock, newsock);
1da177e4 1576 sock_graft(tsk, newsock);
1c92b4e5 1577 unix_state_unlock(tsk);
1da177e4
LT
1578 return 0;
1579
1580out:
1581 return err;
1582}
1583
1584
9b2c45d4 1585static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1da177e4
LT
1586{
1587 struct sock *sk = sock->sk;
ae3b5641 1588 struct unix_address *addr;
13cfa97b 1589 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1da177e4
LT
1590 int err = 0;
1591
1592 if (peer) {
1593 sk = unix_peer_get(sk);
1594
1595 err = -ENOTCONN;
1596 if (!sk)
1597 goto out;
1598 err = 0;
1599 } else {
1600 sock_hold(sk);
1601 }
1602
ae3b5641
AV
1603 addr = smp_load_acquire(&unix_sk(sk)->addr);
1604 if (!addr) {
1da177e4
LT
1605 sunaddr->sun_family = AF_UNIX;
1606 sunaddr->sun_path[0] = 0;
9b2c45d4 1607 err = sizeof(short);
1da177e4 1608 } else {
9b2c45d4
DV
1609 err = addr->len;
1610 memcpy(sunaddr, addr->name, addr->len);
1da177e4 1611 }
1da177e4
LT
1612 sock_put(sk);
1613out:
1614 return err;
1615}
1616
cbcf0112
MS
1617static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1618{
1619 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1620
1621 /*
1622 * Garbage collection of unix sockets starts by selecting a set of
1623 * candidate sockets which have reference only from being in flight
1624 * (total_refs == inflight_refs). This condition is checked once during
1625 * the candidate collection phase, and candidates are marked as such, so
1626 * that non-candidates can later be ignored. While inflight_refs is
1627 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1628 * is an instantaneous decision.
1629 *
1630 * Once a candidate, however, the socket must not be reinstalled into a
1631 * file descriptor while the garbage collection is in progress.
1632 *
1633 * If the above conditions are met, then the directed graph of
1634 * candidates (*) does not change while unix_gc_lock is held.
1635 *
1636 * Any operations that changes the file count through file descriptors
1637 * (dup, close, sendmsg) does not change the graph since candidates are
1638 * not installed in fds.
1639 *
1640 * Dequeing a candidate via recvmsg would install it into an fd, but
1641 * that takes unix_gc_lock to decrement the inflight count, so it's
1642 * serialized with garbage collection.
1643 *
1644 * MSG_PEEK is special in that it does not change the inflight count,
1645 * yet does install the socket into an fd. The following lock/unlock
1646 * pair is to ensure serialization with garbage collection. It must be
1647 * done between incrementing the file count and installing the file into
1648 * an fd.
1649 *
1650 * If garbage collection starts after the barrier provided by the
1651 * lock/unlock, then it will see the elevated refcount and not mark this
1652 * as a candidate. If a garbage collection is already in progress
1653 * before the file count was incremented, then the lock/unlock pair will
1654 * ensure that garbage collection is finished before progressing to
1655 * installing the fd.
1656 *
1657 * (*) A -> B where B is on the queue of A or B is on the queue of C
1658 * which is on the queue of listening socket A.
1659 */
1660 spin_lock(&unix_gc_lock);
1661 spin_unlock(&unix_gc_lock);
1662}
1663
f78a5fda 1664static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
7361c36c
EB
1665{
1666 int err = 0;
16e57262 1667
f78a5fda 1668 UNIXCB(skb).pid = get_pid(scm->pid);
6b0ee8c0
EB
1669 UNIXCB(skb).uid = scm->creds.uid;
1670 UNIXCB(skb).gid = scm->creds.gid;
7361c36c 1671 UNIXCB(skb).fp = NULL;
37a9a8df 1672 unix_get_secdata(scm, skb);
7361c36c
EB
1673 if (scm->fp && send_fds)
1674 err = unix_attach_fds(scm, skb);
1675
1676 skb->destructor = unix_destruct_scm;
1677 return err;
1678}
1679
9490f886
HFS
1680static bool unix_passcred_enabled(const struct socket *sock,
1681 const struct sock *other)
1682{
1683 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1684 !other->sk_socket ||
1685 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1686}
1687
16e57262
ED
1688/*
1689 * Some apps rely on write() giving SCM_CREDENTIALS
1690 * We include credentials if source or destination socket
1691 * asserted SOCK_PASSCRED.
1692 */
1693static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1694 const struct sock *other)
1695{
6b0ee8c0 1696 if (UNIXCB(skb).pid)
16e57262 1697 return;
9490f886 1698 if (unix_passcred_enabled(sock, other)) {
16e57262 1699 UNIXCB(skb).pid = get_pid(task_tgid(current));
6e0895c2 1700 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
16e57262
ED
1701 }
1702}
1703
9490f886
HFS
1704static int maybe_init_creds(struct scm_cookie *scm,
1705 struct socket *socket,
1706 const struct sock *other)
1707{
1708 int err;
1709 struct msghdr msg = { .msg_controllen = 0 };
1710
1711 err = scm_send(socket, &msg, scm, false);
1712 if (err)
1713 return err;
1714
1715 if (unix_passcred_enabled(socket, other)) {
1716 scm->pid = get_pid(task_tgid(current));
1717 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1718 }
1719 return err;
1720}
1721
1722static bool unix_skb_scm_eq(struct sk_buff *skb,
1723 struct scm_cookie *scm)
1724{
1725 const struct unix_skb_parms *u = &UNIXCB(skb);
1726
1727 return u->pid == scm->pid &&
1728 uid_eq(u->uid, scm->creds.uid) &&
1729 gid_eq(u->gid, scm->creds.gid) &&
1730 unix_secdata_eq(scm, skb);
1731}
1732
3c32da19
KT
1733static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1734{
1735 struct scm_fp_list *fp = UNIXCB(skb).fp;
1736 struct unix_sock *u = unix_sk(sk);
1737
3c32da19 1738 if (unlikely(fp && fp->count))
7782040b 1739 atomic_add(fp->count, &u->scm_stat.nr_fds);
3c32da19
KT
1740}
1741
1742static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1743{
1744 struct scm_fp_list *fp = UNIXCB(skb).fp;
1745 struct unix_sock *u = unix_sk(sk);
1746
3c32da19 1747 if (unlikely(fp && fp->count))
7782040b 1748 atomic_sub(fp->count, &u->scm_stat.nr_fds);
3c32da19
KT
1749}
1750
1da177e4
LT
1751/*
1752 * Send AF_UNIX data.
1753 */
1754
1b784140
YX
1755static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1756 size_t len)
1da177e4 1757{
1da177e4 1758 struct sock *sk = sock->sk;
3b1e0a65 1759 struct net *net = sock_net(sk);
1da177e4 1760 struct unix_sock *u = unix_sk(sk);
342dfc30 1761 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1da177e4
LT
1762 struct sock *other = NULL;
1763 int namelen = 0; /* fake GCC */
1764 int err;
95c96174 1765 unsigned int hash;
f78a5fda 1766 struct sk_buff *skb;
1da177e4 1767 long timeo;
7cc05662 1768 struct scm_cookie scm;
eb6a2481 1769 int data_len = 0;
7d267278 1770 int sk_locked;
1da177e4 1771
5f23b734 1772 wait_for_unix_gc();
7cc05662 1773 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
1774 if (err < 0)
1775 return err;
1776
1777 err = -EOPNOTSUPP;
1778 if (msg->msg_flags&MSG_OOB)
1779 goto out;
1780
1781 if (msg->msg_namelen) {
1782 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1783 if (err < 0)
1784 goto out;
1785 namelen = err;
1786 } else {
1787 sunaddr = NULL;
1788 err = -ENOTCONN;
1789 other = unix_peer_get(sk);
1790 if (!other)
1791 goto out;
1792 }
1793
f64f9e71
JP
1794 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1795 && (err = unix_autobind(sock)) != 0)
1da177e4
LT
1796 goto out;
1797
1798 err = -EMSGSIZE;
1799 if (len > sk->sk_sndbuf - 32)
1800 goto out;
1801
31ff6aa5 1802 if (len > SKB_MAX_ALLOC) {
eb6a2481
ED
1803 data_len = min_t(size_t,
1804 len - SKB_MAX_ALLOC,
1805 MAX_SKB_FRAGS * PAGE_SIZE);
31ff6aa5
KT
1806 data_len = PAGE_ALIGN(data_len);
1807
1808 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1809 }
eb6a2481
ED
1810
1811 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
28d64271
ED
1812 msg->msg_flags & MSG_DONTWAIT, &err,
1813 PAGE_ALLOC_COSTLY_ORDER);
e27dfcea 1814 if (skb == NULL)
1da177e4
LT
1815 goto out;
1816
7cc05662 1817 err = unix_scm_to_skb(&scm, skb, true);
25888e30 1818 if (err < 0)
7361c36c 1819 goto out_free;
877ce7c1 1820
eb6a2481
ED
1821 skb_put(skb, len - data_len);
1822 skb->data_len = data_len;
1823 skb->len = len;
c0371da6 1824 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1da177e4
LT
1825 if (err)
1826 goto out_free;
1827
1828 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1829
1830restart:
1831 if (!other) {
1832 err = -ECONNRESET;
1833 if (sunaddr == NULL)
1834 goto out_free;
1835
097e66c5 1836 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1da177e4 1837 hash, &err);
e27dfcea 1838 if (other == NULL)
1da177e4
LT
1839 goto out_free;
1840 }
1841
d6ae3bae
AC
1842 if (sk_filter(other, skb) < 0) {
1843 /* Toss the packet but do not return any error to the sender */
1844 err = len;
1845 goto out_free;
1846 }
1847
7d267278 1848 sk_locked = 0;
1c92b4e5 1849 unix_state_lock(other);
7d267278 1850restart_locked:
1da177e4
LT
1851 err = -EPERM;
1852 if (!unix_may_send(sk, other))
1853 goto out_unlock;
1854
7d267278 1855 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1da177e4
LT
1856 /*
1857 * Check with 1003.1g - what should
1858 * datagram error
1859 */
1c92b4e5 1860 unix_state_unlock(other);
1da177e4
LT
1861 sock_put(other);
1862
7d267278
RW
1863 if (!sk_locked)
1864 unix_state_lock(sk);
1865
1da177e4 1866 err = 0;
1da177e4 1867 if (unix_peer(sk) == other) {
e27dfcea 1868 unix_peer(sk) = NULL;
7d267278
RW
1869 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1870
1c92b4e5 1871 unix_state_unlock(sk);
1da177e4 1872
dc56ad70 1873 sk->sk_state = TCP_CLOSE;
1da177e4
LT
1874 unix_dgram_disconnected(sk, other);
1875 sock_put(other);
1876 err = -ECONNREFUSED;
1877 } else {
1c92b4e5 1878 unix_state_unlock(sk);
1da177e4
LT
1879 }
1880
1881 other = NULL;
1882 if (err)
1883 goto out_free;
1884 goto restart;
1885 }
1886
1887 err = -EPIPE;
1888 if (other->sk_shutdown & RCV_SHUTDOWN)
1889 goto out_unlock;
1890
1891 if (sk->sk_type != SOCK_SEQPACKET) {
1892 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1893 if (err)
1894 goto out_unlock;
1895 }
1896
a5527dda
RW
1897 /* other == sk && unix_peer(other) != sk if
1898 * - unix_peer(sk) == NULL, destination address bound to sk
1899 * - unix_peer(sk) == sk by time of get but disconnected before lock
1900 */
1901 if (other != sk &&
86b18aaa
QC
1902 unlikely(unix_peer(other) != sk &&
1903 unix_recvq_full_lockless(other))) {
7d267278
RW
1904 if (timeo) {
1905 timeo = unix_wait_for_peer(other, timeo);
1906
1907 err = sock_intr_errno(timeo);
1908 if (signal_pending(current))
1909 goto out_free;
1910
1911 goto restart;
1da177e4
LT
1912 }
1913
7d267278
RW
1914 if (!sk_locked) {
1915 unix_state_unlock(other);
1916 unix_state_double_lock(sk, other);
1917 }
1da177e4 1918
7d267278
RW
1919 if (unix_peer(sk) != other ||
1920 unix_dgram_peer_wake_me(sk, other)) {
1921 err = -EAGAIN;
1922 sk_locked = 1;
1923 goto out_unlock;
1924 }
1da177e4 1925
7d267278
RW
1926 if (!sk_locked) {
1927 sk_locked = 1;
1928 goto restart_locked;
1929 }
1da177e4
LT
1930 }
1931
7d267278
RW
1932 if (unlikely(sk_locked))
1933 unix_state_unlock(sk);
1934
3f66116e
AC
1935 if (sock_flag(other, SOCK_RCVTSTAMP))
1936 __net_timestamp(skb);
16e57262 1937 maybe_add_creds(skb, sock, other);
3c32da19 1938 scm_stat_add(other, skb);
7782040b 1939 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 1940 unix_state_unlock(other);
676d2369 1941 other->sk_data_ready(other);
1da177e4 1942 sock_put(other);
7cc05662 1943 scm_destroy(&scm);
1da177e4
LT
1944 return len;
1945
1946out_unlock:
7d267278
RW
1947 if (sk_locked)
1948 unix_state_unlock(sk);
1c92b4e5 1949 unix_state_unlock(other);
1da177e4
LT
1950out_free:
1951 kfree_skb(skb);
1952out:
1953 if (other)
1954 sock_put(other);
7cc05662 1955 scm_destroy(&scm);
1da177e4
LT
1956 return err;
1957}
1958
e370a723 1959/* We use paged skbs for stream sockets, and limit occupancy to 32768
d4e9a408 1960 * bytes, and a minimum of a full page.
e370a723
ED
1961 */
1962#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
ac7bfa62 1963
314001f0
RS
1964#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
1965static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
1966{
1967 struct unix_sock *ousk = unix_sk(other);
1968 struct sk_buff *skb;
1969 int err = 0;
1970
1971 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
1972
1973 if (!skb)
1974 return err;
1975
1976 skb_put(skb, 1);
314001f0
RS
1977 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
1978
1979 if (err) {
1980 kfree_skb(skb);
1981 return err;
1982 }
1983
1984 unix_state_lock(other);
19eed721
RS
1985
1986 if (sock_flag(other, SOCK_DEAD) ||
1987 (other->sk_shutdown & RCV_SHUTDOWN)) {
1988 unix_state_unlock(other);
1989 kfree_skb(skb);
1990 return -EPIPE;
1991 }
1992
314001f0
RS
1993 maybe_add_creds(skb, sock, other);
1994 skb_get(skb);
1995
1996 if (ousk->oob_skb)
19eed721 1997 consume_skb(ousk->oob_skb);
314001f0 1998
e51b87c5 1999 WRITE_ONCE(ousk->oob_skb, skb);
314001f0
RS
2000
2001 scm_stat_add(other, skb);
2002 skb_queue_tail(&other->sk_receive_queue, skb);
2003 sk_send_sigurg(other);
2004 unix_state_unlock(other);
2005 other->sk_data_ready(other);
2006
2007 return err;
2008}
2009#endif
2010
1b784140
YX
2011static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2012 size_t len)
1da177e4 2013{
1da177e4
LT
2014 struct sock *sk = sock->sk;
2015 struct sock *other = NULL;
6eba6a37 2016 int err, size;
f78a5fda 2017 struct sk_buff *skb;
e27dfcea 2018 int sent = 0;
7cc05662 2019 struct scm_cookie scm;
8ba69ba6 2020 bool fds_sent = false;
e370a723 2021 int data_len;
1da177e4 2022
5f23b734 2023 wait_for_unix_gc();
7cc05662 2024 err = scm_send(sock, msg, &scm, false);
1da177e4
LT
2025 if (err < 0)
2026 return err;
2027
2028 err = -EOPNOTSUPP;
314001f0
RS
2029 if (msg->msg_flags & MSG_OOB) {
2030#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
2031 if (len)
2032 len--;
2033 else
2034#endif
2035 goto out_err;
2036 }
1da177e4
LT
2037
2038 if (msg->msg_namelen) {
2039 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2040 goto out_err;
2041 } else {
1da177e4 2042 err = -ENOTCONN;
830a1e5c 2043 other = unix_peer(sk);
1da177e4
LT
2044 if (!other)
2045 goto out_err;
2046 }
2047
2048 if (sk->sk_shutdown & SEND_SHUTDOWN)
2049 goto pipe_err;
2050
6eba6a37 2051 while (sent < len) {
e370a723 2052 size = len - sent;
1da177e4
LT
2053
2054 /* Keep two messages in the pipe so it schedules better */
e370a723 2055 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1da177e4 2056
e370a723
ED
2057 /* allow fallback to order-0 allocations */
2058 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
ac7bfa62 2059
e370a723 2060 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1da177e4 2061
31ff6aa5
KT
2062 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2063
e370a723 2064 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
28d64271
ED
2065 msg->msg_flags & MSG_DONTWAIT, &err,
2066 get_order(UNIX_SKB_FRAGS_SZ));
e370a723 2067 if (!skb)
1da177e4
LT
2068 goto out_err;
2069
f78a5fda 2070 /* Only send the fds in the first buffer */
7cc05662 2071 err = unix_scm_to_skb(&scm, skb, !fds_sent);
25888e30 2072 if (err < 0) {
7361c36c 2073 kfree_skb(skb);
f78a5fda 2074 goto out_err;
6209344f 2075 }
7361c36c 2076 fds_sent = true;
1da177e4 2077
e370a723
ED
2078 skb_put(skb, size - data_len);
2079 skb->data_len = data_len;
2080 skb->len = size;
c0371da6 2081 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
6eba6a37 2082 if (err) {
1da177e4 2083 kfree_skb(skb);
f78a5fda 2084 goto out_err;
1da177e4
LT
2085 }
2086
1c92b4e5 2087 unix_state_lock(other);
1da177e4
LT
2088
2089 if (sock_flag(other, SOCK_DEAD) ||
2090 (other->sk_shutdown & RCV_SHUTDOWN))
2091 goto pipe_err_free;
2092
16e57262 2093 maybe_add_creds(skb, sock, other);
3c32da19 2094 scm_stat_add(other, skb);
7782040b 2095 skb_queue_tail(&other->sk_receive_queue, skb);
1c92b4e5 2096 unix_state_unlock(other);
676d2369 2097 other->sk_data_ready(other);
e27dfcea 2098 sent += size;
1da177e4 2099 }
1da177e4 2100
314001f0
RS
2101#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
2102 if (msg->msg_flags & MSG_OOB) {
2103 err = queue_oob(sock, msg, other);
2104 if (err)
2105 goto out_err;
2106 sent++;
2107 }
2108#endif
2109
7cc05662 2110 scm_destroy(&scm);
1da177e4
LT
2111
2112 return sent;
2113
2114pipe_err_free:
1c92b4e5 2115 unix_state_unlock(other);
1da177e4
LT
2116 kfree_skb(skb);
2117pipe_err:
6eba6a37
ED
2118 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2119 send_sig(SIGPIPE, current, 0);
1da177e4
LT
2120 err = -EPIPE;
2121out_err:
7cc05662 2122 scm_destroy(&scm);
1da177e4
LT
2123 return sent ? : err;
2124}
2125
869e7c62
HFS
2126static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
2127 int offset, size_t size, int flags)
2128{
9490f886
HFS
2129 int err;
2130 bool send_sigpipe = false;
2131 bool init_scm = true;
2132 struct scm_cookie scm;
869e7c62
HFS
2133 struct sock *other, *sk = socket->sk;
2134 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
2135
2136 if (flags & MSG_OOB)
2137 return -EOPNOTSUPP;
2138
2139 other = unix_peer(sk);
2140 if (!other || sk->sk_state != TCP_ESTABLISHED)
2141 return -ENOTCONN;
2142
2143 if (false) {
2144alloc_skb:
2145 unix_state_unlock(other);
6e1ce3c3 2146 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
2147 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
2148 &err, 0);
2149 if (!newskb)
9490f886 2150 goto err;
869e7c62
HFS
2151 }
2152
6e1ce3c3 2153 /* we must acquire iolock as we modify already present
869e7c62
HFS
2154 * skbs in the sk_receive_queue and mess with skb->len
2155 */
6e1ce3c3 2156 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
869e7c62
HFS
2157 if (err) {
2158 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
869e7c62
HFS
2159 goto err;
2160 }
2161
2162 if (sk->sk_shutdown & SEND_SHUTDOWN) {
2163 err = -EPIPE;
9490f886 2164 send_sigpipe = true;
869e7c62
HFS
2165 goto err_unlock;
2166 }
2167
2168 unix_state_lock(other);
2169
2170 if (sock_flag(other, SOCK_DEAD) ||
2171 other->sk_shutdown & RCV_SHUTDOWN) {
2172 err = -EPIPE;
9490f886 2173 send_sigpipe = true;
869e7c62
HFS
2174 goto err_state_unlock;
2175 }
2176
9490f886
HFS
2177 if (init_scm) {
2178 err = maybe_init_creds(&scm, socket, other);
2179 if (err)
2180 goto err_state_unlock;
2181 init_scm = false;
2182 }
2183
869e7c62
HFS
2184 skb = skb_peek_tail(&other->sk_receive_queue);
2185 if (tail && tail == skb) {
2186 skb = newskb;
9490f886
HFS
2187 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2188 if (newskb) {
869e7c62 2189 skb = newskb;
9490f886
HFS
2190 } else {
2191 tail = skb;
869e7c62 2192 goto alloc_skb;
9490f886 2193 }
869e7c62
HFS
2194 } else if (newskb) {
2195 /* this is fast path, we don't necessarily need to
2196 * call to kfree_skb even though with newskb == NULL
2197 * this - does no harm
2198 */
2199 consume_skb(newskb);
8844f972 2200 newskb = NULL;
869e7c62
HFS
2201 }
2202
2203 if (skb_append_pagefrags(skb, page, offset, size)) {
2204 tail = skb;
2205 goto alloc_skb;
2206 }
2207
2208 skb->len += size;
2209 skb->data_len += size;
2210 skb->truesize += size;
14afee4b 2211 refcount_add(size, &sk->sk_wmem_alloc);
869e7c62 2212
a3a116e0 2213 if (newskb) {
9490f886
HFS
2214 err = unix_scm_to_skb(&scm, skb, false);
2215 if (err)
2216 goto err_state_unlock;
a3a116e0 2217 spin_lock(&other->sk_receive_queue.lock);
869e7c62 2218 __skb_queue_tail(&other->sk_receive_queue, newskb);
a3a116e0
HFS
2219 spin_unlock(&other->sk_receive_queue.lock);
2220 }
869e7c62
HFS
2221
2222 unix_state_unlock(other);
6e1ce3c3 2223 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
2224
2225 other->sk_data_ready(other);
9490f886 2226 scm_destroy(&scm);
869e7c62
HFS
2227 return size;
2228
2229err_state_unlock:
2230 unix_state_unlock(other);
2231err_unlock:
6e1ce3c3 2232 mutex_unlock(&unix_sk(other)->iolock);
869e7c62
HFS
2233err:
2234 kfree_skb(newskb);
2235 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2236 send_sig(SIGPIPE, current, 0);
9490f886
HFS
2237 if (!init_scm)
2238 scm_destroy(&scm);
869e7c62
HFS
2239 return err;
2240}
2241
1b784140
YX
2242static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2243 size_t len)
1da177e4
LT
2244{
2245 int err;
2246 struct sock *sk = sock->sk;
ac7bfa62 2247
1da177e4
LT
2248 err = sock_error(sk);
2249 if (err)
2250 return err;
2251
2252 if (sk->sk_state != TCP_ESTABLISHED)
2253 return -ENOTCONN;
2254
2255 if (msg->msg_namelen)
2256 msg->msg_namelen = 0;
2257
1b784140 2258 return unix_dgram_sendmsg(sock, msg, len);
1da177e4 2259}
ac7bfa62 2260
1b784140
YX
2261static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2262 size_t size, int flags)
a05d2ad1
EB
2263{
2264 struct sock *sk = sock->sk;
2265
2266 if (sk->sk_state != TCP_ESTABLISHED)
2267 return -ENOTCONN;
2268
1b784140 2269 return unix_dgram_recvmsg(sock, msg, size, flags);
a05d2ad1
EB
2270}
2271
1da177e4
LT
2272static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2273{
ae3b5641 2274 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
1da177e4 2275
ae3b5641
AV
2276 if (addr) {
2277 msg->msg_namelen = addr->len;
2278 memcpy(msg->msg_name, addr->name, addr->len);
1da177e4
LT
2279 }
2280}
2281
9825d866
CW
2282int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2283 int flags)
1da177e4 2284{
7cc05662 2285 struct scm_cookie scm;
9825d866 2286 struct socket *sock = sk->sk_socket;
1da177e4 2287 struct unix_sock *u = unix_sk(sk);
64874280
RW
2288 struct sk_buff *skb, *last;
2289 long timeo;
fd69c399 2290 int skip;
1da177e4
LT
2291 int err;
2292
2293 err = -EOPNOTSUPP;
2294 if (flags&MSG_OOB)
2295 goto out;
2296
64874280 2297 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1da177e4 2298
64874280 2299 do {
6e1ce3c3 2300 mutex_lock(&u->iolock);
f55bb7f9 2301
64874280 2302 skip = sk_peek_offset(sk, flags);
b50b0580 2303 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
e427cad6
PA
2304 &skip, &err, &last);
2305 if (skb) {
2306 if (!(flags & MSG_PEEK))
2307 scm_stat_del(sk, skb);
64874280 2308 break;
e427cad6 2309 }
64874280 2310
6e1ce3c3 2311 mutex_unlock(&u->iolock);
64874280
RW
2312
2313 if (err != -EAGAIN)
2314 break;
2315 } while (timeo &&
b50b0580
SD
2316 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2317 &err, &timeo, last));
64874280 2318
6e1ce3c3 2319 if (!skb) { /* implies iolock unlocked */
0a112258
FZ
2320 unix_state_lock(sk);
2321 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2322 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2323 (sk->sk_shutdown & RCV_SHUTDOWN))
2324 err = 0;
2325 unix_state_unlock(sk);
64874280 2326 goto out;
0a112258 2327 }
1da177e4 2328
77b75f4d
RW
2329 if (wq_has_sleeper(&u->peer_wait))
2330 wake_up_interruptible_sync_poll(&u->peer_wait,
a9a08845
LT
2331 EPOLLOUT | EPOLLWRNORM |
2332 EPOLLWRBAND);
1da177e4
LT
2333
2334 if (msg->msg_name)
2335 unix_copy_addr(msg, skb->sk);
2336
f55bb7f9
PE
2337 if (size > skb->len - skip)
2338 size = skb->len - skip;
2339 else if (size < skb->len - skip)
1da177e4
LT
2340 msg->msg_flags |= MSG_TRUNC;
2341
51f3d02b 2342 err = skb_copy_datagram_msg(skb, skip, msg, size);
1da177e4
LT
2343 if (err)
2344 goto out_free;
2345
3f66116e
AC
2346 if (sock_flag(sk, SOCK_RCVTSTAMP))
2347 __sock_recv_timestamp(msg, sk, skb);
2348
7cc05662
CH
2349 memset(&scm, 0, sizeof(scm));
2350
2351 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2352 unix_set_secdata(&scm, skb);
1da177e4 2353
6eba6a37 2354 if (!(flags & MSG_PEEK)) {
1da177e4 2355 if (UNIXCB(skb).fp)
7cc05662 2356 unix_detach_fds(&scm, skb);
f55bb7f9
PE
2357
2358 sk_peek_offset_bwd(sk, skb->len);
6eba6a37 2359 } else {
1da177e4
LT
2360 /* It is questionable: on PEEK we could:
2361 - do not return fds - good, but too simple 8)
2362 - return fds, and do not return them on read (old strategy,
2363 apparently wrong)
2364 - clone fds (I chose it for now, it is the most universal
2365 solution)
ac7bfa62
YH
2366
2367 POSIX 1003.1g does not actually define this clearly
2368 at all. POSIX 1003.1g doesn't define a lot of things
2369 clearly however!
2370
1da177e4 2371 */
f55bb7f9
PE
2372
2373 sk_peek_offset_fwd(sk, size);
2374
1da177e4 2375 if (UNIXCB(skb).fp)
cbcf0112 2376 unix_peek_fds(&scm, skb);
1da177e4 2377 }
9f6f9af7 2378 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1da177e4 2379
7cc05662 2380 scm_recv(sock, msg, &scm, flags);
1da177e4
LT
2381
2382out_free:
6eba6a37 2383 skb_free_datagram(sk, skb);
6e1ce3c3 2384 mutex_unlock(&u->iolock);
1da177e4
LT
2385out:
2386 return err;
2387}
29df44fa 2388
9825d866
CW
2389static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2390 int flags)
2391{
2392 struct sock *sk = sock->sk;
2393
2394#ifdef CONFIG_BPF_SYSCALL
94531cfc
JW
2395 const struct proto *prot = READ_ONCE(sk->sk_prot);
2396
2397 if (prot != &unix_dgram_proto)
2398 return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
9825d866
CW
2399 flags & ~MSG_DONTWAIT, NULL);
2400#endif
2401 return __unix_dgram_recvmsg(sk, msg, size, flags);
2402}
2403
29df44fa
CW
2404static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
2405 sk_read_actor_t recv_actor)
2406{
2407 int copied = 0;
2408
2409 while (1) {
2410 struct unix_sock *u = unix_sk(sk);
2411 struct sk_buff *skb;
2412 int used, err;
2413
2414 mutex_lock(&u->iolock);
2415 skb = skb_recv_datagram(sk, 0, 1, &err);
2416 mutex_unlock(&u->iolock);
2417 if (!skb)
2418 return err;
2419
2420 used = recv_actor(desc, skb, 0, skb->len);
2421 if (used <= 0) {
2422 if (!copied)
2423 copied = used;
2424 kfree_skb(skb);
2425 break;
2426 } else if (used <= skb->len) {
2427 copied += used;
2428 }
2429
2430 kfree_skb(skb);
2431 if (!desc->count)
2432 break;
2433 }
2434
2435 return copied;
2436}
1da177e4
LT
2437
2438/*
79f632c7 2439 * Sleep until more data has arrived. But check for races..
1da177e4 2440 */
79f632c7 2441static long unix_stream_data_wait(struct sock *sk, long timeo,
06a77b07
WC
2442 struct sk_buff *last, unsigned int last_len,
2443 bool freezable)
1da177e4 2444{
2b514574 2445 struct sk_buff *tail;
1da177e4
LT
2446 DEFINE_WAIT(wait);
2447
1c92b4e5 2448 unix_state_lock(sk);
1da177e4
LT
2449
2450 for (;;) {
aa395145 2451 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1da177e4 2452
2b514574
HFS
2453 tail = skb_peek_tail(&sk->sk_receive_queue);
2454 if (tail != last ||
2455 (tail && tail->len != last_len) ||
1da177e4
LT
2456 sk->sk_err ||
2457 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2458 signal_pending(current) ||
2459 !timeo)
2460 break;
2461
9cd3e072 2462 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1c92b4e5 2463 unix_state_unlock(sk);
06a77b07
WC
2464 if (freezable)
2465 timeo = freezable_schedule_timeout(timeo);
2466 else
2467 timeo = schedule_timeout(timeo);
1c92b4e5 2468 unix_state_lock(sk);
b48732e4
MS
2469
2470 if (sock_flag(sk, SOCK_DEAD))
2471 break;
2472
9cd3e072 2473 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
1da177e4
LT
2474 }
2475
aa395145 2476 finish_wait(sk_sleep(sk), &wait);
1c92b4e5 2477 unix_state_unlock(sk);
1da177e4
LT
2478 return timeo;
2479}
2480
e370a723
ED
2481static unsigned int unix_skb_len(const struct sk_buff *skb)
2482{
2483 return skb->len - UNIXCB(skb).consumed;
2484}
2485
2b514574
HFS
2486struct unix_stream_read_state {
2487 int (*recv_actor)(struct sk_buff *, int, int,
2488 struct unix_stream_read_state *);
2489 struct socket *socket;
2490 struct msghdr *msg;
2491 struct pipe_inode_info *pipe;
2492 size_t size;
2493 int flags;
2494 unsigned int splice_flags;
2495};
2496
314001f0
RS
2497#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2498static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2499{
2500 struct socket *sock = state->socket;
2501 struct sock *sk = sock->sk;
2502 struct unix_sock *u = unix_sk(sk);
2503 int chunk = 1;
876c14ad 2504 struct sk_buff *oob_skb;
314001f0 2505
876c14ad
RS
2506 mutex_lock(&u->iolock);
2507 unix_state_lock(sk);
2508
2509 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2510 unix_state_unlock(sk);
2511 mutex_unlock(&u->iolock);
314001f0 2512 return -EINVAL;
876c14ad 2513 }
314001f0 2514
876c14ad 2515 oob_skb = u->oob_skb;
314001f0 2516
e51b87c5
KI
2517 if (!(state->flags & MSG_PEEK))
2518 WRITE_ONCE(u->oob_skb, NULL);
876c14ad
RS
2519
2520 unix_state_unlock(sk);
2521
2522 chunk = state->recv_actor(oob_skb, 0, chunk, state);
2523
2524 if (!(state->flags & MSG_PEEK)) {
2525 UNIXCB(oob_skb).consumed += 1;
2526 kfree_skb(oob_skb);
2527 }
2528
2529 mutex_unlock(&u->iolock);
2530
2531 if (chunk < 0)
2532 return -EFAULT;
2533
314001f0
RS
2534 state->msg->msg_flags |= MSG_OOB;
2535 return 1;
2536}
2537
2538static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2539 int flags, int copied)
2540{
2541 struct unix_sock *u = unix_sk(sk);
2542
2543 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2544 skb_unlink(skb, &sk->sk_receive_queue);
2545 consume_skb(skb);
2546 skb = NULL;
2547 } else {
2548 if (skb == u->oob_skb) {
2549 if (copied) {
2550 skb = NULL;
2551 } else if (sock_flag(sk, SOCK_URGINLINE)) {
2552 if (!(flags & MSG_PEEK)) {
e51b87c5 2553 WRITE_ONCE(u->oob_skb, NULL);
314001f0
RS
2554 consume_skb(skb);
2555 }
2556 } else if (!(flags & MSG_PEEK)) {
2557 skb_unlink(skb, &sk->sk_receive_queue);
2558 consume_skb(skb);
2559 skb = skb_peek(&sk->sk_receive_queue);
2560 }
2561 }
2562 }
2563 return skb;
2564}
2565#endif
2566
77462de1
JW
2567static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
2568 sk_read_actor_t recv_actor)
2569{
2570 if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2571 return -ENOTCONN;
2572
2573 return unix_read_sock(sk, desc, recv_actor);
2574}
2575
06a77b07
WC
2576static int unix_stream_read_generic(struct unix_stream_read_state *state,
2577 bool freezable)
1da177e4 2578{
7cc05662 2579 struct scm_cookie scm;
2b514574 2580 struct socket *sock = state->socket;
1da177e4
LT
2581 struct sock *sk = sock->sk;
2582 struct unix_sock *u = unix_sk(sk);
1da177e4 2583 int copied = 0;
2b514574 2584 int flags = state->flags;
de144391 2585 int noblock = flags & MSG_DONTWAIT;
2b514574 2586 bool check_creds = false;
1da177e4
LT
2587 int target;
2588 int err = 0;
2589 long timeo;
fc0d7536 2590 int skip;
2b514574
HFS
2591 size_t size = state->size;
2592 unsigned int last_len;
1da177e4 2593
1b92ee3d
RW
2594 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2595 err = -EINVAL;
1da177e4 2596 goto out;
1b92ee3d 2597 }
1da177e4 2598
1b92ee3d
RW
2599 if (unlikely(flags & MSG_OOB)) {
2600 err = -EOPNOTSUPP;
314001f0 2601#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
314001f0 2602 err = unix_stream_recv_urg(state);
314001f0 2603#endif
1da177e4 2604 goto out;
1b92ee3d 2605 }
1da177e4 2606
2b514574 2607 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
de144391 2608 timeo = sock_rcvtimeo(sk, noblock);
1da177e4 2609
2b514574
HFS
2610 memset(&scm, 0, sizeof(scm));
2611
1da177e4
LT
2612 /* Lock the socket to prevent queue disordering
2613 * while sleeps in memcpy_tomsg
2614 */
6e1ce3c3 2615 mutex_lock(&u->iolock);
1da177e4 2616
a0917e0b 2617 skip = max(sk_peek_offset(sk, flags), 0);
e9193d60 2618
6eba6a37 2619 do {
1da177e4 2620 int chunk;
73ed5d25 2621 bool drop_skb;
79f632c7 2622 struct sk_buff *skb, *last;
1da177e4 2623
18eceb81 2624redo:
3c0d2f37 2625 unix_state_lock(sk);
b48732e4
MS
2626 if (sock_flag(sk, SOCK_DEAD)) {
2627 err = -ECONNRESET;
2628 goto unlock;
2629 }
79f632c7 2630 last = skb = skb_peek(&sk->sk_receive_queue);
2b514574 2631 last_len = last ? last->len : 0;
314001f0
RS
2632
2633#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2634 if (skb) {
2635 skb = manage_oob(skb, sk, flags, copied);
2636 if (!skb) {
2637 unix_state_unlock(sk);
2638 if (copied)
2639 break;
2640 goto redo;
2641 }
2642 }
2643#endif
fc0d7536 2644again:
6eba6a37 2645 if (skb == NULL) {
1da177e4 2646 if (copied >= target)
3c0d2f37 2647 goto unlock;
1da177e4
LT
2648
2649 /*
2650 * POSIX 1003.1g mandates this order.
2651 */
ac7bfa62 2652
6eba6a37
ED
2653 err = sock_error(sk);
2654 if (err)
3c0d2f37 2655 goto unlock;
1da177e4 2656 if (sk->sk_shutdown & RCV_SHUTDOWN)
3c0d2f37
MS
2657 goto unlock;
2658
2659 unix_state_unlock(sk);
1b92ee3d
RW
2660 if (!timeo) {
2661 err = -EAGAIN;
1da177e4 2662 break;
1b92ee3d
RW
2663 }
2664
6e1ce3c3 2665 mutex_unlock(&u->iolock);
1da177e4 2666
2b514574 2667 timeo = unix_stream_data_wait(sk, timeo, last,
06a77b07 2668 last_len, freezable);
1da177e4 2669
3822b5c2 2670 if (signal_pending(current)) {
1da177e4 2671 err = sock_intr_errno(timeo);
fa0dc04d 2672 scm_destroy(&scm);
1da177e4
LT
2673 goto out;
2674 }
b3ca9b02 2675
6e1ce3c3 2676 mutex_lock(&u->iolock);
18eceb81 2677 goto redo;
2b514574 2678unlock:
3c0d2f37
MS
2679 unix_state_unlock(sk);
2680 break;
1da177e4 2681 }
fc0d7536 2682
e370a723
ED
2683 while (skip >= unix_skb_len(skb)) {
2684 skip -= unix_skb_len(skb);
79f632c7 2685 last = skb;
2b514574 2686 last_len = skb->len;
fc0d7536 2687 skb = skb_peek_next(skb, &sk->sk_receive_queue);
79f632c7
BP
2688 if (!skb)
2689 goto again;
fc0d7536
PE
2690 }
2691
3c0d2f37 2692 unix_state_unlock(sk);
1da177e4
LT
2693
2694 if (check_creds) {
2695 /* Never glue messages from different writers */
9490f886 2696 if (!unix_skb_scm_eq(skb, &scm))
1da177e4 2697 break;
0e82e7f6 2698 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1da177e4 2699 /* Copy credentials */
7cc05662 2700 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
37a9a8df 2701 unix_set_secdata(&scm, skb);
2b514574 2702 check_creds = true;
1da177e4
LT
2703 }
2704
2705 /* Copy address just once */
2b514574
HFS
2706 if (state->msg && state->msg->msg_name) {
2707 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2708 state->msg->msg_name);
2709 unix_copy_addr(state->msg, skb->sk);
1da177e4
LT
2710 sunaddr = NULL;
2711 }
2712
e370a723 2713 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
73ed5d25 2714 skb_get(skb);
2b514574 2715 chunk = state->recv_actor(skb, skip, chunk, state);
73ed5d25
HFS
2716 drop_skb = !unix_skb_len(skb);
2717 /* skb is only safe to use if !drop_skb */
2718 consume_skb(skb);
2b514574 2719 if (chunk < 0) {
1da177e4
LT
2720 if (copied == 0)
2721 copied = -EFAULT;
2722 break;
2723 }
2724 copied += chunk;
2725 size -= chunk;
2726
73ed5d25
HFS
2727 if (drop_skb) {
2728 /* the skb was touched by a concurrent reader;
2729 * we should not expect anything from this skb
2730 * anymore and assume it invalid - we can be
2731 * sure it was dropped from the socket queue
2732 *
2733 * let's report a short read
2734 */
2735 err = 0;
2736 break;
2737 }
2738
1da177e4 2739 /* Mark read part of skb as used */
6eba6a37 2740 if (!(flags & MSG_PEEK)) {
e370a723 2741 UNIXCB(skb).consumed += chunk;
1da177e4 2742
fc0d7536
PE
2743 sk_peek_offset_bwd(sk, chunk);
2744
3c32da19 2745 if (UNIXCB(skb).fp) {
3c32da19 2746 scm_stat_del(sk, skb);
7cc05662 2747 unix_detach_fds(&scm, skb);
3c32da19 2748 }
1da177e4 2749
e370a723 2750 if (unix_skb_len(skb))
1da177e4 2751 break;
1da177e4 2752
6f01fd6e 2753 skb_unlink(skb, &sk->sk_receive_queue);
70d4bf6d 2754 consume_skb(skb);
1da177e4 2755
7cc05662 2756 if (scm.fp)
1da177e4 2757 break;
6eba6a37 2758 } else {
1da177e4
LT
2759 /* It is questionable, see note in unix_dgram_recvmsg.
2760 */
2761 if (UNIXCB(skb).fp)
cbcf0112 2762 unix_peek_fds(&scm, skb);
1da177e4 2763
e9193d60 2764 sk_peek_offset_fwd(sk, chunk);
fc0d7536 2765
9f389e35
AC
2766 if (UNIXCB(skb).fp)
2767 break;
2768
e9193d60 2769 skip = 0;
9f389e35
AC
2770 last = skb;
2771 last_len = skb->len;
2772 unix_state_lock(sk);
2773 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2774 if (skb)
2775 goto again;
2776 unix_state_unlock(sk);
1da177e4
LT
2777 break;
2778 }
2779 } while (size);
2780
6e1ce3c3 2781 mutex_unlock(&u->iolock);
2b514574
HFS
2782 if (state->msg)
2783 scm_recv(sock, state->msg, &scm, flags);
2784 else
2785 scm_destroy(&scm);
1da177e4
LT
2786out:
2787 return copied ? : err;
2788}
2789
2b514574
HFS
2790static int unix_stream_read_actor(struct sk_buff *skb,
2791 int skip, int chunk,
2792 struct unix_stream_read_state *state)
2793{
2794 int ret;
2795
2796 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2797 state->msg, chunk);
2798 return ret ?: chunk;
2799}
2800
94531cfc
JW
2801int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2802 size_t size, int flags)
2803{
2804 struct unix_stream_read_state state = {
2805 .recv_actor = unix_stream_read_actor,
2806 .socket = sk->sk_socket,
2807 .msg = msg,
2808 .size = size,
2809 .flags = flags
2810 };
2811
2812 return unix_stream_read_generic(&state, true);
2813}
2814
2b514574
HFS
2815static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2816 size_t size, int flags)
2817{
2818 struct unix_stream_read_state state = {
2819 .recv_actor = unix_stream_read_actor,
2820 .socket = sock,
2821 .msg = msg,
2822 .size = size,
2823 .flags = flags
2824 };
2825
94531cfc
JW
2826#ifdef CONFIG_BPF_SYSCALL
2827 struct sock *sk = sock->sk;
2828 const struct proto *prot = READ_ONCE(sk->sk_prot);
2829
2830 if (prot != &unix_stream_proto)
2831 return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2832 flags & ~MSG_DONTWAIT, NULL);
2833#endif
06a77b07 2834 return unix_stream_read_generic(&state, true);
2b514574
HFS
2835}
2836
2b514574
HFS
2837static int unix_stream_splice_actor(struct sk_buff *skb,
2838 int skip, int chunk,
2839 struct unix_stream_read_state *state)
2840{
2841 return skb_splice_bits(skb, state->socket->sk,
2842 UNIXCB(skb).consumed + skip,
25869262 2843 state->pipe, chunk, state->splice_flags);
2b514574
HFS
2844}
2845
2846static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2847 struct pipe_inode_info *pipe,
2848 size_t size, unsigned int flags)
2849{
2850 struct unix_stream_read_state state = {
2851 .recv_actor = unix_stream_splice_actor,
2852 .socket = sock,
2853 .pipe = pipe,
2854 .size = size,
2855 .splice_flags = flags,
2856 };
2857
2858 if (unlikely(*ppos))
2859 return -ESPIPE;
2860
2861 if (sock->file->f_flags & O_NONBLOCK ||
2862 flags & SPLICE_F_NONBLOCK)
2863 state.flags = MSG_DONTWAIT;
2864
06a77b07 2865 return unix_stream_read_generic(&state, false);
2b514574
HFS
2866}
2867
1da177e4
LT
2868static int unix_shutdown(struct socket *sock, int mode)
2869{
2870 struct sock *sk = sock->sk;
2871 struct sock *other;
2872
fc61b928
XW
2873 if (mode < SHUT_RD || mode > SHUT_RDWR)
2874 return -EINVAL;
2875 /* This maps:
2876 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2877 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2878 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2879 */
2880 ++mode;
7180a031
AC
2881
2882 unix_state_lock(sk);
2883 sk->sk_shutdown |= mode;
2884 other = unix_peer(sk);
2885 if (other)
2886 sock_hold(other);
2887 unix_state_unlock(sk);
2888 sk->sk_state_change(sk);
2889
2890 if (other &&
2891 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2892
2893 int peer_mode = 0;
94531cfc 2894 const struct proto *prot = READ_ONCE(other->sk_prot);
7180a031 2895
d359902d
JW
2896 if (prot->unhash)
2897 prot->unhash(other);
7180a031
AC
2898 if (mode&RCV_SHUTDOWN)
2899 peer_mode |= SEND_SHUTDOWN;
2900 if (mode&SEND_SHUTDOWN)
2901 peer_mode |= RCV_SHUTDOWN;
2902 unix_state_lock(other);
2903 other->sk_shutdown |= peer_mode;
2904 unix_state_unlock(other);
2905 other->sk_state_change(other);
d0c6416b 2906 if (peer_mode == SHUTDOWN_MASK)
7180a031 2907 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
d0c6416b 2908 else if (peer_mode & RCV_SHUTDOWN)
7180a031 2909 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1da177e4 2910 }
7180a031
AC
2911 if (other)
2912 sock_put(other);
2913
1da177e4
LT
2914 return 0;
2915}
2916
885ee74d
PE
2917long unix_inq_len(struct sock *sk)
2918{
2919 struct sk_buff *skb;
2920 long amount = 0;
2921
2922 if (sk->sk_state == TCP_LISTEN)
2923 return -EINVAL;
2924
2925 spin_lock(&sk->sk_receive_queue.lock);
2926 if (sk->sk_type == SOCK_STREAM ||
2927 sk->sk_type == SOCK_SEQPACKET) {
2928 skb_queue_walk(&sk->sk_receive_queue, skb)
e370a723 2929 amount += unix_skb_len(skb);
885ee74d
PE
2930 } else {
2931 skb = skb_peek(&sk->sk_receive_queue);
2932 if (skb)
2933 amount = skb->len;
2934 }
2935 spin_unlock(&sk->sk_receive_queue.lock);
2936
2937 return amount;
2938}
2939EXPORT_SYMBOL_GPL(unix_inq_len);
2940
2941long unix_outq_len(struct sock *sk)
2942{
2943 return sk_wmem_alloc_get(sk);
2944}
2945EXPORT_SYMBOL_GPL(unix_outq_len);
2946
ba94f308
AV
2947static int unix_open_file(struct sock *sk)
2948{
2949 struct path path;
2950 struct file *f;
2951 int fd;
2952
2953 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2954 return -EPERM;
2955
ae3b5641
AV
2956 if (!smp_load_acquire(&unix_sk(sk)->addr))
2957 return -ENOENT;
2958
ba94f308 2959 path = unix_sk(sk)->path;
ae3b5641 2960 if (!path.dentry)
ba94f308 2961 return -ENOENT;
ba94f308
AV
2962
2963 path_get(&path);
ba94f308
AV
2964
2965 fd = get_unused_fd_flags(O_CLOEXEC);
2966 if (fd < 0)
2967 goto out;
2968
2969 f = dentry_open(&path, O_PATH, current_cred());
2970 if (IS_ERR(f)) {
2971 put_unused_fd(fd);
2972 fd = PTR_ERR(f);
2973 goto out;
2974 }
2975
2976 fd_install(fd, f);
2977out:
2978 path_put(&path);
2979
2980 return fd;
2981}
2982
1da177e4
LT
2983static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2984{
2985 struct sock *sk = sock->sk;
e27dfcea 2986 long amount = 0;
1da177e4
LT
2987 int err;
2988
6eba6a37
ED
2989 switch (cmd) {
2990 case SIOCOUTQ:
885ee74d 2991 amount = unix_outq_len(sk);
6eba6a37
ED
2992 err = put_user(amount, (int __user *)arg);
2993 break;
2994 case SIOCINQ:
885ee74d
PE
2995 amount = unix_inq_len(sk);
2996 if (amount < 0)
2997 err = amount;
2998 else
1da177e4 2999 err = put_user(amount, (int __user *)arg);
885ee74d 3000 break;
ba94f308
AV
3001 case SIOCUNIXFILE:
3002 err = unix_open_file(sk);
3003 break;
314001f0
RS
3004#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3005 case SIOCATMARK:
3006 {
3007 struct sk_buff *skb;
314001f0
RS
3008 int answ = 0;
3009
3010 skb = skb_peek(&sk->sk_receive_queue);
e51b87c5 3011 if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
314001f0
RS
3012 answ = 1;
3013 err = put_user(answ, (int __user *)arg);
3014 }
3015 break;
3016#endif
6eba6a37
ED
3017 default:
3018 err = -ENOIOCTLCMD;
3019 break;
1da177e4
LT
3020 }
3021 return err;
3022}
3023
5f6beb9e
AB
3024#ifdef CONFIG_COMPAT
3025static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3026{
3027 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3028}
3029#endif
3030
a11e1d43 3031static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
1da177e4
LT
3032{
3033 struct sock *sk = sock->sk;
a11e1d43
LT
3034 __poll_t mask;
3035
89ab066d 3036 sock_poll_wait(file, sock, wait);
a11e1d43 3037 mask = 0;
1da177e4
LT
3038
3039 /* exceptional events? */
3040 if (sk->sk_err)
a9a08845 3041 mask |= EPOLLERR;
1da177e4 3042 if (sk->sk_shutdown == SHUTDOWN_MASK)
a9a08845 3043 mask |= EPOLLHUP;
f348d70a 3044 if (sk->sk_shutdown & RCV_SHUTDOWN)
a9a08845 3045 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
1da177e4
LT
3046
3047 /* readable? */
3ef7cf57 3048 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
a9a08845 3049 mask |= EPOLLIN | EPOLLRDNORM;
af493388
CW
3050 if (sk_is_readable(sk))
3051 mask |= EPOLLIN | EPOLLRDNORM;
cc668f4a
KI
3052#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3053 if (READ_ONCE(unix_sk(sk)->oob_skb))
3054 mask |= EPOLLPRI;
3055#endif
1da177e4
LT
3056
3057 /* Connection-based need to check for termination and startup */
6eba6a37
ED
3058 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3059 sk->sk_state == TCP_CLOSE)
a9a08845 3060 mask |= EPOLLHUP;
1da177e4
LT
3061
3062 /*
3063 * we set writable also when the other side has shut down the
3064 * connection. This prevents stuck sockets.
3065 */
3066 if (unix_writable(sk))
a9a08845 3067 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
1da177e4
LT
3068
3069 return mask;
3070}
3071
a11e1d43
LT
3072static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3073 poll_table *wait)
3c73419c 3074{
ec0d215f 3075 struct sock *sk = sock->sk, *other;
a11e1d43
LT
3076 unsigned int writable;
3077 __poll_t mask;
3078
89ab066d 3079 sock_poll_wait(file, sock, wait);
a11e1d43 3080 mask = 0;
3c73419c
RW
3081
3082 /* exceptional events? */
3ef7cf57 3083 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
a9a08845
LT
3084 mask |= EPOLLERR |
3085 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
7d4c04fc 3086
3c73419c 3087 if (sk->sk_shutdown & RCV_SHUTDOWN)
a9a08845 3088 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3c73419c 3089 if (sk->sk_shutdown == SHUTDOWN_MASK)
a9a08845 3090 mask |= EPOLLHUP;
3c73419c
RW
3091
3092 /* readable? */
3ef7cf57 3093 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
a9a08845 3094 mask |= EPOLLIN | EPOLLRDNORM;
af493388
CW
3095 if (sk_is_readable(sk))
3096 mask |= EPOLLIN | EPOLLRDNORM;
3c73419c
RW
3097
3098 /* Connection-based need to check for termination and startup */
3099 if (sk->sk_type == SOCK_SEQPACKET) {
3100 if (sk->sk_state == TCP_CLOSE)
a9a08845 3101 mask |= EPOLLHUP;
3c73419c
RW
3102 /* connection hasn't started yet? */
3103 if (sk->sk_state == TCP_SYN_SENT)
3104 return mask;
3105 }
3106
973a34aa 3107 /* No write status requested, avoid expensive OUT tests. */
a11e1d43 3108 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
973a34aa
ED
3109 return mask;
3110
ec0d215f 3111 writable = unix_writable(sk);
7d267278
RW
3112 if (writable) {
3113 unix_state_lock(sk);
3114
3115 other = unix_peer(sk);
3116 if (other && unix_peer(other) != sk &&
04f08eb4 3117 unix_recvq_full_lockless(other) &&
7d267278
RW
3118 unix_dgram_peer_wake_me(sk, other))
3119 writable = 0;
3120
3121 unix_state_unlock(sk);
ec0d215f
RW
3122 }
3123
3124 if (writable)
a9a08845 3125 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3c73419c 3126 else
9cd3e072 3127 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3c73419c 3128
3c73419c
RW
3129 return mask;
3130}
1da177e4
LT
3131
3132#ifdef CONFIG_PROC_FS
a53eb3fe 3133
7123aaa3
ED
3134#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3135
3136#define get_bucket(x) ((x) >> BUCKET_SPACE)
3137#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
3138#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
a53eb3fe 3139
7123aaa3 3140static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
1da177e4 3141{
7123aaa3
ED
3142 unsigned long offset = get_offset(*pos);
3143 unsigned long bucket = get_bucket(*pos);
3144 struct sock *sk;
3145 unsigned long count = 0;
1da177e4 3146
7123aaa3
ED
3147 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
3148 if (sock_net(sk) != seq_file_net(seq))
097e66c5 3149 continue;
7123aaa3
ED
3150 if (++count == offset)
3151 break;
3152 }
3153
3154 return sk;
3155}
3156
3157static struct sock *unix_next_socket(struct seq_file *seq,
3158 struct sock *sk,
3159 loff_t *pos)
3160{
3161 unsigned long bucket;
3162
3163 while (sk > (struct sock *)SEQ_START_TOKEN) {
3164 sk = sk_next(sk);
3165 if (!sk)
3166 goto next_bucket;
3167 if (sock_net(sk) == seq_file_net(seq))
3168 return sk;
1da177e4 3169 }
7123aaa3
ED
3170
3171 do {
3172 sk = unix_from_bucket(seq, pos);
3173 if (sk)
3174 return sk;
3175
3176next_bucket:
3177 bucket = get_bucket(*pos) + 1;
3178 *pos = set_bucket_offset(bucket, 1);
3179 } while (bucket < ARRAY_SIZE(unix_socket_table));
3180
1da177e4
LT
3181 return NULL;
3182}
3183
1da177e4 3184static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 3185 __acquires(unix_table_lock)
1da177e4 3186{
fbe9cc4a 3187 spin_lock(&unix_table_lock);
7123aaa3
ED
3188
3189 if (!*pos)
3190 return SEQ_START_TOKEN;
3191
3192 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
3193 return NULL;
3194
3195 return unix_next_socket(seq, NULL, pos);
1da177e4
LT
3196}
3197
3198static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3199{
3200 ++*pos;
7123aaa3 3201 return unix_next_socket(seq, v, pos);
1da177e4
LT
3202}
3203
3204static void unix_seq_stop(struct seq_file *seq, void *v)
9a429c49 3205 __releases(unix_table_lock)
1da177e4 3206{
fbe9cc4a 3207 spin_unlock(&unix_table_lock);
1da177e4
LT
3208}
3209
3210static int unix_seq_show(struct seq_file *seq, void *v)
3211{
ac7bfa62 3212
b9f3124f 3213 if (v == SEQ_START_TOKEN)
1da177e4
LT
3214 seq_puts(seq, "Num RefCount Protocol Flags Type St "
3215 "Inode Path\n");
3216 else {
3217 struct sock *s = v;
3218 struct unix_sock *u = unix_sk(s);
1c92b4e5 3219 unix_state_lock(s);
1da177e4 3220
71338aa7 3221 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
1da177e4 3222 s,
41c6d650 3223 refcount_read(&s->sk_refcnt),
1da177e4
LT
3224 0,
3225 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3226 s->sk_type,
3227 s->sk_socket ?
3228 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3229 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3230 sock_i_ino(s));
3231
ae3b5641 3232 if (u->addr) { // under unix_table_lock here
1da177e4
LT
3233 int i, len;
3234 seq_putc(seq, ' ');
3235
3236 i = 0;
3237 len = u->addr->len - sizeof(short);
3238 if (!UNIX_ABSTRACT(s))
3239 len--;
3240 else {
3241 seq_putc(seq, '@');
3242 i++;
3243 }
3244 for ( ; i < len; i++)
e7947ea7
IB
3245 seq_putc(seq, u->addr->name->sun_path[i] ?:
3246 '@');
1da177e4 3247 }
1c92b4e5 3248 unix_state_unlock(s);
1da177e4
LT
3249 seq_putc(seq, '\n');
3250 }
3251
3252 return 0;
3253}
3254
56b3d975 3255static const struct seq_operations unix_seq_ops = {
1da177e4
LT
3256 .start = unix_seq_start,
3257 .next = unix_seq_next,
3258 .stop = unix_seq_stop,
3259 .show = unix_seq_show,
3260};
2c860a43
KI
3261
3262#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3263struct bpf_iter__unix {
3264 __bpf_md_ptr(struct bpf_iter_meta *, meta);
3265 __bpf_md_ptr(struct unix_sock *, unix_sk);
3266 uid_t uid __aligned(8);
3267};
3268
3269static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3270 struct unix_sock *unix_sk, uid_t uid)
3271{
3272 struct bpf_iter__unix ctx;
3273
3274 meta->seq_num--; /* skip SEQ_START_TOKEN */
3275 ctx.meta = meta;
3276 ctx.unix_sk = unix_sk;
3277 ctx.uid = uid;
3278 return bpf_iter_run_prog(prog, &ctx);
3279}
3280
3281static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3282{
3283 struct bpf_iter_meta meta;
3284 struct bpf_prog *prog;
3285 struct sock *sk = v;
3286 uid_t uid;
3287
3288 if (v == SEQ_START_TOKEN)
3289 return 0;
3290
3291 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3292 meta.seq = seq;
3293 prog = bpf_iter_get_info(&meta, false);
3294 return unix_prog_seq_show(prog, &meta, v, uid);
3295}
3296
3297static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3298{
3299 struct bpf_iter_meta meta;
3300 struct bpf_prog *prog;
3301
3302 if (!v) {
3303 meta.seq = seq;
3304 prog = bpf_iter_get_info(&meta, true);
3305 if (prog)
3306 (void)unix_prog_seq_show(prog, &meta, v, 0);
3307 }
3308
3309 unix_seq_stop(seq, v);
3310}
3311
3312static const struct seq_operations bpf_iter_unix_seq_ops = {
3313 .start = unix_seq_start,
3314 .next = unix_seq_next,
3315 .stop = bpf_iter_unix_seq_stop,
3316 .show = bpf_iter_unix_seq_show,
3317};
3318#endif
1da177e4
LT
3319#endif
3320
ec1b4cf7 3321static const struct net_proto_family unix_family_ops = {
1da177e4
LT
3322 .family = PF_UNIX,
3323 .create = unix_create,
3324 .owner = THIS_MODULE,
3325};
3326
097e66c5 3327
2c8c1e72 3328static int __net_init unix_net_init(struct net *net)
097e66c5
DL
3329{
3330 int error = -ENOMEM;
3331
a0a53c8b 3332 net->unx.sysctl_max_dgram_qlen = 10;
1597fbc0
PE
3333 if (unix_sysctl_register(net))
3334 goto out;
d392e497 3335
097e66c5 3336#ifdef CONFIG_PROC_FS
c3506372
CH
3337 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3338 sizeof(struct seq_net_private))) {
1597fbc0 3339 unix_sysctl_unregister(net);
097e66c5 3340 goto out;
1597fbc0 3341 }
097e66c5
DL
3342#endif
3343 error = 0;
3344out:
48dcc33e 3345 return error;
097e66c5
DL
3346}
3347
2c8c1e72 3348static void __net_exit unix_net_exit(struct net *net)
097e66c5 3349{
1597fbc0 3350 unix_sysctl_unregister(net);
ece31ffd 3351 remove_proc_entry("unix", net->proc_net);
097e66c5
DL
3352}
3353
3354static struct pernet_operations unix_net_ops = {
3355 .init = unix_net_init,
3356 .exit = unix_net_exit,
3357};
3358
2c860a43
KI
3359#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3360DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3361 struct unix_sock *unix_sk, uid_t uid)
3362
3363static const struct bpf_iter_seq_info unix_seq_info = {
3364 .seq_ops = &bpf_iter_unix_seq_ops,
3365 .init_seq_private = bpf_iter_init_seq_net,
3366 .fini_seq_private = bpf_iter_fini_seq_net,
3367 .seq_priv_size = sizeof(struct seq_net_private),
3368};
3369
3370static struct bpf_iter_reg unix_reg_info = {
3371 .target = "unix",
3372 .ctx_arg_info_size = 1,
3373 .ctx_arg_info = {
3374 { offsetof(struct bpf_iter__unix, unix_sk),
3375 PTR_TO_BTF_ID_OR_NULL },
3376 },
3377 .seq_info = &unix_seq_info,
3378};
3379
3380static void __init bpf_iter_register(void)
3381{
3382 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3383 if (bpf_iter_reg_target(&unix_reg_info))
3384 pr_warn("Warning: could not register bpf iterator unix\n");
3385}
3386#endif
3387
1da177e4
LT
3388static int __init af_unix_init(void)
3389{
3390 int rc = -1;
1da177e4 3391
c593642c 3392 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
1da177e4 3393
94531cfc
JW
3394 rc = proto_register(&unix_dgram_proto, 1);
3395 if (rc != 0) {
3396 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3397 goto out;
3398 }
3399
3400 rc = proto_register(&unix_stream_proto, 1);
ac7bfa62 3401 if (rc != 0) {
5cc208be 3402 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
1da177e4
LT
3403 goto out;
3404 }
3405
3406 sock_register(&unix_family_ops);
097e66c5 3407 register_pernet_subsys(&unix_net_ops);
c6382918 3408 unix_bpf_build_proto();
2c860a43
KI
3409
3410#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3411 bpf_iter_register();
3412#endif
3413
1da177e4
LT
3414out:
3415 return rc;
3416}
3417
3418static void __exit af_unix_exit(void)
3419{
3420 sock_unregister(PF_UNIX);
94531cfc
JW
3421 proto_unregister(&unix_dgram_proto);
3422 proto_unregister(&unix_stream_proto);
097e66c5 3423 unregister_pernet_subsys(&unix_net_ops);
1da177e4
LT
3424}
3425
3d366960
DW
3426/* Earlier than device_initcall() so that other drivers invoking
3427 request_module() don't end up in a loop when modprobe tries
3428 to use a UNIX socket. But later than subsys_initcall() because
3429 we depend on stuff initialised there */
3430fs_initcall(af_unix_init);
1da177e4
LT
3431module_exit(af_unix_exit);
3432
3433MODULE_LICENSE("GPL");
3434MODULE_ALIAS_NETPROTO(PF_UNIX);