net/unix/af_unix.c

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * NET4:        Implementation of BSD Unix domain sockets.
   4  *
   5  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   6  *
   7  * Fixes:
   8  *              Linus Torvalds  :       Assorted bug cures.
   9  *              Niibe Yutaka    :       async I/O support.
  10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  11  *              Alan Cox        :       Limit size of allocated blocks.
  12  *              Alan Cox        :       Fixed the stupid socketpair bug.
  13  *              Alan Cox        :       BSD compatibility fine tuning.
  14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  15  *              Alan Cox        :       Sorted out a proper draft version of
  16  *                                      file descriptor passing hacked up from
  17  *                                      Mike Shaver's work.
  18  *              Marty Leisner   :       Fixes to fd passing
  19  *              Nick Nevin      :       recvmsg bugfix.
  20  *              Alan Cox        :       Started proper garbage collector
  21  *              Heiko EiBfeldt  :       Missing verify_area check
  22  *              Alan Cox        :       Started POSIXisms
  23  *              Andreas Schwab  :       Replace inode by dentry for proper
  24  *                                      reference counting
  25  *              Kirk Petersen   :       Made this a module
  26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  27  *                                      Lots of bug fixes.
  28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  29  *                                      by above two patches.
  30  *           Andrea Arcangeli   :       If possible we block in connect(2)
  31  *                                      if the max backlog of the listen socket
  32  *                                      is been reached. This won't break
  33  *                                      old apps and it will avoid huge amount
  34  *                                      of socks hashed (this for unix_gc()
  35  *                                      performances reasons).
  36  *                                      Security fix that limits the max
  37  *                                      number of socks to 2*max_files and
  38  *                                      the number of skb queueable in the
  39  *                                      dgram receiver.
  40  *              Artur Skawina   :       Hash function optimizations
  41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  42  *            Malcolm Beattie   :       Set peercred for socketpair
  43  *           Michal Ostrowski   :       Module initialization cleanup.
  44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  45  *                                      the core infrastructure is doing that
  46  *                                      for all net proto families now (2.5.69+)
  47  *
  48  * Known differences from reference BSD that was tested:
  49  *
  50  *      [TO FIX]
  51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  52  *              other the moment one end closes.
  53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  55  *      [NOT TO FIX]
  56  *      accept() returns a path name even if the connecting socket has closed
  57  *              in the meantime (BSD loses the path and gives up).
  58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  61  *      BSD af_unix apparently has connect forgetting to block properly.
  62  *              (need to check this with the POSIX spec in detail)
  63  *
  64  * Differences from 2.0.0-11-... (ANK)
  65  *      Bug fixes and improvements.
  66  *              - client shutdown killed server socket.
  67  *              - removed all useless cli/sti pairs.
  68  *
  69  *      Semantic changes/extensions.
  70  *              - generic control message passing.
  71  *              - SCM_CREDENTIALS control message.
  72  *              - "Abstract" (not FS based) socket bindings.
  73  *                Abstract names are sequences of bytes (not zero terminated)
  74  *                started by 0, so that this name space does not intersect
  75  *                with BSD names.
  76  */
  77
  78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  79
  80 #include <linux/module.h>
  81 #include <linux/kernel.h>
  82 #include <linux/signal.h>
  83 #include <linux/sched/signal.h>
  84 #include <linux/errno.h>
  85 #include <linux/string.h>
  86 #include <linux/stat.h>
  87 #include <linux/dcache.h>
  88 #include <linux/namei.h>
  89 #include <linux/socket.h>
  90 #include <linux/un.h>
  91 #include <linux/fcntl.h>
  92 #include <linux/termios.h>
  93 #include <linux/sockios.h>
  94 #include <linux/net.h>
  95 #include <linux/in.h>
  96 #include <linux/fs.h>
  97 #include <linux/slab.h>
  98 #include <linux/uaccess.h>
  99 #include <linux/skbuff.h>
 100 #include <linux/netdevice.h>
 101 #include <net/net_namespace.h>
 102 #include <net/sock.h>
 103 #include <net/tcp_states.h>
 104 #include <net/af_unix.h>
 105 #include <linux/proc_fs.h>
 106 #include <linux/seq_file.h>
 107 #include <net/scm.h>
 108 #include <linux/init.h>
 109 #include <linux/poll.h>
 110 #include <linux/rtnetlink.h>
 111 #include <linux/mount.h>
 112 #include <net/checksum.h>
 113 #include <linux/security.h>
 114 #include <linux/freezer.h>
 115 #include <linux/file.h>
 116 #include <linux/btf_ids.h>
 117
 118 #include "scm.h"
 119
 120 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 121 EXPORT_SYMBOL_GPL(unix_socket_table);
 122 DEFINE_SPINLOCK(unix_table_lock);
 123 EXPORT_SYMBOL_GPL(unix_table_lock);
 124 static atomic_long_t unix_nr_socks;
 125
 126
 127 static struct hlist_head *unix_sockets_unbound(void *addr)
 128 {
 129         unsigned long hash = (unsigned long)addr;
 130
 131         hash ^= hash >> 16;
 132         hash ^= hash >> 8;
 133         hash %= UNIX_HASH_SIZE;
 134         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 135 }
 136
 137 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 138
 139 #ifdef CONFIG_SECURITY_NETWORK
 140 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 141 {
 142         UNIXCB(skb).lsmblob = scm->lsmblob;
 143 }
 144
 145 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 146 {
 147         scm->lsmblob = UNIXCB(skb).lsmblob;
 148 }
 149
 150 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 151 {
 152         return lsmblob_equal(&scm->lsmblob, &(UNIXCB(skb).lsmblob));
 153 }
 154 #else
 155 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 156 { }
 157
 158 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 159 { }
 160
 161 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 162 {
 163         return true;
 164 }
 165 #endif /* CONFIG_SECURITY_NETWORK */
 166
 167 /*
 168  *  SMP locking strategy:
 169  *    hash table is protected with spinlock unix_table_lock
 170  *    each socket state is protected by separate spin lock.
 171  */
 172
 173 static inline unsigned int unix_hash_fold(__wsum n)
 174 {
 175         unsigned int hash = (__force unsigned int)csum_fold(n);
 176
 177         hash ^= hash>>8;
 178         return hash&(UNIX_HASH_SIZE-1);
 179 }
 180
 181 #define unix_peer(sk) (unix_sk(sk)->peer)
 182
 183 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 184 {
 185         return unix_peer(osk) == sk;
 186 }
 187
 188 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 189 {
 190         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 191 }
 192
 193 static inline int unix_recvq_full(const struct sock *sk)
 194 {
 195         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 196 }
 197
 198 static inline int unix_recvq_full_lockless(const struct sock *sk)
 199 {
 200         return skb_queue_len_lockless(&sk->sk_receive_queue) >
 201                 READ_ONCE(sk->sk_max_ack_backlog);
 202 }
 203
 204 struct sock *unix_peer_get(struct sock *s)
 205 {
 206         struct sock *peer;
 207
 208         unix_state_lock(s);
 209         peer = unix_peer(s);
 210         if (peer)
 211                 sock_hold(peer);
 212         unix_state_unlock(s);
 213         return peer;
 214 }
 215 EXPORT_SYMBOL_GPL(unix_peer_get);
 216
 217 static inline void unix_release_addr(struct unix_address *addr)
 218 {
 219         if (refcount_dec_and_test(&addr->refcnt))
 220                 kfree(addr);
 221 }
 222
 223 /*
 224  *      Check unix socket name:
 225  *              - should be not zero length.
 226  *              - if started by not zero, should be NULL terminated (FS object)
 227  *              - if started by zero, it is abstract name.
 228  */
 229
 230 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 231 {
 232         *hashp = 0;
 233
 234         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 235                 return -EINVAL;
 236         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 237                 return -EINVAL;
 238         if (sunaddr->sun_path[0]) {
 239                 /*
 240                  * This may look like an off by one error but it is a bit more
 241                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 242                  * sun_path[108] doesn't as such exist.  However in kernel space
 243                  * we are guaranteed that it is a valid memory location in our
 244                  * kernel address buffer.
 245                  */
 246                 ((char *)sunaddr)[len] = 0;
 247                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 248                 return len;
 249         }
 250
 251         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 252         return len;
 253 }
 254
 255 static void __unix_remove_socket(struct sock *sk)
 256 {
 257         sk_del_node_init(sk);
 258 }
 259
 260 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 261 {
 262         WARN_ON(!sk_unhashed(sk));
 263         sk_add_node(sk, list);
 264 }
 265
 266 static void __unix_set_addr(struct sock *sk, struct unix_address *addr,
 267                             unsigned hash)
 268 {
 269         __unix_remove_socket(sk);
 270         smp_store_release(&unix_sk(sk)->addr, addr);
 271         __unix_insert_socket(&unix_socket_table[hash], sk);
 272 }
 273
 274 static inline void unix_remove_socket(struct sock *sk)
 275 {
 276         spin_lock(&unix_table_lock);
 277         __unix_remove_socket(sk);
 278         spin_unlock(&unix_table_lock);
 279 }
 280
 281 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 282 {
 283         spin_lock(&unix_table_lock);
 284         __unix_insert_socket(list, sk);
 285         spin_unlock(&unix_table_lock);
 286 }
 287
 288 static struct sock *__unix_find_socket_byname(struct net *net,
 289                                               struct sockaddr_un *sunname,
 290                                               int len, unsigned int hash)
 291 {
 292         struct sock *s;
 293
 294         sk_for_each(s, &unix_socket_table[hash]) {
 295                 struct unix_sock *u = unix_sk(s);
 296
 297                 if (!net_eq(sock_net(s), net))
 298                         continue;
 299
 300                 if (u->addr->len == len &&
 301                     !memcmp(u->addr->name, sunname, len))
 302                         return s;
 303         }
 304         return NULL;
 305 }
 306
 307 static inline struct sock *unix_find_socket_byname(struct net *net,
 308                                                    struct sockaddr_un *sunname,
 309                                                    int len, unsigned int hash)
 310 {
 311         struct sock *s;
 312
 313         spin_lock(&unix_table_lock);
 314         s = __unix_find_socket_byname(net, sunname, len, hash);
 315         if (s)
 316                 sock_hold(s);
 317         spin_unlock(&unix_table_lock);
 318         return s;
 319 }
 320
 321 static struct sock *unix_find_socket_byinode(struct inode *i)
 322 {
 323         struct sock *s;
 324
 325         spin_lock(&unix_table_lock);
 326         sk_for_each(s,
 327                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 328                 struct dentry *dentry = unix_sk(s)->path.dentry;
 329
 330                 if (dentry && d_backing_inode(dentry) == i) {
 331                         sock_hold(s);
 332                         goto found;
 333                 }
 334         }
 335         s = NULL;
 336 found:
 337         spin_unlock(&unix_table_lock);
 338         return s;
 339 }
 340
 341 /* Support code for asymmetrically connected dgram sockets
 342  *
 343  * If a datagram socket is connected to a socket not itself connected
 344  * to the first socket (eg, /dev/log), clients may only enqueue more
 345  * messages if the present receive queue of the server socket is not
 346  * "too large". This means there's a second writeability condition
 347  * poll and sendmsg need to test. The dgram recv code will do a wake
 348  * up on the peer_wait wait queue of a socket upon reception of a
 349  * datagram which needs to be propagated to sleeping would-be writers
 350  * since these might not have sent anything so far. This can't be
 351  * accomplished via poll_wait because the lifetime of the server
 352  * socket might be less than that of its clients if these break their
 353  * association with it or if the server socket is closed while clients
 354  * are still connected to it and there's no way to inform "a polling
 355  * implementation" that it should let go of a certain wait queue
 356  *
 357  * In order to propagate a wake up, a wait_queue_entry_t of the client
 358  * socket is enqueued on the peer_wait queue of the server socket
 359  * whose wake function does a wake_up on the ordinary client socket
 360  * wait queue. This connection is established whenever a write (or
 361  * poll for write) hit the flow control condition and broken when the
 362  * association to the server socket is dissolved or after a wake up
 363  * was relayed.
 364  */
 365
 366 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 367                                       void *key)
 368 {
 369         struct unix_sock *u;
 370         wait_queue_head_t *u_sleep;
 371
 372         u = container_of(q, struct unix_sock, peer_wake);
 373
 374         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 375                             q);
 376         u->peer_wake.private = NULL;
 377
 378         /* relaying can only happen while the wq still exists */
 379         u_sleep = sk_sleep(&u->sk);
 380         if (u_sleep)
 381                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
 382
 383         return 0;
 384 }
 385
 386 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 387 {
 388         struct unix_sock *u, *u_other;
 389         int rc;
 390
 391         u = unix_sk(sk);
 392         u_other = unix_sk(other);
 393         rc = 0;
 394         spin_lock(&u_other->peer_wait.lock);
 395
 396         if (!u->peer_wake.private) {
 397                 u->peer_wake.private = other;
 398                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 399
 400                 rc = 1;
 401         }
 402
 403         spin_unlock(&u_other->peer_wait.lock);
 404         return rc;
 405 }
 406
 407 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 408                                             struct sock *other)
 409 {
 410         struct unix_sock *u, *u_other;
 411
 412         u = unix_sk(sk);
 413         u_other = unix_sk(other);
 414         spin_lock(&u_other->peer_wait.lock);
 415
 416         if (u->peer_wake.private == other) {
 417                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 418                 u->peer_wake.private = NULL;
 419         }
 420
 421         spin_unlock(&u_other->peer_wait.lock);
 422 }
 423
 424 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 425                                                    struct sock *other)
 426 {
 427         unix_dgram_peer_wake_disconnect(sk, other);
 428         wake_up_interruptible_poll(sk_sleep(sk),
 429                                    EPOLLOUT |
 430                                    EPOLLWRNORM |
 431                                    EPOLLWRBAND);
 432 }
 433
 434 /* preconditions:
 435  *      - unix_peer(sk) == other
 436  *      - association is stable
 437  */
 438 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 439 {
 440         int connected;
 441
 442         connected = unix_dgram_peer_wake_connect(sk, other);
 443
 444         /* If other is SOCK_DEAD, we want to make sure we signal
 445          * POLLOUT, such that a subsequent write() can get a
 446          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
 447          * to other and its full, we will hang waiting for POLLOUT.
 448          */
 449         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
 450                 return 1;
 451
 452         if (connected)
 453                 unix_dgram_peer_wake_disconnect(sk, other);
 454
 455         return 0;
 456 }
 457
 458 static int unix_writable(const struct sock *sk)
 459 {
 460         return sk->sk_state != TCP_LISTEN &&
 461                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 462 }
 463
 464 static void unix_write_space(struct sock *sk)
 465 {
 466         struct socket_wq *wq;
 467
 468         rcu_read_lock();
 469         if (unix_writable(sk)) {
 470                 wq = rcu_dereference(sk->sk_wq);
 471                 if (skwq_has_sleeper(wq))
 472                         wake_up_interruptible_sync_poll(&wq->wait,
 473                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
 474                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 475         }
 476         rcu_read_unlock();
 477 }
 478
 479 /* When dgram socket disconnects (or changes its peer), we clear its receive
 480  * queue of packets arrived from previous peer. First, it allows to do
 481  * flow control based only on wmem_alloc; second, sk connected to peer
 482  * may receive messages only from that peer. */
 483 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 484 {
 485         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 486                 skb_queue_purge(&sk->sk_receive_queue);
 487                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 488
 489                 /* If one link of bidirectional dgram pipe is disconnected,
 490                  * we signal error. Messages are lost. Do not make this,
 491                  * when peer was not connected to us.
 492                  */
 493                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 494                         other->sk_err = ECONNRESET;
 495                         sk_error_report(other);
 496                 }
 497         }
 498         other->sk_state = TCP_CLOSE;
 499 }
 500
 501 static void unix_sock_destructor(struct sock *sk)
 502 {
 503         struct unix_sock *u = unix_sk(sk);
 504
 505         skb_queue_purge(&sk->sk_receive_queue);
 506
 507 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
 508         if (u->oob_skb) {
 509                 kfree_skb(u->oob_skb);
 510                 u->oob_skb = NULL;
 511         }
 512 #endif
 513         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 514         WARN_ON(!sk_unhashed(sk));
 515         WARN_ON(sk->sk_socket);
 516         if (!sock_flag(sk, SOCK_DEAD)) {
 517                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 518                 return;
 519         }
 520
 521         if (u->addr)
 522                 unix_release_addr(u->addr);
 523
 524         atomic_long_dec(&unix_nr_socks);
 525         local_bh_disable();
 526         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 527         local_bh_enable();
 528 #ifdef UNIX_REFCNT_DEBUG
 529         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 530                 atomic_long_read(&unix_nr_socks));
 531 #endif
 532 }
 533
 534 static void unix_release_sock(struct sock *sk, int embrion)
 535 {
 536         struct unix_sock *u = unix_sk(sk);
 537         struct path path;
 538         struct sock *skpair;
 539         struct sk_buff *skb;
 540         int state;
 541
 542         unix_remove_socket(sk);
 543
 544         /* Clear state */
 545         unix_state_lock(sk);
 546         sock_orphan(sk);
 547         sk->sk_shutdown = SHUTDOWN_MASK;
 548         path         = u->path;
 549         u->path.dentry = NULL;
 550         u->path.mnt = NULL;
 551         state = sk->sk_state;
 552         sk->sk_state = TCP_CLOSE;
 553
 554         skpair = unix_peer(sk);
 555         unix_peer(sk) = NULL;
 556
 557         unix_state_unlock(sk);
 558
 559         wake_up_interruptible_all(&u->peer_wait);
 560
 561         if (skpair != NULL) {
 562                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 563                         unix_state_lock(skpair);
 564                         /* No more writes */
 565                         skpair->sk_shutdown = SHUTDOWN_MASK;
 566                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 567                                 skpair->sk_err = ECONNRESET;
 568                         unix_state_unlock(skpair);
 569                         skpair->sk_state_change(skpair);
 570                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 571                 }
 572
 573                 unix_dgram_peer_wake_disconnect(sk, skpair);
 574                 sock_put(skpair); /* It may now die */
 575         }
 576
 577         /* Try to flush out this socket. Throw out buffers at least */
 578
 579         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 580                 if (state == TCP_LISTEN)
 581                         unix_release_sock(skb->sk, 1);
 582                 /* passed fds are erased in the kfree_skb hook        */
 583                 UNIXCB(skb).consumed = skb->len;
 584                 kfree_skb(skb);
 585         }
 586
 587         if (path.dentry)
 588                 path_put(&path);
 589
 590         sock_put(sk);
 591
 592         /* ---- Socket is dead now and most probably destroyed ---- */
 593
 594         /*
 595          * Fixme: BSD difference: In BSD all sockets connected to us get
 596          *        ECONNRESET and we die on the spot. In Linux we behave
 597          *        like files and pipes do and wait for the last
 598          *        dereference.
 599          *
 600          * Can't we simply set sock->err?
 601          *
 602          *        What the above comment does talk about? --ANK(980817)
 603          */
 604
 605         if (unix_tot_inflight)
 606                 unix_gc();              /* Garbage collect fds */
 607 }
 608
 609 static void init_peercred(struct sock *sk)
 610 {
 611         const struct cred *old_cred;
 612         struct pid *old_pid;
 613
 614         spin_lock(&sk->sk_peer_lock);
 615         old_pid = sk->sk_peer_pid;
 616         old_cred = sk->sk_peer_cred;
 617         sk->sk_peer_pid  = get_pid(task_tgid(current));
 618         sk->sk_peer_cred = get_current_cred();
 619         spin_unlock(&sk->sk_peer_lock);
 620
 621         put_pid(old_pid);
 622         put_cred(old_cred);
 623 }
 624
 625 static void copy_peercred(struct sock *sk, struct sock *peersk)
 626 {
 627         const struct cred *old_cred;
 628         struct pid *old_pid;
 629
 630         if (sk < peersk) {
 631                 spin_lock(&sk->sk_peer_lock);
 632                 spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
 633         } else {
 634                 spin_lock(&peersk->sk_peer_lock);
 635                 spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
 636         }
 637         old_pid = sk->sk_peer_pid;
 638         old_cred = sk->sk_peer_cred;
 639         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 640         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 641
 642         spin_unlock(&sk->sk_peer_lock);
 643         spin_unlock(&peersk->sk_peer_lock);
 644
 645         put_pid(old_pid);
 646         put_cred(old_cred);
 647 }
 648
 649 static int unix_listen(struct socket *sock, int backlog)
 650 {
 651         int err;
 652         struct sock *sk = sock->sk;
 653         struct unix_sock *u = unix_sk(sk);
 654
 655         err = -EOPNOTSUPP;
 656         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 657                 goto out;       /* Only stream/seqpacket sockets accept */
 658         err = -EINVAL;
 659         if (!u->addr)
 660                 goto out;       /* No listens on an unbound socket */
 661         unix_state_lock(sk);
 662         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 663                 goto out_unlock;
 664         if (backlog > sk->sk_max_ack_backlog)
 665                 wake_up_interruptible_all(&u->peer_wait);
 666         sk->sk_max_ack_backlog  = backlog;
 667         sk->sk_state            = TCP_LISTEN;
 668         /* set credentials so connect can copy them */
 669         init_peercred(sk);
 670         err = 0;
 671
 672 out_unlock:
 673         unix_state_unlock(sk);
 674 out:
 675         return err;
 676 }
 677
 678 static int unix_release(struct socket *);
 679 static int unix_bind(struct socket *, struct sockaddr *, int);
 680 static int unix_stream_connect(struct socket *, struct sockaddr *,
 681                                int addr_len, int flags);
 682 static int unix_socketpair(struct socket *, struct socket *);
 683 static int unix_accept(struct socket *, struct socket *, int, bool);
 684 static int unix_getname(struct socket *, struct sockaddr *, int);
 685 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
 686 static __poll_t unix_dgram_poll(struct file *, struct socket *,
 687                                     poll_table *);
 688 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 689 #ifdef CONFIG_COMPAT
 690 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 691 #endif
 692 static int unix_shutdown(struct socket *, int);
 693 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 694 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 695 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 696                                     size_t size, int flags);
 697 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 698                                        struct pipe_inode_info *, size_t size,
 699                                        unsigned int flags);
 700 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 701 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 702 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
 703                           sk_read_actor_t recv_actor);
 704 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
 705                                  sk_read_actor_t recv_actor);
 706 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 707                               int, int);
 708 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 709 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 710                                   int);
 711
 712 static int unix_set_peek_off(struct sock *sk, int val)
 713 {
 714         struct unix_sock *u = unix_sk(sk);
 715
 716         if (mutex_lock_interruptible(&u->iolock))
 717                 return -EINTR;
 718
 719         sk->sk_peek_off = val;
 720         mutex_unlock(&u->iolock);
 721
 722         return 0;
 723 }
 724
 725 #ifdef CONFIG_PROC_FS
 726 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
 727 {
 728         struct sock *sk = sock->sk;
 729         struct unix_sock *u;
 730
 731         if (sk) {
 732                 u = unix_sk(sock->sk);
 733                 seq_printf(m, "scm_fds: %u\n",
 734                            atomic_read(&u->scm_stat.nr_fds));
 735         }
 736 }
 737 #else
 738 #define unix_show_fdinfo NULL
 739 #endif
 740
 741 static const struct proto_ops unix_stream_ops = {
 742         .family =       PF_UNIX,
 743         .owner =        THIS_MODULE,
 744         .release =      unix_release,
 745         .bind =         unix_bind,
 746         .connect =      unix_stream_connect,
 747         .socketpair =   unix_socketpair,
 748         .accept =       unix_accept,
 749         .getname =      unix_getname,
 750         .poll =         unix_poll,
 751         .ioctl =        unix_ioctl,
 752 #ifdef CONFIG_COMPAT
 753         .compat_ioctl = unix_compat_ioctl,
 754 #endif
 755         .listen =       unix_listen,
 756         .shutdown =     unix_shutdown,
 757         .sendmsg =      unix_stream_sendmsg,
 758         .recvmsg =      unix_stream_recvmsg,
 759         .read_sock =    unix_stream_read_sock,
 760         .mmap =         sock_no_mmap,
 761         .sendpage =     unix_stream_sendpage,
 762         .splice_read =  unix_stream_splice_read,
 763         .set_peek_off = unix_set_peek_off,
 764         .show_fdinfo =  unix_show_fdinfo,
 765 };
 766
 767 static const struct proto_ops unix_dgram_ops = {
 768         .family =       PF_UNIX,
 769         .owner =        THIS_MODULE,
 770         .release =      unix_release,
 771         .bind =         unix_bind,
 772         .connect =      unix_dgram_connect,
 773         .socketpair =   unix_socketpair,
 774         .accept =       sock_no_accept,
 775         .getname =      unix_getname,
 776         .poll =         unix_dgram_poll,
 777         .ioctl =        unix_ioctl,
 778 #ifdef CONFIG_COMPAT
 779         .compat_ioctl = unix_compat_ioctl,
 780 #endif
 781         .listen =       sock_no_listen,
 782         .shutdown =     unix_shutdown,
 783         .sendmsg =      unix_dgram_sendmsg,
 784         .read_sock =    unix_read_sock,
 785         .recvmsg =      unix_dgram_recvmsg,
 786         .mmap =         sock_no_mmap,
 787         .sendpage =     sock_no_sendpage,
 788         .set_peek_off = unix_set_peek_off,
 789         .show_fdinfo =  unix_show_fdinfo,
 790 };
 791
 792 static const struct proto_ops unix_seqpacket_ops = {
 793         .family =       PF_UNIX,
 794         .owner =        THIS_MODULE,
 795         .release =      unix_release,
 796         .bind =         unix_bind,
 797         .connect =      unix_stream_connect,
 798         .socketpair =   unix_socketpair,
 799         .accept =       unix_accept,
 800         .getname =      unix_getname,
 801         .poll =         unix_dgram_poll,
 802         .ioctl =        unix_ioctl,
 803 #ifdef CONFIG_COMPAT
 804         .compat_ioctl = unix_compat_ioctl,
 805 #endif
 806         .listen =       unix_listen,
 807         .shutdown =     unix_shutdown,
 808         .sendmsg =      unix_seqpacket_sendmsg,
 809         .recvmsg =      unix_seqpacket_recvmsg,
 810         .mmap =         sock_no_mmap,
 811         .sendpage =     sock_no_sendpage,
 812         .set_peek_off = unix_set_peek_off,
 813         .show_fdinfo =  unix_show_fdinfo,
 814 };
 815
 816 static void unix_close(struct sock *sk, long timeout)
 817 {
 818         /* Nothing to do here, unix socket does not need a ->close().
 819          * This is merely for sockmap.
 820          */
 821 }
 822
 823 static void unix_unhash(struct sock *sk)
 824 {
 825         /* Nothing to do here, unix socket does not need a ->unhash().
 826          * This is merely for sockmap.
 827          */
 828 }
 829
 830 struct proto unix_dgram_proto = {
 831         .name                   = "UNIX",
 832         .owner                  = THIS_MODULE,
 833         .obj_size               = sizeof(struct unix_sock),
 834         .close                  = unix_close,
 835 #ifdef CONFIG_BPF_SYSCALL
 836         .psock_update_sk_prot   = unix_dgram_bpf_update_proto,
 837 #endif
 838 };
 839
 840 struct proto unix_stream_proto = {
 841         .name                   = "UNIX-STREAM",
 842         .owner                  = THIS_MODULE,
 843         .obj_size               = sizeof(struct unix_sock),
 844         .close                  = unix_close,
 845         .unhash                 = unix_unhash,
 846 #ifdef CONFIG_BPF_SYSCALL
 847         .psock_update_sk_prot   = unix_stream_bpf_update_proto,
 848 #endif
 849 };
 850
 851 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
 852 {
 853         struct unix_sock *u;
 854         struct sock *sk;
 855         int err;
 856
 857         atomic_long_inc(&unix_nr_socks);
 858         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
 859                 err = -ENFILE;
 860                 goto err;
 861         }
 862
 863         if (type == SOCK_STREAM)
 864                 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
 865         else /*dgram and  seqpacket */
 866                 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
 867
 868         if (!sk) {
 869                 err = -ENOMEM;
 870                 goto err;
 871         }
 872
 873         sock_init_data(sock, sk);
 874
 875         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 876         sk->sk_write_space      = unix_write_space;
 877         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 878         sk->sk_destruct         = unix_sock_destructor;
 879         u         = unix_sk(sk);
 880         u->path.dentry = NULL;
 881         u->path.mnt = NULL;
 882         spin_lock_init(&u->lock);
 883         atomic_long_set(&u->inflight, 0);
 884         INIT_LIST_HEAD(&u->link);
 885         mutex_init(&u->iolock); /* single task reading lock */
 886         mutex_init(&u->bindlock); /* single task binding lock */
 887         init_waitqueue_head(&u->peer_wait);
 888         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 889         memset(&u->scm_stat, 0, sizeof(struct scm_stat));
 890         unix_insert_socket(unix_sockets_unbound(sk), sk);
 891
 892         local_bh_disable();
 893         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 894         local_bh_enable();
 895
 896         return sk;
 897
 898 err:
 899         atomic_long_dec(&unix_nr_socks);
 900         return ERR_PTR(err);
 901 }
 902
 903 static int unix_create(struct net *net, struct socket *sock, int protocol,
 904                        int kern)
 905 {
 906         struct sock *sk;
 907
 908         if (protocol && protocol != PF_UNIX)
 909                 return -EPROTONOSUPPORT;
 910
 911         sock->state = SS_UNCONNECTED;
 912
 913         switch (sock->type) {
 914         case SOCK_STREAM:
 915                 sock->ops = &unix_stream_ops;
 916                 break;
 917                 /*
 918                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 919                  *      nothing uses it.
 920                  */
 921         case SOCK_RAW:
 922                 sock->type = SOCK_DGRAM;
 923                 fallthrough;
 924         case SOCK_DGRAM:
 925                 sock->ops = &unix_dgram_ops;
 926                 break;
 927         case SOCK_SEQPACKET:
 928                 sock->ops = &unix_seqpacket_ops;
 929                 break;
 930         default:
 931                 return -ESOCKTNOSUPPORT;
 932         }
 933
 934         sk = unix_create1(net, sock, kern, sock->type);
 935         if (IS_ERR(sk))
 936                 return PTR_ERR(sk);
 937
 938         return 0;
 939 }
 940
 941 static int unix_release(struct socket *sock)
 942 {
 943         struct sock *sk = sock->sk;
 944
 945         if (!sk)
 946                 return 0;
 947
 948         sk->sk_prot->close(sk, 0);
 949         unix_release_sock(sk, 0);
 950         sock->sk = NULL;
 951
 952         return 0;
 953 }
 954
 955 static int unix_autobind(struct socket *sock)
 956 {
 957         struct sock *sk = sock->sk;
 958         struct net *net = sock_net(sk);
 959         struct unix_sock *u = unix_sk(sk);
 960         static u32 ordernum = 1;
 961         struct unix_address *addr;
 962         int err;
 963         unsigned int retries = 0;
 964
 965         err = mutex_lock_interruptible(&u->bindlock);
 966         if (err)
 967                 return err;
 968
 969         if (u->addr)
 970                 goto out;
 971
 972         err = -ENOMEM;
 973         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 974         if (!addr)
 975                 goto out;
 976
 977         addr->name->sun_family = AF_UNIX;
 978         refcount_set(&addr->refcnt, 1);
 979
 980 retry:
 981         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 982         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 983         addr->hash ^= sk->sk_type;
 984
 985         spin_lock(&unix_table_lock);
 986         ordernum = (ordernum+1)&0xFFFFF;
 987
 988         if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) {
 989                 spin_unlock(&unix_table_lock);
 990                 /*
 991                  * __unix_find_socket_byname() may take long time if many names
 992                  * are already in use.
 993                  */
 994                 cond_resched();
 995                 /* Give up if all names seems to be in use. */
 996                 if (retries++ == 0xFFFFF) {
 997                         err = -ENOSPC;
 998                         kfree(addr);
 999                         goto out;
1000                 }
1001                 goto retry;
1002         }
1003
1004         __unix_set_addr(sk, addr, addr->hash);
1005         spin_unlock(&unix_table_lock);
1006         err = 0;
1007
1008 out:    mutex_unlock(&u->bindlock);
1009         return err;
1010 }
1011
1012 static struct sock *unix_find_other(struct net *net,
1013                                     struct sockaddr_un *sunname, int len,
1014                                     int type, unsigned int hash, int *error)
1015 {
1016         struct sock *u;
1017         struct path path;
1018         int err = 0;
1019
1020         if (sunname->sun_path[0]) {
1021                 struct inode *inode;
1022                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
1023                 if (err)
1024                         goto fail;
1025                 inode = d_backing_inode(path.dentry);
1026                 err = path_permission(&path, MAY_WRITE);
1027                 if (err)
1028                         goto put_fail;
1029
1030                 err = -ECONNREFUSED;
1031                 if (!S_ISSOCK(inode->i_mode))
1032                         goto put_fail;
1033                 u = unix_find_socket_byinode(inode);
1034                 if (!u)
1035                         goto put_fail;
1036
1037                 if (u->sk_type == type)
1038                         touch_atime(&path);
1039
1040                 path_put(&path);
1041
1042                 err = -EPROTOTYPE;
1043                 if (u->sk_type != type) {
1044                         sock_put(u);
1045                         goto fail;
1046                 }
1047         } else {
1048                 err = -ECONNREFUSED;
1049                 u = unix_find_socket_byname(net, sunname, len, type ^ hash);
1050                 if (u) {
1051                         struct dentry *dentry;
1052                         dentry = unix_sk(u)->path.dentry;
1053                         if (dentry)
1054                                 touch_atime(&unix_sk(u)->path);
1055                 } else
1056                         goto fail;
1057         }
1058         return u;
1059
1060 put_fail:
1061         path_put(&path);
1062 fail:
1063         *error = err;
1064         return NULL;
1065 }
1066
1067 static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
1068 {
1069         struct unix_sock *u = unix_sk(sk);
1070         umode_t mode = S_IFSOCK |
1071                (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1072         struct user_namespace *ns; // barf...
1073         struct path parent;
1074         struct dentry *dentry;
1075         unsigned int hash;
1076         int err;
1077
1078         /*
1079          * Get the parent directory, calculate the hash for last
1080          * component.
1081          */
1082         dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1083         if (IS_ERR(dentry))
1084                 return PTR_ERR(dentry);
1085         ns = mnt_user_ns(parent.mnt);
1086
1087         /*
1088          * All right, let's create it.
1089          */
1090         err = security_path_mknod(&parent, dentry, mode, 0);
1091         if (!err)
1092                 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
1093         if (err)
1094                 goto out;
1095         err = mutex_lock_interruptible(&u->bindlock);
1096         if (err)
1097                 goto out_unlink;
1098         if (u->addr)
1099                 goto out_unlock;
1100
1101         addr->hash = UNIX_HASH_SIZE;
1102         hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1103         spin_lock(&unix_table_lock);
1104         u->path.mnt = mntget(parent.mnt);
1105         u->path.dentry = dget(dentry);
1106         __unix_set_addr(sk, addr, hash);
1107         spin_unlock(&unix_table_lock);
1108         mutex_unlock(&u->bindlock);
1109         done_path_create(&parent, dentry);
1110         return 0;
1111
1112 out_unlock:
1113         mutex_unlock(&u->bindlock);
1114         err = -EINVAL;
1115 out_unlink:
1116         /* failed after successful mknod?  unlink what we'd created... */
1117         vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
1118 out:
1119         done_path_create(&parent, dentry);
1120         return err;
1121 }
1122
1123 static int unix_bind_abstract(struct sock *sk, struct unix_address *addr)
1124 {
1125         struct unix_sock *u = unix_sk(sk);
1126         int err;
1127
1128         err = mutex_lock_interruptible(&u->bindlock);
1129         if (err)
1130                 return err;
1131
1132         if (u->addr) {
1133                 mutex_unlock(&u->bindlock);
1134                 return -EINVAL;
1135         }
1136
1137         spin_lock(&unix_table_lock);
1138         if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
1139                                       addr->hash)) {
1140                 spin_unlock(&unix_table_lock);
1141                 mutex_unlock(&u->bindlock);
1142                 return -EADDRINUSE;
1143         }
1144         __unix_set_addr(sk, addr, addr->hash);
1145         spin_unlock(&unix_table_lock);
1146         mutex_unlock(&u->bindlock);
1147         return 0;
1148 }
1149
1150 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1151 {
1152         struct sock *sk = sock->sk;
1153         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1154         char *sun_path = sunaddr->sun_path;
1155         int err;
1156         unsigned int hash;
1157         struct unix_address *addr;
1158
1159         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1160             sunaddr->sun_family != AF_UNIX)
1161                 return -EINVAL;
1162
1163         if (addr_len == sizeof(short))
1164                 return unix_autobind(sock);
1165
1166         err = unix_mkname(sunaddr, addr_len, &hash);
1167         if (err < 0)
1168                 return err;
1169         addr_len = err;
1170         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1171         if (!addr)
1172                 return -ENOMEM;
1173
1174         memcpy(addr->name, sunaddr, addr_len);
1175         addr->len = addr_len;
1176         addr->hash = hash ^ sk->sk_type;
1177         refcount_set(&addr->refcnt, 1);
1178
1179         if (sun_path[0])
1180                 err = unix_bind_bsd(sk, addr);
1181         else
1182                 err = unix_bind_abstract(sk, addr);
1183         if (err)
1184                 unix_release_addr(addr);
1185         return err == -EEXIST ? -EADDRINUSE : err;
1186 }
1187
1188 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1189 {
1190         if (unlikely(sk1 == sk2) || !sk2) {
1191                 unix_state_lock(sk1);
1192                 return;
1193         }
1194         if (sk1 < sk2) {
1195                 unix_state_lock(sk1);
1196                 unix_state_lock_nested(sk2);
1197         } else {
1198                 unix_state_lock(sk2);
1199                 unix_state_lock_nested(sk1);
1200         }
1201 }
1202
1203 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1204 {
1205         if (unlikely(sk1 == sk2) || !sk2) {
1206                 unix_state_unlock(sk1);
1207                 return;
1208         }
1209         unix_state_unlock(sk1);
1210         unix_state_unlock(sk2);
1211 }
1212
1213 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1214                               int alen, int flags)
1215 {
1216         struct sock *sk = sock->sk;
1217         struct net *net = sock_net(sk);
1218         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1219         struct sock *other;
1220         unsigned int hash;
1221         int err;
1222
1223         err = -EINVAL;
1224         if (alen < offsetofend(struct sockaddr, sa_family))
1225                 goto out;
1226
1227         if (addr->sa_family != AF_UNSPEC) {
1228                 err = unix_mkname(sunaddr, alen, &hash);
1229                 if (err < 0)
1230                         goto out;
1231                 alen = err;
1232
1233                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1234                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1235                         goto out;
1236
1237 restart:
1238                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1239                 if (!other)
1240                         goto out;
1241
1242                 unix_state_double_lock(sk, other);
1243
1244                 /* Apparently VFS overslept socket death. Retry. */
1245                 if (sock_flag(other, SOCK_DEAD)) {
1246                         unix_state_double_unlock(sk, other);
1247                         sock_put(other);
1248                         goto restart;
1249                 }
1250
1251                 err = -EPERM;
1252                 if (!unix_may_send(sk, other))
1253                         goto out_unlock;
1254
1255                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1256                 if (err)
1257                         goto out_unlock;
1258
1259                 sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1260         } else {
1261                 /*
1262                  *      1003.1g breaking connected state with AF_UNSPEC
1263                  */
1264                 other = NULL;
1265                 unix_state_double_lock(sk, other);
1266         }
1267
1268         /*
1269          * If it was connected, reconnect.
1270          */
1271         if (unix_peer(sk)) {
1272                 struct sock *old_peer = unix_peer(sk);
1273
1274                 unix_peer(sk) = other;
1275                 if (!other)
1276                         sk->sk_state = TCP_CLOSE;
1277                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1278
1279                 unix_state_double_unlock(sk, other);
1280
1281                 if (other != old_peer)
1282                         unix_dgram_disconnected(sk, old_peer);
1283                 sock_put(old_peer);
1284         } else {
1285                 unix_peer(sk) = other;
1286                 unix_state_double_unlock(sk, other);
1287         }
1288
1289         return 0;
1290
1291 out_unlock:
1292         unix_state_double_unlock(sk, other);
1293         sock_put(other);
1294 out:
1295         return err;
1296 }
1297
1298 static long unix_wait_for_peer(struct sock *other, long timeo)
1299         __releases(&unix_sk(other)->lock)
1300 {
1301         struct unix_sock *u = unix_sk(other);
1302         int sched;
1303         DEFINE_WAIT(wait);
1304
1305         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1306
1307         sched = !sock_flag(other, SOCK_DEAD) &&
1308                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1309                 unix_recvq_full(other);
1310
1311         unix_state_unlock(other);
1312
1313         if (sched)
1314                 timeo = schedule_timeout(timeo);
1315
1316         finish_wait(&u->peer_wait, &wait);
1317         return timeo;
1318 }
1319
1320 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1321                                int addr_len, int flags)
1322 {
1323         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1324         struct sock *sk = sock->sk;
1325         struct net *net = sock_net(sk);
1326         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1327         struct sock *newsk = NULL;
1328         struct sock *other = NULL;
1329         struct sk_buff *skb = NULL;
1330         unsigned int hash;
1331         int st;
1332         int err;
1333         long timeo;
1334
1335         err = unix_mkname(sunaddr, addr_len, &hash);
1336         if (err < 0)
1337                 goto out;
1338         addr_len = err;
1339
1340         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1341             (err = unix_autobind(sock)) != 0)
1342                 goto out;
1343
1344         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1345
1346         /* First of all allocate resources.
1347            If we will make it after state is locked,
1348            we will have to recheck all again in any case.
1349          */
1350
1351         /* create new sock for complete connection */
1352         newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
1353         if (IS_ERR(newsk)) {
1354                 err = PTR_ERR(newsk);
1355                 newsk = NULL;
1356                 goto out;
1357         }
1358
1359         err = -ENOMEM;
1360
1361         /* Allocate skb for sending to listening sock */
1362         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1363         if (skb == NULL)
1364                 goto out;
1365
1366 restart:
1367         /*  Find listening sock. */
1368         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1369         if (!other)
1370                 goto out;
1371
1372         /* Latch state of peer */
1373         unix_state_lock(other);
1374
1375         /* Apparently VFS overslept socket death. Retry. */
1376         if (sock_flag(other, SOCK_DEAD)) {
1377                 unix_state_unlock(other);
1378                 sock_put(other);
1379                 goto restart;
1380         }
1381
1382         err = -ECONNREFUSED;
1383         if (other->sk_state != TCP_LISTEN)
1384                 goto out_unlock;
1385         if (other->sk_shutdown & RCV_SHUTDOWN)
1386                 goto out_unlock;
1387
1388         if (unix_recvq_full(other)) {
1389                 err = -EAGAIN;
1390                 if (!timeo)
1391                         goto out_unlock;
1392
1393                 timeo = unix_wait_for_peer(other, timeo);
1394
1395                 err = sock_intr_errno(timeo);
1396                 if (signal_pending(current))
1397                         goto out;
1398                 sock_put(other);
1399                 goto restart;
1400         }
1401
1402         /* Latch our state.
1403
1404            It is tricky place. We need to grab our state lock and cannot
1405            drop lock on peer. It is dangerous because deadlock is
1406            possible. Connect to self case and simultaneous
1407            attempt to connect are eliminated by checking socket
1408            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1409            check this before attempt to grab lock.
1410
1411            Well, and we have to recheck the state after socket locked.
1412          */
1413         st = sk->sk_state;
1414
1415         switch (st) {
1416         case TCP_CLOSE:
1417                 /* This is ok... continue with connect */
1418                 break;
1419         case TCP_ESTABLISHED:
1420                 /* Socket is already connected */
1421                 err = -EISCONN;
1422                 goto out_unlock;
1423         default:
1424                 err = -EINVAL;
1425                 goto out_unlock;
1426         }
1427
1428         unix_state_lock_nested(sk);
1429
1430         if (sk->sk_state != st) {
1431                 unix_state_unlock(sk);
1432                 unix_state_unlock(other);
1433                 sock_put(other);
1434                 goto restart;
1435         }
1436
1437         err = security_unix_stream_connect(sk, other, newsk);
1438         if (err) {
1439                 unix_state_unlock(sk);
1440                 goto out_unlock;
1441         }
1442
1443         /* The way is open! Fastly set all the necessary fields... */
1444
1445         sock_hold(sk);
1446         unix_peer(newsk)        = sk;
1447         newsk->sk_state         = TCP_ESTABLISHED;
1448         newsk->sk_type          = sk->sk_type;
1449         init_peercred(newsk);
1450         newu = unix_sk(newsk);
1451         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1452         otheru = unix_sk(other);
1453
1454         /* copy address information from listening to new sock
1455          *
1456          * The contents of *(otheru->addr) and otheru->path
1457          * are seen fully set up here, since we have found
1458          * otheru in hash under unix_table_lock.  Insertion
1459          * into the hash chain we'd found it in had been done
1460          * in an earlier critical area protected by unix_table_lock,
1461          * the same one where we'd set *(otheru->addr) contents,
1462          * as well as otheru->path and otheru->addr itself.
1463          *
1464          * Using smp_store_release() here to set newu->addr
1465          * is enough to make those stores, as well as stores
1466          * to newu->path visible to anyone who gets newu->addr
1467          * by smp_load_acquire().  IOW, the same warranties
1468          * as for unix_sock instances bound in unix_bind() or
1469          * in unix_autobind().
1470          */
1471         if (otheru->path.dentry) {
1472                 path_get(&otheru->path);
1473                 newu->path = otheru->path;
1474         }
1475         refcount_inc(&otheru->addr->refcnt);
1476         smp_store_release(&newu->addr, otheru->addr);
1477
1478         /* Set credentials */
1479         copy_peercred(sk, other);
1480
1481         sock->state     = SS_CONNECTED;
1482         sk->sk_state    = TCP_ESTABLISHED;
1483         sock_hold(newsk);
1484
1485         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1486         unix_peer(sk)   = newsk;
1487
1488         unix_state_unlock(sk);
1489
1490         /* take ten and send info to listening sock */
1491         spin_lock(&other->sk_receive_queue.lock);
1492         __skb_queue_tail(&other->sk_receive_queue, skb);
1493         spin_unlock(&other->sk_receive_queue.lock);
1494         unix_state_unlock(other);
1495         other->sk_data_ready(other);
1496         sock_put(other);
1497         return 0;
1498
1499 out_unlock:
1500         if (other)
1501                 unix_state_unlock(other);
1502
1503 out:
1504         kfree_skb(skb);
1505         if (newsk)
1506                 unix_release_sock(newsk, 0);
1507         if (other)
1508                 sock_put(other);
1509         return err;
1510 }
1511
1512 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1513 {
1514         struct sock *ska = socka->sk, *skb = sockb->sk;
1515
1516         /* Join our sockets back to back */
1517         sock_hold(ska);
1518         sock_hold(skb);
1519         unix_peer(ska) = skb;
1520         unix_peer(skb) = ska;
1521         init_peercred(ska);
1522         init_peercred(skb);
1523
1524         ska->sk_state = TCP_ESTABLISHED;
1525         skb->sk_state = TCP_ESTABLISHED;
1526         socka->state  = SS_CONNECTED;
1527         sockb->state  = SS_CONNECTED;
1528         return 0;
1529 }
1530
1531 static void unix_sock_inherit_flags(const struct socket *old,
1532                                     struct socket *new)
1533 {
1534         if (test_bit(SOCK_PASSCRED, &old->flags))
1535                 set_bit(SOCK_PASSCRED, &new->flags);
1536         if (test_bit(SOCK_PASSSEC, &old->flags))
1537                 set_bit(SOCK_PASSSEC, &new->flags);
1538 }
1539
1540 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1541                        bool kern)
1542 {
1543         struct sock *sk = sock->sk;
1544         struct sock *tsk;
1545         struct sk_buff *skb;
1546         int err;
1547
1548         err = -EOPNOTSUPP;
1549         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1550                 goto out;
1551
1552         err = -EINVAL;
1553         if (sk->sk_state != TCP_LISTEN)
1554                 goto out;
1555
1556         /* If socket state is TCP_LISTEN it cannot change (for now...),
1557          * so that no locks are necessary.
1558          */
1559
1560         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1561         if (!skb) {
1562                 /* This means receive shutdown. */
1563                 if (err == 0)
1564                         err = -EINVAL;
1565                 goto out;
1566         }
1567
1568         tsk = skb->sk;
1569         skb_free_datagram(sk, skb);
1570         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1571
1572         /* attach accepted sock to socket */
1573         unix_state_lock(tsk);
1574         newsock->state = SS_CONNECTED;
1575         unix_sock_inherit_flags(sock, newsock);
1576         sock_graft(tsk, newsock);
1577         unix_state_unlock(tsk);
1578         return 0;
1579
1580 out:
1581         return err;
1582 }
1583
1584
1585 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1586 {
1587         struct sock *sk = sock->sk;
1588         struct unix_address *addr;
1589         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1590         int err = 0;
1591
1592         if (peer) {
1593                 sk = unix_peer_get(sk);
1594
1595                 err = -ENOTCONN;
1596                 if (!sk)
1597                         goto out;
1598                 err = 0;
1599         } else {
1600                 sock_hold(sk);
1601         }
1602
1603         addr = smp_load_acquire(&unix_sk(sk)->addr);
1604         if (!addr) {
1605                 sunaddr->sun_family = AF_UNIX;
1606                 sunaddr->sun_path[0] = 0;
1607                 err = sizeof(short);
1608         } else {
1609                 err = addr->len;
1610                 memcpy(sunaddr, addr->name, addr->len);
1611         }
1612         sock_put(sk);
1613 out:
1614         return err;
1615 }
1616
1617 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1618 {
1619         scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1620
1621         /*
1622          * Garbage collection of unix sockets starts by selecting a set of
1623          * candidate sockets which have reference only from being in flight
1624          * (total_refs == inflight_refs).  This condition is checked once during
1625          * the candidate collection phase, and candidates are marked as such, so
1626          * that non-candidates can later be ignored.  While inflight_refs is
1627          * protected by unix_gc_lock, total_refs (file count) is not, hence this
1628          * is an instantaneous decision.
1629          *
1630          * Once a candidate, however, the socket must not be reinstalled into a
1631          * file descriptor while the garbage collection is in progress.
1632          *
1633          * If the above conditions are met, then the directed graph of
1634          * candidates (*) does not change while unix_gc_lock is held.
1635          *
1636          * Any operations that changes the file count through file descriptors
1637          * (dup, close, sendmsg) does not change the graph since candidates are
1638          * not installed in fds.
1639          *
1640          * Dequeing a candidate via recvmsg would install it into an fd, but
1641          * that takes unix_gc_lock to decrement the inflight count, so it's
1642          * serialized with garbage collection.
1643          *
1644          * MSG_PEEK is special in that it does not change the inflight count,
1645          * yet does install the socket into an fd.  The following lock/unlock
1646          * pair is to ensure serialization with garbage collection.  It must be
1647          * done between incrementing the file count and installing the file into
1648          * an fd.
1649          *
1650          * If garbage collection starts after the barrier provided by the
1651          * lock/unlock, then it will see the elevated refcount and not mark this
1652          * as a candidate.  If a garbage collection is already in progress
1653          * before the file count was incremented, then the lock/unlock pair will
1654          * ensure that garbage collection is finished before progressing to
1655          * installing the fd.
1656          *
1657          * (*) A -> B where B is on the queue of A or B is on the queue of C
1658          * which is on the queue of listening socket A.
1659          */
1660         spin_lock(&unix_gc_lock);
1661         spin_unlock(&unix_gc_lock);
1662 }
1663
1664 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1665 {
1666         int err = 0;
1667
1668         UNIXCB(skb).pid  = get_pid(scm->pid);
1669         UNIXCB(skb).uid = scm->creds.uid;
1670         UNIXCB(skb).gid = scm->creds.gid;
1671         UNIXCB(skb).fp = NULL;
1672         unix_get_secdata(scm, skb);
1673         if (scm->fp && send_fds)
1674                 err = unix_attach_fds(scm, skb);
1675
1676         skb->destructor = unix_destruct_scm;
1677         return err;
1678 }
1679
1680 static bool unix_passcred_enabled(const struct socket *sock,
1681                                   const struct sock *other)
1682 {
1683         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1684                !other->sk_socket ||
1685                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1686 }
1687
1688 /*
1689  * Some apps rely on write() giving SCM_CREDENTIALS
1690  * We include credentials if source or destination socket
1691  * asserted SOCK_PASSCRED.
1692  */
1693 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1694                             const struct sock *other)
1695 {
1696         if (UNIXCB(skb).pid)
1697                 return;
1698         if (unix_passcred_enabled(sock, other)) {
1699                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1700                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1701         }
1702 }
1703
1704 static int maybe_init_creds(struct scm_cookie *scm,
1705                             struct socket *socket,
1706                             const struct sock *other)
1707 {
1708         int err;
1709         struct msghdr msg = { .msg_controllen = 0 };
1710
1711         err = scm_send(socket, &msg, scm, false);
1712         if (err)
1713                 return err;
1714
1715         if (unix_passcred_enabled(socket, other)) {
1716                 scm->pid = get_pid(task_tgid(current));
1717                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1718         }
1719         return err;
1720 }
1721
1722 static bool unix_skb_scm_eq(struct sk_buff *skb,
1723                             struct scm_cookie *scm)
1724 {
1725         const struct unix_skb_parms *u = &UNIXCB(skb);
1726
1727         return u->pid == scm->pid &&
1728                uid_eq(u->uid, scm->creds.uid) &&
1729                gid_eq(u->gid, scm->creds.gid) &&
1730                unix_secdata_eq(scm, skb);
1731 }
1732
1733 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1734 {
1735         struct scm_fp_list *fp = UNIXCB(skb).fp;
1736         struct unix_sock *u = unix_sk(sk);
1737
1738         if (unlikely(fp && fp->count))
1739                 atomic_add(fp->count, &u->scm_stat.nr_fds);
1740 }
1741
1742 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1743 {
1744         struct scm_fp_list *fp = UNIXCB(skb).fp;
1745         struct unix_sock *u = unix_sk(sk);
1746
1747         if (unlikely(fp && fp->count))
1748                 atomic_sub(fp->count, &u->scm_stat.nr_fds);
1749 }
1750
1751 /*
1752  *      Send AF_UNIX data.
1753  */
1754
1755 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1756                               size_t len)
1757 {
1758         struct sock *sk = sock->sk;
1759         struct net *net = sock_net(sk);
1760         struct unix_sock *u = unix_sk(sk);
1761         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1762         struct sock *other = NULL;
1763         int namelen = 0; /* fake GCC */
1764         int err;
1765         unsigned int hash;
1766         struct sk_buff *skb;
1767         long timeo;
1768         struct scm_cookie scm;
1769         int data_len = 0;
1770         int sk_locked;
1771
1772         wait_for_unix_gc();
1773         err = scm_send(sock, msg, &scm, false);
1774         if (err < 0)
1775                 return err;
1776
1777         err = -EOPNOTSUPP;
1778         if (msg->msg_flags&MSG_OOB)
1779                 goto out;
1780
1781         if (msg->msg_namelen) {
1782                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1783                 if (err < 0)
1784                         goto out;
1785                 namelen = err;
1786         } else {
1787                 sunaddr = NULL;
1788                 err = -ENOTCONN;
1789                 other = unix_peer_get(sk);
1790                 if (!other)
1791                         goto out;
1792         }
1793
1794         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1795             && (err = unix_autobind(sock)) != 0)
1796                 goto out;
1797
1798         err = -EMSGSIZE;
1799         if (len > sk->sk_sndbuf - 32)
1800                 goto out;
1801
1802         if (len > SKB_MAX_ALLOC) {
1803                 data_len = min_t(size_t,
1804                                  len - SKB_MAX_ALLOC,
1805                                  MAX_SKB_FRAGS * PAGE_SIZE);
1806                 data_len = PAGE_ALIGN(data_len);
1807
1808                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1809         }
1810
1811         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1812                                    msg->msg_flags & MSG_DONTWAIT, &err,
1813                                    PAGE_ALLOC_COSTLY_ORDER);
1814         if (skb == NULL)
1815                 goto out;
1816
1817         err = unix_scm_to_skb(&scm, skb, true);
1818         if (err < 0)
1819                 goto out_free;
1820
1821         skb_put(skb, len - data_len);
1822         skb->data_len = data_len;
1823         skb->len = len;
1824         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1825         if (err)
1826                 goto out_free;
1827
1828         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1829
1830 restart:
1831         if (!other) {
1832                 err = -ECONNRESET;
1833                 if (sunaddr == NULL)
1834                         goto out_free;
1835
1836                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1837                                         hash, &err);
1838                 if (other == NULL)
1839                         goto out_free;
1840         }
1841
1842         if (sk_filter(other, skb) < 0) {
1843                 /* Toss the packet but do not return any error to the sender */
1844                 err = len;
1845                 goto out_free;
1846         }
1847
1848         sk_locked = 0;
1849         unix_state_lock(other);
1850 restart_locked:
1851         err = -EPERM;
1852         if (!unix_may_send(sk, other))
1853                 goto out_unlock;
1854
1855         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1856                 /*
1857                  *      Check with 1003.1g - what should
1858                  *      datagram error
1859                  */
1860                 unix_state_unlock(other);
1861                 sock_put(other);
1862
1863                 if (!sk_locked)
1864                         unix_state_lock(sk);
1865
1866                 err = 0;
1867                 if (unix_peer(sk) == other) {
1868                         unix_peer(sk) = NULL;
1869                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1870
1871                         unix_state_unlock(sk);
1872
1873                         sk->sk_state = TCP_CLOSE;
1874                         unix_dgram_disconnected(sk, other);
1875                         sock_put(other);
1876                         err = -ECONNREFUSED;
1877                 } else {
1878                         unix_state_unlock(sk);
1879                 }
1880
1881                 other = NULL;
1882                 if (err)
1883                         goto out_free;
1884                 goto restart;
1885         }
1886
1887         err = -EPIPE;
1888         if (other->sk_shutdown & RCV_SHUTDOWN)
1889                 goto out_unlock;
1890
1891         if (sk->sk_type != SOCK_SEQPACKET) {
1892                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1893                 if (err)
1894                         goto out_unlock;
1895         }
1896
1897         /* other == sk && unix_peer(other) != sk if
1898          * - unix_peer(sk) == NULL, destination address bound to sk
1899          * - unix_peer(sk) == sk by time of get but disconnected before lock
1900          */
1901         if (other != sk &&
1902             unlikely(unix_peer(other) != sk &&
1903             unix_recvq_full_lockless(other))) {
1904                 if (timeo) {
1905                         timeo = unix_wait_for_peer(other, timeo);
1906
1907                         err = sock_intr_errno(timeo);
1908                         if (signal_pending(current))
1909                                 goto out_free;
1910
1911                         goto restart;
1912                 }
1913
1914                 if (!sk_locked) {
1915                         unix_state_unlock(other);
1916                         unix_state_double_lock(sk, other);
1917                 }
1918
1919                 if (unix_peer(sk) != other ||
1920                     unix_dgram_peer_wake_me(sk, other)) {
1921                         err = -EAGAIN;
1922                         sk_locked = 1;
1923                         goto out_unlock;
1924                 }
1925
1926                 if (!sk_locked) {
1927                         sk_locked = 1;
1928                         goto restart_locked;
1929                 }
1930         }
1931
1932         if (unlikely(sk_locked))
1933                 unix_state_unlock(sk);
1934
1935         if (sock_flag(other, SOCK_RCVTSTAMP))
1936                 __net_timestamp(skb);
1937         maybe_add_creds(skb, sock, other);
1938         scm_stat_add(other, skb);
1939         skb_queue_tail(&other->sk_receive_queue, skb);
1940         unix_state_unlock(other);
1941         other->sk_data_ready(other);
1942         sock_put(other);
1943         scm_destroy(&scm);
1944         return len;
1945
1946 out_unlock:
1947         if (sk_locked)
1948                 unix_state_unlock(sk);
1949         unix_state_unlock(other);
1950 out_free:
1951         kfree_skb(skb);
1952 out:
1953         if (other)
1954                 sock_put(other);
1955         scm_destroy(&scm);
1956         return err;
1957 }
1958
1959 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1960  * bytes, and a minimum of a full page.
1961  */
1962 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1963
1964 #if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
1965 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
1966 {
1967         struct unix_sock *ousk = unix_sk(other);
1968         struct sk_buff *skb;
1969         int err = 0;
1970
1971         skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
1972
1973         if (!skb)
1974                 return err;
1975
1976         skb_put(skb, 1);
1977         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
1978
1979         if (err) {
1980                 kfree_skb(skb);
1981                 return err;
1982         }
1983
1984         unix_state_lock(other);
1985
1986         if (sock_flag(other, SOCK_DEAD) ||
1987             (other->sk_shutdown & RCV_SHUTDOWN)) {
1988                 unix_state_unlock(other);
1989                 kfree_skb(skb);
1990                 return -EPIPE;
1991         }
1992
1993         maybe_add_creds(skb, sock, other);
1994         skb_get(skb);
1995
1996         if (ousk->oob_skb)
1997                 consume_skb(ousk->oob_skb);
1998
1999         WRITE_ONCE(ousk->oob_skb, skb);
2000
2001         scm_stat_add(other, skb);
2002         skb_queue_tail(&other->sk_receive_queue, skb);
2003         sk_send_sigurg(other);
2004         unix_state_unlock(other);
2005         other->sk_data_ready(other);
2006
2007         return err;
2008 }
2009 #endif
2010
2011 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2012                                size_t len)
2013 {
2014         struct sock *sk = sock->sk;
2015         struct sock *other = NULL;
2016         int err, size;
2017         struct sk_buff *skb;
2018         int sent = 0;
2019         struct scm_cookie scm;
2020         bool fds_sent = false;
2021         int data_len;
2022
2023         wait_for_unix_gc();
2024         err = scm_send(sock, msg, &scm, false);
2025         if (err < 0)
2026                 return err;
2027
2028         err = -EOPNOTSUPP;
2029         if (msg->msg_flags & MSG_OOB) {
2030 #if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
2031                 if (len)
2032                         len--;
2033                 else
2034 #endif
2035                         goto out_err;
2036         }
2037
2038         if (msg->msg_namelen) {
2039                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2040                 goto out_err;
2041         } else {
2042                 err = -ENOTCONN;
2043                 other = unix_peer(sk);
2044                 if (!other)
2045                         goto out_err;
2046         }
2047
2048         if (sk->sk_shutdown & SEND_SHUTDOWN)
2049                 goto pipe_err;
2050
2051         while (sent < len) {
2052                 size = len - sent;
2053
2054                 /* Keep two messages in the pipe so it schedules better */
2055                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
2056
2057                 /* allow fallback to order-0 allocations */
2058                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2059
2060                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2061
2062                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2063
2064                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2065                                            msg->msg_flags & MSG_DONTWAIT, &err,
2066                                            get_order(UNIX_SKB_FRAGS_SZ));
2067                 if (!skb)
2068                         goto out_err;
2069
2070                 /* Only send the fds in the first buffer */
2071                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
2072                 if (err < 0) {
2073                         kfree_skb(skb);
2074                         goto out_err;
2075                 }
2076                 fds_sent = true;
2077
2078                 skb_put(skb, size - data_len);
2079                 skb->data_len = data_len;
2080                 skb->len = size;
2081                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2082                 if (err) {
2083                         kfree_skb(skb);
2084                         goto out_err;
2085                 }
2086
2087                 unix_state_lock(other);
2088
2089                 if (sock_flag(other, SOCK_DEAD) ||
2090                     (other->sk_shutdown & RCV_SHUTDOWN))
2091                         goto pipe_err_free;
2092
2093                 maybe_add_creds(skb, sock, other);
2094                 scm_stat_add(other, skb);
2095                 skb_queue_tail(&other->sk_receive_queue, skb);
2096                 unix_state_unlock(other);
2097                 other->sk_data_ready(other);
2098                 sent += size;
2099         }
2100
2101 #if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
2102         if (msg->msg_flags & MSG_OOB) {
2103                 err = queue_oob(sock, msg, other);
2104                 if (err)
2105                         goto out_err;
2106                 sent++;
2107         }
2108 #endif
2109
2110         scm_destroy(&scm);
2111
2112         return sent;
2113
2114 pipe_err_free:
2115         unix_state_unlock(other);
2116         kfree_skb(skb);
2117 pipe_err:
2118         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2119                 send_sig(SIGPIPE, current, 0);
2120         err = -EPIPE;
2121 out_err:
2122         scm_destroy(&scm);
2123         return sent ? : err;
2124 }
2125
2126 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
2127                                     int offset, size_t size, int flags)
2128 {
2129         int err;
2130         bool send_sigpipe = false;
2131         bool init_scm = true;
2132         struct scm_cookie scm;
2133         struct sock *other, *sk = socket->sk;
2134         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
2135
2136         if (flags & MSG_OOB)
2137                 return -EOPNOTSUPP;
2138
2139         other = unix_peer(sk);
2140         if (!other || sk->sk_state != TCP_ESTABLISHED)
2141                 return -ENOTCONN;
2142
2143         if (false) {
2144 alloc_skb:
2145                 unix_state_unlock(other);
2146                 mutex_unlock(&unix_sk(other)->iolock);
2147                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
2148                                               &err, 0);
2149                 if (!newskb)
2150                         goto err;
2151         }
2152
2153         /* we must acquire iolock as we modify already present
2154          * skbs in the sk_receive_queue and mess with skb->len
2155          */
2156         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
2157         if (err) {
2158                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
2159                 goto err;
2160         }
2161
2162         if (sk->sk_shutdown & SEND_SHUTDOWN) {
2163                 err = -EPIPE;
2164                 send_sigpipe = true;
2165                 goto err_unlock;
2166         }
2167
2168         unix_state_lock(other);
2169
2170         if (sock_flag(other, SOCK_DEAD) ||
2171             other->sk_shutdown & RCV_SHUTDOWN) {
2172                 err = -EPIPE;
2173                 send_sigpipe = true;
2174                 goto err_state_unlock;
2175         }
2176
2177         if (init_scm) {
2178                 err = maybe_init_creds(&scm, socket, other);
2179                 if (err)
2180                         goto err_state_unlock;
2181                 init_scm = false;
2182         }
2183
2184         skb = skb_peek_tail(&other->sk_receive_queue);
2185         if (tail && tail == skb) {
2186                 skb = newskb;
2187         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2188                 if (newskb) {
2189                         skb = newskb;
2190                 } else {
2191                         tail = skb;
2192                         goto alloc_skb;
2193                 }
2194         } else if (newskb) {
2195                 /* this is fast path, we don't necessarily need to
2196                  * call to kfree_skb even though with newskb == NULL
2197                  * this - does no harm
2198                  */
2199                 consume_skb(newskb);
2200                 newskb = NULL;
2201         }
2202
2203         if (skb_append_pagefrags(skb, page, offset, size)) {
2204                 tail = skb;
2205                 goto alloc_skb;
2206         }
2207
2208         skb->len += size;
2209         skb->data_len += size;
2210         skb->truesize += size;
2211         refcount_add(size, &sk->sk_wmem_alloc);
2212
2213         if (newskb) {
2214                 err = unix_scm_to_skb(&scm, skb, false);
2215                 if (err)
2216                         goto err_state_unlock;
2217                 spin_lock(&other->sk_receive_queue.lock);
2218                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2219                 spin_unlock(&other->sk_receive_queue.lock);
2220         }
2221
2222         unix_state_unlock(other);
2223         mutex_unlock(&unix_sk(other)->iolock);
2224
2225         other->sk_data_ready(other);
2226         scm_destroy(&scm);
2227         return size;
2228
2229 err_state_unlock:
2230         unix_state_unlock(other);
2231 err_unlock:
2232         mutex_unlock(&unix_sk(other)->iolock);
2233 err:
2234         kfree_skb(newskb);
2235         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2236                 send_sig(SIGPIPE, current, 0);
2237         if (!init_scm)
2238                 scm_destroy(&scm);
2239         return err;
2240 }
2241
2242 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2243                                   size_t len)
2244 {
2245         int err;
2246         struct sock *sk = sock->sk;
2247
2248         err = sock_error(sk);
2249         if (err)
2250                 return err;
2251
2252         if (sk->sk_state != TCP_ESTABLISHED)
2253                 return -ENOTCONN;
2254
2255         if (msg->msg_namelen)
2256                 msg->msg_namelen = 0;
2257
2258         return unix_dgram_sendmsg(sock, msg, len);
2259 }
2260
2261 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2262                                   size_t size, int flags)
2263 {
2264         struct sock *sk = sock->sk;
2265
2266         if (sk->sk_state != TCP_ESTABLISHED)
2267                 return -ENOTCONN;
2268
2269         return unix_dgram_recvmsg(sock, msg, size, flags);
2270 }
2271
2272 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2273 {
2274         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2275
2276         if (addr) {
2277                 msg->msg_namelen = addr->len;
2278                 memcpy(msg->msg_name, addr->name, addr->len);
2279         }
2280 }
2281
2282 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2283                          int flags)
2284 {
2285         struct scm_cookie scm;
2286         struct socket *sock = sk->sk_socket;
2287         struct unix_sock *u = unix_sk(sk);
2288         struct sk_buff *skb, *last;
2289         long timeo;
2290         int skip;
2291         int err;
2292
2293         err = -EOPNOTSUPP;
2294         if (flags&MSG_OOB)
2295                 goto out;
2296
2297         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2298
2299         do {
2300                 mutex_lock(&u->iolock);
2301
2302                 skip = sk_peek_offset(sk, flags);
2303                 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2304                                               &skip, &err, &last);
2305                 if (skb) {
2306                         if (!(flags & MSG_PEEK))
2307                                 scm_stat_del(sk, skb);
2308                         break;
2309                 }
2310
2311                 mutex_unlock(&u->iolock);
2312
2313                 if (err != -EAGAIN)
2314                         break;
2315         } while (timeo &&
2316                  !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2317                                               &err, &timeo, last));
2318
2319         if (!skb) { /* implies iolock unlocked */
2320                 unix_state_lock(sk);
2321                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2322                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2323                     (sk->sk_shutdown & RCV_SHUTDOWN))
2324                         err = 0;
2325                 unix_state_unlock(sk);
2326                 goto out;
2327         }
2328
2329         if (wq_has_sleeper(&u->peer_wait))
2330                 wake_up_interruptible_sync_poll(&u->peer_wait,
2331                                                 EPOLLOUT | EPOLLWRNORM |
2332                                                 EPOLLWRBAND);
2333
2334         if (msg->msg_name)
2335                 unix_copy_addr(msg, skb->sk);
2336
2337         if (size > skb->len - skip)
2338                 size = skb->len - skip;
2339         else if (size < skb->len - skip)
2340                 msg->msg_flags |= MSG_TRUNC;
2341
2342         err = skb_copy_datagram_msg(skb, skip, msg, size);
2343         if (err)
2344                 goto out_free;
2345
2346         if (sock_flag(sk, SOCK_RCVTSTAMP))
2347                 __sock_recv_timestamp(msg, sk, skb);
2348
2349         memset(&scm, 0, sizeof(scm));
2350
2351         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2352         unix_set_secdata(&scm, skb);
2353
2354         if (!(flags & MSG_PEEK)) {
2355                 if (UNIXCB(skb).fp)
2356                         unix_detach_fds(&scm, skb);
2357
2358                 sk_peek_offset_bwd(sk, skb->len);
2359         } else {
2360                 /* It is questionable: on PEEK we could:
2361                    - do not return fds - good, but too simple 8)
2362                    - return fds, and do not return them on read (old strategy,
2363                      apparently wrong)
2364                    - clone fds (I chose it for now, it is the most universal
2365                      solution)
2366
2367                    POSIX 1003.1g does not actually define this clearly
2368                    at all. POSIX 1003.1g doesn't define a lot of things
2369                    clearly however!
2370
2371                 */
2372
2373                 sk_peek_offset_fwd(sk, size);
2374
2375                 if (UNIXCB(skb).fp)
2376                         unix_peek_fds(&scm, skb);
2377         }
2378         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2379
2380         scm_recv(sock, msg, &scm, flags);
2381
2382 out_free:
2383         skb_free_datagram(sk, skb);
2384         mutex_unlock(&u->iolock);
2385 out:
2386         return err;
2387 }
2388
2389 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2390                               int flags)
2391 {
2392         struct sock *sk = sock->sk;
2393
2394 #ifdef CONFIG_BPF_SYSCALL
2395         const struct proto *prot = READ_ONCE(sk->sk_prot);
2396
2397         if (prot != &unix_dgram_proto)
2398                 return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2399                                             flags & ~MSG_DONTWAIT, NULL);
2400 #endif
2401         return __unix_dgram_recvmsg(sk, msg, size, flags);
2402 }
2403
2404 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
2405                           sk_read_actor_t recv_actor)
2406 {
2407         int copied = 0;
2408
2409         while (1) {
2410                 struct unix_sock *u = unix_sk(sk);
2411                 struct sk_buff *skb;
2412                 int used, err;
2413
2414                 mutex_lock(&u->iolock);
2415                 skb = skb_recv_datagram(sk, 0, 1, &err);
2416                 mutex_unlock(&u->iolock);
2417                 if (!skb)
2418                         return err;
2419
2420                 used = recv_actor(desc, skb, 0, skb->len);
2421                 if (used <= 0) {
2422                         if (!copied)
2423                                 copied = used;
2424                         kfree_skb(skb);
2425                         break;
2426                 } else if (used <= skb->len) {
2427                         copied += used;
2428                 }
2429
2430                 kfree_skb(skb);
2431                 if (!desc->count)
2432                         break;
2433         }
2434
2435         return copied;
2436 }
2437
2438 /*
2439  *      Sleep until more data has arrived. But check for races..
2440  */
2441 static long unix_stream_data_wait(struct sock *sk, long timeo,
2442                                   struct sk_buff *last, unsigned int last_len,
2443                                   bool freezable)
2444 {
2445         struct sk_buff *tail;
2446         DEFINE_WAIT(wait);
2447
2448         unix_state_lock(sk);
2449
2450         for (;;) {
2451                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2452
2453                 tail = skb_peek_tail(&sk->sk_receive_queue);
2454                 if (tail != last ||
2455                     (tail && tail->len != last_len) ||
2456                     sk->sk_err ||
2457                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2458                     signal_pending(current) ||
2459                     !timeo)
2460                         break;
2461
2462                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2463                 unix_state_unlock(sk);
2464                 if (freezable)
2465                         timeo = freezable_schedule_timeout(timeo);
2466                 else
2467                         timeo = schedule_timeout(timeo);
2468                 unix_state_lock(sk);
2469
2470                 if (sock_flag(sk, SOCK_DEAD))
2471                         break;
2472
2473                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2474         }
2475
2476         finish_wait(sk_sleep(sk), &wait);
2477         unix_state_unlock(sk);
2478         return timeo;
2479 }
2480
2481 static unsigned int unix_skb_len(const struct sk_buff *skb)
2482 {
2483         return skb->len - UNIXCB(skb).consumed;
2484 }
2485
2486 struct unix_stream_read_state {
2487         int (*recv_actor)(struct sk_buff *, int, int,
2488                           struct unix_stream_read_state *);
2489         struct socket *socket;
2490         struct msghdr *msg;
2491         struct pipe_inode_info *pipe;
2492         size_t size;
2493         int flags;
2494         unsigned int splice_flags;
2495 };
2496
2497 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2498 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2499 {
2500         struct socket *sock = state->socket;
2501         struct sock *sk = sock->sk;
2502         struct unix_sock *u = unix_sk(sk);
2503         int chunk = 1;
2504         struct sk_buff *oob_skb;
2505
2506         mutex_lock(&u->iolock);
2507         unix_state_lock(sk);
2508
2509         if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2510                 unix_state_unlock(sk);
2511                 mutex_unlock(&u->iolock);
2512                 return -EINVAL;
2513         }
2514
2515         oob_skb = u->oob_skb;
2516
2517         if (!(state->flags & MSG_PEEK))
2518                 WRITE_ONCE(u->oob_skb, NULL);
2519
2520         unix_state_unlock(sk);
2521
2522         chunk = state->recv_actor(oob_skb, 0, chunk, state);
2523
2524         if (!(state->flags & MSG_PEEK)) {
2525                 UNIXCB(oob_skb).consumed += 1;
2526                 kfree_skb(oob_skb);
2527         }
2528
2529         mutex_unlock(&u->iolock);
2530
2531         if (chunk < 0)
2532                 return -EFAULT;
2533
2534         state->msg->msg_flags |= MSG_OOB;
2535         return 1;
2536 }
2537
2538 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2539                                   int flags, int copied)
2540 {
2541         struct unix_sock *u = unix_sk(sk);
2542
2543         if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2544                 skb_unlink(skb, &sk->sk_receive_queue);
2545                 consume_skb(skb);
2546                 skb = NULL;
2547         } else {
2548                 if (skb == u->oob_skb) {
2549                         if (copied) {
2550                                 skb = NULL;
2551                         } else if (sock_flag(sk, SOCK_URGINLINE)) {
2552                                 if (!(flags & MSG_PEEK)) {
2553                                         WRITE_ONCE(u->oob_skb, NULL);
2554                                         consume_skb(skb);
2555                                 }
2556                         } else if (!(flags & MSG_PEEK)) {
2557                                 skb_unlink(skb, &sk->sk_receive_queue);
2558                                 consume_skb(skb);
2559                                 skb = skb_peek(&sk->sk_receive_queue);
2560                         }
2561                 }
2562         }
2563         return skb;
2564 }
2565 #endif
2566
2567 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
2568                                  sk_read_actor_t recv_actor)
2569 {
2570         if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2571                 return -ENOTCONN;
2572
2573         return unix_read_sock(sk, desc, recv_actor);
2574 }
2575
2576 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2577                                     bool freezable)
2578 {
2579         struct scm_cookie scm;
2580         struct socket *sock = state->socket;
2581         struct sock *sk = sock->sk;
2582         struct unix_sock *u = unix_sk(sk);
2583         int copied = 0;
2584         int flags = state->flags;
2585         int noblock = flags & MSG_DONTWAIT;
2586         bool check_creds = false;
2587         int target;
2588         int err = 0;
2589         long timeo;
2590         int skip;
2591         size_t size = state->size;
2592         unsigned int last_len;
2593
2594         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2595                 err = -EINVAL;
2596                 goto out;
2597         }
2598
2599         if (unlikely(flags & MSG_OOB)) {
2600                 err = -EOPNOTSUPP;
2601 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2602                 err = unix_stream_recv_urg(state);
2603 #endif
2604                 goto out;
2605         }
2606
2607         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2608         timeo = sock_rcvtimeo(sk, noblock);
2609
2610         memset(&scm, 0, sizeof(scm));
2611
2612         /* Lock the socket to prevent queue disordering
2613          * while sleeps in memcpy_tomsg
2614          */
2615         mutex_lock(&u->iolock);
2616
2617         skip = max(sk_peek_offset(sk, flags), 0);
2618
2619         do {
2620                 int chunk;
2621                 bool drop_skb;
2622                 struct sk_buff *skb, *last;
2623
2624 redo:
2625                 unix_state_lock(sk);
2626                 if (sock_flag(sk, SOCK_DEAD)) {
2627                         err = -ECONNRESET;
2628                         goto unlock;
2629                 }
2630                 last = skb = skb_peek(&sk->sk_receive_queue);
2631                 last_len = last ? last->len : 0;
2632
2633 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2634                 if (skb) {
2635                         skb = manage_oob(skb, sk, flags, copied);
2636                         if (!skb) {
2637                                 unix_state_unlock(sk);
2638                                 if (copied)
2639                                         break;
2640                                 goto redo;
2641                         }
2642                 }
2643 #endif
2644 again:
2645                 if (skb == NULL) {
2646                         if (copied >= target)
2647                                 goto unlock;
2648
2649                         /*
2650                          *      POSIX 1003.1g mandates this order.
2651                          */
2652
2653                         err = sock_error(sk);
2654                         if (err)
2655                                 goto unlock;
2656                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2657                                 goto unlock;
2658
2659                         unix_state_unlock(sk);
2660                         if (!timeo) {
2661                                 err = -EAGAIN;
2662                                 break;
2663                         }
2664
2665                         mutex_unlock(&u->iolock);
2666
2667                         timeo = unix_stream_data_wait(sk, timeo, last,
2668                                                       last_len, freezable);
2669
2670                         if (signal_pending(current)) {
2671                                 err = sock_intr_errno(timeo);
2672                                 scm_destroy(&scm);
2673                                 goto out;
2674                         }
2675
2676                         mutex_lock(&u->iolock);
2677                         goto redo;
2678 unlock:
2679                         unix_state_unlock(sk);
2680                         break;
2681                 }
2682
2683                 while (skip >= unix_skb_len(skb)) {
2684                         skip -= unix_skb_len(skb);
2685                         last = skb;
2686                         last_len = skb->len;
2687                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2688                         if (!skb)
2689                                 goto again;
2690                 }
2691
2692                 unix_state_unlock(sk);
2693
2694                 if (check_creds) {
2695                         /* Never glue messages from different writers */
2696                         if (!unix_skb_scm_eq(skb, &scm))
2697                                 break;
2698                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2699                         /* Copy credentials */
2700                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2701                         unix_set_secdata(&scm, skb);
2702                         check_creds = true;
2703                 }
2704
2705                 /* Copy address just once */
2706                 if (state->msg && state->msg->msg_name) {
2707                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2708                                          state->msg->msg_name);
2709                         unix_copy_addr(state->msg, skb->sk);
2710                         sunaddr = NULL;
2711                 }
2712
2713                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2714                 skb_get(skb);
2715                 chunk = state->recv_actor(skb, skip, chunk, state);
2716                 drop_skb = !unix_skb_len(skb);
2717                 /* skb is only safe to use if !drop_skb */
2718                 consume_skb(skb);
2719                 if (chunk < 0) {
2720                         if (copied == 0)
2721                                 copied = -EFAULT;
2722                         break;
2723                 }
2724                 copied += chunk;
2725                 size -= chunk;
2726
2727                 if (drop_skb) {
2728                         /* the skb was touched by a concurrent reader;
2729                          * we should not expect anything from this skb
2730                          * anymore and assume it invalid - we can be
2731                          * sure it was dropped from the socket queue
2732                          *
2733                          * let's report a short read
2734                          */
2735                         err = 0;
2736                         break;
2737                 }
2738
2739                 /* Mark read part of skb as used */
2740                 if (!(flags & MSG_PEEK)) {
2741                         UNIXCB(skb).consumed += chunk;
2742
2743                         sk_peek_offset_bwd(sk, chunk);
2744
2745                         if (UNIXCB(skb).fp) {
2746                                 scm_stat_del(sk, skb);
2747                                 unix_detach_fds(&scm, skb);
2748                         }
2749
2750                         if (unix_skb_len(skb))
2751                                 break;
2752
2753                         skb_unlink(skb, &sk->sk_receive_queue);
2754                         consume_skb(skb);
2755
2756                         if (scm.fp)
2757                                 break;
2758                 } else {
2759                         /* It is questionable, see note in unix_dgram_recvmsg.
2760                          */
2761                         if (UNIXCB(skb).fp)
2762                                 unix_peek_fds(&scm, skb);
2763
2764                         sk_peek_offset_fwd(sk, chunk);
2765
2766                         if (UNIXCB(skb).fp)
2767                                 break;
2768
2769                         skip = 0;
2770                         last = skb;
2771                         last_len = skb->len;
2772                         unix_state_lock(sk);
2773                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2774                         if (skb)
2775                                 goto again;
2776                         unix_state_unlock(sk);
2777                         break;
2778                 }
2779         } while (size);
2780
2781         mutex_unlock(&u->iolock);
2782         if (state->msg)
2783                 scm_recv(sock, state->msg, &scm, flags);
2784         else
2785                 scm_destroy(&scm);
2786 out:
2787         return copied ? : err;
2788 }
2789
2790 static int unix_stream_read_actor(struct sk_buff *skb,
2791                                   int skip, int chunk,
2792                                   struct unix_stream_read_state *state)
2793 {
2794         int ret;
2795
2796         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2797                                     state->msg, chunk);
2798         return ret ?: chunk;
2799 }
2800
2801 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2802                           size_t size, int flags)
2803 {
2804         struct unix_stream_read_state state = {
2805                 .recv_actor = unix_stream_read_actor,
2806                 .socket = sk->sk_socket,
2807                 .msg = msg,
2808                 .size = size,
2809                 .flags = flags
2810         };
2811
2812         return unix_stream_read_generic(&state, true);
2813 }
2814
2815 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2816                                size_t size, int flags)
2817 {
2818         struct unix_stream_read_state state = {
2819                 .recv_actor = unix_stream_read_actor,
2820                 .socket = sock,
2821                 .msg = msg,
2822                 .size = size,
2823                 .flags = flags
2824         };
2825
2826 #ifdef CONFIG_BPF_SYSCALL
2827         struct sock *sk = sock->sk;
2828         const struct proto *prot = READ_ONCE(sk->sk_prot);
2829
2830         if (prot != &unix_stream_proto)
2831                 return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2832                                             flags & ~MSG_DONTWAIT, NULL);
2833 #endif
2834         return unix_stream_read_generic(&state, true);
2835 }
2836
2837 static int unix_stream_splice_actor(struct sk_buff *skb,
2838                                     int skip, int chunk,
2839                                     struct unix_stream_read_state *state)
2840 {
2841         return skb_splice_bits(skb, state->socket->sk,
2842                                UNIXCB(skb).consumed + skip,
2843                                state->pipe, chunk, state->splice_flags);
2844 }
2845
2846 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2847                                        struct pipe_inode_info *pipe,
2848                                        size_t size, unsigned int flags)
2849 {
2850         struct unix_stream_read_state state = {
2851                 .recv_actor = unix_stream_splice_actor,
2852                 .socket = sock,
2853                 .pipe = pipe,
2854                 .size = size,
2855                 .splice_flags = flags,
2856         };
2857
2858         if (unlikely(*ppos))
2859                 return -ESPIPE;
2860
2861         if (sock->file->f_flags & O_NONBLOCK ||
2862             flags & SPLICE_F_NONBLOCK)
2863                 state.flags = MSG_DONTWAIT;
2864
2865         return unix_stream_read_generic(&state, false);
2866 }
2867
2868 static int unix_shutdown(struct socket *sock, int mode)
2869 {
2870         struct sock *sk = sock->sk;
2871         struct sock *other;
2872
2873         if (mode < SHUT_RD || mode > SHUT_RDWR)
2874                 return -EINVAL;
2875         /* This maps:
2876          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2877          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2878          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2879          */
2880         ++mode;
2881
2882         unix_state_lock(sk);
2883         sk->sk_shutdown |= mode;
2884         other = unix_peer(sk);
2885         if (other)
2886                 sock_hold(other);
2887         unix_state_unlock(sk);
2888         sk->sk_state_change(sk);
2889
2890         if (other &&
2891                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2892
2893                 int peer_mode = 0;
2894                 const struct proto *prot = READ_ONCE(other->sk_prot);
2895
2896                 if (prot->unhash)
2897                         prot->unhash(other);
2898                 if (mode&RCV_SHUTDOWN)
2899                         peer_mode |= SEND_SHUTDOWN;
2900                 if (mode&SEND_SHUTDOWN)
2901                         peer_mode |= RCV_SHUTDOWN;
2902                 unix_state_lock(other);
2903                 other->sk_shutdown |= peer_mode;
2904                 unix_state_unlock(other);
2905                 other->sk_state_change(other);
2906                 if (peer_mode == SHUTDOWN_MASK)
2907                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2908                 else if (peer_mode & RCV_SHUTDOWN)
2909                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2910         }
2911         if (other)
2912                 sock_put(other);
2913
2914         return 0;
2915 }
2916
2917 long unix_inq_len(struct sock *sk)
2918 {
2919         struct sk_buff *skb;
2920         long amount = 0;
2921
2922         if (sk->sk_state == TCP_LISTEN)
2923                 return -EINVAL;
2924
2925         spin_lock(&sk->sk_receive_queue.lock);
2926         if (sk->sk_type == SOCK_STREAM ||
2927             sk->sk_type == SOCK_SEQPACKET) {
2928                 skb_queue_walk(&sk->sk_receive_queue, skb)
2929                         amount += unix_skb_len(skb);
2930         } else {
2931                 skb = skb_peek(&sk->sk_receive_queue);
2932                 if (skb)
2933                         amount = skb->len;
2934         }
2935         spin_unlock(&sk->sk_receive_queue.lock);
2936
2937         return amount;
2938 }
2939 EXPORT_SYMBOL_GPL(unix_inq_len);
2940
2941 long unix_outq_len(struct sock *sk)
2942 {
2943         return sk_wmem_alloc_get(sk);
2944 }
2945 EXPORT_SYMBOL_GPL(unix_outq_len);
2946
2947 static int unix_open_file(struct sock *sk)
2948 {
2949         struct path path;
2950         struct file *f;
2951         int fd;
2952
2953         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2954                 return -EPERM;
2955
2956         if (!smp_load_acquire(&unix_sk(sk)->addr))
2957                 return -ENOENT;
2958
2959         path = unix_sk(sk)->path;
2960         if (!path.dentry)
2961                 return -ENOENT;
2962
2963         path_get(&path);
2964
2965         fd = get_unused_fd_flags(O_CLOEXEC);
2966         if (fd < 0)
2967                 goto out;
2968
2969         f = dentry_open(&path, O_PATH, current_cred());
2970         if (IS_ERR(f)) {
2971                 put_unused_fd(fd);
2972                 fd = PTR_ERR(f);
2973                 goto out;
2974         }
2975
2976         fd_install(fd, f);
2977 out:
2978         path_put(&path);
2979
2980         return fd;
2981 }
2982
2983 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2984 {
2985         struct sock *sk = sock->sk;
2986         long amount = 0;
2987         int err;
2988
2989         switch (cmd) {
2990         case SIOCOUTQ:
2991                 amount = unix_outq_len(sk);
2992                 err = put_user(amount, (int __user *)arg);
2993                 break;
2994         case SIOCINQ:
2995                 amount = unix_inq_len(sk);
2996                 if (amount < 0)
2997                         err = amount;
2998                 else
2999                         err = put_user(amount, (int __user *)arg);
3000                 break;
3001         case SIOCUNIXFILE:
3002                 err = unix_open_file(sk);
3003                 break;
3004 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3005         case SIOCATMARK:
3006                 {
3007                         struct sk_buff *skb;
3008                         int answ = 0;
3009
3010                         skb = skb_peek(&sk->sk_receive_queue);
3011                         if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3012                                 answ = 1;
3013                         err = put_user(answ, (int __user *)arg);
3014                 }
3015                 break;
3016 #endif
3017         default:
3018                 err = -ENOIOCTLCMD;
3019                 break;
3020         }
3021         return err;
3022 }
3023
3024 #ifdef CONFIG_COMPAT
3025 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3026 {
3027         return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3028 }
3029 #endif
3030
3031 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3032 {
3033         struct sock *sk = sock->sk;
3034         __poll_t mask;
3035
3036         sock_poll_wait(file, sock, wait);
3037         mask = 0;
3038
3039         /* exceptional events? */
3040         if (sk->sk_err)
3041                 mask |= EPOLLERR;
3042         if (sk->sk_shutdown == SHUTDOWN_MASK)
3043                 mask |= EPOLLHUP;
3044         if (sk->sk_shutdown & RCV_SHUTDOWN)
3045                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3046
3047         /* readable? */
3048         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3049                 mask |= EPOLLIN | EPOLLRDNORM;
3050         if (sk_is_readable(sk))
3051                 mask |= EPOLLIN | EPOLLRDNORM;
3052 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3053         if (READ_ONCE(unix_sk(sk)->oob_skb))
3054                 mask |= EPOLLPRI;
3055 #endif
3056
3057         /* Connection-based need to check for termination and startup */
3058         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3059             sk->sk_state == TCP_CLOSE)
3060                 mask |= EPOLLHUP;
3061
3062         /*
3063          * we set writable also when the other side has shut down the
3064          * connection. This prevents stuck sockets.
3065          */
3066         if (unix_writable(sk))
3067                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3068
3069         return mask;
3070 }
3071
3072 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3073                                     poll_table *wait)
3074 {
3075         struct sock *sk = sock->sk, *other;
3076         unsigned int writable;
3077         __poll_t mask;
3078
3079         sock_poll_wait(file, sock, wait);
3080         mask = 0;
3081
3082         /* exceptional events? */
3083         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
3084                 mask |= EPOLLERR |
3085                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3086
3087         if (sk->sk_shutdown & RCV_SHUTDOWN)
3088                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3089         if (sk->sk_shutdown == SHUTDOWN_MASK)
3090                 mask |= EPOLLHUP;
3091
3092         /* readable? */
3093         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3094                 mask |= EPOLLIN | EPOLLRDNORM;
3095         if (sk_is_readable(sk))
3096                 mask |= EPOLLIN | EPOLLRDNORM;
3097
3098         /* Connection-based need to check for termination and startup */
3099         if (sk->sk_type == SOCK_SEQPACKET) {
3100                 if (sk->sk_state == TCP_CLOSE)
3101                         mask |= EPOLLHUP;
3102                 /* connection hasn't started yet? */
3103                 if (sk->sk_state == TCP_SYN_SENT)
3104                         return mask;
3105         }
3106
3107         /* No write status requested, avoid expensive OUT tests. */
3108         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3109                 return mask;
3110
3111         writable = unix_writable(sk);
3112         if (writable) {
3113                 unix_state_lock(sk);
3114
3115                 other = unix_peer(sk);
3116                 if (other && unix_peer(other) != sk &&
3117                     unix_recvq_full_lockless(other) &&
3118                     unix_dgram_peer_wake_me(sk, other))
3119                         writable = 0;
3120
3121                 unix_state_unlock(sk);
3122         }
3123
3124         if (writable)
3125                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3126         else
3127                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3128
3129         return mask;
3130 }
3131
3132 #ifdef CONFIG_PROC_FS
3133
3134 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3135
3136 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3137 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
3138 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3139
3140 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3141 {
3142         unsigned long offset = get_offset(*pos);
3143         unsigned long bucket = get_bucket(*pos);
3144         struct sock *sk;
3145         unsigned long count = 0;
3146
3147         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
3148                 if (sock_net(sk) != seq_file_net(seq))
3149                         continue;
3150                 if (++count == offset)
3151                         break;
3152         }
3153
3154         return sk;
3155 }
3156
3157 static struct sock *unix_next_socket(struct seq_file *seq,
3158                                      struct sock *sk,
3159                                      loff_t *pos)
3160 {
3161         unsigned long bucket;
3162
3163         while (sk > (struct sock *)SEQ_START_TOKEN) {
3164                 sk = sk_next(sk);
3165                 if (!sk)
3166                         goto next_bucket;
3167                 if (sock_net(sk) == seq_file_net(seq))
3168                         return sk;
3169         }
3170
3171         do {
3172                 sk = unix_from_bucket(seq, pos);
3173                 if (sk)
3174                         return sk;
3175
3176 next_bucket:
3177                 bucket = get_bucket(*pos) + 1;
3178                 *pos = set_bucket_offset(bucket, 1);
3179         } while (bucket < ARRAY_SIZE(unix_socket_table));
3180
3181         return NULL;
3182 }
3183
3184 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3185         __acquires(unix_table_lock)
3186 {
3187         spin_lock(&unix_table_lock);
3188
3189         if (!*pos)
3190                 return SEQ_START_TOKEN;
3191
3192         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
3193                 return NULL;
3194
3195         return unix_next_socket(seq, NULL, pos);
3196 }
3197
3198 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3199 {
3200         ++*pos;
3201         return unix_next_socket(seq, v, pos);
3202 }
3203
3204 static void unix_seq_stop(struct seq_file *seq, void *v)
3205         __releases(unix_table_lock)
3206 {
3207         spin_unlock(&unix_table_lock);
3208 }
3209
3210 static int unix_seq_show(struct seq_file *seq, void *v)
3211 {
3212
3213         if (v == SEQ_START_TOKEN)
3214                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
3215                          "Inode Path\n");
3216         else {
3217                 struct sock *s = v;
3218                 struct unix_sock *u = unix_sk(s);
3219                 unix_state_lock(s);
3220
3221                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3222                         s,
3223                         refcount_read(&s->sk_refcnt),
3224                         0,
3225                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3226                         s->sk_type,
3227                         s->sk_socket ?
3228                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3229                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3230                         sock_i_ino(s));
3231
3232                 if (u->addr) {  // under unix_table_lock here
3233                         int i, len;
3234                         seq_putc(seq, ' ');
3235
3236                         i = 0;
3237                         len = u->addr->len - sizeof(short);
3238                         if (!UNIX_ABSTRACT(s))
3239                                 len--;
3240                         else {
3241                                 seq_putc(seq, '@');
3242                                 i++;
3243                         }
3244                         for ( ; i < len; i++)
3245                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
3246                                          '@');
3247                 }
3248                 unix_state_unlock(s);
3249                 seq_putc(seq, '\n');
3250         }
3251
3252         return 0;
3253 }
3254
3255 static const struct seq_operations unix_seq_ops = {
3256         .start  = unix_seq_start,
3257         .next   = unix_seq_next,
3258         .stop   = unix_seq_stop,
3259         .show   = unix_seq_show,
3260 };
3261
3262 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3263 struct bpf_iter__unix {
3264         __bpf_md_ptr(struct bpf_iter_meta *, meta);
3265         __bpf_md_ptr(struct unix_sock *, unix_sk);
3266         uid_t uid __aligned(8);
3267 };
3268
3269 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3270                               struct unix_sock *unix_sk, uid_t uid)
3271 {
3272         struct bpf_iter__unix ctx;
3273
3274         meta->seq_num--;  /* skip SEQ_START_TOKEN */
3275         ctx.meta = meta;
3276         ctx.unix_sk = unix_sk;
3277         ctx.uid = uid;
3278         return bpf_iter_run_prog(prog, &ctx);
3279 }
3280
3281 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3282 {
3283         struct bpf_iter_meta meta;
3284         struct bpf_prog *prog;
3285         struct sock *sk = v;
3286         uid_t uid;
3287
3288         if (v == SEQ_START_TOKEN)
3289                 return 0;
3290
3291         uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3292         meta.seq = seq;
3293         prog = bpf_iter_get_info(&meta, false);
3294         return unix_prog_seq_show(prog, &meta, v, uid);
3295 }
3296
3297 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3298 {
3299         struct bpf_iter_meta meta;
3300         struct bpf_prog *prog;
3301
3302         if (!v) {
3303                 meta.seq = seq;
3304                 prog = bpf_iter_get_info(&meta, true);
3305                 if (prog)
3306                         (void)unix_prog_seq_show(prog, &meta, v, 0);
3307         }
3308
3309         unix_seq_stop(seq, v);
3310 }
3311
3312 static const struct seq_operations bpf_iter_unix_seq_ops = {
3313         .start  = unix_seq_start,
3314         .next   = unix_seq_next,
3315         .stop   = bpf_iter_unix_seq_stop,
3316         .show   = bpf_iter_unix_seq_show,
3317 };
3318 #endif
3319 #endif
3320
3321 static const struct net_proto_family unix_family_ops = {
3322         .family = PF_UNIX,
3323         .create = unix_create,
3324         .owner  = THIS_MODULE,
3325 };
3326
3327
3328 static int __net_init unix_net_init(struct net *net)
3329 {
3330         int error = -ENOMEM;
3331
3332         net->unx.sysctl_max_dgram_qlen = 10;
3333         if (unix_sysctl_register(net))
3334                 goto out;
3335
3336 #ifdef CONFIG_PROC_FS
3337         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3338                         sizeof(struct seq_net_private))) {
3339                 unix_sysctl_unregister(net);
3340                 goto out;
3341         }
3342 #endif
3343         error = 0;
3344 out:
3345         return error;
3346 }
3347
3348 static void __net_exit unix_net_exit(struct net *net)
3349 {
3350         unix_sysctl_unregister(net);
3351         remove_proc_entry("unix", net->proc_net);
3352 }
3353
3354 static struct pernet_operations unix_net_ops = {
3355         .init = unix_net_init,
3356         .exit = unix_net_exit,
3357 };
3358
3359 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3360 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3361                      struct unix_sock *unix_sk, uid_t uid)
3362
3363 static const struct bpf_iter_seq_info unix_seq_info = {
3364         .seq_ops                = &bpf_iter_unix_seq_ops,
3365         .init_seq_private       = bpf_iter_init_seq_net,
3366         .fini_seq_private       = bpf_iter_fini_seq_net,
3367         .seq_priv_size          = sizeof(struct seq_net_private),
3368 };
3369
3370 static struct bpf_iter_reg unix_reg_info = {
3371         .target                 = "unix",
3372         .ctx_arg_info_size      = 1,
3373         .ctx_arg_info           = {
3374                 { offsetof(struct bpf_iter__unix, unix_sk),
3375                   PTR_TO_BTF_ID_OR_NULL },
3376         },
3377         .seq_info               = &unix_seq_info,
3378 };
3379
3380 static void __init bpf_iter_register(void)
3381 {
3382         unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3383         if (bpf_iter_reg_target(&unix_reg_info))
3384                 pr_warn("Warning: could not register bpf iterator unix\n");
3385 }
3386 #endif
3387
3388 static int __init af_unix_init(void)
3389 {
3390         int rc = -1;
3391
3392         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3393
3394         rc = proto_register(&unix_dgram_proto, 1);
3395         if (rc != 0) {
3396                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3397                 goto out;
3398         }
3399
3400         rc = proto_register(&unix_stream_proto, 1);
3401         if (rc != 0) {
3402                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3403                 goto out;
3404         }
3405
3406         sock_register(&unix_family_ops);
3407         register_pernet_subsys(&unix_net_ops);
3408         unix_bpf_build_proto();
3409
3410 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3411         bpf_iter_register();
3412 #endif
3413
3414 out:
3415         return rc;
3416 }
3417
3418 static void __exit af_unix_exit(void)
3419 {
3420         sock_unregister(PF_UNIX);
3421         proto_unregister(&unix_dgram_proto);
3422         proto_unregister(&unix_stream_proto);
3423         unregister_pernet_subsys(&unix_net_ops);
3424 }
3425
3426 /* Earlier than device_initcall() so that other drivers invoking
3427    request_module() don't end up in a loop when modprobe tries
3428    to use a UNIX socket. But later than subsys_initcall() because
3429    we depend on stuff initialised there */
3430 fs_initcall(af_unix_init);
3431 module_exit(af_unix_exit);
3432
3433 MODULE_LICENSE("GPL");
3434 MODULE_ALIAS_NETPROTO(PF_UNIX);