net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85 #include <linux/module.h>
  86 #include <linux/kernel.h>
  87 #include <linux/signal.h>
  88 #include <linux/sched/signal.h>
  89 #include <linux/errno.h>
  90 #include <linux/string.h>
  91 #include <linux/stat.h>
  92 #include <linux/dcache.h>
  93 #include <linux/namei.h>
  94 #include <linux/socket.h>
  95 #include <linux/un.h>
  96 #include <linux/fcntl.h>
  97 #include <linux/termios.h>
  98 #include <linux/sockios.h>
  99 #include <linux/net.h>
 100 #include <linux/in.h>
 101 #include <linux/fs.h>
 102 #include <linux/slab.h>
 103 #include <linux/uaccess.h>
 104 #include <linux/skbuff.h>
 105 #include <linux/netdevice.h>
 106 #include <net/net_namespace.h>
 107 #include <net/sock.h>
 108 #include <net/tcp_states.h>
 109 #include <net/af_unix.h>
 110 #include <linux/proc_fs.h>
 111 #include <linux/seq_file.h>
 112 #include <net/scm.h>
 113 #include <linux/init.h>
 114 #include <linux/poll.h>
 115 #include <linux/rtnetlink.h>
 116 #include <linux/mount.h>
 117 #include <net/checksum.h>
 118 #include <linux/security.h>
 119 #include <linux/freezer.h>
 120 #include <linux/file.h>
 121
 122 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 123 EXPORT_SYMBOL_GPL(unix_socket_table);
 124 DEFINE_SPINLOCK(unix_table_lock);
 125 EXPORT_SYMBOL_GPL(unix_table_lock);
 126 static atomic_long_t unix_nr_socks;
 127
 128
 129 static struct hlist_head *unix_sockets_unbound(void *addr)
 130 {
 131         unsigned long hash = (unsigned long)addr;
 132
 133         hash ^= hash >> 16;
 134         hash ^= hash >> 8;
 135         hash %= UNIX_HASH_SIZE;
 136         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 137 }
 138
 139 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 140
 141 #ifdef CONFIG_SECURITY_NETWORK
 142 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 143 {
 144         UNIXCB(skb).secid = scm->secid;
 145 }
 146
 147 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 148 {
 149         scm->secid = UNIXCB(skb).secid;
 150 }
 151
 152 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 153 {
 154         return (scm->secid == UNIXCB(skb).secid);
 155 }
 156 #else
 157 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158 { }
 159
 160 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 161 { }
 162
 163 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 164 {
 165         return true;
 166 }
 167 #endif /* CONFIG_SECURITY_NETWORK */
 168
 169 /*
 170  *  SMP locking strategy:
 171  *    hash table is protected with spinlock unix_table_lock
 172  *    each socket state is protected by separate spin lock.
 173  */
 174
 175 static inline unsigned int unix_hash_fold(__wsum n)
 176 {
 177         unsigned int hash = (__force unsigned int)csum_fold(n);
 178
 179         hash ^= hash>>8;
 180         return hash&(UNIX_HASH_SIZE-1);
 181 }
 182
 183 #define unix_peer(sk) (unix_sk(sk)->peer)
 184
 185 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 186 {
 187         return unix_peer(osk) == sk;
 188 }
 189
 190 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 191 {
 192         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 193 }
 194
 195 static inline int unix_recvq_full(struct sock const *sk)
 196 {
 197         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 198 }
 199
 200 struct sock *unix_peer_get(struct sock *s)
 201 {
 202         struct sock *peer;
 203
 204         unix_state_lock(s);
 205         peer = unix_peer(s);
 206         if (peer)
 207                 sock_hold(peer);
 208         unix_state_unlock(s);
 209         return peer;
 210 }
 211 EXPORT_SYMBOL_GPL(unix_peer_get);
 212
 213 static inline void unix_release_addr(struct unix_address *addr)
 214 {
 215         if (atomic_dec_and_test(&addr->refcnt))
 216                 kfree(addr);
 217 }
 218
 219 /*
 220  *      Check unix socket name:
 221  *              - should be not zero length.
 222  *              - if started by not zero, should be NULL terminated (FS object)
 223  *              - if started by zero, it is abstract name.
 224  */
 225
 226 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 227 {
 228         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 229                 return -EINVAL;
 230         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 231                 return -EINVAL;
 232         if (sunaddr->sun_path[0]) {
 233                 /*
 234                  * This may look like an off by one error but it is a bit more
 235                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 236                  * sun_path[108] doesn't as such exist.  However in kernel space
 237                  * we are guaranteed that it is a valid memory location in our
 238                  * kernel address buffer.
 239                  */
 240                 ((char *)sunaddr)[len] = 0;
 241                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 242                 return len;
 243         }
 244
 245         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 246         return len;
 247 }
 248
 249 static void __unix_remove_socket(struct sock *sk)
 250 {
 251         sk_del_node_init(sk);
 252 }
 253
 254 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 255 {
 256         WARN_ON(!sk_unhashed(sk));
 257         sk_add_node(sk, list);
 258 }
 259
 260 static inline void unix_remove_socket(struct sock *sk)
 261 {
 262         spin_lock(&unix_table_lock);
 263         __unix_remove_socket(sk);
 264         spin_unlock(&unix_table_lock);
 265 }
 266
 267 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 268 {
 269         spin_lock(&unix_table_lock);
 270         __unix_insert_socket(list, sk);
 271         spin_unlock(&unix_table_lock);
 272 }
 273
 274 static struct sock *__unix_find_socket_byname(struct net *net,
 275                                               struct sockaddr_un *sunname,
 276                                               int len, int type, unsigned int hash)
 277 {
 278         struct sock *s;
 279
 280         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 281                 struct unix_sock *u = unix_sk(s);
 282
 283                 if (!net_eq(sock_net(s), net))
 284                         continue;
 285
 286                 if (u->addr->len == len &&
 287                     !memcmp(u->addr->name, sunname, len))
 288                         goto found;
 289         }
 290         s = NULL;
 291 found:
 292         return s;
 293 }
 294
 295 static inline struct sock *unix_find_socket_byname(struct net *net,
 296                                                    struct sockaddr_un *sunname,
 297                                                    int len, int type,
 298                                                    unsigned int hash)
 299 {
 300         struct sock *s;
 301
 302         spin_lock(&unix_table_lock);
 303         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 304         if (s)
 305                 sock_hold(s);
 306         spin_unlock(&unix_table_lock);
 307         return s;
 308 }
 309
 310 static struct sock *unix_find_socket_byinode(struct inode *i)
 311 {
 312         struct sock *s;
 313
 314         spin_lock(&unix_table_lock);
 315         sk_for_each(s,
 316                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 317                 struct dentry *dentry = unix_sk(s)->path.dentry;
 318
 319                 if (dentry && d_backing_inode(dentry) == i) {
 320                         sock_hold(s);
 321                         goto found;
 322                 }
 323         }
 324         s = NULL;
 325 found:
 326         spin_unlock(&unix_table_lock);
 327         return s;
 328 }
 329
 330 /* Support code for asymmetrically connected dgram sockets
 331  *
 332  * If a datagram socket is connected to a socket not itself connected
 333  * to the first socket (eg, /dev/log), clients may only enqueue more
 334  * messages if the present receive queue of the server socket is not
 335  * "too large". This means there's a second writeability condition
 336  * poll and sendmsg need to test. The dgram recv code will do a wake
 337  * up on the peer_wait wait queue of a socket upon reception of a
 338  * datagram which needs to be propagated to sleeping would-be writers
 339  * since these might not have sent anything so far. This can't be
 340  * accomplished via poll_wait because the lifetime of the server
 341  * socket might be less than that of its clients if these break their
 342  * association with it or if the server socket is closed while clients
 343  * are still connected to it and there's no way to inform "a polling
 344  * implementation" that it should let go of a certain wait queue
 345  *
 346  * In order to propagate a wake up, a wait_queue_t of the client
 347  * socket is enqueued on the peer_wait queue of the server socket
 348  * whose wake function does a wake_up on the ordinary client socket
 349  * wait queue. This connection is established whenever a write (or
 350  * poll for write) hit the flow control condition and broken when the
 351  * association to the server socket is dissolved or after a wake up
 352  * was relayed.
 353  */
 354
 355 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
 356                                       void *key)
 357 {
 358         struct unix_sock *u;
 359         wait_queue_head_t *u_sleep;
 360
 361         u = container_of(q, struct unix_sock, peer_wake);
 362
 363         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 364                             q);
 365         u->peer_wake.private = NULL;
 366
 367         /* relaying can only happen while the wq still exists */
 368         u_sleep = sk_sleep(&u->sk);
 369         if (u_sleep)
 370                 wake_up_interruptible_poll(u_sleep, key);
 371
 372         return 0;
 373 }
 374
 375 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 376 {
 377         struct unix_sock *u, *u_other;
 378         int rc;
 379
 380         u = unix_sk(sk);
 381         u_other = unix_sk(other);
 382         rc = 0;
 383         spin_lock(&u_other->peer_wait.lock);
 384
 385         if (!u->peer_wake.private) {
 386                 u->peer_wake.private = other;
 387                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 388
 389                 rc = 1;
 390         }
 391
 392         spin_unlock(&u_other->peer_wait.lock);
 393         return rc;
 394 }
 395
 396 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 397                                             struct sock *other)
 398 {
 399         struct unix_sock *u, *u_other;
 400
 401         u = unix_sk(sk);
 402         u_other = unix_sk(other);
 403         spin_lock(&u_other->peer_wait.lock);
 404
 405         if (u->peer_wake.private == other) {
 406                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 407                 u->peer_wake.private = NULL;
 408         }
 409
 410         spin_unlock(&u_other->peer_wait.lock);
 411 }
 412
 413 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 414                                                    struct sock *other)
 415 {
 416         unix_dgram_peer_wake_disconnect(sk, other);
 417         wake_up_interruptible_poll(sk_sleep(sk),
 418                                    POLLOUT |
 419                                    POLLWRNORM |
 420                                    POLLWRBAND);
 421 }
 422
 423 /* preconditions:
 424  *      - unix_peer(sk) == other
 425  *      - association is stable
 426  */
 427 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 428 {
 429         int connected;
 430
 431         connected = unix_dgram_peer_wake_connect(sk, other);
 432
 433         if (unix_recvq_full(other))
 434                 return 1;
 435
 436         if (connected)
 437                 unix_dgram_peer_wake_disconnect(sk, other);
 438
 439         return 0;
 440 }
 441
 442 static int unix_writable(const struct sock *sk)
 443 {
 444         return sk->sk_state != TCP_LISTEN &&
 445                (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 446 }
 447
 448 static void unix_write_space(struct sock *sk)
 449 {
 450         struct socket_wq *wq;
 451
 452         rcu_read_lock();
 453         if (unix_writable(sk)) {
 454                 wq = rcu_dereference(sk->sk_wq);
 455                 if (skwq_has_sleeper(wq))
 456                         wake_up_interruptible_sync_poll(&wq->wait,
 457                                 POLLOUT | POLLWRNORM | POLLWRBAND);
 458                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 459         }
 460         rcu_read_unlock();
 461 }
 462
 463 /* When dgram socket disconnects (or changes its peer), we clear its receive
 464  * queue of packets arrived from previous peer. First, it allows to do
 465  * flow control based only on wmem_alloc; second, sk connected to peer
 466  * may receive messages only from that peer. */
 467 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 468 {
 469         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 470                 skb_queue_purge(&sk->sk_receive_queue);
 471                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 472
 473                 /* If one link of bidirectional dgram pipe is disconnected,
 474                  * we signal error. Messages are lost. Do not make this,
 475                  * when peer was not connected to us.
 476                  */
 477                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 478                         other->sk_err = ECONNRESET;
 479                         other->sk_error_report(other);
 480                 }
 481         }
 482 }
 483
 484 static void unix_sock_destructor(struct sock *sk)
 485 {
 486         struct unix_sock *u = unix_sk(sk);
 487
 488         skb_queue_purge(&sk->sk_receive_queue);
 489
 490         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 491         WARN_ON(!sk_unhashed(sk));
 492         WARN_ON(sk->sk_socket);
 493         if (!sock_flag(sk, SOCK_DEAD)) {
 494                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 495                 return;
 496         }
 497
 498         if (u->addr)
 499                 unix_release_addr(u->addr);
 500
 501         atomic_long_dec(&unix_nr_socks);
 502         local_bh_disable();
 503         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 504         local_bh_enable();
 505 #ifdef UNIX_REFCNT_DEBUG
 506         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 507                 atomic_long_read(&unix_nr_socks));
 508 #endif
 509 }
 510
 511 static void unix_release_sock(struct sock *sk, int embrion)
 512 {
 513         struct unix_sock *u = unix_sk(sk);
 514         struct path path;
 515         struct sock *skpair;
 516         struct sk_buff *skb;
 517         int state;
 518
 519         unix_remove_socket(sk);
 520
 521         /* Clear state */
 522         unix_state_lock(sk);
 523         sock_orphan(sk);
 524         sk->sk_shutdown = SHUTDOWN_MASK;
 525         path         = u->path;
 526         u->path.dentry = NULL;
 527         u->path.mnt = NULL;
 528         state = sk->sk_state;
 529         sk->sk_state = TCP_CLOSE;
 530         unix_state_unlock(sk);
 531
 532         wake_up_interruptible_all(&u->peer_wait);
 533
 534         skpair = unix_peer(sk);
 535
 536         if (skpair != NULL) {
 537                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 538                         unix_state_lock(skpair);
 539                         /* No more writes */
 540                         skpair->sk_shutdown = SHUTDOWN_MASK;
 541                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 542                                 skpair->sk_err = ECONNRESET;
 543                         unix_state_unlock(skpair);
 544                         skpair->sk_state_change(skpair);
 545                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 546                 }
 547
 548                 unix_dgram_peer_wake_disconnect(sk, skpair);
 549                 sock_put(skpair); /* It may now die */
 550                 unix_peer(sk) = NULL;
 551         }
 552
 553         /* Try to flush out this socket. Throw out buffers at least */
 554
 555         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 556                 if (state == TCP_LISTEN)
 557                         unix_release_sock(skb->sk, 1);
 558                 /* passed fds are erased in the kfree_skb hook        */
 559                 UNIXCB(skb).consumed = skb->len;
 560                 kfree_skb(skb);
 561         }
 562
 563         if (path.dentry)
 564                 path_put(&path);
 565
 566         sock_put(sk);
 567
 568         /* ---- Socket is dead now and most probably destroyed ---- */
 569
 570         /*
 571          * Fixme: BSD difference: In BSD all sockets connected to us get
 572          *        ECONNRESET and we die on the spot. In Linux we behave
 573          *        like files and pipes do and wait for the last
 574          *        dereference.
 575          *
 576          * Can't we simply set sock->err?
 577          *
 578          *        What the above comment does talk about? --ANK(980817)
 579          */
 580
 581         if (unix_tot_inflight)
 582                 unix_gc();              /* Garbage collect fds */
 583 }
 584
 585 static void init_peercred(struct sock *sk)
 586 {
 587         put_pid(sk->sk_peer_pid);
 588         if (sk->sk_peer_cred)
 589                 put_cred(sk->sk_peer_cred);
 590         sk->sk_peer_pid  = get_pid(task_tgid(current));
 591         sk->sk_peer_cred = get_current_cred();
 592 }
 593
 594 static void copy_peercred(struct sock *sk, struct sock *peersk)
 595 {
 596         put_pid(sk->sk_peer_pid);
 597         if (sk->sk_peer_cred)
 598                 put_cred(sk->sk_peer_cred);
 599         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 600         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 601 }
 602
 603 static int unix_listen(struct socket *sock, int backlog)
 604 {
 605         int err;
 606         struct sock *sk = sock->sk;
 607         struct unix_sock *u = unix_sk(sk);
 608         struct pid *old_pid = NULL;
 609
 610         err = -EOPNOTSUPP;
 611         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 612                 goto out;       /* Only stream/seqpacket sockets accept */
 613         err = -EINVAL;
 614         if (!u->addr)
 615                 goto out;       /* No listens on an unbound socket */
 616         unix_state_lock(sk);
 617         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 618                 goto out_unlock;
 619         if (backlog > sk->sk_max_ack_backlog)
 620                 wake_up_interruptible_all(&u->peer_wait);
 621         sk->sk_max_ack_backlog  = backlog;
 622         sk->sk_state            = TCP_LISTEN;
 623         /* set credentials so connect can copy them */
 624         init_peercred(sk);
 625         err = 0;
 626
 627 out_unlock:
 628         unix_state_unlock(sk);
 629         put_pid(old_pid);
 630 out:
 631         return err;
 632 }
 633
 634 static int unix_release(struct socket *);
 635 static int unix_bind(struct socket *, struct sockaddr *, int);
 636 static int unix_stream_connect(struct socket *, struct sockaddr *,
 637                                int addr_len, int flags);
 638 static int unix_socketpair(struct socket *, struct socket *);
 639 static int unix_accept(struct socket *, struct socket *, int);
 640 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 641 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 642 static unsigned int unix_dgram_poll(struct file *, struct socket *,
 643                                     poll_table *);
 644 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 645 static int unix_shutdown(struct socket *, int);
 646 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 647 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 648 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 649                                     size_t size, int flags);
 650 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 651                                        struct pipe_inode_info *, size_t size,
 652                                        unsigned int flags);
 653 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 654 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 655 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 656                               int, int);
 657 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 658 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 659                                   int);
 660
 661 static int unix_set_peek_off(struct sock *sk, int val)
 662 {
 663         struct unix_sock *u = unix_sk(sk);
 664
 665         if (mutex_lock_interruptible(&u->iolock))
 666                 return -EINTR;
 667
 668         sk->sk_peek_off = val;
 669         mutex_unlock(&u->iolock);
 670
 671         return 0;
 672 }
 673
 674
 675 static const struct proto_ops unix_stream_ops = {
 676         .family =       PF_UNIX,
 677         .owner =        THIS_MODULE,
 678         .release =      unix_release,
 679         .bind =         unix_bind,
 680         .connect =      unix_stream_connect,
 681         .socketpair =   unix_socketpair,
 682         .accept =       unix_accept,
 683         .getname =      unix_getname,
 684         .poll =         unix_poll,
 685         .ioctl =        unix_ioctl,
 686         .listen =       unix_listen,
 687         .shutdown =     unix_shutdown,
 688         .setsockopt =   sock_no_setsockopt,
 689         .getsockopt =   sock_no_getsockopt,
 690         .sendmsg =      unix_stream_sendmsg,
 691         .recvmsg =      unix_stream_recvmsg,
 692         .mmap =         sock_no_mmap,
 693         .sendpage =     unix_stream_sendpage,
 694         .splice_read =  unix_stream_splice_read,
 695         .set_peek_off = unix_set_peek_off,
 696 };
 697
 698 static const struct proto_ops unix_dgram_ops = {
 699         .family =       PF_UNIX,
 700         .owner =        THIS_MODULE,
 701         .release =      unix_release,
 702         .bind =         unix_bind,
 703         .connect =      unix_dgram_connect,
 704         .socketpair =   unix_socketpair,
 705         .accept =       sock_no_accept,
 706         .getname =      unix_getname,
 707         .poll =         unix_dgram_poll,
 708         .ioctl =        unix_ioctl,
 709         .listen =       sock_no_listen,
 710         .shutdown =     unix_shutdown,
 711         .setsockopt =   sock_no_setsockopt,
 712         .getsockopt =   sock_no_getsockopt,
 713         .sendmsg =      unix_dgram_sendmsg,
 714         .recvmsg =      unix_dgram_recvmsg,
 715         .mmap =         sock_no_mmap,
 716         .sendpage =     sock_no_sendpage,
 717         .set_peek_off = unix_set_peek_off,
 718 };
 719
 720 static const struct proto_ops unix_seqpacket_ops = {
 721         .family =       PF_UNIX,
 722         .owner =        THIS_MODULE,
 723         .release =      unix_release,
 724         .bind =         unix_bind,
 725         .connect =      unix_stream_connect,
 726         .socketpair =   unix_socketpair,
 727         .accept =       unix_accept,
 728         .getname =      unix_getname,
 729         .poll =         unix_dgram_poll,
 730         .ioctl =        unix_ioctl,
 731         .listen =       unix_listen,
 732         .shutdown =     unix_shutdown,
 733         .setsockopt =   sock_no_setsockopt,
 734         .getsockopt =   sock_no_getsockopt,
 735         .sendmsg =      unix_seqpacket_sendmsg,
 736         .recvmsg =      unix_seqpacket_recvmsg,
 737         .mmap =         sock_no_mmap,
 738         .sendpage =     sock_no_sendpage,
 739         .set_peek_off = unix_set_peek_off,
 740 };
 741
 742 static struct proto unix_proto = {
 743         .name                   = "UNIX",
 744         .owner                  = THIS_MODULE,
 745         .obj_size               = sizeof(struct unix_sock),
 746 };
 747
 748 /*
 749  * AF_UNIX sockets do not interact with hardware, hence they
 750  * dont trigger interrupts - so it's safe for them to have
 751  * bh-unsafe locking for their sk_receive_queue.lock. Split off
 752  * this special lock-class by reinitializing the spinlock key:
 753  */
 754 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 755
 756 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 757 {
 758         struct sock *sk = NULL;
 759         struct unix_sock *u;
 760
 761         atomic_long_inc(&unix_nr_socks);
 762         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 763                 goto out;
 764
 765         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 766         if (!sk)
 767                 goto out;
 768
 769         sock_init_data(sock, sk);
 770         lockdep_set_class(&sk->sk_receive_queue.lock,
 771                                 &af_unix_sk_receive_queue_lock_key);
 772
 773         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 774         sk->sk_write_space      = unix_write_space;
 775         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 776         sk->sk_destruct         = unix_sock_destructor;
 777         u         = unix_sk(sk);
 778         u->path.dentry = NULL;
 779         u->path.mnt = NULL;
 780         spin_lock_init(&u->lock);
 781         atomic_long_set(&u->inflight, 0);
 782         INIT_LIST_HEAD(&u->link);
 783         mutex_init(&u->iolock); /* single task reading lock */
 784         mutex_init(&u->bindlock); /* single task binding lock */
 785         init_waitqueue_head(&u->peer_wait);
 786         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 787         unix_insert_socket(unix_sockets_unbound(sk), sk);
 788 out:
 789         if (sk == NULL)
 790                 atomic_long_dec(&unix_nr_socks);
 791         else {
 792                 local_bh_disable();
 793                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 794                 local_bh_enable();
 795         }
 796         return sk;
 797 }
 798
 799 static int unix_create(struct net *net, struct socket *sock, int protocol,
 800                        int kern)
 801 {
 802         if (protocol && protocol != PF_UNIX)
 803                 return -EPROTONOSUPPORT;
 804
 805         sock->state = SS_UNCONNECTED;
 806
 807         switch (sock->type) {
 808         case SOCK_STREAM:
 809                 sock->ops = &unix_stream_ops;
 810                 break;
 811                 /*
 812                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 813                  *      nothing uses it.
 814                  */
 815         case SOCK_RAW:
 816                 sock->type = SOCK_DGRAM;
 817         case SOCK_DGRAM:
 818                 sock->ops = &unix_dgram_ops;
 819                 break;
 820         case SOCK_SEQPACKET:
 821                 sock->ops = &unix_seqpacket_ops;
 822                 break;
 823         default:
 824                 return -ESOCKTNOSUPPORT;
 825         }
 826
 827         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 828 }
 829
 830 static int unix_release(struct socket *sock)
 831 {
 832         struct sock *sk = sock->sk;
 833
 834         if (!sk)
 835                 return 0;
 836
 837         unix_release_sock(sk, 0);
 838         sock->sk = NULL;
 839
 840         return 0;
 841 }
 842
 843 static int unix_autobind(struct socket *sock)
 844 {
 845         struct sock *sk = sock->sk;
 846         struct net *net = sock_net(sk);
 847         struct unix_sock *u = unix_sk(sk);
 848         static u32 ordernum = 1;
 849         struct unix_address *addr;
 850         int err;
 851         unsigned int retries = 0;
 852
 853         err = mutex_lock_interruptible(&u->bindlock);
 854         if (err)
 855                 return err;
 856
 857         err = 0;
 858         if (u->addr)
 859                 goto out;
 860
 861         err = -ENOMEM;
 862         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 863         if (!addr)
 864                 goto out;
 865
 866         addr->name->sun_family = AF_UNIX;
 867         atomic_set(&addr->refcnt, 1);
 868
 869 retry:
 870         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 871         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 872
 873         spin_lock(&unix_table_lock);
 874         ordernum = (ordernum+1)&0xFFFFF;
 875
 876         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 877                                       addr->hash)) {
 878                 spin_unlock(&unix_table_lock);
 879                 /*
 880                  * __unix_find_socket_byname() may take long time if many names
 881                  * are already in use.
 882                  */
 883                 cond_resched();
 884                 /* Give up if all names seems to be in use. */
 885                 if (retries++ == 0xFFFFF) {
 886                         err = -ENOSPC;
 887                         kfree(addr);
 888                         goto out;
 889                 }
 890                 goto retry;
 891         }
 892         addr->hash ^= sk->sk_type;
 893
 894         __unix_remove_socket(sk);
 895         u->addr = addr;
 896         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 897         spin_unlock(&unix_table_lock);
 898         err = 0;
 899
 900 out:    mutex_unlock(&u->bindlock);
 901         return err;
 902 }
 903
 904 static struct sock *unix_find_other(struct net *net,
 905                                     struct sockaddr_un *sunname, int len,
 906                                     int type, unsigned int hash, int *error)
 907 {
 908         struct sock *u;
 909         struct path path;
 910         int err = 0;
 911
 912         if (sunname->sun_path[0]) {
 913                 struct inode *inode;
 914                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 915                 if (err)
 916                         goto fail;
 917                 inode = d_backing_inode(path.dentry);
 918                 err = inode_permission(inode, MAY_WRITE);
 919                 if (err)
 920                         goto put_fail;
 921
 922                 err = -ECONNREFUSED;
 923                 if (!S_ISSOCK(inode->i_mode))
 924                         goto put_fail;
 925                 u = unix_find_socket_byinode(inode);
 926                 if (!u)
 927                         goto put_fail;
 928
 929                 if (u->sk_type == type)
 930                         touch_atime(&path);
 931
 932                 path_put(&path);
 933
 934                 err = -EPROTOTYPE;
 935                 if (u->sk_type != type) {
 936                         sock_put(u);
 937                         goto fail;
 938                 }
 939         } else {
 940                 err = -ECONNREFUSED;
 941                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 942                 if (u) {
 943                         struct dentry *dentry;
 944                         dentry = unix_sk(u)->path.dentry;
 945                         if (dentry)
 946                                 touch_atime(&unix_sk(u)->path);
 947                 } else
 948                         goto fail;
 949         }
 950         return u;
 951
 952 put_fail:
 953         path_put(&path);
 954 fail:
 955         *error = err;
 956         return NULL;
 957 }
 958
 959 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 960 {
 961         struct dentry *dentry;
 962         struct path path;
 963         int err = 0;
 964         /*
 965          * Get the parent directory, calculate the hash for last
 966          * component.
 967          */
 968         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 969         err = PTR_ERR(dentry);
 970         if (IS_ERR(dentry))
 971                 return err;
 972
 973         /*
 974          * All right, let's create it.
 975          */
 976         err = security_path_mknod(&path, dentry, mode, 0);
 977         if (!err) {
 978                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 979                 if (!err) {
 980                         res->mnt = mntget(path.mnt);
 981                         res->dentry = dget(dentry);
 982                 }
 983         }
 984         done_path_create(&path, dentry);
 985         return err;
 986 }
 987
 988 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 989 {
 990         struct sock *sk = sock->sk;
 991         struct net *net = sock_net(sk);
 992         struct unix_sock *u = unix_sk(sk);
 993         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 994         char *sun_path = sunaddr->sun_path;
 995         int err;
 996         unsigned int hash;
 997         struct unix_address *addr;
 998         struct hlist_head *list;
 999         struct path path = { NULL, NULL };
1000
1001         err = -EINVAL;
1002         if (sunaddr->sun_family != AF_UNIX)
1003                 goto out;
1004
1005         if (addr_len == sizeof(short)) {
1006                 err = unix_autobind(sock);
1007                 goto out;
1008         }
1009
1010         err = unix_mkname(sunaddr, addr_len, &hash);
1011         if (err < 0)
1012                 goto out;
1013         addr_len = err;
1014
1015         if (sun_path[0]) {
1016                 umode_t mode = S_IFSOCK |
1017                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1018                 err = unix_mknod(sun_path, mode, &path);
1019                 if (err) {
1020                         if (err == -EEXIST)
1021                                 err = -EADDRINUSE;
1022                         goto out;
1023                 }
1024         }
1025
1026         err = mutex_lock_interruptible(&u->bindlock);
1027         if (err)
1028                 goto out_put;
1029
1030         err = -EINVAL;
1031         if (u->addr)
1032                 goto out_up;
1033
1034         err = -ENOMEM;
1035         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1036         if (!addr)
1037                 goto out_up;
1038
1039         memcpy(addr->name, sunaddr, addr_len);
1040         addr->len = addr_len;
1041         addr->hash = hash ^ sk->sk_type;
1042         atomic_set(&addr->refcnt, 1);
1043
1044         if (sun_path[0]) {
1045                 addr->hash = UNIX_HASH_SIZE;
1046                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1047                 spin_lock(&unix_table_lock);
1048                 u->path = path;
1049                 list = &unix_socket_table[hash];
1050         } else {
1051                 spin_lock(&unix_table_lock);
1052                 err = -EADDRINUSE;
1053                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1054                                               sk->sk_type, hash)) {
1055                         unix_release_addr(addr);
1056                         goto out_unlock;
1057                 }
1058
1059                 list = &unix_socket_table[addr->hash];
1060         }
1061
1062         err = 0;
1063         __unix_remove_socket(sk);
1064         u->addr = addr;
1065         __unix_insert_socket(list, sk);
1066
1067 out_unlock:
1068         spin_unlock(&unix_table_lock);
1069 out_up:
1070         mutex_unlock(&u->bindlock);
1071 out_put:
1072         if (err)
1073                 path_put(&path);
1074 out:
1075         return err;
1076 }
1077
1078 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1079 {
1080         if (unlikely(sk1 == sk2) || !sk2) {
1081                 unix_state_lock(sk1);
1082                 return;
1083         }
1084         if (sk1 < sk2) {
1085                 unix_state_lock(sk1);
1086                 unix_state_lock_nested(sk2);
1087         } else {
1088                 unix_state_lock(sk2);
1089                 unix_state_lock_nested(sk1);
1090         }
1091 }
1092
1093 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1094 {
1095         if (unlikely(sk1 == sk2) || !sk2) {
1096                 unix_state_unlock(sk1);
1097                 return;
1098         }
1099         unix_state_unlock(sk1);
1100         unix_state_unlock(sk2);
1101 }
1102
1103 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1104                               int alen, int flags)
1105 {
1106         struct sock *sk = sock->sk;
1107         struct net *net = sock_net(sk);
1108         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1109         struct sock *other;
1110         unsigned int hash;
1111         int err;
1112
1113         if (addr->sa_family != AF_UNSPEC) {
1114                 err = unix_mkname(sunaddr, alen, &hash);
1115                 if (err < 0)
1116                         goto out;
1117                 alen = err;
1118
1119                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1120                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1121                         goto out;
1122
1123 restart:
1124                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1125                 if (!other)
1126                         goto out;
1127
1128                 unix_state_double_lock(sk, other);
1129
1130                 /* Apparently VFS overslept socket death. Retry. */
1131                 if (sock_flag(other, SOCK_DEAD)) {
1132                         unix_state_double_unlock(sk, other);
1133                         sock_put(other);
1134                         goto restart;
1135                 }
1136
1137                 err = -EPERM;
1138                 if (!unix_may_send(sk, other))
1139                         goto out_unlock;
1140
1141                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1142                 if (err)
1143                         goto out_unlock;
1144
1145         } else {
1146                 /*
1147                  *      1003.1g breaking connected state with AF_UNSPEC
1148                  */
1149                 other = NULL;
1150                 unix_state_double_lock(sk, other);
1151         }
1152
1153         /*
1154          * If it was connected, reconnect.
1155          */
1156         if (unix_peer(sk)) {
1157                 struct sock *old_peer = unix_peer(sk);
1158                 unix_peer(sk) = other;
1159                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1160
1161                 unix_state_double_unlock(sk, other);
1162
1163                 if (other != old_peer)
1164                         unix_dgram_disconnected(sk, old_peer);
1165                 sock_put(old_peer);
1166         } else {
1167                 unix_peer(sk) = other;
1168                 unix_state_double_unlock(sk, other);
1169         }
1170         return 0;
1171
1172 out_unlock:
1173         unix_state_double_unlock(sk, other);
1174         sock_put(other);
1175 out:
1176         return err;
1177 }
1178
1179 static long unix_wait_for_peer(struct sock *other, long timeo)
1180 {
1181         struct unix_sock *u = unix_sk(other);
1182         int sched;
1183         DEFINE_WAIT(wait);
1184
1185         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1186
1187         sched = !sock_flag(other, SOCK_DEAD) &&
1188                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1189                 unix_recvq_full(other);
1190
1191         unix_state_unlock(other);
1192
1193         if (sched)
1194                 timeo = schedule_timeout(timeo);
1195
1196         finish_wait(&u->peer_wait, &wait);
1197         return timeo;
1198 }
1199
1200 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1201                                int addr_len, int flags)
1202 {
1203         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1204         struct sock *sk = sock->sk;
1205         struct net *net = sock_net(sk);
1206         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1207         struct sock *newsk = NULL;
1208         struct sock *other = NULL;
1209         struct sk_buff *skb = NULL;
1210         unsigned int hash;
1211         int st;
1212         int err;
1213         long timeo;
1214
1215         err = unix_mkname(sunaddr, addr_len, &hash);
1216         if (err < 0)
1217                 goto out;
1218         addr_len = err;
1219
1220         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1221             (err = unix_autobind(sock)) != 0)
1222                 goto out;
1223
1224         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1225
1226         /* First of all allocate resources.
1227            If we will make it after state is locked,
1228            we will have to recheck all again in any case.
1229          */
1230
1231         err = -ENOMEM;
1232
1233         /* create new sock for complete connection */
1234         newsk = unix_create1(sock_net(sk), NULL, 0);
1235         if (newsk == NULL)
1236                 goto out;
1237
1238         /* Allocate skb for sending to listening sock */
1239         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1240         if (skb == NULL)
1241                 goto out;
1242
1243 restart:
1244         /*  Find listening sock. */
1245         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1246         if (!other)
1247                 goto out;
1248
1249         /* Latch state of peer */
1250         unix_state_lock(other);
1251
1252         /* Apparently VFS overslept socket death. Retry. */
1253         if (sock_flag(other, SOCK_DEAD)) {
1254                 unix_state_unlock(other);
1255                 sock_put(other);
1256                 goto restart;
1257         }
1258
1259         err = -ECONNREFUSED;
1260         if (other->sk_state != TCP_LISTEN)
1261                 goto out_unlock;
1262         if (other->sk_shutdown & RCV_SHUTDOWN)
1263                 goto out_unlock;
1264
1265         if (unix_recvq_full(other)) {
1266                 err = -EAGAIN;
1267                 if (!timeo)
1268                         goto out_unlock;
1269
1270                 timeo = unix_wait_for_peer(other, timeo);
1271
1272                 err = sock_intr_errno(timeo);
1273                 if (signal_pending(current))
1274                         goto out;
1275                 sock_put(other);
1276                 goto restart;
1277         }
1278
1279         /* Latch our state.
1280
1281            It is tricky place. We need to grab our state lock and cannot
1282            drop lock on peer. It is dangerous because deadlock is
1283            possible. Connect to self case and simultaneous
1284            attempt to connect are eliminated by checking socket
1285            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1286            check this before attempt to grab lock.
1287
1288            Well, and we have to recheck the state after socket locked.
1289          */
1290         st = sk->sk_state;
1291
1292         switch (st) {
1293         case TCP_CLOSE:
1294                 /* This is ok... continue with connect */
1295                 break;
1296         case TCP_ESTABLISHED:
1297                 /* Socket is already connected */
1298                 err = -EISCONN;
1299                 goto out_unlock;
1300         default:
1301                 err = -EINVAL;
1302                 goto out_unlock;
1303         }
1304
1305         unix_state_lock_nested(sk);
1306
1307         if (sk->sk_state != st) {
1308                 unix_state_unlock(sk);
1309                 unix_state_unlock(other);
1310                 sock_put(other);
1311                 goto restart;
1312         }
1313
1314         err = security_unix_stream_connect(sk, other, newsk);
1315         if (err) {
1316                 unix_state_unlock(sk);
1317                 goto out_unlock;
1318         }
1319
1320         /* The way is open! Fastly set all the necessary fields... */
1321
1322         sock_hold(sk);
1323         unix_peer(newsk)        = sk;
1324         newsk->sk_state         = TCP_ESTABLISHED;
1325         newsk->sk_type          = sk->sk_type;
1326         init_peercred(newsk);
1327         newu = unix_sk(newsk);
1328         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1329         otheru = unix_sk(other);
1330
1331         /* copy address information from listening to new sock*/
1332         if (otheru->addr) {
1333                 atomic_inc(&otheru->addr->refcnt);
1334                 newu->addr = otheru->addr;
1335         }
1336         if (otheru->path.dentry) {
1337                 path_get(&otheru->path);
1338                 newu->path = otheru->path;
1339         }
1340
1341         /* Set credentials */
1342         copy_peercred(sk, other);
1343
1344         sock->state     = SS_CONNECTED;
1345         sk->sk_state    = TCP_ESTABLISHED;
1346         sock_hold(newsk);
1347
1348         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1349         unix_peer(sk)   = newsk;
1350
1351         unix_state_unlock(sk);
1352
1353         /* take ten and and send info to listening sock */
1354         spin_lock(&other->sk_receive_queue.lock);
1355         __skb_queue_tail(&other->sk_receive_queue, skb);
1356         spin_unlock(&other->sk_receive_queue.lock);
1357         unix_state_unlock(other);
1358         other->sk_data_ready(other);
1359         sock_put(other);
1360         return 0;
1361
1362 out_unlock:
1363         if (other)
1364                 unix_state_unlock(other);
1365
1366 out:
1367         kfree_skb(skb);
1368         if (newsk)
1369                 unix_release_sock(newsk, 0);
1370         if (other)
1371                 sock_put(other);
1372         return err;
1373 }
1374
1375 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1376 {
1377         struct sock *ska = socka->sk, *skb = sockb->sk;
1378
1379         /* Join our sockets back to back */
1380         sock_hold(ska);
1381         sock_hold(skb);
1382         unix_peer(ska) = skb;
1383         unix_peer(skb) = ska;
1384         init_peercred(ska);
1385         init_peercred(skb);
1386
1387         if (ska->sk_type != SOCK_DGRAM) {
1388                 ska->sk_state = TCP_ESTABLISHED;
1389                 skb->sk_state = TCP_ESTABLISHED;
1390                 socka->state  = SS_CONNECTED;
1391                 sockb->state  = SS_CONNECTED;
1392         }
1393         return 0;
1394 }
1395
1396 static void unix_sock_inherit_flags(const struct socket *old,
1397                                     struct socket *new)
1398 {
1399         if (test_bit(SOCK_PASSCRED, &old->flags))
1400                 set_bit(SOCK_PASSCRED, &new->flags);
1401         if (test_bit(SOCK_PASSSEC, &old->flags))
1402                 set_bit(SOCK_PASSSEC, &new->flags);
1403 }
1404
1405 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1406 {
1407         struct sock *sk = sock->sk;
1408         struct sock *tsk;
1409         struct sk_buff *skb;
1410         int err;
1411
1412         err = -EOPNOTSUPP;
1413         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1414                 goto out;
1415
1416         err = -EINVAL;
1417         if (sk->sk_state != TCP_LISTEN)
1418                 goto out;
1419
1420         /* If socket state is TCP_LISTEN it cannot change (for now...),
1421          * so that no locks are necessary.
1422          */
1423
1424         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1425         if (!skb) {
1426                 /* This means receive shutdown. */
1427                 if (err == 0)
1428                         err = -EINVAL;
1429                 goto out;
1430         }
1431
1432         tsk = skb->sk;
1433         skb_free_datagram(sk, skb);
1434         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1435
1436         /* attach accepted sock to socket */
1437         unix_state_lock(tsk);
1438         newsock->state = SS_CONNECTED;
1439         unix_sock_inherit_flags(sock, newsock);
1440         sock_graft(tsk, newsock);
1441         unix_state_unlock(tsk);
1442         return 0;
1443
1444 out:
1445         return err;
1446 }
1447
1448
1449 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1450 {
1451         struct sock *sk = sock->sk;
1452         struct unix_sock *u;
1453         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1454         int err = 0;
1455
1456         if (peer) {
1457                 sk = unix_peer_get(sk);
1458
1459                 err = -ENOTCONN;
1460                 if (!sk)
1461                         goto out;
1462                 err = 0;
1463         } else {
1464                 sock_hold(sk);
1465         }
1466
1467         u = unix_sk(sk);
1468         unix_state_lock(sk);
1469         if (!u->addr) {
1470                 sunaddr->sun_family = AF_UNIX;
1471                 sunaddr->sun_path[0] = 0;
1472                 *uaddr_len = sizeof(short);
1473         } else {
1474                 struct unix_address *addr = u->addr;
1475
1476                 *uaddr_len = addr->len;
1477                 memcpy(sunaddr, addr->name, *uaddr_len);
1478         }
1479         unix_state_unlock(sk);
1480         sock_put(sk);
1481 out:
1482         return err;
1483 }
1484
1485 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1486 {
1487         int i;
1488
1489         scm->fp = UNIXCB(skb).fp;
1490         UNIXCB(skb).fp = NULL;
1491
1492         for (i = scm->fp->count-1; i >= 0; i--)
1493                 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1494 }
1495
1496 static void unix_destruct_scm(struct sk_buff *skb)
1497 {
1498         struct scm_cookie scm;
1499         memset(&scm, 0, sizeof(scm));
1500         scm.pid  = UNIXCB(skb).pid;
1501         if (UNIXCB(skb).fp)
1502                 unix_detach_fds(&scm, skb);
1503
1504         /* Alas, it calls VFS */
1505         /* So fscking what? fput() had been SMP-safe since the last Summer */
1506         scm_destroy(&scm);
1507         sock_wfree(skb);
1508 }
1509
1510 /*
1511  * The "user->unix_inflight" variable is protected by the garbage
1512  * collection lock, and we just read it locklessly here. If you go
1513  * over the limit, there might be a tiny race in actually noticing
1514  * it across threads. Tough.
1515  */
1516 static inline bool too_many_unix_fds(struct task_struct *p)
1517 {
1518         struct user_struct *user = current_user();
1519
1520         if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1521                 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1522         return false;
1523 }
1524
1525 #define MAX_RECURSION_LEVEL 4
1526
1527 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1528 {
1529         int i;
1530         unsigned char max_level = 0;
1531
1532         if (too_many_unix_fds(current))
1533                 return -ETOOMANYREFS;
1534
1535         for (i = scm->fp->count - 1; i >= 0; i--) {
1536                 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1537
1538                 if (sk)
1539                         max_level = max(max_level,
1540                                         unix_sk(sk)->recursion_level);
1541         }
1542         if (unlikely(max_level > MAX_RECURSION_LEVEL))
1543                 return -ETOOMANYREFS;
1544
1545         /*
1546          * Need to duplicate file references for the sake of garbage
1547          * collection.  Otherwise a socket in the fps might become a
1548          * candidate for GC while the skb is not yet queued.
1549          */
1550         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1551         if (!UNIXCB(skb).fp)
1552                 return -ENOMEM;
1553
1554         for (i = scm->fp->count - 1; i >= 0; i--)
1555                 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1556         return max_level;
1557 }
1558
1559 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1560 {
1561         int err = 0;
1562
1563         UNIXCB(skb).pid  = get_pid(scm->pid);
1564         UNIXCB(skb).uid = scm->creds.uid;
1565         UNIXCB(skb).gid = scm->creds.gid;
1566         UNIXCB(skb).fp = NULL;
1567         unix_get_secdata(scm, skb);
1568         if (scm->fp && send_fds)
1569                 err = unix_attach_fds(scm, skb);
1570
1571         skb->destructor = unix_destruct_scm;
1572         return err;
1573 }
1574
1575 static bool unix_passcred_enabled(const struct socket *sock,
1576                                   const struct sock *other)
1577 {
1578         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1579                !other->sk_socket ||
1580                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1581 }
1582
1583 /*
1584  * Some apps rely on write() giving SCM_CREDENTIALS
1585  * We include credentials if source or destination socket
1586  * asserted SOCK_PASSCRED.
1587  */
1588 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1589                             const struct sock *other)
1590 {
1591         if (UNIXCB(skb).pid)
1592                 return;
1593         if (unix_passcred_enabled(sock, other)) {
1594                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1595                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1596         }
1597 }
1598
1599 static int maybe_init_creds(struct scm_cookie *scm,
1600                             struct socket *socket,
1601                             const struct sock *other)
1602 {
1603         int err;
1604         struct msghdr msg = { .msg_controllen = 0 };
1605
1606         err = scm_send(socket, &msg, scm, false);
1607         if (err)
1608                 return err;
1609
1610         if (unix_passcred_enabled(socket, other)) {
1611                 scm->pid = get_pid(task_tgid(current));
1612                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1613         }
1614         return err;
1615 }
1616
1617 static bool unix_skb_scm_eq(struct sk_buff *skb,
1618                             struct scm_cookie *scm)
1619 {
1620         const struct unix_skb_parms *u = &UNIXCB(skb);
1621
1622         return u->pid == scm->pid &&
1623                uid_eq(u->uid, scm->creds.uid) &&
1624                gid_eq(u->gid, scm->creds.gid) &&
1625                unix_secdata_eq(scm, skb);
1626 }
1627
1628 /*
1629  *      Send AF_UNIX data.
1630  */
1631
1632 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1633                               size_t len)
1634 {
1635         struct sock *sk = sock->sk;
1636         struct net *net = sock_net(sk);
1637         struct unix_sock *u = unix_sk(sk);
1638         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1639         struct sock *other = NULL;
1640         int namelen = 0; /* fake GCC */
1641         int err;
1642         unsigned int hash;
1643         struct sk_buff *skb;
1644         long timeo;
1645         struct scm_cookie scm;
1646         int max_level;
1647         int data_len = 0;
1648         int sk_locked;
1649
1650         wait_for_unix_gc();
1651         err = scm_send(sock, msg, &scm, false);
1652         if (err < 0)
1653                 return err;
1654
1655         err = -EOPNOTSUPP;
1656         if (msg->msg_flags&MSG_OOB)
1657                 goto out;
1658
1659         if (msg->msg_namelen) {
1660                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1661                 if (err < 0)
1662                         goto out;
1663                 namelen = err;
1664         } else {
1665                 sunaddr = NULL;
1666                 err = -ENOTCONN;
1667                 other = unix_peer_get(sk);
1668                 if (!other)
1669                         goto out;
1670         }
1671
1672         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1673             && (err = unix_autobind(sock)) != 0)
1674                 goto out;
1675
1676         err = -EMSGSIZE;
1677         if (len > sk->sk_sndbuf - 32)
1678                 goto out;
1679
1680         if (len > SKB_MAX_ALLOC) {
1681                 data_len = min_t(size_t,
1682                                  len - SKB_MAX_ALLOC,
1683                                  MAX_SKB_FRAGS * PAGE_SIZE);
1684                 data_len = PAGE_ALIGN(data_len);
1685
1686                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1687         }
1688
1689         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1690                                    msg->msg_flags & MSG_DONTWAIT, &err,
1691                                    PAGE_ALLOC_COSTLY_ORDER);
1692         if (skb == NULL)
1693                 goto out;
1694
1695         err = unix_scm_to_skb(&scm, skb, true);
1696         if (err < 0)
1697                 goto out_free;
1698         max_level = err + 1;
1699
1700         skb_put(skb, len - data_len);
1701         skb->data_len = data_len;
1702         skb->len = len;
1703         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1704         if (err)
1705                 goto out_free;
1706
1707         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1708
1709 restart:
1710         if (!other) {
1711                 err = -ECONNRESET;
1712                 if (sunaddr == NULL)
1713                         goto out_free;
1714
1715                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1716                                         hash, &err);
1717                 if (other == NULL)
1718                         goto out_free;
1719         }
1720
1721         if (sk_filter(other, skb) < 0) {
1722                 /* Toss the packet but do not return any error to the sender */
1723                 err = len;
1724                 goto out_free;
1725         }
1726
1727         sk_locked = 0;
1728         unix_state_lock(other);
1729 restart_locked:
1730         err = -EPERM;
1731         if (!unix_may_send(sk, other))
1732                 goto out_unlock;
1733
1734         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1735                 /*
1736                  *      Check with 1003.1g - what should
1737                  *      datagram error
1738                  */
1739                 unix_state_unlock(other);
1740                 sock_put(other);
1741
1742                 if (!sk_locked)
1743                         unix_state_lock(sk);
1744
1745                 err = 0;
1746                 if (unix_peer(sk) == other) {
1747                         unix_peer(sk) = NULL;
1748                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1749
1750                         unix_state_unlock(sk);
1751
1752                         unix_dgram_disconnected(sk, other);
1753                         sock_put(other);
1754                         err = -ECONNREFUSED;
1755                 } else {
1756                         unix_state_unlock(sk);
1757                 }
1758
1759                 other = NULL;
1760                 if (err)
1761                         goto out_free;
1762                 goto restart;
1763         }
1764
1765         err = -EPIPE;
1766         if (other->sk_shutdown & RCV_SHUTDOWN)
1767                 goto out_unlock;
1768
1769         if (sk->sk_type != SOCK_SEQPACKET) {
1770                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1771                 if (err)
1772                         goto out_unlock;
1773         }
1774
1775         /* other == sk && unix_peer(other) != sk if
1776          * - unix_peer(sk) == NULL, destination address bound to sk
1777          * - unix_peer(sk) == sk by time of get but disconnected before lock
1778          */
1779         if (other != sk &&
1780             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1781                 if (timeo) {
1782                         timeo = unix_wait_for_peer(other, timeo);
1783
1784                         err = sock_intr_errno(timeo);
1785                         if (signal_pending(current))
1786                                 goto out_free;
1787
1788                         goto restart;
1789                 }
1790
1791                 if (!sk_locked) {
1792                         unix_state_unlock(other);
1793                         unix_state_double_lock(sk, other);
1794                 }
1795
1796                 if (unix_peer(sk) != other ||
1797                     unix_dgram_peer_wake_me(sk, other)) {
1798                         err = -EAGAIN;
1799                         sk_locked = 1;
1800                         goto out_unlock;
1801                 }
1802
1803                 if (!sk_locked) {
1804                         sk_locked = 1;
1805                         goto restart_locked;
1806                 }
1807         }
1808
1809         if (unlikely(sk_locked))
1810                 unix_state_unlock(sk);
1811
1812         if (sock_flag(other, SOCK_RCVTSTAMP))
1813                 __net_timestamp(skb);
1814         maybe_add_creds(skb, sock, other);
1815         skb_queue_tail(&other->sk_receive_queue, skb);
1816         if (max_level > unix_sk(other)->recursion_level)
1817                 unix_sk(other)->recursion_level = max_level;
1818         unix_state_unlock(other);
1819         other->sk_data_ready(other);
1820         sock_put(other);
1821         scm_destroy(&scm);
1822         return len;
1823
1824 out_unlock:
1825         if (sk_locked)
1826                 unix_state_unlock(sk);
1827         unix_state_unlock(other);
1828 out_free:
1829         kfree_skb(skb);
1830 out:
1831         if (other)
1832                 sock_put(other);
1833         scm_destroy(&scm);
1834         return err;
1835 }
1836
1837 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1838  * bytes, and a minimun of a full page.
1839  */
1840 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1841
1842 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1843                                size_t len)
1844 {
1845         struct sock *sk = sock->sk;
1846         struct sock *other = NULL;
1847         int err, size;
1848         struct sk_buff *skb;
1849         int sent = 0;
1850         struct scm_cookie scm;
1851         bool fds_sent = false;
1852         int max_level;
1853         int data_len;
1854
1855         wait_for_unix_gc();
1856         err = scm_send(sock, msg, &scm, false);
1857         if (err < 0)
1858                 return err;
1859
1860         err = -EOPNOTSUPP;
1861         if (msg->msg_flags&MSG_OOB)
1862                 goto out_err;
1863
1864         if (msg->msg_namelen) {
1865                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1866                 goto out_err;
1867         } else {
1868                 err = -ENOTCONN;
1869                 other = unix_peer(sk);
1870                 if (!other)
1871                         goto out_err;
1872         }
1873
1874         if (sk->sk_shutdown & SEND_SHUTDOWN)
1875                 goto pipe_err;
1876
1877         while (sent < len) {
1878                 size = len - sent;
1879
1880                 /* Keep two messages in the pipe so it schedules better */
1881                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1882
1883                 /* allow fallback to order-0 allocations */
1884                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1885
1886                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1887
1888                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1889
1890                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1891                                            msg->msg_flags & MSG_DONTWAIT, &err,
1892                                            get_order(UNIX_SKB_FRAGS_SZ));
1893                 if (!skb)
1894                         goto out_err;
1895
1896                 /* Only send the fds in the first buffer */
1897                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1898                 if (err < 0) {
1899                         kfree_skb(skb);
1900                         goto out_err;
1901                 }
1902                 max_level = err + 1;
1903                 fds_sent = true;
1904
1905                 skb_put(skb, size - data_len);
1906                 skb->data_len = data_len;
1907                 skb->len = size;
1908                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1909                 if (err) {
1910                         kfree_skb(skb);
1911                         goto out_err;
1912                 }
1913
1914                 unix_state_lock(other);
1915
1916                 if (sock_flag(other, SOCK_DEAD) ||
1917                     (other->sk_shutdown & RCV_SHUTDOWN))
1918                         goto pipe_err_free;
1919
1920                 maybe_add_creds(skb, sock, other);
1921                 skb_queue_tail(&other->sk_receive_queue, skb);
1922                 if (max_level > unix_sk(other)->recursion_level)
1923                         unix_sk(other)->recursion_level = max_level;
1924                 unix_state_unlock(other);
1925                 other->sk_data_ready(other);
1926                 sent += size;
1927         }
1928
1929         scm_destroy(&scm);
1930
1931         return sent;
1932
1933 pipe_err_free:
1934         unix_state_unlock(other);
1935         kfree_skb(skb);
1936 pipe_err:
1937         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1938                 send_sig(SIGPIPE, current, 0);
1939         err = -EPIPE;
1940 out_err:
1941         scm_destroy(&scm);
1942         return sent ? : err;
1943 }
1944
1945 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1946                                     int offset, size_t size, int flags)
1947 {
1948         int err;
1949         bool send_sigpipe = false;
1950         bool init_scm = true;
1951         struct scm_cookie scm;
1952         struct sock *other, *sk = socket->sk;
1953         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1954
1955         if (flags & MSG_OOB)
1956                 return -EOPNOTSUPP;
1957
1958         other = unix_peer(sk);
1959         if (!other || sk->sk_state != TCP_ESTABLISHED)
1960                 return -ENOTCONN;
1961
1962         if (false) {
1963 alloc_skb:
1964                 unix_state_unlock(other);
1965                 mutex_unlock(&unix_sk(other)->iolock);
1966                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1967                                               &err, 0);
1968                 if (!newskb)
1969                         goto err;
1970         }
1971
1972         /* we must acquire iolock as we modify already present
1973          * skbs in the sk_receive_queue and mess with skb->len
1974          */
1975         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1976         if (err) {
1977                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1978                 goto err;
1979         }
1980
1981         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1982                 err = -EPIPE;
1983                 send_sigpipe = true;
1984                 goto err_unlock;
1985         }
1986
1987         unix_state_lock(other);
1988
1989         if (sock_flag(other, SOCK_DEAD) ||
1990             other->sk_shutdown & RCV_SHUTDOWN) {
1991                 err = -EPIPE;
1992                 send_sigpipe = true;
1993                 goto err_state_unlock;
1994         }
1995
1996         if (init_scm) {
1997                 err = maybe_init_creds(&scm, socket, other);
1998                 if (err)
1999                         goto err_state_unlock;
2000                 init_scm = false;
2001         }
2002
2003         skb = skb_peek_tail(&other->sk_receive_queue);
2004         if (tail && tail == skb) {
2005                 skb = newskb;
2006         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2007                 if (newskb) {
2008                         skb = newskb;
2009                 } else {
2010                         tail = skb;
2011                         goto alloc_skb;
2012                 }
2013         } else if (newskb) {
2014                 /* this is fast path, we don't necessarily need to
2015                  * call to kfree_skb even though with newskb == NULL
2016                  * this - does no harm
2017                  */
2018                 consume_skb(newskb);
2019                 newskb = NULL;
2020         }
2021
2022         if (skb_append_pagefrags(skb, page, offset, size)) {
2023                 tail = skb;
2024                 goto alloc_skb;
2025         }
2026
2027         skb->len += size;
2028         skb->data_len += size;
2029         skb->truesize += size;
2030         atomic_add(size, &sk->sk_wmem_alloc);
2031
2032         if (newskb) {
2033                 err = unix_scm_to_skb(&scm, skb, false);
2034                 if (err)
2035                         goto err_state_unlock;
2036                 spin_lock(&other->sk_receive_queue.lock);
2037                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2038                 spin_unlock(&other->sk_receive_queue.lock);
2039         }
2040
2041         unix_state_unlock(other);
2042         mutex_unlock(&unix_sk(other)->iolock);
2043
2044         other->sk_data_ready(other);
2045         scm_destroy(&scm);
2046         return size;
2047
2048 err_state_unlock:
2049         unix_state_unlock(other);
2050 err_unlock:
2051         mutex_unlock(&unix_sk(other)->iolock);
2052 err:
2053         kfree_skb(newskb);
2054         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2055                 send_sig(SIGPIPE, current, 0);
2056         if (!init_scm)
2057                 scm_destroy(&scm);
2058         return err;
2059 }
2060
2061 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2062                                   size_t len)
2063 {
2064         int err;
2065         struct sock *sk = sock->sk;
2066
2067         err = sock_error(sk);
2068         if (err)
2069                 return err;
2070
2071         if (sk->sk_state != TCP_ESTABLISHED)
2072                 return -ENOTCONN;
2073
2074         if (msg->msg_namelen)
2075                 msg->msg_namelen = 0;
2076
2077         return unix_dgram_sendmsg(sock, msg, len);
2078 }
2079
2080 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2081                                   size_t size, int flags)
2082 {
2083         struct sock *sk = sock->sk;
2084
2085         if (sk->sk_state != TCP_ESTABLISHED)
2086                 return -ENOTCONN;
2087
2088         return unix_dgram_recvmsg(sock, msg, size, flags);
2089 }
2090
2091 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2092 {
2093         struct unix_sock *u = unix_sk(sk);
2094
2095         if (u->addr) {
2096                 msg->msg_namelen = u->addr->len;
2097                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
2098         }
2099 }
2100
2101 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2102                               size_t size, int flags)
2103 {
2104         struct scm_cookie scm;
2105         struct sock *sk = sock->sk;
2106         struct unix_sock *u = unix_sk(sk);
2107         struct sk_buff *skb, *last;
2108         long timeo;
2109         int err;
2110         int peeked, skip;
2111
2112         err = -EOPNOTSUPP;
2113         if (flags&MSG_OOB)
2114                 goto out;
2115
2116         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2117
2118         do {
2119                 mutex_lock(&u->iolock);
2120
2121                 skip = sk_peek_offset(sk, flags);
2122                 skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2123                                               &err, &last);
2124                 if (skb)
2125                         break;
2126
2127                 mutex_unlock(&u->iolock);
2128
2129                 if (err != -EAGAIN)
2130                         break;
2131         } while (timeo &&
2132                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2133
2134         if (!skb) { /* implies iolock unlocked */
2135                 unix_state_lock(sk);
2136                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2137                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2138                     (sk->sk_shutdown & RCV_SHUTDOWN))
2139                         err = 0;
2140                 unix_state_unlock(sk);
2141                 goto out;
2142         }
2143
2144         if (wq_has_sleeper(&u->peer_wait))
2145                 wake_up_interruptible_sync_poll(&u->peer_wait,
2146                                                 POLLOUT | POLLWRNORM |
2147                                                 POLLWRBAND);
2148
2149         if (msg->msg_name)
2150                 unix_copy_addr(msg, skb->sk);
2151
2152         if (size > skb->len - skip)
2153                 size = skb->len - skip;
2154         else if (size < skb->len - skip)
2155                 msg->msg_flags |= MSG_TRUNC;
2156
2157         err = skb_copy_datagram_msg(skb, skip, msg, size);
2158         if (err)
2159                 goto out_free;
2160
2161         if (sock_flag(sk, SOCK_RCVTSTAMP))
2162                 __sock_recv_timestamp(msg, sk, skb);
2163
2164         memset(&scm, 0, sizeof(scm));
2165
2166         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2167         unix_set_secdata(&scm, skb);
2168
2169         if (!(flags & MSG_PEEK)) {
2170                 if (UNIXCB(skb).fp)
2171                         unix_detach_fds(&scm, skb);
2172
2173                 sk_peek_offset_bwd(sk, skb->len);
2174         } else {
2175                 /* It is questionable: on PEEK we could:
2176                    - do not return fds - good, but too simple 8)
2177                    - return fds, and do not return them on read (old strategy,
2178                      apparently wrong)
2179                    - clone fds (I chose it for now, it is the most universal
2180                      solution)
2181
2182                    POSIX 1003.1g does not actually define this clearly
2183                    at all. POSIX 1003.1g doesn't define a lot of things
2184                    clearly however!
2185
2186                 */
2187
2188                 sk_peek_offset_fwd(sk, size);
2189
2190                 if (UNIXCB(skb).fp)
2191                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2192         }
2193         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2194
2195         scm_recv(sock, msg, &scm, flags);
2196
2197 out_free:
2198         skb_free_datagram(sk, skb);
2199         mutex_unlock(&u->iolock);
2200 out:
2201         return err;
2202 }
2203
2204 /*
2205  *      Sleep until more data has arrived. But check for races..
2206  */
2207 static long unix_stream_data_wait(struct sock *sk, long timeo,
2208                                   struct sk_buff *last, unsigned int last_len,
2209                                   bool freezable)
2210 {
2211         struct sk_buff *tail;
2212         DEFINE_WAIT(wait);
2213
2214         unix_state_lock(sk);
2215
2216         for (;;) {
2217                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2218
2219                 tail = skb_peek_tail(&sk->sk_receive_queue);
2220                 if (tail != last ||
2221                     (tail && tail->len != last_len) ||
2222                     sk->sk_err ||
2223                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2224                     signal_pending(current) ||
2225                     !timeo)
2226                         break;
2227
2228                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2229                 unix_state_unlock(sk);
2230                 if (freezable)
2231                         timeo = freezable_schedule_timeout(timeo);
2232                 else
2233                         timeo = schedule_timeout(timeo);
2234                 unix_state_lock(sk);
2235
2236                 if (sock_flag(sk, SOCK_DEAD))
2237                         break;
2238
2239                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2240         }
2241
2242         finish_wait(sk_sleep(sk), &wait);
2243         unix_state_unlock(sk);
2244         return timeo;
2245 }
2246
2247 static unsigned int unix_skb_len(const struct sk_buff *skb)
2248 {
2249         return skb->len - UNIXCB(skb).consumed;
2250 }
2251
2252 struct unix_stream_read_state {
2253         int (*recv_actor)(struct sk_buff *, int, int,
2254                           struct unix_stream_read_state *);
2255         struct socket *socket;
2256         struct msghdr *msg;
2257         struct pipe_inode_info *pipe;
2258         size_t size;
2259         int flags;
2260         unsigned int splice_flags;
2261 };
2262
2263 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2264                                     bool freezable)
2265 {
2266         struct scm_cookie scm;
2267         struct socket *sock = state->socket;
2268         struct sock *sk = sock->sk;
2269         struct unix_sock *u = unix_sk(sk);
2270         int copied = 0;
2271         int flags = state->flags;
2272         int noblock = flags & MSG_DONTWAIT;
2273         bool check_creds = false;
2274         int target;
2275         int err = 0;
2276         long timeo;
2277         int skip;
2278         size_t size = state->size;
2279         unsigned int last_len;
2280
2281         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2282                 err = -EINVAL;
2283                 goto out;
2284         }
2285
2286         if (unlikely(flags & MSG_OOB)) {
2287                 err = -EOPNOTSUPP;
2288                 goto out;
2289         }
2290
2291         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2292         timeo = sock_rcvtimeo(sk, noblock);
2293
2294         memset(&scm, 0, sizeof(scm));
2295
2296         /* Lock the socket to prevent queue disordering
2297          * while sleeps in memcpy_tomsg
2298          */
2299         mutex_lock(&u->iolock);
2300
2301         if (flags & MSG_PEEK)
2302                 skip = sk_peek_offset(sk, flags);
2303         else
2304                 skip = 0;
2305
2306         do {
2307                 int chunk;
2308                 bool drop_skb;
2309                 struct sk_buff *skb, *last;
2310
2311 redo:
2312                 unix_state_lock(sk);
2313                 if (sock_flag(sk, SOCK_DEAD)) {
2314                         err = -ECONNRESET;
2315                         goto unlock;
2316                 }
2317                 last = skb = skb_peek(&sk->sk_receive_queue);
2318                 last_len = last ? last->len : 0;
2319 again:
2320                 if (skb == NULL) {
2321                         unix_sk(sk)->recursion_level = 0;
2322                         if (copied >= target)
2323                                 goto unlock;
2324
2325                         /*
2326                          *      POSIX 1003.1g mandates this order.
2327                          */
2328
2329                         err = sock_error(sk);
2330                         if (err)
2331                                 goto unlock;
2332                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2333                                 goto unlock;
2334
2335                         unix_state_unlock(sk);
2336                         if (!timeo) {
2337                                 err = -EAGAIN;
2338                                 break;
2339                         }
2340
2341                         mutex_unlock(&u->iolock);
2342
2343                         timeo = unix_stream_data_wait(sk, timeo, last,
2344                                                       last_len, freezable);
2345
2346                         if (signal_pending(current)) {
2347                                 err = sock_intr_errno(timeo);
2348                                 scm_destroy(&scm);
2349                                 goto out;
2350                         }
2351
2352                         mutex_lock(&u->iolock);
2353                         goto redo;
2354 unlock:
2355                         unix_state_unlock(sk);
2356                         break;
2357                 }
2358
2359                 while (skip >= unix_skb_len(skb)) {
2360                         skip -= unix_skb_len(skb);
2361                         last = skb;
2362                         last_len = skb->len;
2363                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2364                         if (!skb)
2365                                 goto again;
2366                 }
2367
2368                 unix_state_unlock(sk);
2369
2370                 if (check_creds) {
2371                         /* Never glue messages from different writers */
2372                         if (!unix_skb_scm_eq(skb, &scm))
2373                                 break;
2374                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2375                         /* Copy credentials */
2376                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2377                         unix_set_secdata(&scm, skb);
2378                         check_creds = true;
2379                 }
2380
2381                 /* Copy address just once */
2382                 if (state->msg && state->msg->msg_name) {
2383                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2384                                          state->msg->msg_name);
2385                         unix_copy_addr(state->msg, skb->sk);
2386                         sunaddr = NULL;
2387                 }
2388
2389                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2390                 skb_get(skb);
2391                 chunk = state->recv_actor(skb, skip, chunk, state);
2392                 drop_skb = !unix_skb_len(skb);
2393                 /* skb is only safe to use if !drop_skb */
2394                 consume_skb(skb);
2395                 if (chunk < 0) {
2396                         if (copied == 0)
2397                                 copied = -EFAULT;
2398                         break;
2399                 }
2400                 copied += chunk;
2401                 size -= chunk;
2402
2403                 if (drop_skb) {
2404                         /* the skb was touched by a concurrent reader;
2405                          * we should not expect anything from this skb
2406                          * anymore and assume it invalid - we can be
2407                          * sure it was dropped from the socket queue
2408                          *
2409                          * let's report a short read
2410                          */
2411                         err = 0;
2412                         break;
2413                 }
2414
2415                 /* Mark read part of skb as used */
2416                 if (!(flags & MSG_PEEK)) {
2417                         UNIXCB(skb).consumed += chunk;
2418
2419                         sk_peek_offset_bwd(sk, chunk);
2420
2421                         if (UNIXCB(skb).fp)
2422                                 unix_detach_fds(&scm, skb);
2423
2424                         if (unix_skb_len(skb))
2425                                 break;
2426
2427                         skb_unlink(skb, &sk->sk_receive_queue);
2428                         consume_skb(skb);
2429
2430                         if (scm.fp)
2431                                 break;
2432                 } else {
2433                         /* It is questionable, see note in unix_dgram_recvmsg.
2434                          */
2435                         if (UNIXCB(skb).fp)
2436                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2437
2438                         sk_peek_offset_fwd(sk, chunk);
2439
2440                         if (UNIXCB(skb).fp)
2441                                 break;
2442
2443                         skip = 0;
2444                         last = skb;
2445                         last_len = skb->len;
2446                         unix_state_lock(sk);
2447                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2448                         if (skb)
2449                                 goto again;
2450                         unix_state_unlock(sk);
2451                         break;
2452                 }
2453         } while (size);
2454
2455         mutex_unlock(&u->iolock);
2456         if (state->msg)
2457                 scm_recv(sock, state->msg, &scm, flags);
2458         else
2459                 scm_destroy(&scm);
2460 out:
2461         return copied ? : err;
2462 }
2463
2464 static int unix_stream_read_actor(struct sk_buff *skb,
2465                                   int skip, int chunk,
2466                                   struct unix_stream_read_state *state)
2467 {
2468         int ret;
2469
2470         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2471                                     state->msg, chunk);
2472         return ret ?: chunk;
2473 }
2474
2475 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2476                                size_t size, int flags)
2477 {
2478         struct unix_stream_read_state state = {
2479                 .recv_actor = unix_stream_read_actor,
2480                 .socket = sock,
2481                 .msg = msg,
2482                 .size = size,
2483                 .flags = flags
2484         };
2485
2486         return unix_stream_read_generic(&state, true);
2487 }
2488
2489 static int unix_stream_splice_actor(struct sk_buff *skb,
2490                                     int skip, int chunk,
2491                                     struct unix_stream_read_state *state)
2492 {
2493         return skb_splice_bits(skb, state->socket->sk,
2494                                UNIXCB(skb).consumed + skip,
2495                                state->pipe, chunk, state->splice_flags);
2496 }
2497
2498 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2499                                        struct pipe_inode_info *pipe,
2500                                        size_t size, unsigned int flags)
2501 {
2502         struct unix_stream_read_state state = {
2503                 .recv_actor = unix_stream_splice_actor,
2504                 .socket = sock,
2505                 .pipe = pipe,
2506                 .size = size,
2507                 .splice_flags = flags,
2508         };
2509
2510         if (unlikely(*ppos))
2511                 return -ESPIPE;
2512
2513         if (sock->file->f_flags & O_NONBLOCK ||
2514             flags & SPLICE_F_NONBLOCK)
2515                 state.flags = MSG_DONTWAIT;
2516
2517         return unix_stream_read_generic(&state, false);
2518 }
2519
2520 static int unix_shutdown(struct socket *sock, int mode)
2521 {
2522         struct sock *sk = sock->sk;
2523         struct sock *other;
2524
2525         if (mode < SHUT_RD || mode > SHUT_RDWR)
2526                 return -EINVAL;
2527         /* This maps:
2528          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2529          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2530          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2531          */
2532         ++mode;
2533
2534         unix_state_lock(sk);
2535         sk->sk_shutdown |= mode;
2536         other = unix_peer(sk);
2537         if (other)
2538                 sock_hold(other);
2539         unix_state_unlock(sk);
2540         sk->sk_state_change(sk);
2541
2542         if (other &&
2543                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2544
2545                 int peer_mode = 0;
2546
2547                 if (mode&RCV_SHUTDOWN)
2548                         peer_mode |= SEND_SHUTDOWN;
2549                 if (mode&SEND_SHUTDOWN)
2550                         peer_mode |= RCV_SHUTDOWN;
2551                 unix_state_lock(other);
2552                 other->sk_shutdown |= peer_mode;
2553                 unix_state_unlock(other);
2554                 other->sk_state_change(other);
2555                 if (peer_mode == SHUTDOWN_MASK)
2556                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2557                 else if (peer_mode & RCV_SHUTDOWN)
2558                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2559         }
2560         if (other)
2561                 sock_put(other);
2562
2563         return 0;
2564 }
2565
2566 long unix_inq_len(struct sock *sk)
2567 {
2568         struct sk_buff *skb;
2569         long amount = 0;
2570
2571         if (sk->sk_state == TCP_LISTEN)
2572                 return -EINVAL;
2573
2574         spin_lock(&sk->sk_receive_queue.lock);
2575         if (sk->sk_type == SOCK_STREAM ||
2576             sk->sk_type == SOCK_SEQPACKET) {
2577                 skb_queue_walk(&sk->sk_receive_queue, skb)
2578                         amount += unix_skb_len(skb);
2579         } else {
2580                 skb = skb_peek(&sk->sk_receive_queue);
2581                 if (skb)
2582                         amount = skb->len;
2583         }
2584         spin_unlock(&sk->sk_receive_queue.lock);
2585
2586         return amount;
2587 }
2588 EXPORT_SYMBOL_GPL(unix_inq_len);
2589
2590 long unix_outq_len(struct sock *sk)
2591 {
2592         return sk_wmem_alloc_get(sk);
2593 }
2594 EXPORT_SYMBOL_GPL(unix_outq_len);
2595
2596 static int unix_open_file(struct sock *sk)
2597 {
2598         struct path path;
2599         struct file *f;
2600         int fd;
2601
2602         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2603                 return -EPERM;
2604
2605         unix_state_lock(sk);
2606         path = unix_sk(sk)->path;
2607         if (!path.dentry) {
2608                 unix_state_unlock(sk);
2609                 return -ENOENT;
2610         }
2611
2612         path_get(&path);
2613         unix_state_unlock(sk);
2614
2615         fd = get_unused_fd_flags(O_CLOEXEC);
2616         if (fd < 0)
2617                 goto out;
2618
2619         f = dentry_open(&path, O_PATH, current_cred());
2620         if (IS_ERR(f)) {
2621                 put_unused_fd(fd);
2622                 fd = PTR_ERR(f);
2623                 goto out;
2624         }
2625
2626         fd_install(fd, f);
2627 out:
2628         path_put(&path);
2629
2630         return fd;
2631 }
2632
2633 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2634 {
2635         struct sock *sk = sock->sk;
2636         long amount = 0;
2637         int err;
2638
2639         switch (cmd) {
2640         case SIOCOUTQ:
2641                 amount = unix_outq_len(sk);
2642                 err = put_user(amount, (int __user *)arg);
2643                 break;
2644         case SIOCINQ:
2645                 amount = unix_inq_len(sk);
2646                 if (amount < 0)
2647                         err = amount;
2648                 else
2649                         err = put_user(amount, (int __user *)arg);
2650                 break;
2651         case SIOCUNIXFILE:
2652                 err = unix_open_file(sk);
2653                 break;
2654         default:
2655                 err = -ENOIOCTLCMD;
2656                 break;
2657         }
2658         return err;
2659 }
2660
2661 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2662 {
2663         struct sock *sk = sock->sk;
2664         unsigned int mask;
2665
2666         sock_poll_wait(file, sk_sleep(sk), wait);
2667         mask = 0;
2668
2669         /* exceptional events? */
2670         if (sk->sk_err)
2671                 mask |= POLLERR;
2672         if (sk->sk_shutdown == SHUTDOWN_MASK)
2673                 mask |= POLLHUP;
2674         if (sk->sk_shutdown & RCV_SHUTDOWN)
2675                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2676
2677         /* readable? */
2678         if (!skb_queue_empty(&sk->sk_receive_queue))
2679                 mask |= POLLIN | POLLRDNORM;
2680
2681         /* Connection-based need to check for termination and startup */
2682         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2683             sk->sk_state == TCP_CLOSE)
2684                 mask |= POLLHUP;
2685
2686         /*
2687          * we set writable also when the other side has shut down the
2688          * connection. This prevents stuck sockets.
2689          */
2690         if (unix_writable(sk))
2691                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2692
2693         return mask;
2694 }
2695
2696 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2697                                     poll_table *wait)
2698 {
2699         struct sock *sk = sock->sk, *other;
2700         unsigned int mask, writable;
2701
2702         sock_poll_wait(file, sk_sleep(sk), wait);
2703         mask = 0;
2704
2705         /* exceptional events? */
2706         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2707                 mask |= POLLERR |
2708                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2709
2710         if (sk->sk_shutdown & RCV_SHUTDOWN)
2711                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2712         if (sk->sk_shutdown == SHUTDOWN_MASK)
2713                 mask |= POLLHUP;
2714
2715         /* readable? */
2716         if (!skb_queue_empty(&sk->sk_receive_queue))
2717                 mask |= POLLIN | POLLRDNORM;
2718
2719         /* Connection-based need to check for termination and startup */
2720         if (sk->sk_type == SOCK_SEQPACKET) {
2721                 if (sk->sk_state == TCP_CLOSE)
2722                         mask |= POLLHUP;
2723                 /* connection hasn't started yet? */
2724                 if (sk->sk_state == TCP_SYN_SENT)
2725                         return mask;
2726         }
2727
2728         /* No write status requested, avoid expensive OUT tests. */
2729         if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2730                 return mask;
2731
2732         writable = unix_writable(sk);
2733         if (writable) {
2734                 unix_state_lock(sk);
2735
2736                 other = unix_peer(sk);
2737                 if (other && unix_peer(other) != sk &&
2738                     unix_recvq_full(other) &&
2739                     unix_dgram_peer_wake_me(sk, other))
2740                         writable = 0;
2741
2742                 unix_state_unlock(sk);
2743         }
2744
2745         if (writable)
2746                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2747         else
2748                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2749
2750         return mask;
2751 }
2752
2753 #ifdef CONFIG_PROC_FS
2754
2755 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2756
2757 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2758 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2759 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2760
2761 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2762 {
2763         unsigned long offset = get_offset(*pos);
2764         unsigned long bucket = get_bucket(*pos);
2765         struct sock *sk;
2766         unsigned long count = 0;
2767
2768         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2769                 if (sock_net(sk) != seq_file_net(seq))
2770                         continue;
2771                 if (++count == offset)
2772                         break;
2773         }
2774
2775         return sk;
2776 }
2777
2778 static struct sock *unix_next_socket(struct seq_file *seq,
2779                                      struct sock *sk,
2780                                      loff_t *pos)
2781 {
2782         unsigned long bucket;
2783
2784         while (sk > (struct sock *)SEQ_START_TOKEN) {
2785                 sk = sk_next(sk);
2786                 if (!sk)
2787                         goto next_bucket;
2788                 if (sock_net(sk) == seq_file_net(seq))
2789                         return sk;
2790         }
2791
2792         do {
2793                 sk = unix_from_bucket(seq, pos);
2794                 if (sk)
2795                         return sk;
2796
2797 next_bucket:
2798                 bucket = get_bucket(*pos) + 1;
2799                 *pos = set_bucket_offset(bucket, 1);
2800         } while (bucket < ARRAY_SIZE(unix_socket_table));
2801
2802         return NULL;
2803 }
2804
2805 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2806         __acquires(unix_table_lock)
2807 {
2808         spin_lock(&unix_table_lock);
2809
2810         if (!*pos)
2811                 return SEQ_START_TOKEN;
2812
2813         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2814                 return NULL;
2815
2816         return unix_next_socket(seq, NULL, pos);
2817 }
2818
2819 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2820 {
2821         ++*pos;
2822         return unix_next_socket(seq, v, pos);
2823 }
2824
2825 static void unix_seq_stop(struct seq_file *seq, void *v)
2826         __releases(unix_table_lock)
2827 {
2828         spin_unlock(&unix_table_lock);
2829 }
2830
2831 static int unix_seq_show(struct seq_file *seq, void *v)
2832 {
2833
2834         if (v == SEQ_START_TOKEN)
2835                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2836                          "Inode Path\n");
2837         else {
2838                 struct sock *s = v;
2839                 struct unix_sock *u = unix_sk(s);
2840                 unix_state_lock(s);
2841
2842                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2843                         s,
2844                         atomic_read(&s->sk_refcnt),
2845                         0,
2846                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2847                         s->sk_type,
2848                         s->sk_socket ?
2849                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2850                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2851                         sock_i_ino(s));
2852
2853                 if (u->addr) {
2854                         int i, len;
2855                         seq_putc(seq, ' ');
2856
2857                         i = 0;
2858                         len = u->addr->len - sizeof(short);
2859                         if (!UNIX_ABSTRACT(s))
2860                                 len--;
2861                         else {
2862                                 seq_putc(seq, '@');
2863                                 i++;
2864                         }
2865                         for ( ; i < len; i++)
2866                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2867                                          '@');
2868                 }
2869                 unix_state_unlock(s);
2870                 seq_putc(seq, '\n');
2871         }
2872
2873         return 0;
2874 }
2875
2876 static const struct seq_operations unix_seq_ops = {
2877         .start  = unix_seq_start,
2878         .next   = unix_seq_next,
2879         .stop   = unix_seq_stop,
2880         .show   = unix_seq_show,
2881 };
2882
2883 static int unix_seq_open(struct inode *inode, struct file *file)
2884 {
2885         return seq_open_net(inode, file, &unix_seq_ops,
2886                             sizeof(struct seq_net_private));
2887 }
2888
2889 static const struct file_operations unix_seq_fops = {
2890         .owner          = THIS_MODULE,
2891         .open           = unix_seq_open,
2892         .read           = seq_read,
2893         .llseek         = seq_lseek,
2894         .release        = seq_release_net,
2895 };
2896
2897 #endif
2898
2899 static const struct net_proto_family unix_family_ops = {
2900         .family = PF_UNIX,
2901         .create = unix_create,
2902         .owner  = THIS_MODULE,
2903 };
2904
2905
2906 static int __net_init unix_net_init(struct net *net)
2907 {
2908         int error = -ENOMEM;
2909
2910         net->unx.sysctl_max_dgram_qlen = 10;
2911         if (unix_sysctl_register(net))
2912                 goto out;
2913
2914 #ifdef CONFIG_PROC_FS
2915         if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2916                 unix_sysctl_unregister(net);
2917                 goto out;
2918         }
2919 #endif
2920         error = 0;
2921 out:
2922         return error;
2923 }
2924
2925 static void __net_exit unix_net_exit(struct net *net)
2926 {
2927         unix_sysctl_unregister(net);
2928         remove_proc_entry("unix", net->proc_net);
2929 }
2930
2931 static struct pernet_operations unix_net_ops = {
2932         .init = unix_net_init,
2933         .exit = unix_net_exit,
2934 };
2935
2936 static int __init af_unix_init(void)
2937 {
2938         int rc = -1;
2939
2940         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2941
2942         rc = proto_register(&unix_proto, 1);
2943         if (rc != 0) {
2944                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2945                 goto out;
2946         }
2947
2948         sock_register(&unix_family_ops);
2949         register_pernet_subsys(&unix_net_ops);
2950 out:
2951         return rc;
2952 }
2953
2954 static void __exit af_unix_exit(void)
2955 {
2956         sock_unregister(PF_UNIX);
2957         proto_unregister(&unix_proto);
2958         unregister_pernet_subsys(&unix_net_ops);
2959 }
2960
2961 /* Earlier than device_initcall() so that other drivers invoking
2962    request_module() don't end up in a loop when modprobe tries
2963    to use a UNIX socket. But later than subsys_initcall() because
2964    we depend on stuff initialised there */
2965 fs_initcall(af_unix_init);
2966 module_exit(af_unix_exit);
2967
2968 MODULE_LICENSE("GPL");
2969 MODULE_ALIAS_NETPROTO(PF_UNIX);