]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/unix/af_unix.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/ide
[mirror_ubuntu-bionic-kernel.git] / net / unix / af_unix.c
1 /*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Fixes:
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
21 * Mike Shaver's work.
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
28 * reference counting
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
31 * Lots of bug fixes.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
43 * dgram receiver.
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
51 *
52 *
53 * Known differences from reference BSD that was tested:
54 *
55 * [TO FIX]
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
60 * [NOT TO FIX]
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
68 *
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
73 *
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
80 * with BSD names.
81 */
82
83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
112 #include <net/scm.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119 #include <linux/freezer.h>
120
121 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
122 EXPORT_SYMBOL_GPL(unix_socket_table);
123 DEFINE_SPINLOCK(unix_table_lock);
124 EXPORT_SYMBOL_GPL(unix_table_lock);
125 static atomic_long_t unix_nr_socks;
126
127
128 static struct hlist_head *unix_sockets_unbound(void *addr)
129 {
130 unsigned long hash = (unsigned long)addr;
131
132 hash ^= hash >> 16;
133 hash ^= hash >> 8;
134 hash %= UNIX_HASH_SIZE;
135 return &unix_socket_table[UNIX_HASH_SIZE + hash];
136 }
137
138 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
139
140 #ifdef CONFIG_SECURITY_NETWORK
141 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
142 {
143 UNIXCB(skb).secid = scm->secid;
144 }
145
146 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
147 {
148 scm->secid = UNIXCB(skb).secid;
149 }
150
151 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
152 {
153 return (scm->secid == UNIXCB(skb).secid);
154 }
155 #else
156 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
157 { }
158
159 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
160 { }
161
162 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
163 {
164 return true;
165 }
166 #endif /* CONFIG_SECURITY_NETWORK */
167
168 /*
169 * SMP locking strategy:
170 * hash table is protected with spinlock unix_table_lock
171 * each socket state is protected by separate spin lock.
172 */
173
174 static inline unsigned int unix_hash_fold(__wsum n)
175 {
176 unsigned int hash = (__force unsigned int)csum_fold(n);
177
178 hash ^= hash>>8;
179 return hash&(UNIX_HASH_SIZE-1);
180 }
181
182 #define unix_peer(sk) (unix_sk(sk)->peer)
183
184 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
185 {
186 return unix_peer(osk) == sk;
187 }
188
189 static inline int unix_may_send(struct sock *sk, struct sock *osk)
190 {
191 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
192 }
193
194 static inline int unix_recvq_full(struct sock const *sk)
195 {
196 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
197 }
198
199 struct sock *unix_peer_get(struct sock *s)
200 {
201 struct sock *peer;
202
203 unix_state_lock(s);
204 peer = unix_peer(s);
205 if (peer)
206 sock_hold(peer);
207 unix_state_unlock(s);
208 return peer;
209 }
210 EXPORT_SYMBOL_GPL(unix_peer_get);
211
212 static inline void unix_release_addr(struct unix_address *addr)
213 {
214 if (atomic_dec_and_test(&addr->refcnt))
215 kfree(addr);
216 }
217
218 /*
219 * Check unix socket name:
220 * - should be not zero length.
221 * - if started by not zero, should be NULL terminated (FS object)
222 * - if started by zero, it is abstract name.
223 */
224
225 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
226 {
227 if (len <= sizeof(short) || len > sizeof(*sunaddr))
228 return -EINVAL;
229 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
230 return -EINVAL;
231 if (sunaddr->sun_path[0]) {
232 /*
233 * This may look like an off by one error but it is a bit more
234 * subtle. 108 is the longest valid AF_UNIX path for a binding.
235 * sun_path[108] doesn't as such exist. However in kernel space
236 * we are guaranteed that it is a valid memory location in our
237 * kernel address buffer.
238 */
239 ((char *)sunaddr)[len] = 0;
240 len = strlen(sunaddr->sun_path)+1+sizeof(short);
241 return len;
242 }
243
244 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
245 return len;
246 }
247
248 static void __unix_remove_socket(struct sock *sk)
249 {
250 sk_del_node_init(sk);
251 }
252
253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
254 {
255 WARN_ON(!sk_unhashed(sk));
256 sk_add_node(sk, list);
257 }
258
259 static inline void unix_remove_socket(struct sock *sk)
260 {
261 spin_lock(&unix_table_lock);
262 __unix_remove_socket(sk);
263 spin_unlock(&unix_table_lock);
264 }
265
266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
267 {
268 spin_lock(&unix_table_lock);
269 __unix_insert_socket(list, sk);
270 spin_unlock(&unix_table_lock);
271 }
272
273 static struct sock *__unix_find_socket_byname(struct net *net,
274 struct sockaddr_un *sunname,
275 int len, int type, unsigned int hash)
276 {
277 struct sock *s;
278
279 sk_for_each(s, &unix_socket_table[hash ^ type]) {
280 struct unix_sock *u = unix_sk(s);
281
282 if (!net_eq(sock_net(s), net))
283 continue;
284
285 if (u->addr->len == len &&
286 !memcmp(u->addr->name, sunname, len))
287 goto found;
288 }
289 s = NULL;
290 found:
291 return s;
292 }
293
294 static inline struct sock *unix_find_socket_byname(struct net *net,
295 struct sockaddr_un *sunname,
296 int len, int type,
297 unsigned int hash)
298 {
299 struct sock *s;
300
301 spin_lock(&unix_table_lock);
302 s = __unix_find_socket_byname(net, sunname, len, type, hash);
303 if (s)
304 sock_hold(s);
305 spin_unlock(&unix_table_lock);
306 return s;
307 }
308
309 static struct sock *unix_find_socket_byinode(struct inode *i)
310 {
311 struct sock *s;
312
313 spin_lock(&unix_table_lock);
314 sk_for_each(s,
315 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
316 struct dentry *dentry = unix_sk(s)->path.dentry;
317
318 if (dentry && d_backing_inode(dentry) == i) {
319 sock_hold(s);
320 goto found;
321 }
322 }
323 s = NULL;
324 found:
325 spin_unlock(&unix_table_lock);
326 return s;
327 }
328
329 static int unix_writable(const struct sock *sk)
330 {
331 return sk->sk_state != TCP_LISTEN &&
332 (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
333 }
334
335 static void unix_write_space(struct sock *sk)
336 {
337 struct socket_wq *wq;
338
339 rcu_read_lock();
340 if (unix_writable(sk)) {
341 wq = rcu_dereference(sk->sk_wq);
342 if (wq_has_sleeper(wq))
343 wake_up_interruptible_sync_poll(&wq->wait,
344 POLLOUT | POLLWRNORM | POLLWRBAND);
345 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
346 }
347 rcu_read_unlock();
348 }
349
350 /* When dgram socket disconnects (or changes its peer), we clear its receive
351 * queue of packets arrived from previous peer. First, it allows to do
352 * flow control based only on wmem_alloc; second, sk connected to peer
353 * may receive messages only from that peer. */
354 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
355 {
356 if (!skb_queue_empty(&sk->sk_receive_queue)) {
357 skb_queue_purge(&sk->sk_receive_queue);
358 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
359
360 /* If one link of bidirectional dgram pipe is disconnected,
361 * we signal error. Messages are lost. Do not make this,
362 * when peer was not connected to us.
363 */
364 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
365 other->sk_err = ECONNRESET;
366 other->sk_error_report(other);
367 }
368 }
369 }
370
371 static void unix_sock_destructor(struct sock *sk)
372 {
373 struct unix_sock *u = unix_sk(sk);
374
375 skb_queue_purge(&sk->sk_receive_queue);
376
377 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
378 WARN_ON(!sk_unhashed(sk));
379 WARN_ON(sk->sk_socket);
380 if (!sock_flag(sk, SOCK_DEAD)) {
381 pr_info("Attempt to release alive unix socket: %p\n", sk);
382 return;
383 }
384
385 if (u->addr)
386 unix_release_addr(u->addr);
387
388 atomic_long_dec(&unix_nr_socks);
389 local_bh_disable();
390 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
391 local_bh_enable();
392 #ifdef UNIX_REFCNT_DEBUG
393 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
394 atomic_long_read(&unix_nr_socks));
395 #endif
396 }
397
398 static void unix_release_sock(struct sock *sk, int embrion)
399 {
400 struct unix_sock *u = unix_sk(sk);
401 struct path path;
402 struct sock *skpair;
403 struct sk_buff *skb;
404 int state;
405
406 unix_remove_socket(sk);
407
408 /* Clear state */
409 unix_state_lock(sk);
410 sock_orphan(sk);
411 sk->sk_shutdown = SHUTDOWN_MASK;
412 path = u->path;
413 u->path.dentry = NULL;
414 u->path.mnt = NULL;
415 state = sk->sk_state;
416 sk->sk_state = TCP_CLOSE;
417 unix_state_unlock(sk);
418
419 wake_up_interruptible_all(&u->peer_wait);
420
421 skpair = unix_peer(sk);
422
423 if (skpair != NULL) {
424 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
425 unix_state_lock(skpair);
426 /* No more writes */
427 skpair->sk_shutdown = SHUTDOWN_MASK;
428 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
429 skpair->sk_err = ECONNRESET;
430 unix_state_unlock(skpair);
431 skpair->sk_state_change(skpair);
432 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
433 }
434 sock_put(skpair); /* It may now die */
435 unix_peer(sk) = NULL;
436 }
437
438 /* Try to flush out this socket. Throw out buffers at least */
439
440 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
441 if (state == TCP_LISTEN)
442 unix_release_sock(skb->sk, 1);
443 /* passed fds are erased in the kfree_skb hook */
444 kfree_skb(skb);
445 }
446
447 if (path.dentry)
448 path_put(&path);
449
450 sock_put(sk);
451
452 /* ---- Socket is dead now and most probably destroyed ---- */
453
454 /*
455 * Fixme: BSD difference: In BSD all sockets connected to us get
456 * ECONNRESET and we die on the spot. In Linux we behave
457 * like files and pipes do and wait for the last
458 * dereference.
459 *
460 * Can't we simply set sock->err?
461 *
462 * What the above comment does talk about? --ANK(980817)
463 */
464
465 if (unix_tot_inflight)
466 unix_gc(); /* Garbage collect fds */
467 }
468
469 static void init_peercred(struct sock *sk)
470 {
471 put_pid(sk->sk_peer_pid);
472 if (sk->sk_peer_cred)
473 put_cred(sk->sk_peer_cred);
474 sk->sk_peer_pid = get_pid(task_tgid(current));
475 sk->sk_peer_cred = get_current_cred();
476 }
477
478 static void copy_peercred(struct sock *sk, struct sock *peersk)
479 {
480 put_pid(sk->sk_peer_pid);
481 if (sk->sk_peer_cred)
482 put_cred(sk->sk_peer_cred);
483 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
484 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
485 }
486
487 static int unix_listen(struct socket *sock, int backlog)
488 {
489 int err;
490 struct sock *sk = sock->sk;
491 struct unix_sock *u = unix_sk(sk);
492 struct pid *old_pid = NULL;
493
494 err = -EOPNOTSUPP;
495 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
496 goto out; /* Only stream/seqpacket sockets accept */
497 err = -EINVAL;
498 if (!u->addr)
499 goto out; /* No listens on an unbound socket */
500 unix_state_lock(sk);
501 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
502 goto out_unlock;
503 if (backlog > sk->sk_max_ack_backlog)
504 wake_up_interruptible_all(&u->peer_wait);
505 sk->sk_max_ack_backlog = backlog;
506 sk->sk_state = TCP_LISTEN;
507 /* set credentials so connect can copy them */
508 init_peercred(sk);
509 err = 0;
510
511 out_unlock:
512 unix_state_unlock(sk);
513 put_pid(old_pid);
514 out:
515 return err;
516 }
517
518 static int unix_release(struct socket *);
519 static int unix_bind(struct socket *, struct sockaddr *, int);
520 static int unix_stream_connect(struct socket *, struct sockaddr *,
521 int addr_len, int flags);
522 static int unix_socketpair(struct socket *, struct socket *);
523 static int unix_accept(struct socket *, struct socket *, int);
524 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
525 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
526 static unsigned int unix_dgram_poll(struct file *, struct socket *,
527 poll_table *);
528 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
529 static int unix_shutdown(struct socket *, int);
530 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
531 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
532 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
533 size_t size, int flags);
534 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
535 struct pipe_inode_info *, size_t size,
536 unsigned int flags);
537 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
538 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
539 static int unix_dgram_connect(struct socket *, struct sockaddr *,
540 int, int);
541 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
542 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
543 int);
544
545 static int unix_set_peek_off(struct sock *sk, int val)
546 {
547 struct unix_sock *u = unix_sk(sk);
548
549 if (mutex_lock_interruptible(&u->readlock))
550 return -EINTR;
551
552 sk->sk_peek_off = val;
553 mutex_unlock(&u->readlock);
554
555 return 0;
556 }
557
558
559 static const struct proto_ops unix_stream_ops = {
560 .family = PF_UNIX,
561 .owner = THIS_MODULE,
562 .release = unix_release,
563 .bind = unix_bind,
564 .connect = unix_stream_connect,
565 .socketpair = unix_socketpair,
566 .accept = unix_accept,
567 .getname = unix_getname,
568 .poll = unix_poll,
569 .ioctl = unix_ioctl,
570 .listen = unix_listen,
571 .shutdown = unix_shutdown,
572 .setsockopt = sock_no_setsockopt,
573 .getsockopt = sock_no_getsockopt,
574 .sendmsg = unix_stream_sendmsg,
575 .recvmsg = unix_stream_recvmsg,
576 .mmap = sock_no_mmap,
577 .sendpage = unix_stream_sendpage,
578 .splice_read = unix_stream_splice_read,
579 .set_peek_off = unix_set_peek_off,
580 };
581
582 static const struct proto_ops unix_dgram_ops = {
583 .family = PF_UNIX,
584 .owner = THIS_MODULE,
585 .release = unix_release,
586 .bind = unix_bind,
587 .connect = unix_dgram_connect,
588 .socketpair = unix_socketpair,
589 .accept = sock_no_accept,
590 .getname = unix_getname,
591 .poll = unix_dgram_poll,
592 .ioctl = unix_ioctl,
593 .listen = sock_no_listen,
594 .shutdown = unix_shutdown,
595 .setsockopt = sock_no_setsockopt,
596 .getsockopt = sock_no_getsockopt,
597 .sendmsg = unix_dgram_sendmsg,
598 .recvmsg = unix_dgram_recvmsg,
599 .mmap = sock_no_mmap,
600 .sendpage = sock_no_sendpage,
601 .set_peek_off = unix_set_peek_off,
602 };
603
604 static const struct proto_ops unix_seqpacket_ops = {
605 .family = PF_UNIX,
606 .owner = THIS_MODULE,
607 .release = unix_release,
608 .bind = unix_bind,
609 .connect = unix_stream_connect,
610 .socketpair = unix_socketpair,
611 .accept = unix_accept,
612 .getname = unix_getname,
613 .poll = unix_dgram_poll,
614 .ioctl = unix_ioctl,
615 .listen = unix_listen,
616 .shutdown = unix_shutdown,
617 .setsockopt = sock_no_setsockopt,
618 .getsockopt = sock_no_getsockopt,
619 .sendmsg = unix_seqpacket_sendmsg,
620 .recvmsg = unix_seqpacket_recvmsg,
621 .mmap = sock_no_mmap,
622 .sendpage = sock_no_sendpage,
623 .set_peek_off = unix_set_peek_off,
624 };
625
626 static struct proto unix_proto = {
627 .name = "UNIX",
628 .owner = THIS_MODULE,
629 .obj_size = sizeof(struct unix_sock),
630 };
631
632 /*
633 * AF_UNIX sockets do not interact with hardware, hence they
634 * dont trigger interrupts - so it's safe for them to have
635 * bh-unsafe locking for their sk_receive_queue.lock. Split off
636 * this special lock-class by reinitializing the spinlock key:
637 */
638 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
639
640 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
641 {
642 struct sock *sk = NULL;
643 struct unix_sock *u;
644
645 atomic_long_inc(&unix_nr_socks);
646 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
647 goto out;
648
649 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
650 if (!sk)
651 goto out;
652
653 sock_init_data(sock, sk);
654 lockdep_set_class(&sk->sk_receive_queue.lock,
655 &af_unix_sk_receive_queue_lock_key);
656
657 sk->sk_write_space = unix_write_space;
658 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
659 sk->sk_destruct = unix_sock_destructor;
660 u = unix_sk(sk);
661 u->path.dentry = NULL;
662 u->path.mnt = NULL;
663 spin_lock_init(&u->lock);
664 atomic_long_set(&u->inflight, 0);
665 INIT_LIST_HEAD(&u->link);
666 mutex_init(&u->readlock); /* single task reading lock */
667 init_waitqueue_head(&u->peer_wait);
668 unix_insert_socket(unix_sockets_unbound(sk), sk);
669 out:
670 if (sk == NULL)
671 atomic_long_dec(&unix_nr_socks);
672 else {
673 local_bh_disable();
674 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
675 local_bh_enable();
676 }
677 return sk;
678 }
679
680 static int unix_create(struct net *net, struct socket *sock, int protocol,
681 int kern)
682 {
683 if (protocol && protocol != PF_UNIX)
684 return -EPROTONOSUPPORT;
685
686 sock->state = SS_UNCONNECTED;
687
688 switch (sock->type) {
689 case SOCK_STREAM:
690 sock->ops = &unix_stream_ops;
691 break;
692 /*
693 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
694 * nothing uses it.
695 */
696 case SOCK_RAW:
697 sock->type = SOCK_DGRAM;
698 case SOCK_DGRAM:
699 sock->ops = &unix_dgram_ops;
700 break;
701 case SOCK_SEQPACKET:
702 sock->ops = &unix_seqpacket_ops;
703 break;
704 default:
705 return -ESOCKTNOSUPPORT;
706 }
707
708 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
709 }
710
711 static int unix_release(struct socket *sock)
712 {
713 struct sock *sk = sock->sk;
714
715 if (!sk)
716 return 0;
717
718 unix_release_sock(sk, 0);
719 sock->sk = NULL;
720
721 return 0;
722 }
723
724 static int unix_autobind(struct socket *sock)
725 {
726 struct sock *sk = sock->sk;
727 struct net *net = sock_net(sk);
728 struct unix_sock *u = unix_sk(sk);
729 static u32 ordernum = 1;
730 struct unix_address *addr;
731 int err;
732 unsigned int retries = 0;
733
734 err = mutex_lock_interruptible(&u->readlock);
735 if (err)
736 return err;
737
738 err = 0;
739 if (u->addr)
740 goto out;
741
742 err = -ENOMEM;
743 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
744 if (!addr)
745 goto out;
746
747 addr->name->sun_family = AF_UNIX;
748 atomic_set(&addr->refcnt, 1);
749
750 retry:
751 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
752 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
753
754 spin_lock(&unix_table_lock);
755 ordernum = (ordernum+1)&0xFFFFF;
756
757 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
758 addr->hash)) {
759 spin_unlock(&unix_table_lock);
760 /*
761 * __unix_find_socket_byname() may take long time if many names
762 * are already in use.
763 */
764 cond_resched();
765 /* Give up if all names seems to be in use. */
766 if (retries++ == 0xFFFFF) {
767 err = -ENOSPC;
768 kfree(addr);
769 goto out;
770 }
771 goto retry;
772 }
773 addr->hash ^= sk->sk_type;
774
775 __unix_remove_socket(sk);
776 u->addr = addr;
777 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
778 spin_unlock(&unix_table_lock);
779 err = 0;
780
781 out: mutex_unlock(&u->readlock);
782 return err;
783 }
784
785 static struct sock *unix_find_other(struct net *net,
786 struct sockaddr_un *sunname, int len,
787 int type, unsigned int hash, int *error)
788 {
789 struct sock *u;
790 struct path path;
791 int err = 0;
792
793 if (sunname->sun_path[0]) {
794 struct inode *inode;
795 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
796 if (err)
797 goto fail;
798 inode = d_backing_inode(path.dentry);
799 err = inode_permission(inode, MAY_WRITE);
800 if (err)
801 goto put_fail;
802
803 err = -ECONNREFUSED;
804 if (!S_ISSOCK(inode->i_mode))
805 goto put_fail;
806 u = unix_find_socket_byinode(inode);
807 if (!u)
808 goto put_fail;
809
810 if (u->sk_type == type)
811 touch_atime(&path);
812
813 path_put(&path);
814
815 err = -EPROTOTYPE;
816 if (u->sk_type != type) {
817 sock_put(u);
818 goto fail;
819 }
820 } else {
821 err = -ECONNREFUSED;
822 u = unix_find_socket_byname(net, sunname, len, type, hash);
823 if (u) {
824 struct dentry *dentry;
825 dentry = unix_sk(u)->path.dentry;
826 if (dentry)
827 touch_atime(&unix_sk(u)->path);
828 } else
829 goto fail;
830 }
831 return u;
832
833 put_fail:
834 path_put(&path);
835 fail:
836 *error = err;
837 return NULL;
838 }
839
840 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
841 {
842 struct dentry *dentry;
843 struct path path;
844 int err = 0;
845 /*
846 * Get the parent directory, calculate the hash for last
847 * component.
848 */
849 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
850 err = PTR_ERR(dentry);
851 if (IS_ERR(dentry))
852 return err;
853
854 /*
855 * All right, let's create it.
856 */
857 err = security_path_mknod(&path, dentry, mode, 0);
858 if (!err) {
859 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
860 if (!err) {
861 res->mnt = mntget(path.mnt);
862 res->dentry = dget(dentry);
863 }
864 }
865 done_path_create(&path, dentry);
866 return err;
867 }
868
869 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
870 {
871 struct sock *sk = sock->sk;
872 struct net *net = sock_net(sk);
873 struct unix_sock *u = unix_sk(sk);
874 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
875 char *sun_path = sunaddr->sun_path;
876 int err;
877 unsigned int hash;
878 struct unix_address *addr;
879 struct hlist_head *list;
880
881 err = -EINVAL;
882 if (sunaddr->sun_family != AF_UNIX)
883 goto out;
884
885 if (addr_len == sizeof(short)) {
886 err = unix_autobind(sock);
887 goto out;
888 }
889
890 err = unix_mkname(sunaddr, addr_len, &hash);
891 if (err < 0)
892 goto out;
893 addr_len = err;
894
895 err = mutex_lock_interruptible(&u->readlock);
896 if (err)
897 goto out;
898
899 err = -EINVAL;
900 if (u->addr)
901 goto out_up;
902
903 err = -ENOMEM;
904 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
905 if (!addr)
906 goto out_up;
907
908 memcpy(addr->name, sunaddr, addr_len);
909 addr->len = addr_len;
910 addr->hash = hash ^ sk->sk_type;
911 atomic_set(&addr->refcnt, 1);
912
913 if (sun_path[0]) {
914 struct path path;
915 umode_t mode = S_IFSOCK |
916 (SOCK_INODE(sock)->i_mode & ~current_umask());
917 err = unix_mknod(sun_path, mode, &path);
918 if (err) {
919 if (err == -EEXIST)
920 err = -EADDRINUSE;
921 unix_release_addr(addr);
922 goto out_up;
923 }
924 addr->hash = UNIX_HASH_SIZE;
925 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
926 spin_lock(&unix_table_lock);
927 u->path = path;
928 list = &unix_socket_table[hash];
929 } else {
930 spin_lock(&unix_table_lock);
931 err = -EADDRINUSE;
932 if (__unix_find_socket_byname(net, sunaddr, addr_len,
933 sk->sk_type, hash)) {
934 unix_release_addr(addr);
935 goto out_unlock;
936 }
937
938 list = &unix_socket_table[addr->hash];
939 }
940
941 err = 0;
942 __unix_remove_socket(sk);
943 u->addr = addr;
944 __unix_insert_socket(list, sk);
945
946 out_unlock:
947 spin_unlock(&unix_table_lock);
948 out_up:
949 mutex_unlock(&u->readlock);
950 out:
951 return err;
952 }
953
954 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
955 {
956 if (unlikely(sk1 == sk2) || !sk2) {
957 unix_state_lock(sk1);
958 return;
959 }
960 if (sk1 < sk2) {
961 unix_state_lock(sk1);
962 unix_state_lock_nested(sk2);
963 } else {
964 unix_state_lock(sk2);
965 unix_state_lock_nested(sk1);
966 }
967 }
968
969 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
970 {
971 if (unlikely(sk1 == sk2) || !sk2) {
972 unix_state_unlock(sk1);
973 return;
974 }
975 unix_state_unlock(sk1);
976 unix_state_unlock(sk2);
977 }
978
979 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
980 int alen, int flags)
981 {
982 struct sock *sk = sock->sk;
983 struct net *net = sock_net(sk);
984 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
985 struct sock *other;
986 unsigned int hash;
987 int err;
988
989 if (addr->sa_family != AF_UNSPEC) {
990 err = unix_mkname(sunaddr, alen, &hash);
991 if (err < 0)
992 goto out;
993 alen = err;
994
995 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
996 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
997 goto out;
998
999 restart:
1000 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1001 if (!other)
1002 goto out;
1003
1004 unix_state_double_lock(sk, other);
1005
1006 /* Apparently VFS overslept socket death. Retry. */
1007 if (sock_flag(other, SOCK_DEAD)) {
1008 unix_state_double_unlock(sk, other);
1009 sock_put(other);
1010 goto restart;
1011 }
1012
1013 err = -EPERM;
1014 if (!unix_may_send(sk, other))
1015 goto out_unlock;
1016
1017 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1018 if (err)
1019 goto out_unlock;
1020
1021 } else {
1022 /*
1023 * 1003.1g breaking connected state with AF_UNSPEC
1024 */
1025 other = NULL;
1026 unix_state_double_lock(sk, other);
1027 }
1028
1029 /*
1030 * If it was connected, reconnect.
1031 */
1032 if (unix_peer(sk)) {
1033 struct sock *old_peer = unix_peer(sk);
1034 unix_peer(sk) = other;
1035 unix_state_double_unlock(sk, other);
1036
1037 if (other != old_peer)
1038 unix_dgram_disconnected(sk, old_peer);
1039 sock_put(old_peer);
1040 } else {
1041 unix_peer(sk) = other;
1042 unix_state_double_unlock(sk, other);
1043 }
1044 return 0;
1045
1046 out_unlock:
1047 unix_state_double_unlock(sk, other);
1048 sock_put(other);
1049 out:
1050 return err;
1051 }
1052
1053 static long unix_wait_for_peer(struct sock *other, long timeo)
1054 {
1055 struct unix_sock *u = unix_sk(other);
1056 int sched;
1057 DEFINE_WAIT(wait);
1058
1059 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1060
1061 sched = !sock_flag(other, SOCK_DEAD) &&
1062 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1063 unix_recvq_full(other);
1064
1065 unix_state_unlock(other);
1066
1067 if (sched)
1068 timeo = schedule_timeout(timeo);
1069
1070 finish_wait(&u->peer_wait, &wait);
1071 return timeo;
1072 }
1073
1074 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1075 int addr_len, int flags)
1076 {
1077 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1078 struct sock *sk = sock->sk;
1079 struct net *net = sock_net(sk);
1080 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1081 struct sock *newsk = NULL;
1082 struct sock *other = NULL;
1083 struct sk_buff *skb = NULL;
1084 unsigned int hash;
1085 int st;
1086 int err;
1087 long timeo;
1088
1089 err = unix_mkname(sunaddr, addr_len, &hash);
1090 if (err < 0)
1091 goto out;
1092 addr_len = err;
1093
1094 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1095 (err = unix_autobind(sock)) != 0)
1096 goto out;
1097
1098 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1099
1100 /* First of all allocate resources.
1101 If we will make it after state is locked,
1102 we will have to recheck all again in any case.
1103 */
1104
1105 err = -ENOMEM;
1106
1107 /* create new sock for complete connection */
1108 newsk = unix_create1(sock_net(sk), NULL, 0);
1109 if (newsk == NULL)
1110 goto out;
1111
1112 /* Allocate skb for sending to listening sock */
1113 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1114 if (skb == NULL)
1115 goto out;
1116
1117 restart:
1118 /* Find listening sock. */
1119 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1120 if (!other)
1121 goto out;
1122
1123 /* Latch state of peer */
1124 unix_state_lock(other);
1125
1126 /* Apparently VFS overslept socket death. Retry. */
1127 if (sock_flag(other, SOCK_DEAD)) {
1128 unix_state_unlock(other);
1129 sock_put(other);
1130 goto restart;
1131 }
1132
1133 err = -ECONNREFUSED;
1134 if (other->sk_state != TCP_LISTEN)
1135 goto out_unlock;
1136 if (other->sk_shutdown & RCV_SHUTDOWN)
1137 goto out_unlock;
1138
1139 if (unix_recvq_full(other)) {
1140 err = -EAGAIN;
1141 if (!timeo)
1142 goto out_unlock;
1143
1144 timeo = unix_wait_for_peer(other, timeo);
1145
1146 err = sock_intr_errno(timeo);
1147 if (signal_pending(current))
1148 goto out;
1149 sock_put(other);
1150 goto restart;
1151 }
1152
1153 /* Latch our state.
1154
1155 It is tricky place. We need to grab our state lock and cannot
1156 drop lock on peer. It is dangerous because deadlock is
1157 possible. Connect to self case and simultaneous
1158 attempt to connect are eliminated by checking socket
1159 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1160 check this before attempt to grab lock.
1161
1162 Well, and we have to recheck the state after socket locked.
1163 */
1164 st = sk->sk_state;
1165
1166 switch (st) {
1167 case TCP_CLOSE:
1168 /* This is ok... continue with connect */
1169 break;
1170 case TCP_ESTABLISHED:
1171 /* Socket is already connected */
1172 err = -EISCONN;
1173 goto out_unlock;
1174 default:
1175 err = -EINVAL;
1176 goto out_unlock;
1177 }
1178
1179 unix_state_lock_nested(sk);
1180
1181 if (sk->sk_state != st) {
1182 unix_state_unlock(sk);
1183 unix_state_unlock(other);
1184 sock_put(other);
1185 goto restart;
1186 }
1187
1188 err = security_unix_stream_connect(sk, other, newsk);
1189 if (err) {
1190 unix_state_unlock(sk);
1191 goto out_unlock;
1192 }
1193
1194 /* The way is open! Fastly set all the necessary fields... */
1195
1196 sock_hold(sk);
1197 unix_peer(newsk) = sk;
1198 newsk->sk_state = TCP_ESTABLISHED;
1199 newsk->sk_type = sk->sk_type;
1200 init_peercred(newsk);
1201 newu = unix_sk(newsk);
1202 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1203 otheru = unix_sk(other);
1204
1205 /* copy address information from listening to new sock*/
1206 if (otheru->addr) {
1207 atomic_inc(&otheru->addr->refcnt);
1208 newu->addr = otheru->addr;
1209 }
1210 if (otheru->path.dentry) {
1211 path_get(&otheru->path);
1212 newu->path = otheru->path;
1213 }
1214
1215 /* Set credentials */
1216 copy_peercred(sk, other);
1217
1218 sock->state = SS_CONNECTED;
1219 sk->sk_state = TCP_ESTABLISHED;
1220 sock_hold(newsk);
1221
1222 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1223 unix_peer(sk) = newsk;
1224
1225 unix_state_unlock(sk);
1226
1227 /* take ten and and send info to listening sock */
1228 spin_lock(&other->sk_receive_queue.lock);
1229 __skb_queue_tail(&other->sk_receive_queue, skb);
1230 spin_unlock(&other->sk_receive_queue.lock);
1231 unix_state_unlock(other);
1232 other->sk_data_ready(other);
1233 sock_put(other);
1234 return 0;
1235
1236 out_unlock:
1237 if (other)
1238 unix_state_unlock(other);
1239
1240 out:
1241 kfree_skb(skb);
1242 if (newsk)
1243 unix_release_sock(newsk, 0);
1244 if (other)
1245 sock_put(other);
1246 return err;
1247 }
1248
1249 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1250 {
1251 struct sock *ska = socka->sk, *skb = sockb->sk;
1252
1253 /* Join our sockets back to back */
1254 sock_hold(ska);
1255 sock_hold(skb);
1256 unix_peer(ska) = skb;
1257 unix_peer(skb) = ska;
1258 init_peercred(ska);
1259 init_peercred(skb);
1260
1261 if (ska->sk_type != SOCK_DGRAM) {
1262 ska->sk_state = TCP_ESTABLISHED;
1263 skb->sk_state = TCP_ESTABLISHED;
1264 socka->state = SS_CONNECTED;
1265 sockb->state = SS_CONNECTED;
1266 }
1267 return 0;
1268 }
1269
1270 static void unix_sock_inherit_flags(const struct socket *old,
1271 struct socket *new)
1272 {
1273 if (test_bit(SOCK_PASSCRED, &old->flags))
1274 set_bit(SOCK_PASSCRED, &new->flags);
1275 if (test_bit(SOCK_PASSSEC, &old->flags))
1276 set_bit(SOCK_PASSSEC, &new->flags);
1277 }
1278
1279 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1280 {
1281 struct sock *sk = sock->sk;
1282 struct sock *tsk;
1283 struct sk_buff *skb;
1284 int err;
1285
1286 err = -EOPNOTSUPP;
1287 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1288 goto out;
1289
1290 err = -EINVAL;
1291 if (sk->sk_state != TCP_LISTEN)
1292 goto out;
1293
1294 /* If socket state is TCP_LISTEN it cannot change (for now...),
1295 * so that no locks are necessary.
1296 */
1297
1298 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1299 if (!skb) {
1300 /* This means receive shutdown. */
1301 if (err == 0)
1302 err = -EINVAL;
1303 goto out;
1304 }
1305
1306 tsk = skb->sk;
1307 skb_free_datagram(sk, skb);
1308 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1309
1310 /* attach accepted sock to socket */
1311 unix_state_lock(tsk);
1312 newsock->state = SS_CONNECTED;
1313 unix_sock_inherit_flags(sock, newsock);
1314 sock_graft(tsk, newsock);
1315 unix_state_unlock(tsk);
1316 return 0;
1317
1318 out:
1319 return err;
1320 }
1321
1322
1323 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1324 {
1325 struct sock *sk = sock->sk;
1326 struct unix_sock *u;
1327 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1328 int err = 0;
1329
1330 if (peer) {
1331 sk = unix_peer_get(sk);
1332
1333 err = -ENOTCONN;
1334 if (!sk)
1335 goto out;
1336 err = 0;
1337 } else {
1338 sock_hold(sk);
1339 }
1340
1341 u = unix_sk(sk);
1342 unix_state_lock(sk);
1343 if (!u->addr) {
1344 sunaddr->sun_family = AF_UNIX;
1345 sunaddr->sun_path[0] = 0;
1346 *uaddr_len = sizeof(short);
1347 } else {
1348 struct unix_address *addr = u->addr;
1349
1350 *uaddr_len = addr->len;
1351 memcpy(sunaddr, addr->name, *uaddr_len);
1352 }
1353 unix_state_unlock(sk);
1354 sock_put(sk);
1355 out:
1356 return err;
1357 }
1358
1359 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1360 {
1361 int i;
1362
1363 scm->fp = UNIXCB(skb).fp;
1364 UNIXCB(skb).fp = NULL;
1365
1366 for (i = scm->fp->count-1; i >= 0; i--)
1367 unix_notinflight(scm->fp->fp[i]);
1368 }
1369
1370 static void unix_destruct_scm(struct sk_buff *skb)
1371 {
1372 struct scm_cookie scm;
1373 memset(&scm, 0, sizeof(scm));
1374 scm.pid = UNIXCB(skb).pid;
1375 if (UNIXCB(skb).fp)
1376 unix_detach_fds(&scm, skb);
1377
1378 /* Alas, it calls VFS */
1379 /* So fscking what? fput() had been SMP-safe since the last Summer */
1380 scm_destroy(&scm);
1381 sock_wfree(skb);
1382 }
1383
1384 #define MAX_RECURSION_LEVEL 4
1385
1386 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1387 {
1388 int i;
1389 unsigned char max_level = 0;
1390 int unix_sock_count = 0;
1391
1392 for (i = scm->fp->count - 1; i >= 0; i--) {
1393 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1394
1395 if (sk) {
1396 unix_sock_count++;
1397 max_level = max(max_level,
1398 unix_sk(sk)->recursion_level);
1399 }
1400 }
1401 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1402 return -ETOOMANYREFS;
1403
1404 /*
1405 * Need to duplicate file references for the sake of garbage
1406 * collection. Otherwise a socket in the fps might become a
1407 * candidate for GC while the skb is not yet queued.
1408 */
1409 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1410 if (!UNIXCB(skb).fp)
1411 return -ENOMEM;
1412
1413 if (unix_sock_count) {
1414 for (i = scm->fp->count - 1; i >= 0; i--)
1415 unix_inflight(scm->fp->fp[i]);
1416 }
1417 return max_level;
1418 }
1419
1420 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1421 {
1422 int err = 0;
1423
1424 UNIXCB(skb).pid = get_pid(scm->pid);
1425 UNIXCB(skb).uid = scm->creds.uid;
1426 UNIXCB(skb).gid = scm->creds.gid;
1427 UNIXCB(skb).fp = NULL;
1428 unix_get_secdata(scm, skb);
1429 if (scm->fp && send_fds)
1430 err = unix_attach_fds(scm, skb);
1431
1432 skb->destructor = unix_destruct_scm;
1433 return err;
1434 }
1435
1436 /*
1437 * Some apps rely on write() giving SCM_CREDENTIALS
1438 * We include credentials if source or destination socket
1439 * asserted SOCK_PASSCRED.
1440 */
1441 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1442 const struct sock *other)
1443 {
1444 if (UNIXCB(skb).pid)
1445 return;
1446 if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1447 !other->sk_socket ||
1448 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1449 UNIXCB(skb).pid = get_pid(task_tgid(current));
1450 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1451 }
1452 }
1453
1454 /*
1455 * Send AF_UNIX data.
1456 */
1457
1458 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1459 size_t len)
1460 {
1461 struct sock *sk = sock->sk;
1462 struct net *net = sock_net(sk);
1463 struct unix_sock *u = unix_sk(sk);
1464 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1465 struct sock *other = NULL;
1466 int namelen = 0; /* fake GCC */
1467 int err;
1468 unsigned int hash;
1469 struct sk_buff *skb;
1470 long timeo;
1471 struct scm_cookie scm;
1472 int max_level;
1473 int data_len = 0;
1474
1475 wait_for_unix_gc();
1476 err = scm_send(sock, msg, &scm, false);
1477 if (err < 0)
1478 return err;
1479
1480 err = -EOPNOTSUPP;
1481 if (msg->msg_flags&MSG_OOB)
1482 goto out;
1483
1484 if (msg->msg_namelen) {
1485 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1486 if (err < 0)
1487 goto out;
1488 namelen = err;
1489 } else {
1490 sunaddr = NULL;
1491 err = -ENOTCONN;
1492 other = unix_peer_get(sk);
1493 if (!other)
1494 goto out;
1495 }
1496
1497 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1498 && (err = unix_autobind(sock)) != 0)
1499 goto out;
1500
1501 err = -EMSGSIZE;
1502 if (len > sk->sk_sndbuf - 32)
1503 goto out;
1504
1505 if (len > SKB_MAX_ALLOC) {
1506 data_len = min_t(size_t,
1507 len - SKB_MAX_ALLOC,
1508 MAX_SKB_FRAGS * PAGE_SIZE);
1509 data_len = PAGE_ALIGN(data_len);
1510
1511 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1512 }
1513
1514 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1515 msg->msg_flags & MSG_DONTWAIT, &err,
1516 PAGE_ALLOC_COSTLY_ORDER);
1517 if (skb == NULL)
1518 goto out;
1519
1520 err = unix_scm_to_skb(&scm, skb, true);
1521 if (err < 0)
1522 goto out_free;
1523 max_level = err + 1;
1524
1525 skb_put(skb, len - data_len);
1526 skb->data_len = data_len;
1527 skb->len = len;
1528 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1529 if (err)
1530 goto out_free;
1531
1532 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1533
1534 restart:
1535 if (!other) {
1536 err = -ECONNRESET;
1537 if (sunaddr == NULL)
1538 goto out_free;
1539
1540 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1541 hash, &err);
1542 if (other == NULL)
1543 goto out_free;
1544 }
1545
1546 if (sk_filter(other, skb) < 0) {
1547 /* Toss the packet but do not return any error to the sender */
1548 err = len;
1549 goto out_free;
1550 }
1551
1552 unix_state_lock(other);
1553 err = -EPERM;
1554 if (!unix_may_send(sk, other))
1555 goto out_unlock;
1556
1557 if (sock_flag(other, SOCK_DEAD)) {
1558 /*
1559 * Check with 1003.1g - what should
1560 * datagram error
1561 */
1562 unix_state_unlock(other);
1563 sock_put(other);
1564
1565 err = 0;
1566 unix_state_lock(sk);
1567 if (unix_peer(sk) == other) {
1568 unix_peer(sk) = NULL;
1569 unix_state_unlock(sk);
1570
1571 unix_dgram_disconnected(sk, other);
1572 sock_put(other);
1573 err = -ECONNREFUSED;
1574 } else {
1575 unix_state_unlock(sk);
1576 }
1577
1578 other = NULL;
1579 if (err)
1580 goto out_free;
1581 goto restart;
1582 }
1583
1584 err = -EPIPE;
1585 if (other->sk_shutdown & RCV_SHUTDOWN)
1586 goto out_unlock;
1587
1588 if (sk->sk_type != SOCK_SEQPACKET) {
1589 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1590 if (err)
1591 goto out_unlock;
1592 }
1593
1594 if (unix_peer(other) != sk && unix_recvq_full(other)) {
1595 if (!timeo) {
1596 err = -EAGAIN;
1597 goto out_unlock;
1598 }
1599
1600 timeo = unix_wait_for_peer(other, timeo);
1601
1602 err = sock_intr_errno(timeo);
1603 if (signal_pending(current))
1604 goto out_free;
1605
1606 goto restart;
1607 }
1608
1609 if (sock_flag(other, SOCK_RCVTSTAMP))
1610 __net_timestamp(skb);
1611 maybe_add_creds(skb, sock, other);
1612 skb_queue_tail(&other->sk_receive_queue, skb);
1613 if (max_level > unix_sk(other)->recursion_level)
1614 unix_sk(other)->recursion_level = max_level;
1615 unix_state_unlock(other);
1616 other->sk_data_ready(other);
1617 sock_put(other);
1618 scm_destroy(&scm);
1619 return len;
1620
1621 out_unlock:
1622 unix_state_unlock(other);
1623 out_free:
1624 kfree_skb(skb);
1625 out:
1626 if (other)
1627 sock_put(other);
1628 scm_destroy(&scm);
1629 return err;
1630 }
1631
1632 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1633 * bytes, and a minimun of a full page.
1634 */
1635 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1636
1637 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1638 size_t len)
1639 {
1640 struct sock *sk = sock->sk;
1641 struct sock *other = NULL;
1642 int err, size;
1643 struct sk_buff *skb;
1644 int sent = 0;
1645 struct scm_cookie scm;
1646 bool fds_sent = false;
1647 int max_level;
1648 int data_len;
1649
1650 wait_for_unix_gc();
1651 err = scm_send(sock, msg, &scm, false);
1652 if (err < 0)
1653 return err;
1654
1655 err = -EOPNOTSUPP;
1656 if (msg->msg_flags&MSG_OOB)
1657 goto out_err;
1658
1659 if (msg->msg_namelen) {
1660 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1661 goto out_err;
1662 } else {
1663 err = -ENOTCONN;
1664 other = unix_peer(sk);
1665 if (!other)
1666 goto out_err;
1667 }
1668
1669 if (sk->sk_shutdown & SEND_SHUTDOWN)
1670 goto pipe_err;
1671
1672 while (sent < len) {
1673 size = len - sent;
1674
1675 /* Keep two messages in the pipe so it schedules better */
1676 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1677
1678 /* allow fallback to order-0 allocations */
1679 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1680
1681 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1682
1683 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1684
1685 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1686 msg->msg_flags & MSG_DONTWAIT, &err,
1687 get_order(UNIX_SKB_FRAGS_SZ));
1688 if (!skb)
1689 goto out_err;
1690
1691 /* Only send the fds in the first buffer */
1692 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1693 if (err < 0) {
1694 kfree_skb(skb);
1695 goto out_err;
1696 }
1697 max_level = err + 1;
1698 fds_sent = true;
1699
1700 skb_put(skb, size - data_len);
1701 skb->data_len = data_len;
1702 skb->len = size;
1703 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1704 if (err) {
1705 kfree_skb(skb);
1706 goto out_err;
1707 }
1708
1709 unix_state_lock(other);
1710
1711 if (sock_flag(other, SOCK_DEAD) ||
1712 (other->sk_shutdown & RCV_SHUTDOWN))
1713 goto pipe_err_free;
1714
1715 maybe_add_creds(skb, sock, other);
1716 skb_queue_tail(&other->sk_receive_queue, skb);
1717 if (max_level > unix_sk(other)->recursion_level)
1718 unix_sk(other)->recursion_level = max_level;
1719 unix_state_unlock(other);
1720 other->sk_data_ready(other);
1721 sent += size;
1722 }
1723
1724 scm_destroy(&scm);
1725
1726 return sent;
1727
1728 pipe_err_free:
1729 unix_state_unlock(other);
1730 kfree_skb(skb);
1731 pipe_err:
1732 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1733 send_sig(SIGPIPE, current, 0);
1734 err = -EPIPE;
1735 out_err:
1736 scm_destroy(&scm);
1737 return sent ? : err;
1738 }
1739
1740 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1741 int offset, size_t size, int flags)
1742 {
1743 int err = 0;
1744 bool send_sigpipe = true;
1745 struct sock *other, *sk = socket->sk;
1746 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1747
1748 if (flags & MSG_OOB)
1749 return -EOPNOTSUPP;
1750
1751 other = unix_peer(sk);
1752 if (!other || sk->sk_state != TCP_ESTABLISHED)
1753 return -ENOTCONN;
1754
1755 if (false) {
1756 alloc_skb:
1757 unix_state_unlock(other);
1758 mutex_unlock(&unix_sk(other)->readlock);
1759 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1760 &err, 0);
1761 if (!newskb)
1762 return err;
1763 }
1764
1765 /* we must acquire readlock as we modify already present
1766 * skbs in the sk_receive_queue and mess with skb->len
1767 */
1768 err = mutex_lock_interruptible(&unix_sk(other)->readlock);
1769 if (err) {
1770 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1771 send_sigpipe = false;
1772 goto err;
1773 }
1774
1775 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1776 err = -EPIPE;
1777 goto err_unlock;
1778 }
1779
1780 unix_state_lock(other);
1781
1782 if (sock_flag(other, SOCK_DEAD) ||
1783 other->sk_shutdown & RCV_SHUTDOWN) {
1784 err = -EPIPE;
1785 goto err_state_unlock;
1786 }
1787
1788 skb = skb_peek_tail(&other->sk_receive_queue);
1789 if (tail && tail == skb) {
1790 skb = newskb;
1791 } else if (!skb) {
1792 if (newskb)
1793 skb = newskb;
1794 else
1795 goto alloc_skb;
1796 } else if (newskb) {
1797 /* this is fast path, we don't necessarily need to
1798 * call to kfree_skb even though with newskb == NULL
1799 * this - does no harm
1800 */
1801 consume_skb(newskb);
1802 }
1803
1804 if (skb_append_pagefrags(skb, page, offset, size)) {
1805 tail = skb;
1806 goto alloc_skb;
1807 }
1808
1809 skb->len += size;
1810 skb->data_len += size;
1811 skb->truesize += size;
1812 atomic_add(size, &sk->sk_wmem_alloc);
1813
1814 if (newskb)
1815 __skb_queue_tail(&other->sk_receive_queue, newskb);
1816
1817 unix_state_unlock(other);
1818 mutex_unlock(&unix_sk(other)->readlock);
1819
1820 other->sk_data_ready(other);
1821
1822 return size;
1823
1824 err_state_unlock:
1825 unix_state_unlock(other);
1826 err_unlock:
1827 mutex_unlock(&unix_sk(other)->readlock);
1828 err:
1829 kfree_skb(newskb);
1830 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
1831 send_sig(SIGPIPE, current, 0);
1832 return err;
1833 }
1834
1835 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
1836 size_t len)
1837 {
1838 int err;
1839 struct sock *sk = sock->sk;
1840
1841 err = sock_error(sk);
1842 if (err)
1843 return err;
1844
1845 if (sk->sk_state != TCP_ESTABLISHED)
1846 return -ENOTCONN;
1847
1848 if (msg->msg_namelen)
1849 msg->msg_namelen = 0;
1850
1851 return unix_dgram_sendmsg(sock, msg, len);
1852 }
1853
1854 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
1855 size_t size, int flags)
1856 {
1857 struct sock *sk = sock->sk;
1858
1859 if (sk->sk_state != TCP_ESTABLISHED)
1860 return -ENOTCONN;
1861
1862 return unix_dgram_recvmsg(sock, msg, size, flags);
1863 }
1864
1865 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1866 {
1867 struct unix_sock *u = unix_sk(sk);
1868
1869 if (u->addr) {
1870 msg->msg_namelen = u->addr->len;
1871 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1872 }
1873 }
1874
1875 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
1876 size_t size, int flags)
1877 {
1878 struct scm_cookie scm;
1879 struct sock *sk = sock->sk;
1880 struct unix_sock *u = unix_sk(sk);
1881 int noblock = flags & MSG_DONTWAIT;
1882 struct sk_buff *skb;
1883 int err;
1884 int peeked, skip;
1885
1886 err = -EOPNOTSUPP;
1887 if (flags&MSG_OOB)
1888 goto out;
1889
1890 err = mutex_lock_interruptible(&u->readlock);
1891 if (unlikely(err)) {
1892 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
1893 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
1894 */
1895 err = noblock ? -EAGAIN : -ERESTARTSYS;
1896 goto out;
1897 }
1898
1899 skip = sk_peek_offset(sk, flags);
1900
1901 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
1902 if (!skb) {
1903 unix_state_lock(sk);
1904 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1905 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1906 (sk->sk_shutdown & RCV_SHUTDOWN))
1907 err = 0;
1908 unix_state_unlock(sk);
1909 goto out_unlock;
1910 }
1911
1912 wake_up_interruptible_sync_poll(&u->peer_wait,
1913 POLLOUT | POLLWRNORM | POLLWRBAND);
1914
1915 if (msg->msg_name)
1916 unix_copy_addr(msg, skb->sk);
1917
1918 if (size > skb->len - skip)
1919 size = skb->len - skip;
1920 else if (size < skb->len - skip)
1921 msg->msg_flags |= MSG_TRUNC;
1922
1923 err = skb_copy_datagram_msg(skb, skip, msg, size);
1924 if (err)
1925 goto out_free;
1926
1927 if (sock_flag(sk, SOCK_RCVTSTAMP))
1928 __sock_recv_timestamp(msg, sk, skb);
1929
1930 memset(&scm, 0, sizeof(scm));
1931
1932 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
1933 unix_set_secdata(&scm, skb);
1934
1935 if (!(flags & MSG_PEEK)) {
1936 if (UNIXCB(skb).fp)
1937 unix_detach_fds(&scm, skb);
1938
1939 sk_peek_offset_bwd(sk, skb->len);
1940 } else {
1941 /* It is questionable: on PEEK we could:
1942 - do not return fds - good, but too simple 8)
1943 - return fds, and do not return them on read (old strategy,
1944 apparently wrong)
1945 - clone fds (I chose it for now, it is the most universal
1946 solution)
1947
1948 POSIX 1003.1g does not actually define this clearly
1949 at all. POSIX 1003.1g doesn't define a lot of things
1950 clearly however!
1951
1952 */
1953
1954 sk_peek_offset_fwd(sk, size);
1955
1956 if (UNIXCB(skb).fp)
1957 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
1958 }
1959 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1960
1961 scm_recv(sock, msg, &scm, flags);
1962
1963 out_free:
1964 skb_free_datagram(sk, skb);
1965 out_unlock:
1966 mutex_unlock(&u->readlock);
1967 out:
1968 return err;
1969 }
1970
1971 /*
1972 * Sleep until more data has arrived. But check for races..
1973 */
1974 static long unix_stream_data_wait(struct sock *sk, long timeo,
1975 struct sk_buff *last, unsigned int last_len)
1976 {
1977 struct sk_buff *tail;
1978 DEFINE_WAIT(wait);
1979
1980 unix_state_lock(sk);
1981
1982 for (;;) {
1983 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1984
1985 tail = skb_peek_tail(&sk->sk_receive_queue);
1986 if (tail != last ||
1987 (tail && tail->len != last_len) ||
1988 sk->sk_err ||
1989 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1990 signal_pending(current) ||
1991 !timeo)
1992 break;
1993
1994 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1995 unix_state_unlock(sk);
1996 timeo = freezable_schedule_timeout(timeo);
1997 unix_state_lock(sk);
1998
1999 if (sock_flag(sk, SOCK_DEAD))
2000 break;
2001
2002 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2003 }
2004
2005 finish_wait(sk_sleep(sk), &wait);
2006 unix_state_unlock(sk);
2007 return timeo;
2008 }
2009
2010 static unsigned int unix_skb_len(const struct sk_buff *skb)
2011 {
2012 return skb->len - UNIXCB(skb).consumed;
2013 }
2014
2015 struct unix_stream_read_state {
2016 int (*recv_actor)(struct sk_buff *, int, int,
2017 struct unix_stream_read_state *);
2018 struct socket *socket;
2019 struct msghdr *msg;
2020 struct pipe_inode_info *pipe;
2021 size_t size;
2022 int flags;
2023 unsigned int splice_flags;
2024 };
2025
2026 static int unix_stream_read_generic(struct unix_stream_read_state *state)
2027 {
2028 struct scm_cookie scm;
2029 struct socket *sock = state->socket;
2030 struct sock *sk = sock->sk;
2031 struct unix_sock *u = unix_sk(sk);
2032 int copied = 0;
2033 int flags = state->flags;
2034 int noblock = flags & MSG_DONTWAIT;
2035 bool check_creds = false;
2036 int target;
2037 int err = 0;
2038 long timeo;
2039 int skip;
2040 size_t size = state->size;
2041 unsigned int last_len;
2042
2043 err = -EINVAL;
2044 if (sk->sk_state != TCP_ESTABLISHED)
2045 goto out;
2046
2047 err = -EOPNOTSUPP;
2048 if (flags & MSG_OOB)
2049 goto out;
2050
2051 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2052 timeo = sock_rcvtimeo(sk, noblock);
2053
2054 memset(&scm, 0, sizeof(scm));
2055
2056 /* Lock the socket to prevent queue disordering
2057 * while sleeps in memcpy_tomsg
2058 */
2059 err = mutex_lock_interruptible(&u->readlock);
2060 if (unlikely(err)) {
2061 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
2062 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
2063 */
2064 err = noblock ? -EAGAIN : -ERESTARTSYS;
2065 goto out;
2066 }
2067
2068 if (flags & MSG_PEEK)
2069 skip = sk_peek_offset(sk, flags);
2070 else
2071 skip = 0;
2072
2073 do {
2074 int chunk;
2075 struct sk_buff *skb, *last;
2076
2077 unix_state_lock(sk);
2078 if (sock_flag(sk, SOCK_DEAD)) {
2079 err = -ECONNRESET;
2080 goto unlock;
2081 }
2082 last = skb = skb_peek(&sk->sk_receive_queue);
2083 last_len = last ? last->len : 0;
2084 again:
2085 if (skb == NULL) {
2086 unix_sk(sk)->recursion_level = 0;
2087 if (copied >= target)
2088 goto unlock;
2089
2090 /*
2091 * POSIX 1003.1g mandates this order.
2092 */
2093
2094 err = sock_error(sk);
2095 if (err)
2096 goto unlock;
2097 if (sk->sk_shutdown & RCV_SHUTDOWN)
2098 goto unlock;
2099
2100 unix_state_unlock(sk);
2101 err = -EAGAIN;
2102 if (!timeo)
2103 break;
2104 mutex_unlock(&u->readlock);
2105
2106 timeo = unix_stream_data_wait(sk, timeo, last,
2107 last_len);
2108
2109 if (signal_pending(current) ||
2110 mutex_lock_interruptible(&u->readlock)) {
2111 err = sock_intr_errno(timeo);
2112 goto out;
2113 }
2114
2115 continue;
2116 unlock:
2117 unix_state_unlock(sk);
2118 break;
2119 }
2120
2121 while (skip >= unix_skb_len(skb)) {
2122 skip -= unix_skb_len(skb);
2123 last = skb;
2124 last_len = skb->len;
2125 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2126 if (!skb)
2127 goto again;
2128 }
2129
2130 unix_state_unlock(sk);
2131
2132 if (check_creds) {
2133 /* Never glue messages from different writers */
2134 if ((UNIXCB(skb).pid != scm.pid) ||
2135 !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
2136 !gid_eq(UNIXCB(skb).gid, scm.creds.gid) ||
2137 !unix_secdata_eq(&scm, skb))
2138 break;
2139 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2140 /* Copy credentials */
2141 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2142 unix_set_secdata(&scm, skb);
2143 check_creds = true;
2144 }
2145
2146 /* Copy address just once */
2147 if (state->msg && state->msg->msg_name) {
2148 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2149 state->msg->msg_name);
2150 unix_copy_addr(state->msg, skb->sk);
2151 sunaddr = NULL;
2152 }
2153
2154 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2155 chunk = state->recv_actor(skb, skip, chunk, state);
2156 if (chunk < 0) {
2157 if (copied == 0)
2158 copied = -EFAULT;
2159 break;
2160 }
2161 copied += chunk;
2162 size -= chunk;
2163
2164 /* Mark read part of skb as used */
2165 if (!(flags & MSG_PEEK)) {
2166 UNIXCB(skb).consumed += chunk;
2167
2168 sk_peek_offset_bwd(sk, chunk);
2169
2170 if (UNIXCB(skb).fp)
2171 unix_detach_fds(&scm, skb);
2172
2173 if (unix_skb_len(skb))
2174 break;
2175
2176 skb_unlink(skb, &sk->sk_receive_queue);
2177 consume_skb(skb);
2178
2179 if (scm.fp)
2180 break;
2181 } else {
2182 /* It is questionable, see note in unix_dgram_recvmsg.
2183 */
2184 if (UNIXCB(skb).fp)
2185 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2186
2187 sk_peek_offset_fwd(sk, chunk);
2188
2189 if (UNIXCB(skb).fp)
2190 break;
2191
2192 skip = 0;
2193 last = skb;
2194 last_len = skb->len;
2195 unix_state_lock(sk);
2196 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2197 if (skb)
2198 goto again;
2199 unix_state_unlock(sk);
2200 break;
2201 }
2202 } while (size);
2203
2204 mutex_unlock(&u->readlock);
2205 if (state->msg)
2206 scm_recv(sock, state->msg, &scm, flags);
2207 else
2208 scm_destroy(&scm);
2209 out:
2210 return copied ? : err;
2211 }
2212
2213 static int unix_stream_read_actor(struct sk_buff *skb,
2214 int skip, int chunk,
2215 struct unix_stream_read_state *state)
2216 {
2217 int ret;
2218
2219 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2220 state->msg, chunk);
2221 return ret ?: chunk;
2222 }
2223
2224 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2225 size_t size, int flags)
2226 {
2227 struct unix_stream_read_state state = {
2228 .recv_actor = unix_stream_read_actor,
2229 .socket = sock,
2230 .msg = msg,
2231 .size = size,
2232 .flags = flags
2233 };
2234
2235 return unix_stream_read_generic(&state);
2236 }
2237
2238 static ssize_t skb_unix_socket_splice(struct sock *sk,
2239 struct pipe_inode_info *pipe,
2240 struct splice_pipe_desc *spd)
2241 {
2242 int ret;
2243 struct unix_sock *u = unix_sk(sk);
2244
2245 mutex_unlock(&u->readlock);
2246 ret = splice_to_pipe(pipe, spd);
2247 mutex_lock(&u->readlock);
2248
2249 return ret;
2250 }
2251
2252 static int unix_stream_splice_actor(struct sk_buff *skb,
2253 int skip, int chunk,
2254 struct unix_stream_read_state *state)
2255 {
2256 return skb_splice_bits(skb, state->socket->sk,
2257 UNIXCB(skb).consumed + skip,
2258 state->pipe, chunk, state->splice_flags,
2259 skb_unix_socket_splice);
2260 }
2261
2262 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2263 struct pipe_inode_info *pipe,
2264 size_t size, unsigned int flags)
2265 {
2266 struct unix_stream_read_state state = {
2267 .recv_actor = unix_stream_splice_actor,
2268 .socket = sock,
2269 .pipe = pipe,
2270 .size = size,
2271 .splice_flags = flags,
2272 };
2273
2274 if (unlikely(*ppos))
2275 return -ESPIPE;
2276
2277 if (sock->file->f_flags & O_NONBLOCK ||
2278 flags & SPLICE_F_NONBLOCK)
2279 state.flags = MSG_DONTWAIT;
2280
2281 return unix_stream_read_generic(&state);
2282 }
2283
2284 static int unix_shutdown(struct socket *sock, int mode)
2285 {
2286 struct sock *sk = sock->sk;
2287 struct sock *other;
2288
2289 if (mode < SHUT_RD || mode > SHUT_RDWR)
2290 return -EINVAL;
2291 /* This maps:
2292 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2293 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2294 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2295 */
2296 ++mode;
2297
2298 unix_state_lock(sk);
2299 sk->sk_shutdown |= mode;
2300 other = unix_peer(sk);
2301 if (other)
2302 sock_hold(other);
2303 unix_state_unlock(sk);
2304 sk->sk_state_change(sk);
2305
2306 if (other &&
2307 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2308
2309 int peer_mode = 0;
2310
2311 if (mode&RCV_SHUTDOWN)
2312 peer_mode |= SEND_SHUTDOWN;
2313 if (mode&SEND_SHUTDOWN)
2314 peer_mode |= RCV_SHUTDOWN;
2315 unix_state_lock(other);
2316 other->sk_shutdown |= peer_mode;
2317 unix_state_unlock(other);
2318 other->sk_state_change(other);
2319 if (peer_mode == SHUTDOWN_MASK)
2320 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2321 else if (peer_mode & RCV_SHUTDOWN)
2322 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2323 }
2324 if (other)
2325 sock_put(other);
2326
2327 return 0;
2328 }
2329
2330 long unix_inq_len(struct sock *sk)
2331 {
2332 struct sk_buff *skb;
2333 long amount = 0;
2334
2335 if (sk->sk_state == TCP_LISTEN)
2336 return -EINVAL;
2337
2338 spin_lock(&sk->sk_receive_queue.lock);
2339 if (sk->sk_type == SOCK_STREAM ||
2340 sk->sk_type == SOCK_SEQPACKET) {
2341 skb_queue_walk(&sk->sk_receive_queue, skb)
2342 amount += unix_skb_len(skb);
2343 } else {
2344 skb = skb_peek(&sk->sk_receive_queue);
2345 if (skb)
2346 amount = skb->len;
2347 }
2348 spin_unlock(&sk->sk_receive_queue.lock);
2349
2350 return amount;
2351 }
2352 EXPORT_SYMBOL_GPL(unix_inq_len);
2353
2354 long unix_outq_len(struct sock *sk)
2355 {
2356 return sk_wmem_alloc_get(sk);
2357 }
2358 EXPORT_SYMBOL_GPL(unix_outq_len);
2359
2360 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2361 {
2362 struct sock *sk = sock->sk;
2363 long amount = 0;
2364 int err;
2365
2366 switch (cmd) {
2367 case SIOCOUTQ:
2368 amount = unix_outq_len(sk);
2369 err = put_user(amount, (int __user *)arg);
2370 break;
2371 case SIOCINQ:
2372 amount = unix_inq_len(sk);
2373 if (amount < 0)
2374 err = amount;
2375 else
2376 err = put_user(amount, (int __user *)arg);
2377 break;
2378 default:
2379 err = -ENOIOCTLCMD;
2380 break;
2381 }
2382 return err;
2383 }
2384
2385 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2386 {
2387 struct sock *sk = sock->sk;
2388 unsigned int mask;
2389
2390 sock_poll_wait(file, sk_sleep(sk), wait);
2391 mask = 0;
2392
2393 /* exceptional events? */
2394 if (sk->sk_err)
2395 mask |= POLLERR;
2396 if (sk->sk_shutdown == SHUTDOWN_MASK)
2397 mask |= POLLHUP;
2398 if (sk->sk_shutdown & RCV_SHUTDOWN)
2399 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2400
2401 /* readable? */
2402 if (!skb_queue_empty(&sk->sk_receive_queue))
2403 mask |= POLLIN | POLLRDNORM;
2404
2405 /* Connection-based need to check for termination and startup */
2406 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2407 sk->sk_state == TCP_CLOSE)
2408 mask |= POLLHUP;
2409
2410 /*
2411 * we set writable also when the other side has shut down the
2412 * connection. This prevents stuck sockets.
2413 */
2414 if (unix_writable(sk))
2415 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2416
2417 return mask;
2418 }
2419
2420 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2421 poll_table *wait)
2422 {
2423 struct sock *sk = sock->sk, *other;
2424 unsigned int mask, writable;
2425
2426 sock_poll_wait(file, sk_sleep(sk), wait);
2427 mask = 0;
2428
2429 /* exceptional events? */
2430 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2431 mask |= POLLERR |
2432 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2433
2434 if (sk->sk_shutdown & RCV_SHUTDOWN)
2435 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2436 if (sk->sk_shutdown == SHUTDOWN_MASK)
2437 mask |= POLLHUP;
2438
2439 /* readable? */
2440 if (!skb_queue_empty(&sk->sk_receive_queue))
2441 mask |= POLLIN | POLLRDNORM;
2442
2443 /* Connection-based need to check for termination and startup */
2444 if (sk->sk_type == SOCK_SEQPACKET) {
2445 if (sk->sk_state == TCP_CLOSE)
2446 mask |= POLLHUP;
2447 /* connection hasn't started yet? */
2448 if (sk->sk_state == TCP_SYN_SENT)
2449 return mask;
2450 }
2451
2452 /* No write status requested, avoid expensive OUT tests. */
2453 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2454 return mask;
2455
2456 writable = unix_writable(sk);
2457 other = unix_peer_get(sk);
2458 if (other) {
2459 if (unix_peer(other) != sk) {
2460 sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2461 if (unix_recvq_full(other))
2462 writable = 0;
2463 }
2464 sock_put(other);
2465 }
2466
2467 if (writable)
2468 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2469 else
2470 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2471
2472 return mask;
2473 }
2474
2475 #ifdef CONFIG_PROC_FS
2476
2477 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2478
2479 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2480 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2481 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2482
2483 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2484 {
2485 unsigned long offset = get_offset(*pos);
2486 unsigned long bucket = get_bucket(*pos);
2487 struct sock *sk;
2488 unsigned long count = 0;
2489
2490 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2491 if (sock_net(sk) != seq_file_net(seq))
2492 continue;
2493 if (++count == offset)
2494 break;
2495 }
2496
2497 return sk;
2498 }
2499
2500 static struct sock *unix_next_socket(struct seq_file *seq,
2501 struct sock *sk,
2502 loff_t *pos)
2503 {
2504 unsigned long bucket;
2505
2506 while (sk > (struct sock *)SEQ_START_TOKEN) {
2507 sk = sk_next(sk);
2508 if (!sk)
2509 goto next_bucket;
2510 if (sock_net(sk) == seq_file_net(seq))
2511 return sk;
2512 }
2513
2514 do {
2515 sk = unix_from_bucket(seq, pos);
2516 if (sk)
2517 return sk;
2518
2519 next_bucket:
2520 bucket = get_bucket(*pos) + 1;
2521 *pos = set_bucket_offset(bucket, 1);
2522 } while (bucket < ARRAY_SIZE(unix_socket_table));
2523
2524 return NULL;
2525 }
2526
2527 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2528 __acquires(unix_table_lock)
2529 {
2530 spin_lock(&unix_table_lock);
2531
2532 if (!*pos)
2533 return SEQ_START_TOKEN;
2534
2535 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2536 return NULL;
2537
2538 return unix_next_socket(seq, NULL, pos);
2539 }
2540
2541 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2542 {
2543 ++*pos;
2544 return unix_next_socket(seq, v, pos);
2545 }
2546
2547 static void unix_seq_stop(struct seq_file *seq, void *v)
2548 __releases(unix_table_lock)
2549 {
2550 spin_unlock(&unix_table_lock);
2551 }
2552
2553 static int unix_seq_show(struct seq_file *seq, void *v)
2554 {
2555
2556 if (v == SEQ_START_TOKEN)
2557 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2558 "Inode Path\n");
2559 else {
2560 struct sock *s = v;
2561 struct unix_sock *u = unix_sk(s);
2562 unix_state_lock(s);
2563
2564 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2565 s,
2566 atomic_read(&s->sk_refcnt),
2567 0,
2568 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2569 s->sk_type,
2570 s->sk_socket ?
2571 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2572 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2573 sock_i_ino(s));
2574
2575 if (u->addr) {
2576 int i, len;
2577 seq_putc(seq, ' ');
2578
2579 i = 0;
2580 len = u->addr->len - sizeof(short);
2581 if (!UNIX_ABSTRACT(s))
2582 len--;
2583 else {
2584 seq_putc(seq, '@');
2585 i++;
2586 }
2587 for ( ; i < len; i++)
2588 seq_putc(seq, u->addr->name->sun_path[i]);
2589 }
2590 unix_state_unlock(s);
2591 seq_putc(seq, '\n');
2592 }
2593
2594 return 0;
2595 }
2596
2597 static const struct seq_operations unix_seq_ops = {
2598 .start = unix_seq_start,
2599 .next = unix_seq_next,
2600 .stop = unix_seq_stop,
2601 .show = unix_seq_show,
2602 };
2603
2604 static int unix_seq_open(struct inode *inode, struct file *file)
2605 {
2606 return seq_open_net(inode, file, &unix_seq_ops,
2607 sizeof(struct seq_net_private));
2608 }
2609
2610 static const struct file_operations unix_seq_fops = {
2611 .owner = THIS_MODULE,
2612 .open = unix_seq_open,
2613 .read = seq_read,
2614 .llseek = seq_lseek,
2615 .release = seq_release_net,
2616 };
2617
2618 #endif
2619
2620 static const struct net_proto_family unix_family_ops = {
2621 .family = PF_UNIX,
2622 .create = unix_create,
2623 .owner = THIS_MODULE,
2624 };
2625
2626
2627 static int __net_init unix_net_init(struct net *net)
2628 {
2629 int error = -ENOMEM;
2630
2631 net->unx.sysctl_max_dgram_qlen = 10;
2632 if (unix_sysctl_register(net))
2633 goto out;
2634
2635 #ifdef CONFIG_PROC_FS
2636 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2637 unix_sysctl_unregister(net);
2638 goto out;
2639 }
2640 #endif
2641 error = 0;
2642 out:
2643 return error;
2644 }
2645
2646 static void __net_exit unix_net_exit(struct net *net)
2647 {
2648 unix_sysctl_unregister(net);
2649 remove_proc_entry("unix", net->proc_net);
2650 }
2651
2652 static struct pernet_operations unix_net_ops = {
2653 .init = unix_net_init,
2654 .exit = unix_net_exit,
2655 };
2656
2657 static int __init af_unix_init(void)
2658 {
2659 int rc = -1;
2660
2661 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2662
2663 rc = proto_register(&unix_proto, 1);
2664 if (rc != 0) {
2665 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2666 goto out;
2667 }
2668
2669 sock_register(&unix_family_ops);
2670 register_pernet_subsys(&unix_net_ops);
2671 out:
2672 return rc;
2673 }
2674
2675 static void __exit af_unix_exit(void)
2676 {
2677 sock_unregister(PF_UNIX);
2678 proto_unregister(&unix_proto);
2679 unregister_pernet_subsys(&unix_net_ops);
2680 }
2681
2682 /* Earlier than device_initcall() so that other drivers invoking
2683 request_module() don't end up in a loop when modprobe tries
2684 to use a UNIX socket. But later than subsys_initcall() because
2685 we depend on stuff initialised there */
2686 fs_initcall(af_unix_init);
2687 module_exit(af_unix_exit);
2688
2689 MODULE_LICENSE("GPL");
2690 MODULE_ALIAS_NETPROTO(PF_UNIX);