1 // SPDX-License-Identifier: GPL-2.0-only
3 * common code for virtio vsock
5 * Copyright (C) 2013-2015 Red Hat, Inc.
6 * Author: Asias He <asias@redhat.com>
7 * Stefan Hajnoczi <stefanha@redhat.com>
9 #include <linux/spinlock.h>
10 #include <linux/module.h>
11 #include <linux/sched/signal.h>
12 #include <linux/ctype.h>
13 #include <linux/list.h>
14 #include <linux/virtio_vsock.h>
15 #include <uapi/linux/vsockmon.h>
18 #include <net/af_vsock.h>
20 #define CREATE_TRACE_POINTS
21 #include <trace/events/vsock_virtio_transport_common.h>
23 /* How long to wait for graceful shutdown of a connection */
24 #define VSOCK_CLOSE_TIMEOUT (8 * HZ)
26 /* Threshold for detecting small packets to copy */
27 #define GOOD_COPY_LEN 128
29 static const struct virtio_transport
*
30 virtio_transport_get_ops(struct vsock_sock
*vsk
)
32 const struct vsock_transport
*t
= vsock_core_get_transport(vsk
);
37 return container_of(t
, struct virtio_transport
, transport
);
40 static struct virtio_vsock_pkt
*
41 virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info
*info
,
48 struct virtio_vsock_pkt
*pkt
;
51 pkt
= kzalloc(sizeof(*pkt
), GFP_KERNEL
);
55 pkt
->hdr
.type
= cpu_to_le16(info
->type
);
56 pkt
->hdr
.op
= cpu_to_le16(info
->op
);
57 pkt
->hdr
.src_cid
= cpu_to_le64(src_cid
);
58 pkt
->hdr
.dst_cid
= cpu_to_le64(dst_cid
);
59 pkt
->hdr
.src_port
= cpu_to_le32(src_port
);
60 pkt
->hdr
.dst_port
= cpu_to_le32(dst_port
);
61 pkt
->hdr
.flags
= cpu_to_le32(info
->flags
);
63 pkt
->hdr
.len
= cpu_to_le32(len
);
64 pkt
->reply
= info
->reply
;
67 if (info
->msg
&& len
> 0) {
68 pkt
->buf
= kmalloc(len
, GFP_KERNEL
);
74 err
= memcpy_from_msg(pkt
->buf
, info
->msg
, len
);
78 if (msg_data_left(info
->msg
) == 0 &&
79 info
->type
== VIRTIO_VSOCK_TYPE_SEQPACKET
) {
80 pkt
->hdr
.flags
|= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM
);
82 if (info
->msg
->msg_flags
& MSG_EOR
)
83 pkt
->hdr
.flags
|= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR
);
87 trace_virtio_transport_alloc_pkt(src_cid
, src_port
,
104 static struct sk_buff
*virtio_transport_build_skb(void *opaque
)
106 struct virtio_vsock_pkt
*pkt
= opaque
;
107 struct af_vsockmon_hdr
*hdr
;
112 /* A packet could be split to fit the RX buffer, so we can retrieve
113 * the payload length from the header and the buffer pointer taking
114 * care of the offset in the original packet.
116 payload_len
= le32_to_cpu(pkt
->hdr
.len
);
117 payload_buf
= pkt
->buf
+ pkt
->off
;
119 skb
= alloc_skb(sizeof(*hdr
) + sizeof(pkt
->hdr
) + payload_len
,
124 hdr
= skb_put(skb
, sizeof(*hdr
));
126 /* pkt->hdr is little-endian so no need to byteswap here */
127 hdr
->src_cid
= pkt
->hdr
.src_cid
;
128 hdr
->src_port
= pkt
->hdr
.src_port
;
129 hdr
->dst_cid
= pkt
->hdr
.dst_cid
;
130 hdr
->dst_port
= pkt
->hdr
.dst_port
;
132 hdr
->transport
= cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO
);
133 hdr
->len
= cpu_to_le16(sizeof(pkt
->hdr
));
134 memset(hdr
->reserved
, 0, sizeof(hdr
->reserved
));
136 switch (le16_to_cpu(pkt
->hdr
.op
)) {
137 case VIRTIO_VSOCK_OP_REQUEST
:
138 case VIRTIO_VSOCK_OP_RESPONSE
:
139 hdr
->op
= cpu_to_le16(AF_VSOCK_OP_CONNECT
);
141 case VIRTIO_VSOCK_OP_RST
:
142 case VIRTIO_VSOCK_OP_SHUTDOWN
:
143 hdr
->op
= cpu_to_le16(AF_VSOCK_OP_DISCONNECT
);
145 case VIRTIO_VSOCK_OP_RW
:
146 hdr
->op
= cpu_to_le16(AF_VSOCK_OP_PAYLOAD
);
148 case VIRTIO_VSOCK_OP_CREDIT_UPDATE
:
149 case VIRTIO_VSOCK_OP_CREDIT_REQUEST
:
150 hdr
->op
= cpu_to_le16(AF_VSOCK_OP_CONTROL
);
153 hdr
->op
= cpu_to_le16(AF_VSOCK_OP_UNKNOWN
);
157 skb_put_data(skb
, &pkt
->hdr
, sizeof(pkt
->hdr
));
160 skb_put_data(skb
, payload_buf
, payload_len
);
166 void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt
*pkt
)
168 if (pkt
->tap_delivered
)
171 vsock_deliver_tap(virtio_transport_build_skb
, pkt
);
172 pkt
->tap_delivered
= true;
174 EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt
);
176 static u16
virtio_transport_get_type(struct sock
*sk
)
178 if (sk
->sk_type
== SOCK_STREAM
)
179 return VIRTIO_VSOCK_TYPE_STREAM
;
181 return VIRTIO_VSOCK_TYPE_SEQPACKET
;
184 /* This function can only be used on connecting/connected sockets,
185 * since a socket assigned to a transport is required.
187 * Do not use on listener sockets!
189 static int virtio_transport_send_pkt_info(struct vsock_sock
*vsk
,
190 struct virtio_vsock_pkt_info
*info
)
192 u32 src_cid
, src_port
, dst_cid
, dst_port
;
193 const struct virtio_transport
*t_ops
;
194 struct virtio_vsock_sock
*vvs
;
195 struct virtio_vsock_pkt
*pkt
;
196 u32 pkt_len
= info
->pkt_len
;
198 info
->type
= virtio_transport_get_type(sk_vsock(vsk
));
200 t_ops
= virtio_transport_get_ops(vsk
);
201 if (unlikely(!t_ops
))
204 src_cid
= t_ops
->transport
.get_local_cid();
205 src_port
= vsk
->local_addr
.svm_port
;
206 if (!info
->remote_cid
) {
207 dst_cid
= vsk
->remote_addr
.svm_cid
;
208 dst_port
= vsk
->remote_addr
.svm_port
;
210 dst_cid
= info
->remote_cid
;
211 dst_port
= info
->remote_port
;
216 /* we can send less than pkt_len bytes */
217 if (pkt_len
> VIRTIO_VSOCK_MAX_PKT_BUF_SIZE
)
218 pkt_len
= VIRTIO_VSOCK_MAX_PKT_BUF_SIZE
;
220 /* virtio_transport_get_credit might return less than pkt_len credit */
221 pkt_len
= virtio_transport_get_credit(vvs
, pkt_len
);
223 /* Do not send zero length OP_RW pkt */
224 if (pkt_len
== 0 && info
->op
== VIRTIO_VSOCK_OP_RW
)
227 pkt
= virtio_transport_alloc_pkt(info
, pkt_len
,
231 virtio_transport_put_credit(vvs
, pkt_len
);
235 virtio_transport_inc_tx_pkt(vvs
, pkt
);
237 return t_ops
->send_pkt(pkt
);
240 static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock
*vvs
,
241 struct virtio_vsock_pkt
*pkt
)
243 if (vvs
->rx_bytes
+ pkt
->len
> vvs
->buf_alloc
)
246 vvs
->rx_bytes
+= pkt
->len
;
250 static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock
*vvs
,
251 struct virtio_vsock_pkt
*pkt
)
253 vvs
->rx_bytes
-= pkt
->len
;
254 vvs
->fwd_cnt
+= pkt
->len
;
257 void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock
*vvs
, struct virtio_vsock_pkt
*pkt
)
259 spin_lock_bh(&vvs
->rx_lock
);
260 vvs
->last_fwd_cnt
= vvs
->fwd_cnt
;
261 pkt
->hdr
.fwd_cnt
= cpu_to_le32(vvs
->fwd_cnt
);
262 pkt
->hdr
.buf_alloc
= cpu_to_le32(vvs
->buf_alloc
);
263 spin_unlock_bh(&vvs
->rx_lock
);
265 EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt
);
267 u32
virtio_transport_get_credit(struct virtio_vsock_sock
*vvs
, u32 credit
)
271 spin_lock_bh(&vvs
->tx_lock
);
272 ret
= vvs
->peer_buf_alloc
- (vvs
->tx_cnt
- vvs
->peer_fwd_cnt
);
276 spin_unlock_bh(&vvs
->tx_lock
);
280 EXPORT_SYMBOL_GPL(virtio_transport_get_credit
);
282 void virtio_transport_put_credit(struct virtio_vsock_sock
*vvs
, u32 credit
)
284 spin_lock_bh(&vvs
->tx_lock
);
285 vvs
->tx_cnt
-= credit
;
286 spin_unlock_bh(&vvs
->tx_lock
);
288 EXPORT_SYMBOL_GPL(virtio_transport_put_credit
);
290 static int virtio_transport_send_credit_update(struct vsock_sock
*vsk
)
292 struct virtio_vsock_pkt_info info
= {
293 .op
= VIRTIO_VSOCK_OP_CREDIT_UPDATE
,
297 return virtio_transport_send_pkt_info(vsk
, &info
);
301 virtio_transport_stream_do_peek(struct vsock_sock
*vsk
,
305 struct virtio_vsock_sock
*vvs
= vsk
->trans
;
306 struct virtio_vsock_pkt
*pkt
;
307 size_t bytes
, total
= 0, off
;
310 spin_lock_bh(&vvs
->rx_lock
);
312 list_for_each_entry(pkt
, &vvs
->rx_queue
, list
) {
318 while (total
< len
&& off
< pkt
->len
) {
320 if (bytes
> pkt
->len
- off
)
321 bytes
= pkt
->len
- off
;
323 /* sk_lock is held by caller so no one else can dequeue.
324 * Unlock rx_lock since memcpy_to_msg() may sleep.
326 spin_unlock_bh(&vvs
->rx_lock
);
328 err
= memcpy_to_msg(msg
, pkt
->buf
+ off
, bytes
);
332 spin_lock_bh(&vvs
->rx_lock
);
339 spin_unlock_bh(&vvs
->rx_lock
);
350 virtio_transport_stream_do_dequeue(struct vsock_sock
*vsk
,
354 struct virtio_vsock_sock
*vvs
= vsk
->trans
;
355 struct virtio_vsock_pkt
*pkt
;
356 size_t bytes
, total
= 0;
360 spin_lock_bh(&vvs
->rx_lock
);
361 while (total
< len
&& !list_empty(&vvs
->rx_queue
)) {
362 pkt
= list_first_entry(&vvs
->rx_queue
,
363 struct virtio_vsock_pkt
, list
);
366 if (bytes
> pkt
->len
- pkt
->off
)
367 bytes
= pkt
->len
- pkt
->off
;
369 /* sk_lock is held by caller so no one else can dequeue.
370 * Unlock rx_lock since memcpy_to_msg() may sleep.
372 spin_unlock_bh(&vvs
->rx_lock
);
374 err
= memcpy_to_msg(msg
, pkt
->buf
+ pkt
->off
, bytes
);
378 spin_lock_bh(&vvs
->rx_lock
);
382 if (pkt
->off
== pkt
->len
) {
383 virtio_transport_dec_rx_pkt(vvs
, pkt
);
384 list_del(&pkt
->list
);
385 virtio_transport_free_pkt(pkt
);
389 free_space
= vvs
->buf_alloc
- (vvs
->fwd_cnt
- vvs
->last_fwd_cnt
);
391 spin_unlock_bh(&vvs
->rx_lock
);
393 /* To reduce the number of credit update messages,
394 * don't update credits as long as lots of space is available.
395 * Note: the limit chosen here is arbitrary. Setting the limit
396 * too high causes extra messages. Too low causes transmitter
397 * stalls. As stalls are in theory more expensive than extra
398 * messages, we set the limit to a high value. TODO: experiment
399 * with different values.
401 if (free_space
< VIRTIO_VSOCK_MAX_PKT_BUF_SIZE
)
402 virtio_transport_send_credit_update(vsk
);
412 static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock
*vsk
,
416 struct virtio_vsock_sock
*vvs
= vsk
->trans
;
417 struct virtio_vsock_pkt
*pkt
;
418 int dequeued_len
= 0;
419 size_t user_buf_len
= msg_data_left(msg
);
420 bool msg_ready
= false;
422 spin_lock_bh(&vvs
->rx_lock
);
424 if (vvs
->msg_count
== 0) {
425 spin_unlock_bh(&vvs
->rx_lock
);
430 pkt
= list_first_entry(&vvs
->rx_queue
, struct virtio_vsock_pkt
, list
);
432 if (dequeued_len
>= 0) {
434 size_t bytes_to_copy
;
436 pkt_len
= (size_t)le32_to_cpu(pkt
->hdr
.len
);
437 bytes_to_copy
= min(user_buf_len
, pkt_len
);
442 /* sk_lock is held by caller so no one else can dequeue.
443 * Unlock rx_lock since memcpy_to_msg() may sleep.
445 spin_unlock_bh(&vvs
->rx_lock
);
447 err
= memcpy_to_msg(msg
, pkt
->buf
, bytes_to_copy
);
449 /* Copy of message failed. Rest of
450 * fragments will be freed without copy.
454 user_buf_len
-= bytes_to_copy
;
457 spin_lock_bh(&vvs
->rx_lock
);
460 if (dequeued_len
>= 0)
461 dequeued_len
+= pkt_len
;
464 if (le32_to_cpu(pkt
->hdr
.flags
) & VIRTIO_VSOCK_SEQ_EOM
) {
468 if (le32_to_cpu(pkt
->hdr
.flags
) & VIRTIO_VSOCK_SEQ_EOR
)
469 msg
->msg_flags
|= MSG_EOR
;
472 virtio_transport_dec_rx_pkt(vvs
, pkt
);
473 list_del(&pkt
->list
);
474 virtio_transport_free_pkt(pkt
);
477 spin_unlock_bh(&vvs
->rx_lock
);
479 virtio_transport_send_credit_update(vsk
);
485 virtio_transport_stream_dequeue(struct vsock_sock
*vsk
,
487 size_t len
, int flags
)
489 if (flags
& MSG_PEEK
)
490 return virtio_transport_stream_do_peek(vsk
, msg
, len
);
492 return virtio_transport_stream_do_dequeue(vsk
, msg
, len
);
494 EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue
);
497 virtio_transport_seqpacket_dequeue(struct vsock_sock
*vsk
,
501 if (flags
& MSG_PEEK
)
504 return virtio_transport_seqpacket_do_dequeue(vsk
, msg
, flags
);
506 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue
);
509 virtio_transport_seqpacket_enqueue(struct vsock_sock
*vsk
,
513 struct virtio_vsock_sock
*vvs
= vsk
->trans
;
515 spin_lock_bh(&vvs
->tx_lock
);
517 if (len
> vvs
->peer_buf_alloc
) {
518 spin_unlock_bh(&vvs
->tx_lock
);
522 spin_unlock_bh(&vvs
->tx_lock
);
524 return virtio_transport_stream_enqueue(vsk
, msg
, len
);
526 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_enqueue
);
529 virtio_transport_dgram_dequeue(struct vsock_sock
*vsk
,
531 size_t len
, int flags
)
535 EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue
);
537 s64
virtio_transport_stream_has_data(struct vsock_sock
*vsk
)
539 struct virtio_vsock_sock
*vvs
= vsk
->trans
;
542 spin_lock_bh(&vvs
->rx_lock
);
543 bytes
= vvs
->rx_bytes
;
544 spin_unlock_bh(&vvs
->rx_lock
);
548 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data
);
550 u32
virtio_transport_seqpacket_has_data(struct vsock_sock
*vsk
)
552 struct virtio_vsock_sock
*vvs
= vsk
->trans
;
555 spin_lock_bh(&vvs
->rx_lock
);
556 msg_count
= vvs
->msg_count
;
557 spin_unlock_bh(&vvs
->rx_lock
);
561 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data
);
563 static s64
virtio_transport_has_space(struct vsock_sock
*vsk
)
565 struct virtio_vsock_sock
*vvs
= vsk
->trans
;
568 bytes
= vvs
->peer_buf_alloc
- (vvs
->tx_cnt
- vvs
->peer_fwd_cnt
);
575 s64
virtio_transport_stream_has_space(struct vsock_sock
*vsk
)
577 struct virtio_vsock_sock
*vvs
= vsk
->trans
;
580 spin_lock_bh(&vvs
->tx_lock
);
581 bytes
= virtio_transport_has_space(vsk
);
582 spin_unlock_bh(&vvs
->tx_lock
);
586 EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space
);
588 int virtio_transport_do_socket_init(struct vsock_sock
*vsk
,
589 struct vsock_sock
*psk
)
591 struct virtio_vsock_sock
*vvs
;
593 vvs
= kzalloc(sizeof(*vvs
), GFP_KERNEL
);
599 if (psk
&& psk
->trans
) {
600 struct virtio_vsock_sock
*ptrans
= psk
->trans
;
602 vvs
->peer_buf_alloc
= ptrans
->peer_buf_alloc
;
605 if (vsk
->buffer_size
> VIRTIO_VSOCK_MAX_BUF_SIZE
)
606 vsk
->buffer_size
= VIRTIO_VSOCK_MAX_BUF_SIZE
;
608 vvs
->buf_alloc
= vsk
->buffer_size
;
610 spin_lock_init(&vvs
->rx_lock
);
611 spin_lock_init(&vvs
->tx_lock
);
612 INIT_LIST_HEAD(&vvs
->rx_queue
);
616 EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init
);
618 /* sk_lock held by the caller */
619 void virtio_transport_notify_buffer_size(struct vsock_sock
*vsk
, u64
*val
)
621 struct virtio_vsock_sock
*vvs
= vsk
->trans
;
623 if (*val
> VIRTIO_VSOCK_MAX_BUF_SIZE
)
624 *val
= VIRTIO_VSOCK_MAX_BUF_SIZE
;
626 vvs
->buf_alloc
= *val
;
628 virtio_transport_send_credit_update(vsk
);
630 EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size
);
633 virtio_transport_notify_poll_in(struct vsock_sock
*vsk
,
635 bool *data_ready_now
)
637 if (vsock_stream_has_data(vsk
))
638 *data_ready_now
= true;
640 *data_ready_now
= false;
644 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in
);
647 virtio_transport_notify_poll_out(struct vsock_sock
*vsk
,
649 bool *space_avail_now
)
653 free_space
= vsock_stream_has_space(vsk
);
655 *space_avail_now
= true;
656 else if (free_space
== 0)
657 *space_avail_now
= false;
661 EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out
);
663 int virtio_transport_notify_recv_init(struct vsock_sock
*vsk
,
664 size_t target
, struct vsock_transport_recv_notify_data
*data
)
668 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init
);
670 int virtio_transport_notify_recv_pre_block(struct vsock_sock
*vsk
,
671 size_t target
, struct vsock_transport_recv_notify_data
*data
)
675 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block
);
677 int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock
*vsk
,
678 size_t target
, struct vsock_transport_recv_notify_data
*data
)
682 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue
);
684 int virtio_transport_notify_recv_post_dequeue(struct vsock_sock
*vsk
,
685 size_t target
, ssize_t copied
, bool data_read
,
686 struct vsock_transport_recv_notify_data
*data
)
690 EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue
);
692 int virtio_transport_notify_send_init(struct vsock_sock
*vsk
,
693 struct vsock_transport_send_notify_data
*data
)
697 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init
);
699 int virtio_transport_notify_send_pre_block(struct vsock_sock
*vsk
,
700 struct vsock_transport_send_notify_data
*data
)
704 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block
);
706 int virtio_transport_notify_send_pre_enqueue(struct vsock_sock
*vsk
,
707 struct vsock_transport_send_notify_data
*data
)
711 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue
);
713 int virtio_transport_notify_send_post_enqueue(struct vsock_sock
*vsk
,
714 ssize_t written
, struct vsock_transport_send_notify_data
*data
)
718 EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue
);
720 u64
virtio_transport_stream_rcvhiwat(struct vsock_sock
*vsk
)
722 return vsk
->buffer_size
;
724 EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat
);
726 bool virtio_transport_stream_is_active(struct vsock_sock
*vsk
)
730 EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active
);
732 bool virtio_transport_stream_allow(u32 cid
, u32 port
)
736 EXPORT_SYMBOL_GPL(virtio_transport_stream_allow
);
738 int virtio_transport_dgram_bind(struct vsock_sock
*vsk
,
739 struct sockaddr_vm
*addr
)
743 EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind
);
745 bool virtio_transport_dgram_allow(u32 cid
, u32 port
)
749 EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow
);
751 int virtio_transport_connect(struct vsock_sock
*vsk
)
753 struct virtio_vsock_pkt_info info
= {
754 .op
= VIRTIO_VSOCK_OP_REQUEST
,
758 return virtio_transport_send_pkt_info(vsk
, &info
);
760 EXPORT_SYMBOL_GPL(virtio_transport_connect
);
762 int virtio_transport_shutdown(struct vsock_sock
*vsk
, int mode
)
764 struct virtio_vsock_pkt_info info
= {
765 .op
= VIRTIO_VSOCK_OP_SHUTDOWN
,
766 .flags
= (mode
& RCV_SHUTDOWN
?
767 VIRTIO_VSOCK_SHUTDOWN_RCV
: 0) |
768 (mode
& SEND_SHUTDOWN
?
769 VIRTIO_VSOCK_SHUTDOWN_SEND
: 0),
773 return virtio_transport_send_pkt_info(vsk
, &info
);
775 EXPORT_SYMBOL_GPL(virtio_transport_shutdown
);
778 virtio_transport_dgram_enqueue(struct vsock_sock
*vsk
,
779 struct sockaddr_vm
*remote_addr
,
785 EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue
);
788 virtio_transport_stream_enqueue(struct vsock_sock
*vsk
,
792 struct virtio_vsock_pkt_info info
= {
793 .op
= VIRTIO_VSOCK_OP_RW
,
799 return virtio_transport_send_pkt_info(vsk
, &info
);
801 EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue
);
803 void virtio_transport_destruct(struct vsock_sock
*vsk
)
805 struct virtio_vsock_sock
*vvs
= vsk
->trans
;
809 EXPORT_SYMBOL_GPL(virtio_transport_destruct
);
811 static int virtio_transport_reset(struct vsock_sock
*vsk
,
812 struct virtio_vsock_pkt
*pkt
)
814 struct virtio_vsock_pkt_info info
= {
815 .op
= VIRTIO_VSOCK_OP_RST
,
820 /* Send RST only if the original pkt is not a RST pkt */
821 if (pkt
&& le16_to_cpu(pkt
->hdr
.op
) == VIRTIO_VSOCK_OP_RST
)
824 return virtio_transport_send_pkt_info(vsk
, &info
);
827 /* Normally packets are associated with a socket. There may be no socket if an
828 * attempt was made to connect to a socket that does not exist.
830 static int virtio_transport_reset_no_sock(const struct virtio_transport
*t
,
831 struct virtio_vsock_pkt
*pkt
)
833 struct virtio_vsock_pkt
*reply
;
834 struct virtio_vsock_pkt_info info
= {
835 .op
= VIRTIO_VSOCK_OP_RST
,
836 .type
= le16_to_cpu(pkt
->hdr
.type
),
840 /* Send RST only if the original pkt is not a RST pkt */
841 if (le16_to_cpu(pkt
->hdr
.op
) == VIRTIO_VSOCK_OP_RST
)
844 reply
= virtio_transport_alloc_pkt(&info
, 0,
845 le64_to_cpu(pkt
->hdr
.dst_cid
),
846 le32_to_cpu(pkt
->hdr
.dst_port
),
847 le64_to_cpu(pkt
->hdr
.src_cid
),
848 le32_to_cpu(pkt
->hdr
.src_port
));
853 virtio_transport_free_pkt(reply
);
857 return t
->send_pkt(reply
);
860 /* This function should be called with sk_lock held and SOCK_DONE set */
861 static void virtio_transport_remove_sock(struct vsock_sock
*vsk
)
863 struct virtio_vsock_sock
*vvs
= vsk
->trans
;
864 struct virtio_vsock_pkt
*pkt
, *tmp
;
866 /* We don't need to take rx_lock, as the socket is closing and we are
869 list_for_each_entry_safe(pkt
, tmp
, &vvs
->rx_queue
, list
) {
870 list_del(&pkt
->list
);
871 virtio_transport_free_pkt(pkt
);
874 vsock_remove_sock(vsk
);
877 static void virtio_transport_wait_close(struct sock
*sk
, long timeout
)
880 DEFINE_WAIT_FUNC(wait
, woken_wake_function
);
882 add_wait_queue(sk_sleep(sk
), &wait
);
885 if (sk_wait_event(sk
, &timeout
,
886 sock_flag(sk
, SOCK_DONE
), &wait
))
888 } while (!signal_pending(current
) && timeout
);
890 remove_wait_queue(sk_sleep(sk
), &wait
);
894 static void virtio_transport_do_close(struct vsock_sock
*vsk
,
897 struct sock
*sk
= sk_vsock(vsk
);
899 sock_set_flag(sk
, SOCK_DONE
);
900 vsk
->peer_shutdown
= SHUTDOWN_MASK
;
901 if (vsock_stream_has_data(vsk
) <= 0)
902 sk
->sk_state
= TCP_CLOSING
;
903 sk
->sk_state_change(sk
);
905 if (vsk
->close_work_scheduled
&&
906 (!cancel_timeout
|| cancel_delayed_work(&vsk
->close_work
))) {
907 vsk
->close_work_scheduled
= false;
909 virtio_transport_remove_sock(vsk
);
911 /* Release refcnt obtained when we scheduled the timeout */
916 static void virtio_transport_close_timeout(struct work_struct
*work
)
918 struct vsock_sock
*vsk
=
919 container_of(work
, struct vsock_sock
, close_work
.work
);
920 struct sock
*sk
= sk_vsock(vsk
);
925 if (!sock_flag(sk
, SOCK_DONE
)) {
926 (void)virtio_transport_reset(vsk
, NULL
);
928 virtio_transport_do_close(vsk
, false);
931 vsk
->close_work_scheduled
= false;
937 /* User context, vsk->sk is locked */
938 static bool virtio_transport_close(struct vsock_sock
*vsk
)
940 struct sock
*sk
= &vsk
->sk
;
942 if (!(sk
->sk_state
== TCP_ESTABLISHED
||
943 sk
->sk_state
== TCP_CLOSING
))
946 /* Already received SHUTDOWN from peer, reply with RST */
947 if ((vsk
->peer_shutdown
& SHUTDOWN_MASK
) == SHUTDOWN_MASK
) {
948 (void)virtio_transport_reset(vsk
, NULL
);
952 if ((sk
->sk_shutdown
& SHUTDOWN_MASK
) != SHUTDOWN_MASK
)
953 (void)virtio_transport_shutdown(vsk
, SHUTDOWN_MASK
);
955 if (sock_flag(sk
, SOCK_LINGER
) && !(current
->flags
& PF_EXITING
))
956 virtio_transport_wait_close(sk
, sk
->sk_lingertime
);
958 if (sock_flag(sk
, SOCK_DONE
)) {
963 INIT_DELAYED_WORK(&vsk
->close_work
,
964 virtio_transport_close_timeout
);
965 vsk
->close_work_scheduled
= true;
966 schedule_delayed_work(&vsk
->close_work
, VSOCK_CLOSE_TIMEOUT
);
970 void virtio_transport_release(struct vsock_sock
*vsk
)
972 struct sock
*sk
= &vsk
->sk
;
973 bool remove_sock
= true;
975 if (sk
->sk_type
== SOCK_STREAM
|| sk
->sk_type
== SOCK_SEQPACKET
)
976 remove_sock
= virtio_transport_close(vsk
);
979 sock_set_flag(sk
, SOCK_DONE
);
980 virtio_transport_remove_sock(vsk
);
983 EXPORT_SYMBOL_GPL(virtio_transport_release
);
986 virtio_transport_recv_connecting(struct sock
*sk
,
987 struct virtio_vsock_pkt
*pkt
)
989 struct vsock_sock
*vsk
= vsock_sk(sk
);
993 switch (le16_to_cpu(pkt
->hdr
.op
)) {
994 case VIRTIO_VSOCK_OP_RESPONSE
:
995 sk
->sk_state
= TCP_ESTABLISHED
;
996 sk
->sk_socket
->state
= SS_CONNECTED
;
997 vsock_insert_connected(vsk
);
998 sk
->sk_state_change(sk
);
1000 case VIRTIO_VSOCK_OP_INVALID
:
1002 case VIRTIO_VSOCK_OP_RST
:
1014 virtio_transport_reset(vsk
, pkt
);
1015 sk
->sk_state
= TCP_CLOSE
;
1017 sk_error_report(sk
);
1022 virtio_transport_recv_enqueue(struct vsock_sock
*vsk
,
1023 struct virtio_vsock_pkt
*pkt
)
1025 struct virtio_vsock_sock
*vvs
= vsk
->trans
;
1026 bool can_enqueue
, free_pkt
= false;
1028 pkt
->len
= le32_to_cpu(pkt
->hdr
.len
);
1031 spin_lock_bh(&vvs
->rx_lock
);
1033 can_enqueue
= virtio_transport_inc_rx_pkt(vvs
, pkt
);
1039 if (le32_to_cpu(pkt
->hdr
.flags
) & VIRTIO_VSOCK_SEQ_EOM
)
1042 /* Try to copy small packets into the buffer of last packet queued,
1043 * to avoid wasting memory queueing the entire buffer with a small
1046 if (pkt
->len
<= GOOD_COPY_LEN
&& !list_empty(&vvs
->rx_queue
)) {
1047 struct virtio_vsock_pkt
*last_pkt
;
1049 last_pkt
= list_last_entry(&vvs
->rx_queue
,
1050 struct virtio_vsock_pkt
, list
);
1052 /* If there is space in the last packet queued, we copy the
1053 * new packet in its buffer. We avoid this if the last packet
1054 * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is
1055 * delimiter of SEQPACKET message, so 'pkt' is the first packet
1058 if ((pkt
->len
<= last_pkt
->buf_len
- last_pkt
->len
) &&
1059 !(le32_to_cpu(last_pkt
->hdr
.flags
) & VIRTIO_VSOCK_SEQ_EOM
)) {
1060 memcpy(last_pkt
->buf
+ last_pkt
->len
, pkt
->buf
,
1062 last_pkt
->len
+= pkt
->len
;
1064 last_pkt
->hdr
.flags
|= pkt
->hdr
.flags
;
1069 list_add_tail(&pkt
->list
, &vvs
->rx_queue
);
1072 spin_unlock_bh(&vvs
->rx_lock
);
1074 virtio_transport_free_pkt(pkt
);
1078 virtio_transport_recv_connected(struct sock
*sk
,
1079 struct virtio_vsock_pkt
*pkt
)
1081 struct vsock_sock
*vsk
= vsock_sk(sk
);
1084 switch (le16_to_cpu(pkt
->hdr
.op
)) {
1085 case VIRTIO_VSOCK_OP_RW
:
1086 virtio_transport_recv_enqueue(vsk
, pkt
);
1087 sk
->sk_data_ready(sk
);
1089 case VIRTIO_VSOCK_OP_CREDIT_REQUEST
:
1090 virtio_transport_send_credit_update(vsk
);
1092 case VIRTIO_VSOCK_OP_CREDIT_UPDATE
:
1093 sk
->sk_write_space(sk
);
1095 case VIRTIO_VSOCK_OP_SHUTDOWN
:
1096 if (le32_to_cpu(pkt
->hdr
.flags
) & VIRTIO_VSOCK_SHUTDOWN_RCV
)
1097 vsk
->peer_shutdown
|= RCV_SHUTDOWN
;
1098 if (le32_to_cpu(pkt
->hdr
.flags
) & VIRTIO_VSOCK_SHUTDOWN_SEND
)
1099 vsk
->peer_shutdown
|= SEND_SHUTDOWN
;
1100 if (vsk
->peer_shutdown
== SHUTDOWN_MASK
&&
1101 vsock_stream_has_data(vsk
) <= 0 &&
1102 !sock_flag(sk
, SOCK_DONE
)) {
1103 (void)virtio_transport_reset(vsk
, NULL
);
1105 virtio_transport_do_close(vsk
, true);
1107 if (le32_to_cpu(pkt
->hdr
.flags
))
1108 sk
->sk_state_change(sk
);
1110 case VIRTIO_VSOCK_OP_RST
:
1111 virtio_transport_do_close(vsk
, true);
1118 virtio_transport_free_pkt(pkt
);
1123 virtio_transport_recv_disconnecting(struct sock
*sk
,
1124 struct virtio_vsock_pkt
*pkt
)
1126 struct vsock_sock
*vsk
= vsock_sk(sk
);
1128 if (le16_to_cpu(pkt
->hdr
.op
) == VIRTIO_VSOCK_OP_RST
)
1129 virtio_transport_do_close(vsk
, true);
1133 virtio_transport_send_response(struct vsock_sock
*vsk
,
1134 struct virtio_vsock_pkt
*pkt
)
1136 struct virtio_vsock_pkt_info info
= {
1137 .op
= VIRTIO_VSOCK_OP_RESPONSE
,
1138 .remote_cid
= le64_to_cpu(pkt
->hdr
.src_cid
),
1139 .remote_port
= le32_to_cpu(pkt
->hdr
.src_port
),
1144 return virtio_transport_send_pkt_info(vsk
, &info
);
1147 static bool virtio_transport_space_update(struct sock
*sk
,
1148 struct virtio_vsock_pkt
*pkt
)
1150 struct vsock_sock
*vsk
= vsock_sk(sk
);
1151 struct virtio_vsock_sock
*vvs
= vsk
->trans
;
1152 bool space_available
;
1154 /* Listener sockets are not associated with any transport, so we are
1155 * not able to take the state to see if there is space available in the
1156 * remote peer, but since they are only used to receive requests, we
1157 * can assume that there is always space available in the other peer.
1162 /* buf_alloc and fwd_cnt is always included in the hdr */
1163 spin_lock_bh(&vvs
->tx_lock
);
1164 vvs
->peer_buf_alloc
= le32_to_cpu(pkt
->hdr
.buf_alloc
);
1165 vvs
->peer_fwd_cnt
= le32_to_cpu(pkt
->hdr
.fwd_cnt
);
1166 space_available
= virtio_transport_has_space(vsk
);
1167 spin_unlock_bh(&vvs
->tx_lock
);
1168 return space_available
;
1171 /* Handle server socket */
1173 virtio_transport_recv_listen(struct sock
*sk
, struct virtio_vsock_pkt
*pkt
,
1174 struct virtio_transport
*t
)
1176 struct vsock_sock
*vsk
= vsock_sk(sk
);
1177 struct vsock_sock
*vchild
;
1181 if (le16_to_cpu(pkt
->hdr
.op
) != VIRTIO_VSOCK_OP_REQUEST
) {
1182 virtio_transport_reset_no_sock(t
, pkt
);
1186 if (sk_acceptq_is_full(sk
)) {
1187 virtio_transport_reset_no_sock(t
, pkt
);
1191 child
= vsock_create_connected(sk
);
1193 virtio_transport_reset_no_sock(t
, pkt
);
1197 sk_acceptq_added(sk
);
1199 lock_sock_nested(child
, SINGLE_DEPTH_NESTING
);
1201 child
->sk_state
= TCP_ESTABLISHED
;
1203 vchild
= vsock_sk(child
);
1204 vsock_addr_init(&vchild
->local_addr
, le64_to_cpu(pkt
->hdr
.dst_cid
),
1205 le32_to_cpu(pkt
->hdr
.dst_port
));
1206 vsock_addr_init(&vchild
->remote_addr
, le64_to_cpu(pkt
->hdr
.src_cid
),
1207 le32_to_cpu(pkt
->hdr
.src_port
));
1209 ret
= vsock_assign_transport(vchild
, vsk
);
1210 /* Transport assigned (looking at remote_addr) must be the same
1211 * where we received the request.
1213 if (ret
|| vchild
->transport
!= &t
->transport
) {
1214 release_sock(child
);
1215 virtio_transport_reset_no_sock(t
, pkt
);
1220 if (virtio_transport_space_update(child
, pkt
))
1221 child
->sk_write_space(child
);
1223 vsock_insert_connected(vchild
);
1224 vsock_enqueue_accept(sk
, child
);
1225 virtio_transport_send_response(vchild
, pkt
);
1227 release_sock(child
);
1229 sk
->sk_data_ready(sk
);
1233 static bool virtio_transport_valid_type(u16 type
)
1235 return (type
== VIRTIO_VSOCK_TYPE_STREAM
) ||
1236 (type
== VIRTIO_VSOCK_TYPE_SEQPACKET
);
1239 /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
1242 void virtio_transport_recv_pkt(struct virtio_transport
*t
,
1243 struct virtio_vsock_pkt
*pkt
)
1245 struct sockaddr_vm src
, dst
;
1246 struct vsock_sock
*vsk
;
1248 bool space_available
;
1250 vsock_addr_init(&src
, le64_to_cpu(pkt
->hdr
.src_cid
),
1251 le32_to_cpu(pkt
->hdr
.src_port
));
1252 vsock_addr_init(&dst
, le64_to_cpu(pkt
->hdr
.dst_cid
),
1253 le32_to_cpu(pkt
->hdr
.dst_port
));
1255 trace_virtio_transport_recv_pkt(src
.svm_cid
, src
.svm_port
,
1256 dst
.svm_cid
, dst
.svm_port
,
1257 le32_to_cpu(pkt
->hdr
.len
),
1258 le16_to_cpu(pkt
->hdr
.type
),
1259 le16_to_cpu(pkt
->hdr
.op
),
1260 le32_to_cpu(pkt
->hdr
.flags
),
1261 le32_to_cpu(pkt
->hdr
.buf_alloc
),
1262 le32_to_cpu(pkt
->hdr
.fwd_cnt
));
1264 if (!virtio_transport_valid_type(le16_to_cpu(pkt
->hdr
.type
))) {
1265 (void)virtio_transport_reset_no_sock(t
, pkt
);
1269 /* The socket must be in connected or bound table
1270 * otherwise send reset back
1272 sk
= vsock_find_connected_socket(&src
, &dst
);
1274 sk
= vsock_find_bound_socket(&dst
);
1276 (void)virtio_transport_reset_no_sock(t
, pkt
);
1281 if (virtio_transport_get_type(sk
) != le16_to_cpu(pkt
->hdr
.type
)) {
1282 (void)virtio_transport_reset_no_sock(t
, pkt
);
1291 /* Check if sk has been closed before lock_sock */
1292 if (sock_flag(sk
, SOCK_DONE
)) {
1293 (void)virtio_transport_reset_no_sock(t
, pkt
);
1299 space_available
= virtio_transport_space_update(sk
, pkt
);
1301 /* Update CID in case it has changed after a transport reset event */
1302 vsk
->local_addr
.svm_cid
= dst
.svm_cid
;
1304 if (space_available
)
1305 sk
->sk_write_space(sk
);
1307 switch (sk
->sk_state
) {
1309 virtio_transport_recv_listen(sk
, pkt
, t
);
1310 virtio_transport_free_pkt(pkt
);
1313 virtio_transport_recv_connecting(sk
, pkt
);
1314 virtio_transport_free_pkt(pkt
);
1316 case TCP_ESTABLISHED
:
1317 virtio_transport_recv_connected(sk
, pkt
);
1320 virtio_transport_recv_disconnecting(sk
, pkt
);
1321 virtio_transport_free_pkt(pkt
);
1324 (void)virtio_transport_reset_no_sock(t
, pkt
);
1325 virtio_transport_free_pkt(pkt
);
1331 /* Release refcnt obtained when we fetched this socket out of the
1332 * bound or connected list.
1338 virtio_transport_free_pkt(pkt
);
1340 EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt
);
1342 void virtio_transport_free_pkt(struct virtio_vsock_pkt
*pkt
)
1347 EXPORT_SYMBOL_GPL(virtio_transport_free_pkt
);
1349 MODULE_LICENSE("GPL v2");
1350 MODULE_AUTHOR("Asias He");
1351 MODULE_DESCRIPTION("common code for virtio vsock");