]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/vmw_vsock/vmci_transport.c
VSOCK: Return VMCI_ERROR_NO_MEM when fails to allocate skb
[mirror_ubuntu-artful-kernel.git] / net / vmw_vsock / vmci_transport.c
CommitLineData
d021c344
AK
1/*
2 * VMware vSockets Driver
3 *
4 * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation version 2 and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 */
15
16#include <linux/types.h>
d021c344
AK
17#include <linux/bitops.h>
18#include <linux/cred.h>
19#include <linux/init.h>
20#include <linux/io.h>
21#include <linux/kernel.h>
22#include <linux/kmod.h>
23#include <linux/list.h>
24#include <linux/miscdevice.h>
25#include <linux/module.h>
26#include <linux/mutex.h>
27#include <linux/net.h>
28#include <linux/poll.h>
29#include <linux/skbuff.h>
30#include <linux/smp.h>
31#include <linux/socket.h>
32#include <linux/stddef.h>
33#include <linux/unistd.h>
34#include <linux/wait.h>
35#include <linux/workqueue.h>
36#include <net/sock.h>
37
38#include "af_vsock.h"
39#include "vmci_transport_notify.h"
40
41static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg);
42static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg);
43static void vmci_transport_peer_attach_cb(u32 sub_id,
44 const struct vmci_event_data *ed,
45 void *client_data);
46static void vmci_transport_peer_detach_cb(u32 sub_id,
47 const struct vmci_event_data *ed,
48 void *client_data);
49static void vmci_transport_recv_pkt_work(struct work_struct *work);
50static int vmci_transport_recv_listen(struct sock *sk,
51 struct vmci_transport_packet *pkt);
52static int vmci_transport_recv_connecting_server(
53 struct sock *sk,
54 struct sock *pending,
55 struct vmci_transport_packet *pkt);
56static int vmci_transport_recv_connecting_client(
57 struct sock *sk,
58 struct vmci_transport_packet *pkt);
59static int vmci_transport_recv_connecting_client_negotiate(
60 struct sock *sk,
61 struct vmci_transport_packet *pkt);
62static int vmci_transport_recv_connecting_client_invalid(
63 struct sock *sk,
64 struct vmci_transport_packet *pkt);
65static int vmci_transport_recv_connected(struct sock *sk,
66 struct vmci_transport_packet *pkt);
67static bool vmci_transport_old_proto_override(bool *old_pkt_proto);
68static u16 vmci_transport_new_proto_supported_versions(void);
69static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto,
70 bool old_pkt_proto);
71
72struct vmci_transport_recv_pkt_info {
73 struct work_struct work;
74 struct sock *sk;
75 struct vmci_transport_packet pkt;
76};
77
78static struct vmci_handle vmci_transport_stream_handle = { VMCI_INVALID_ID,
79 VMCI_INVALID_ID };
80static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
81
82static int PROTOCOL_OVERRIDE = -1;
83
84#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN 128
85#define VMCI_TRANSPORT_DEFAULT_QP_SIZE 262144
86#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX 262144
87
88/* The default peer timeout indicates how long we will wait for a peer response
89 * to a control message.
90 */
91#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
92
93#define SS_LISTEN 255
94
95/* Helper function to convert from a VMCI error code to a VSock error code. */
96
97static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
98{
99 int err;
100
101 switch (vmci_error) {
102 case VMCI_ERROR_NO_MEM:
103 err = ENOMEM;
104 break;
105 case VMCI_ERROR_DUPLICATE_ENTRY:
106 case VMCI_ERROR_ALREADY_EXISTS:
107 err = EADDRINUSE;
108 break;
109 case VMCI_ERROR_NO_ACCESS:
110 err = EPERM;
111 break;
112 case VMCI_ERROR_NO_RESOURCES:
113 err = ENOBUFS;
114 break;
115 case VMCI_ERROR_INVALID_RESOURCE:
116 err = EHOSTUNREACH;
117 break;
118 case VMCI_ERROR_INVALID_ARGS:
119 default:
120 err = EINVAL;
121 }
122
123 return err > 0 ? -err : err;
124}
125
2a89f924
RG
126static u32 vmci_transport_peer_rid(u32 peer_cid)
127{
128 if (VMADDR_CID_HYPERVISOR == peer_cid)
129 return VMCI_TRANSPORT_HYPERVISOR_PACKET_RID;
130
131 return VMCI_TRANSPORT_PACKET_RID;
132}
133
d021c344
AK
134static inline void
135vmci_transport_packet_init(struct vmci_transport_packet *pkt,
136 struct sockaddr_vm *src,
137 struct sockaddr_vm *dst,
138 u8 type,
139 u64 size,
140 u64 mode,
141 struct vmci_transport_waiting_info *wait,
142 u16 proto,
143 struct vmci_handle handle)
144{
145 /* We register the stream control handler as an any cid handle so we
146 * must always send from a source address of VMADDR_CID_ANY
147 */
148 pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY,
149 VMCI_TRANSPORT_PACKET_RID);
150 pkt->dg.dst = vmci_make_handle(dst->svm_cid,
2a89f924 151 vmci_transport_peer_rid(dst->svm_cid));
d021c344
AK
152 pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg);
153 pkt->version = VMCI_TRANSPORT_PACKET_VERSION;
154 pkt->type = type;
155 pkt->src_port = src->svm_port;
156 pkt->dst_port = dst->svm_port;
157 memset(&pkt->proto, 0, sizeof(pkt->proto));
158 memset(&pkt->_reserved2, 0, sizeof(pkt->_reserved2));
159
160 switch (pkt->type) {
161 case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
162 pkt->u.size = 0;
163 break;
164
165 case VMCI_TRANSPORT_PACKET_TYPE_REQUEST:
166 case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
167 pkt->u.size = size;
168 break;
169
170 case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
171 case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
172 pkt->u.handle = handle;
173 break;
174
175 case VMCI_TRANSPORT_PACKET_TYPE_WROTE:
176 case VMCI_TRANSPORT_PACKET_TYPE_READ:
177 case VMCI_TRANSPORT_PACKET_TYPE_RST:
178 pkt->u.size = 0;
179 break;
180
181 case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
182 pkt->u.mode = mode;
183 break;
184
185 case VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ:
186 case VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE:
187 memcpy(&pkt->u.wait, wait, sizeof(pkt->u.wait));
188 break;
189
190 case VMCI_TRANSPORT_PACKET_TYPE_REQUEST2:
191 case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
192 pkt->u.size = size;
193 pkt->proto = proto;
194 break;
195 }
196}
197
198static inline void
199vmci_transport_packet_get_addresses(struct vmci_transport_packet *pkt,
200 struct sockaddr_vm *local,
201 struct sockaddr_vm *remote)
202{
203 vsock_addr_init(local, pkt->dg.dst.context, pkt->dst_port);
204 vsock_addr_init(remote, pkt->dg.src.context, pkt->src_port);
205}
206
207static int
208__vmci_transport_send_control_pkt(struct vmci_transport_packet *pkt,
209 struct sockaddr_vm *src,
210 struct sockaddr_vm *dst,
211 enum vmci_transport_packet_type type,
212 u64 size,
213 u64 mode,
214 struct vmci_transport_waiting_info *wait,
215 u16 proto,
216 struct vmci_handle handle,
217 bool convert_error)
218{
219 int err;
220
221 vmci_transport_packet_init(pkt, src, dst, type, size, mode, wait,
222 proto, handle);
223 err = vmci_datagram_send(&pkt->dg);
224 if (convert_error && (err < 0))
225 return vmci_transport_error_to_vsock_error(err);
226
227 return err;
228}
229
230static int
231vmci_transport_reply_control_pkt_fast(struct vmci_transport_packet *pkt,
232 enum vmci_transport_packet_type type,
233 u64 size,
234 u64 mode,
235 struct vmci_transport_waiting_info *wait,
236 struct vmci_handle handle)
237{
238 struct vmci_transport_packet reply;
239 struct sockaddr_vm src, dst;
240
241 if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST) {
242 return 0;
243 } else {
244 vmci_transport_packet_get_addresses(pkt, &src, &dst);
245 return __vmci_transport_send_control_pkt(&reply, &src, &dst,
246 type,
247 size, mode, wait,
248 VSOCK_PROTO_INVALID,
249 handle, true);
250 }
251}
252
253static int
254vmci_transport_send_control_pkt_bh(struct sockaddr_vm *src,
255 struct sockaddr_vm *dst,
256 enum vmci_transport_packet_type type,
257 u64 size,
258 u64 mode,
259 struct vmci_transport_waiting_info *wait,
260 struct vmci_handle handle)
261{
262 /* Note that it is safe to use a single packet across all CPUs since
263 * two tasklets of the same type are guaranteed to not ever run
264 * simultaneously. If that ever changes, or VMCI stops using tasklets,
265 * we can use per-cpu packets.
266 */
267 static struct vmci_transport_packet pkt;
268
269 return __vmci_transport_send_control_pkt(&pkt, src, dst, type,
270 size, mode, wait,
271 VSOCK_PROTO_INVALID, handle,
272 false);
273}
274
275static int
276vmci_transport_send_control_pkt(struct sock *sk,
277 enum vmci_transport_packet_type type,
278 u64 size,
279 u64 mode,
280 struct vmci_transport_waiting_info *wait,
281 u16 proto,
282 struct vmci_handle handle)
283{
284 struct vmci_transport_packet *pkt;
285 struct vsock_sock *vsk;
286 int err;
287
288 vsk = vsock_sk(sk);
289
290 if (!vsock_addr_bound(&vsk->local_addr))
291 return -EINVAL;
292
293 if (!vsock_addr_bound(&vsk->remote_addr))
294 return -EINVAL;
295
296 pkt = kmalloc(sizeof(*pkt), GFP_KERNEL);
297 if (!pkt)
298 return -ENOMEM;
299
300 err = __vmci_transport_send_control_pkt(pkt, &vsk->local_addr,
301 &vsk->remote_addr, type, size,
302 mode, wait, proto, handle,
303 true);
304 kfree(pkt);
305
306 return err;
307}
308
309static int vmci_transport_send_reset_bh(struct sockaddr_vm *dst,
310 struct sockaddr_vm *src,
311 struct vmci_transport_packet *pkt)
312{
313 if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
314 return 0;
315 return vmci_transport_send_control_pkt_bh(
316 dst, src,
317 VMCI_TRANSPORT_PACKET_TYPE_RST, 0,
318 0, NULL, VMCI_INVALID_HANDLE);
319}
320
321static int vmci_transport_send_reset(struct sock *sk,
322 struct vmci_transport_packet *pkt)
323{
324 if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST)
325 return 0;
326 return vmci_transport_send_control_pkt(sk,
327 VMCI_TRANSPORT_PACKET_TYPE_RST,
328 0, 0, NULL, VSOCK_PROTO_INVALID,
329 VMCI_INVALID_HANDLE);
330}
331
332static int vmci_transport_send_negotiate(struct sock *sk, size_t size)
333{
334 return vmci_transport_send_control_pkt(
335 sk,
336 VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE,
337 size, 0, NULL,
338 VSOCK_PROTO_INVALID,
339 VMCI_INVALID_HANDLE);
340}
341
342static int vmci_transport_send_negotiate2(struct sock *sk, size_t size,
343 u16 version)
344{
345 return vmci_transport_send_control_pkt(
346 sk,
347 VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2,
348 size, 0, NULL, version,
349 VMCI_INVALID_HANDLE);
350}
351
352static int vmci_transport_send_qp_offer(struct sock *sk,
353 struct vmci_handle handle)
354{
355 return vmci_transport_send_control_pkt(
356 sk, VMCI_TRANSPORT_PACKET_TYPE_OFFER, 0,
357 0, NULL,
358 VSOCK_PROTO_INVALID, handle);
359}
360
361static int vmci_transport_send_attach(struct sock *sk,
362 struct vmci_handle handle)
363{
364 return vmci_transport_send_control_pkt(
365 sk, VMCI_TRANSPORT_PACKET_TYPE_ATTACH,
366 0, 0, NULL, VSOCK_PROTO_INVALID,
367 handle);
368}
369
370static int vmci_transport_reply_reset(struct vmci_transport_packet *pkt)
371{
372 return vmci_transport_reply_control_pkt_fast(
373 pkt,
374 VMCI_TRANSPORT_PACKET_TYPE_RST,
375 0, 0, NULL,
376 VMCI_INVALID_HANDLE);
377}
378
379static int vmci_transport_send_invalid_bh(struct sockaddr_vm *dst,
380 struct sockaddr_vm *src)
381{
382 return vmci_transport_send_control_pkt_bh(
383 dst, src,
384 VMCI_TRANSPORT_PACKET_TYPE_INVALID,
385 0, 0, NULL, VMCI_INVALID_HANDLE);
386}
387
388int vmci_transport_send_wrote_bh(struct sockaddr_vm *dst,
389 struct sockaddr_vm *src)
390{
391 return vmci_transport_send_control_pkt_bh(
392 dst, src,
393 VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
394 0, NULL, VMCI_INVALID_HANDLE);
395}
396
397int vmci_transport_send_read_bh(struct sockaddr_vm *dst,
398 struct sockaddr_vm *src)
399{
400 return vmci_transport_send_control_pkt_bh(
401 dst, src,
402 VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
403 0, NULL, VMCI_INVALID_HANDLE);
404}
405
406int vmci_transport_send_wrote(struct sock *sk)
407{
408 return vmci_transport_send_control_pkt(
409 sk, VMCI_TRANSPORT_PACKET_TYPE_WROTE, 0,
410 0, NULL, VSOCK_PROTO_INVALID,
411 VMCI_INVALID_HANDLE);
412}
413
414int vmci_transport_send_read(struct sock *sk)
415{
416 return vmci_transport_send_control_pkt(
417 sk, VMCI_TRANSPORT_PACKET_TYPE_READ, 0,
418 0, NULL, VSOCK_PROTO_INVALID,
419 VMCI_INVALID_HANDLE);
420}
421
422int vmci_transport_send_waiting_write(struct sock *sk,
423 struct vmci_transport_waiting_info *wait)
424{
425 return vmci_transport_send_control_pkt(
426 sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_WRITE,
427 0, 0, wait, VSOCK_PROTO_INVALID,
428 VMCI_INVALID_HANDLE);
429}
430
431int vmci_transport_send_waiting_read(struct sock *sk,
432 struct vmci_transport_waiting_info *wait)
433{
434 return vmci_transport_send_control_pkt(
435 sk, VMCI_TRANSPORT_PACKET_TYPE_WAITING_READ,
436 0, 0, wait, VSOCK_PROTO_INVALID,
437 VMCI_INVALID_HANDLE);
438}
439
440static int vmci_transport_shutdown(struct vsock_sock *vsk, int mode)
441{
442 return vmci_transport_send_control_pkt(
443 &vsk->sk,
444 VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN,
445 0, mode, NULL,
446 VSOCK_PROTO_INVALID,
447 VMCI_INVALID_HANDLE);
448}
449
450static int vmci_transport_send_conn_request(struct sock *sk, size_t size)
451{
452 return vmci_transport_send_control_pkt(sk,
453 VMCI_TRANSPORT_PACKET_TYPE_REQUEST,
454 size, 0, NULL,
455 VSOCK_PROTO_INVALID,
456 VMCI_INVALID_HANDLE);
457}
458
459static int vmci_transport_send_conn_request2(struct sock *sk, size_t size,
460 u16 version)
461{
462 return vmci_transport_send_control_pkt(
463 sk, VMCI_TRANSPORT_PACKET_TYPE_REQUEST2,
464 size, 0, NULL, version,
465 VMCI_INVALID_HANDLE);
466}
467
468static struct sock *vmci_transport_get_pending(
469 struct sock *listener,
470 struct vmci_transport_packet *pkt)
471{
472 struct vsock_sock *vlistener;
473 struct vsock_sock *vpending;
474 struct sock *pending;
990454b5
RG
475 struct sockaddr_vm src;
476
477 vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
d021c344
AK
478
479 vlistener = vsock_sk(listener);
480
481 list_for_each_entry(vpending, &vlistener->pending_links,
482 pending_links) {
d021c344 483 if (vsock_addr_equals_addr(&src, &vpending->remote_addr) &&
990454b5 484 pkt->dst_port == vpending->local_addr.svm_port) {
d021c344
AK
485 pending = sk_vsock(vpending);
486 sock_hold(pending);
487 goto found;
488 }
489 }
490
491 pending = NULL;
492found:
493 return pending;
494
495}
496
497static void vmci_transport_release_pending(struct sock *pending)
498{
499 sock_put(pending);
500}
501
502/* We allow two kinds of sockets to communicate with a restricted VM: 1)
503 * trusted sockets 2) sockets from applications running as the same user as the
504 * VM (this is only true for the host side and only when using hosted products)
505 */
506
507static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid)
508{
509 return vsock->trusted ||
510 vmci_is_context_owner(peer_cid, vsock->owner->uid);
511}
512
513/* We allow sending datagrams to and receiving datagrams from a restricted VM
514 * only if it is trusted as described in vmci_transport_is_trusted.
515 */
516
517static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid)
518{
2a89f924
RG
519 if (VMADDR_CID_HYPERVISOR == peer_cid)
520 return true;
521
d021c344
AK
522 if (vsock->cached_peer != peer_cid) {
523 vsock->cached_peer = peer_cid;
524 if (!vmci_transport_is_trusted(vsock, peer_cid) &&
525 (vmci_context_get_priv_flags(peer_cid) &
526 VMCI_PRIVILEGE_FLAG_RESTRICTED)) {
527 vsock->cached_peer_allow_dgram = false;
528 } else {
529 vsock->cached_peer_allow_dgram = true;
530 }
531 }
532
533 return vsock->cached_peer_allow_dgram;
534}
535
536static int
537vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
538 struct vmci_handle *handle,
539 u64 produce_size,
540 u64 consume_size,
541 u32 peer, u32 flags, bool trusted)
542{
543 int err = 0;
544
545 if (trusted) {
546 /* Try to allocate our queue pair as trusted. This will only
547 * work if vsock is running in the host.
548 */
549
550 err = vmci_qpair_alloc(qpair, handle, produce_size,
551 consume_size,
552 peer, flags,
553 VMCI_PRIVILEGE_FLAG_TRUSTED);
554 if (err != VMCI_ERROR_NO_ACCESS)
555 goto out;
556
557 }
558
559 err = vmci_qpair_alloc(qpair, handle, produce_size, consume_size,
560 peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
561out:
562 if (err < 0) {
563 pr_err("Could not attach to queue pair with %d\n",
564 err);
565 err = vmci_transport_error_to_vsock_error(err);
566 }
567
568 return err;
569}
570
571static int
572vmci_transport_datagram_create_hnd(u32 resource_id,
573 u32 flags,
574 vmci_datagram_recv_cb recv_cb,
575 void *client_data,
576 struct vmci_handle *out_handle)
577{
578 int err = 0;
579
580 /* Try to allocate our datagram handler as trusted. This will only work
581 * if vsock is running in the host.
582 */
583
584 err = vmci_datagram_create_handle_priv(resource_id, flags,
585 VMCI_PRIVILEGE_FLAG_TRUSTED,
586 recv_cb,
587 client_data, out_handle);
588
589 if (err == VMCI_ERROR_NO_ACCESS)
590 err = vmci_datagram_create_handle(resource_id, flags,
591 recv_cb, client_data,
592 out_handle);
593
594 return err;
595}
596
597/* This is invoked as part of a tasklet that's scheduled when the VMCI
598 * interrupt fires. This is run in bottom-half context and if it ever needs to
599 * sleep it should defer that work to a work queue.
600 */
601
602static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
603{
604 struct sock *sk;
605 size_t size;
606 struct sk_buff *skb;
607 struct vsock_sock *vsk;
608
609 sk = (struct sock *)data;
610
611 /* This handler is privileged when this module is running on the host.
612 * We will get datagrams from all endpoints (even VMs that are in a
613 * restricted context). If we get one from a restricted context then
614 * the destination socket must be trusted.
615 *
616 * NOTE: We access the socket struct without holding the lock here.
617 * This is ok because the field we are interested is never modified
618 * outside of the create and destruct socket functions.
619 */
620 vsk = vsock_sk(sk);
621 if (!vmci_transport_allow_dgram(vsk, dg->src.context))
622 return VMCI_ERROR_NO_ACCESS;
623
624 size = VMCI_DG_SIZE(dg);
625
626 /* Attach the packet to the socket's receive queue as an sk_buff. */
627 skb = alloc_skb(size, GFP_ATOMIC);
dce1a287
AH
628 if (!skb)
629 return VMCI_ERROR_NO_MEM;
630
631 /* sk_receive_skb() will do a sock_put(), so hold here. */
632 sock_hold(sk);
633 skb_put(skb, size);
634 memcpy(skb->data, dg, size);
635 sk_receive_skb(sk, skb, 0);
d021c344
AK
636
637 return VMCI_SUCCESS;
638}
639
640static bool vmci_transport_stream_allow(u32 cid, u32 port)
641{
642 static const u32 non_socket_contexts[] = {
d021c344
AK
643 VMADDR_CID_RESERVED,
644 };
645 int i;
646
647 BUILD_BUG_ON(sizeof(cid) != sizeof(*non_socket_contexts));
648
649 for (i = 0; i < ARRAY_SIZE(non_socket_contexts); i++) {
650 if (cid == non_socket_contexts[i])
651 return false;
652 }
653
654 return true;
655}
656
657/* This is invoked as part of a tasklet that's scheduled when the VMCI
658 * interrupt fires. This is run in bottom-half context but it defers most of
659 * its work to the packet handling work queue.
660 */
661
662static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
663{
664 struct sock *sk;
665 struct sockaddr_vm dst;
666 struct sockaddr_vm src;
667 struct vmci_transport_packet *pkt;
668 struct vsock_sock *vsk;
669 bool bh_process_pkt;
670 int err;
671
672 sk = NULL;
673 err = VMCI_SUCCESS;
674 bh_process_pkt = false;
675
676 /* Ignore incoming packets from contexts without sockets, or resources
677 * that aren't vsock implementations.
678 */
679
680 if (!vmci_transport_stream_allow(dg->src.context, -1)
2a89f924 681 || vmci_transport_peer_rid(dg->src.context) != dg->src.resource)
d021c344
AK
682 return VMCI_ERROR_NO_ACCESS;
683
684 if (VMCI_DG_SIZE(dg) < sizeof(*pkt))
685 /* Drop datagrams that do not contain full VSock packets. */
686 return VMCI_ERROR_INVALID_ARGS;
687
688 pkt = (struct vmci_transport_packet *)dg;
689
690 /* Find the socket that should handle this packet. First we look for a
691 * connected socket and if there is none we look for a socket bound to
692 * the destintation address.
693 */
694 vsock_addr_init(&src, pkt->dg.src.context, pkt->src_port);
695 vsock_addr_init(&dst, pkt->dg.dst.context, pkt->dst_port);
696
697 sk = vsock_find_connected_socket(&src, &dst);
698 if (!sk) {
699 sk = vsock_find_bound_socket(&dst);
700 if (!sk) {
701 /* We could not find a socket for this specified
702 * address. If this packet is a RST, we just drop it.
703 * If it is another packet, we send a RST. Note that
704 * we do not send a RST reply to RSTs so that we do not
705 * continually send RSTs between two endpoints.
706 *
707 * Note that since this is a reply, dst is src and src
708 * is dst.
709 */
710 if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
711 pr_err("unable to send reset\n");
712
713 err = VMCI_ERROR_NOT_FOUND;
714 goto out;
715 }
716 }
717
718 /* If the received packet type is beyond all types known to this
719 * implementation, reply with an invalid message. Hopefully this will
720 * help when implementing backwards compatibility in the future.
721 */
722 if (pkt->type >= VMCI_TRANSPORT_PACKET_TYPE_MAX) {
723 vmci_transport_send_invalid_bh(&dst, &src);
724 err = VMCI_ERROR_INVALID_ARGS;
725 goto out;
726 }
727
728 /* This handler is privileged when this module is running on the host.
729 * We will get datagram connect requests from all endpoints (even VMs
730 * that are in a restricted context). If we get one from a restricted
731 * context then the destination socket must be trusted.
732 *
733 * NOTE: We access the socket struct without holding the lock here.
734 * This is ok because the field we are interested is never modified
735 * outside of the create and destruct socket functions.
736 */
737 vsk = vsock_sk(sk);
738 if (!vmci_transport_allow_dgram(vsk, pkt->dg.src.context)) {
739 err = VMCI_ERROR_NO_ACCESS;
740 goto out;
741 }
742
743 /* We do most everything in a work queue, but let's fast path the
744 * notification of reads and writes to help data transfer performance.
745 * We can only do this if there is no process context code executing
746 * for this socket since that may change the state.
747 */
748 bh_lock_sock(sk);
749
990454b5
RG
750 if (!sock_owned_by_user(sk)) {
751 /* The local context ID may be out of date, update it. */
752 vsk->local_addr.svm_cid = dst.svm_cid;
753
754 if (sk->sk_state == SS_CONNECTED)
755 vmci_trans(vsk)->notify_ops->handle_notify_pkt(
756 sk, pkt, true, &dst, &src,
757 &bh_process_pkt);
758 }
d021c344
AK
759
760 bh_unlock_sock(sk);
761
762 if (!bh_process_pkt) {
763 struct vmci_transport_recv_pkt_info *recv_pkt_info;
764
765 recv_pkt_info = kmalloc(sizeof(*recv_pkt_info), GFP_ATOMIC);
766 if (!recv_pkt_info) {
767 if (vmci_transport_send_reset_bh(&dst, &src, pkt) < 0)
768 pr_err("unable to send reset\n");
769
770 err = VMCI_ERROR_NO_MEM;
771 goto out;
772 }
773
774 recv_pkt_info->sk = sk;
775 memcpy(&recv_pkt_info->pkt, pkt, sizeof(recv_pkt_info->pkt));
776 INIT_WORK(&recv_pkt_info->work, vmci_transport_recv_pkt_work);
777
778 schedule_work(&recv_pkt_info->work);
779 /* Clear sk so that the reference count incremented by one of
780 * the Find functions above is not decremented below. We need
781 * that reference count for the packet handler we've scheduled
782 * to run.
783 */
784 sk = NULL;
785 }
786
787out:
788 if (sk)
789 sock_put(sk);
790
791 return err;
792}
793
794static void vmci_transport_peer_attach_cb(u32 sub_id,
795 const struct vmci_event_data *e_data,
796 void *client_data)
797{
798 struct sock *sk = client_data;
799 const struct vmci_event_payload_qp *e_payload;
800 struct vsock_sock *vsk;
801
802 e_payload = vmci_event_data_const_payload(e_data);
803
804 vsk = vsock_sk(sk);
805
806 /* We don't ask for delayed CBs when we subscribe to this event (we
807 * pass 0 as flags to vmci_event_subscribe()). VMCI makes no
808 * guarantees in that case about what context we might be running in,
809 * so it could be BH or process, blockable or non-blockable. So we
810 * need to account for all possible contexts here.
811 */
812 local_bh_disable();
813 bh_lock_sock(sk);
814
815 /* XXX This is lame, we should provide a way to lookup sockets by
816 * qp_handle.
817 */
818 if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
819 e_payload->handle)) {
820 /* XXX This doesn't do anything, but in the future we may want
821 * to set a flag here to verify the attach really did occur and
822 * we weren't just sent a datagram claiming it was.
823 */
824 goto out;
825 }
826
827out:
828 bh_unlock_sock(sk);
829 local_bh_enable();
830}
831
832static void vmci_transport_handle_detach(struct sock *sk)
833{
834 struct vsock_sock *vsk;
835
836 vsk = vsock_sk(sk);
837 if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
838 sock_set_flag(sk, SOCK_DONE);
839
840 /* On a detach the peer will not be sending or receiving
841 * anymore.
842 */
843 vsk->peer_shutdown = SHUTDOWN_MASK;
844
845 /* We should not be sending anymore since the peer won't be
846 * there to receive, but we can still receive if there is data
847 * left in our consume queue.
848 */
849 if (vsock_stream_has_data(vsk) <= 0) {
850 if (sk->sk_state == SS_CONNECTING) {
851 /* The peer may detach from a queue pair while
852 * we are still in the connecting state, i.e.,
853 * if the peer VM is killed after attaching to
854 * a queue pair, but before we complete the
855 * handshake. In that case, we treat the detach
856 * event like a reset.
857 */
858
859 sk->sk_state = SS_UNCONNECTED;
860 sk->sk_err = ECONNRESET;
861 sk->sk_error_report(sk);
862 return;
863 }
864 sk->sk_state = SS_UNCONNECTED;
865 }
866 sk->sk_state_change(sk);
867 }
868}
869
870static void vmci_transport_peer_detach_cb(u32 sub_id,
871 const struct vmci_event_data *e_data,
872 void *client_data)
873{
874 struct sock *sk = client_data;
875 const struct vmci_event_payload_qp *e_payload;
876 struct vsock_sock *vsk;
877
878 e_payload = vmci_event_data_const_payload(e_data);
879 vsk = vsock_sk(sk);
880 if (vmci_handle_is_invalid(e_payload->handle))
881 return;
882
883 /* Same rules for locking as for peer_attach_cb(). */
884 local_bh_disable();
885 bh_lock_sock(sk);
886
887 /* XXX This is lame, we should provide a way to lookup sockets by
888 * qp_handle.
889 */
890 if (vmci_handle_is_equal(vmci_trans(vsk)->qp_handle,
891 e_payload->handle))
892 vmci_transport_handle_detach(sk);
893
894 bh_unlock_sock(sk);
895 local_bh_enable();
896}
897
898static void vmci_transport_qp_resumed_cb(u32 sub_id,
899 const struct vmci_event_data *e_data,
900 void *client_data)
901{
902 vsock_for_each_connected_socket(vmci_transport_handle_detach);
903}
904
905static void vmci_transport_recv_pkt_work(struct work_struct *work)
906{
907 struct vmci_transport_recv_pkt_info *recv_pkt_info;
908 struct vmci_transport_packet *pkt;
909 struct sock *sk;
910
911 recv_pkt_info =
912 container_of(work, struct vmci_transport_recv_pkt_info, work);
913 sk = recv_pkt_info->sk;
914 pkt = &recv_pkt_info->pkt;
915
916 lock_sock(sk);
917
990454b5
RG
918 /* The local context ID may be out of date. */
919 vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
920
d021c344
AK
921 switch (sk->sk_state) {
922 case SS_LISTEN:
923 vmci_transport_recv_listen(sk, pkt);
924 break;
925 case SS_CONNECTING:
926 /* Processing of pending connections for servers goes through
927 * the listening socket, so see vmci_transport_recv_listen()
928 * for that path.
929 */
930 vmci_transport_recv_connecting_client(sk, pkt);
931 break;
932 case SS_CONNECTED:
933 vmci_transport_recv_connected(sk, pkt);
934 break;
935 default:
936 /* Because this function does not run in the same context as
937 * vmci_transport_recv_stream_cb it is possible that the
938 * socket has closed. We need to let the other side know or it
939 * could be sitting in a connect and hang forever. Send a
940 * reset to prevent that.
941 */
942 vmci_transport_send_reset(sk, pkt);
943 goto out;
944 }
945
946out:
947 release_sock(sk);
948 kfree(recv_pkt_info);
949 /* Release reference obtained in the stream callback when we fetched
950 * this socket out of the bound or connected list.
951 */
952 sock_put(sk);
953}
954
955static int vmci_transport_recv_listen(struct sock *sk,
956 struct vmci_transport_packet *pkt)
957{
958 struct sock *pending;
959 struct vsock_sock *vpending;
960 int err;
961 u64 qp_size;
962 bool old_request = false;
963 bool old_pkt_proto = false;
964
965 err = 0;
966
967 /* Because we are in the listen state, we could be receiving a packet
968 * for ourself or any previous connection requests that we received.
969 * If it's the latter, we try to find a socket in our list of pending
970 * connections and, if we do, call the appropriate handler for the
971 * state that that socket is in. Otherwise we try to service the
972 * connection request.
973 */
974 pending = vmci_transport_get_pending(sk, pkt);
975 if (pending) {
976 lock_sock(pending);
990454b5
RG
977
978 /* The local context ID may be out of date. */
979 vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context;
980
d021c344
AK
981 switch (pending->sk_state) {
982 case SS_CONNECTING:
983 err = vmci_transport_recv_connecting_server(sk,
984 pending,
985 pkt);
986 break;
987 default:
988 vmci_transport_send_reset(pending, pkt);
989 err = -EINVAL;
990 }
991
992 if (err < 0)
993 vsock_remove_pending(sk, pending);
994
995 release_sock(pending);
996 vmci_transport_release_pending(pending);
997
998 return err;
999 }
1000
1001 /* The listen state only accepts connection requests. Reply with a
1002 * reset unless we received a reset.
1003 */
1004
1005 if (!(pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST ||
1006 pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)) {
1007 vmci_transport_reply_reset(pkt);
1008 return -EINVAL;
1009 }
1010
1011 if (pkt->u.size == 0) {
1012 vmci_transport_reply_reset(pkt);
1013 return -EINVAL;
1014 }
1015
1016 /* If this socket can't accommodate this connection request, we send a
1017 * reset. Otherwise we create and initialize a child socket and reply
1018 * with a connection negotiation.
1019 */
1020 if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) {
1021 vmci_transport_reply_reset(pkt);
1022 return -ECONNREFUSED;
1023 }
1024
1025 pending = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
1026 sk->sk_type);
1027 if (!pending) {
1028 vmci_transport_send_reset(sk, pkt);
1029 return -ENOMEM;
1030 }
1031
1032 vpending = vsock_sk(pending);
1033
1034 vsock_addr_init(&vpending->local_addr, pkt->dg.dst.context,
1035 pkt->dst_port);
1036 vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context,
1037 pkt->src_port);
1038
1039 /* If the proposed size fits within our min/max, accept it. Otherwise
1040 * propose our own size.
1041 */
1042 if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size &&
1043 pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) {
1044 qp_size = pkt->u.size;
1045 } else {
1046 qp_size = vmci_trans(vpending)->queue_pair_size;
1047 }
1048
1049 /* Figure out if we are using old or new requests based on the
1050 * overrides pkt types sent by our peer.
1051 */
1052 if (vmci_transport_old_proto_override(&old_pkt_proto)) {
1053 old_request = old_pkt_proto;
1054 } else {
1055 if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST)
1056 old_request = true;
1057 else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_REQUEST2)
1058 old_request = false;
1059
1060 }
1061
1062 if (old_request) {
1063 /* Handle a REQUEST (or override) */
1064 u16 version = VSOCK_PROTO_INVALID;
1065 if (vmci_transport_proto_to_notify_struct(
1066 pending, &version, true))
1067 err = vmci_transport_send_negotiate(pending, qp_size);
1068 else
1069 err = -EINVAL;
1070
1071 } else {
1072 /* Handle a REQUEST2 (or override) */
1073 int proto_int = pkt->proto;
1074 int pos;
1075 u16 active_proto_version = 0;
1076
1077 /* The list of possible protocols is the intersection of all
1078 * protocols the client supports ... plus all the protocols we
1079 * support.
1080 */
1081 proto_int &= vmci_transport_new_proto_supported_versions();
1082
1083 /* We choose the highest possible protocol version and use that
1084 * one.
1085 */
1086 pos = fls(proto_int);
1087 if (pos) {
1088 active_proto_version = (1 << (pos - 1));
1089 if (vmci_transport_proto_to_notify_struct(
1090 pending, &active_proto_version, false))
1091 err = vmci_transport_send_negotiate2(pending,
1092 qp_size,
1093 active_proto_version);
1094 else
1095 err = -EINVAL;
1096
1097 } else {
1098 err = -EINVAL;
1099 }
1100 }
1101
1102 if (err < 0) {
1103 vmci_transport_send_reset(sk, pkt);
1104 sock_put(pending);
1105 err = vmci_transport_error_to_vsock_error(err);
1106 goto out;
1107 }
1108
1109 vsock_add_pending(sk, pending);
1110 sk->sk_ack_backlog++;
1111
1112 pending->sk_state = SS_CONNECTING;
1113 vmci_trans(vpending)->produce_size =
1114 vmci_trans(vpending)->consume_size = qp_size;
1115 vmci_trans(vpending)->queue_pair_size = qp_size;
1116
1117 vmci_trans(vpending)->notify_ops->process_request(pending);
1118
1119 /* We might never receive another message for this socket and it's not
1120 * connected to any process, so we have to ensure it gets cleaned up
1121 * ourself. Our delayed work function will take care of that. Note
1122 * that we do not ever cancel this function since we have few
1123 * guarantees about its state when calling cancel_delayed_work().
1124 * Instead we hold a reference on the socket for that function and make
1125 * it capable of handling cases where it needs to do nothing but
1126 * release that reference.
1127 */
1128 vpending->listener = sk;
1129 sock_hold(sk);
1130 sock_hold(pending);
1131 INIT_DELAYED_WORK(&vpending->dwork, vsock_pending_work);
1132 schedule_delayed_work(&vpending->dwork, HZ);
1133
1134out:
1135 return err;
1136}
1137
1138static int
1139vmci_transport_recv_connecting_server(struct sock *listener,
1140 struct sock *pending,
1141 struct vmci_transport_packet *pkt)
1142{
1143 struct vsock_sock *vpending;
1144 struct vmci_handle handle;
1145 struct vmci_qp *qpair;
1146 bool is_local;
1147 u32 flags;
1148 u32 detach_sub_id;
1149 int err;
1150 int skerr;
1151
1152 vpending = vsock_sk(pending);
1153 detach_sub_id = VMCI_INVALID_ID;
1154
1155 switch (pkt->type) {
1156 case VMCI_TRANSPORT_PACKET_TYPE_OFFER:
1157 if (vmci_handle_is_invalid(pkt->u.handle)) {
1158 vmci_transport_send_reset(pending, pkt);
1159 skerr = EPROTO;
1160 err = -EINVAL;
1161 goto destroy;
1162 }
1163 break;
1164 default:
1165 /* Close and cleanup the connection. */
1166 vmci_transport_send_reset(pending, pkt);
1167 skerr = EPROTO;
1168 err = pkt->type == VMCI_TRANSPORT_PACKET_TYPE_RST ? 0 : -EINVAL;
1169 goto destroy;
1170 }
1171
1172 /* In order to complete the connection we need to attach to the offered
1173 * queue pair and send an attach notification. We also subscribe to the
1174 * detach event so we know when our peer goes away, and we do that
1175 * before attaching so we don't miss an event. If all this succeeds,
1176 * we update our state and wakeup anything waiting in accept() for a
1177 * connection.
1178 */
1179
1180 /* We don't care about attach since we ensure the other side has
1181 * attached by specifying the ATTACH_ONLY flag below.
1182 */
1183 err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1184 vmci_transport_peer_detach_cb,
1185 pending, &detach_sub_id);
1186 if (err < VMCI_SUCCESS) {
1187 vmci_transport_send_reset(pending, pkt);
1188 err = vmci_transport_error_to_vsock_error(err);
1189 skerr = -err;
1190 goto destroy;
1191 }
1192
1193 vmci_trans(vpending)->detach_sub_id = detach_sub_id;
1194
1195 /* Now attach to the queue pair the client created. */
1196 handle = pkt->u.handle;
1197
1198 /* vpending->local_addr always has a context id so we do not need to
1199 * worry about VMADDR_CID_ANY in this case.
1200 */
1201 is_local =
1202 vpending->remote_addr.svm_cid == vpending->local_addr.svm_cid;
1203 flags = VMCI_QPFLAG_ATTACH_ONLY;
1204 flags |= is_local ? VMCI_QPFLAG_LOCAL : 0;
1205
1206 err = vmci_transport_queue_pair_alloc(
1207 &qpair,
1208 &handle,
1209 vmci_trans(vpending)->produce_size,
1210 vmci_trans(vpending)->consume_size,
1211 pkt->dg.src.context,
1212 flags,
1213 vmci_transport_is_trusted(
1214 vpending,
1215 vpending->remote_addr.svm_cid));
1216 if (err < 0) {
1217 vmci_transport_send_reset(pending, pkt);
1218 skerr = -err;
1219 goto destroy;
1220 }
1221
1222 vmci_trans(vpending)->qp_handle = handle;
1223 vmci_trans(vpending)->qpair = qpair;
1224
1225 /* When we send the attach message, we must be ready to handle incoming
1226 * control messages on the newly connected socket. So we move the
1227 * pending socket to the connected state before sending the attach
1228 * message. Otherwise, an incoming packet triggered by the attach being
1229 * received by the peer may be processed concurrently with what happens
1230 * below after sending the attach message, and that incoming packet
1231 * will find the listening socket instead of the (currently) pending
1232 * socket. Note that enqueueing the socket increments the reference
1233 * count, so even if a reset comes before the connection is accepted,
1234 * the socket will be valid until it is removed from the queue.
1235 *
1236 * If we fail sending the attach below, we remove the socket from the
1237 * connected list and move the socket to SS_UNCONNECTED before
1238 * releasing the lock, so a pending slow path processing of an incoming
1239 * packet will not see the socket in the connected state in that case.
1240 */
1241 pending->sk_state = SS_CONNECTED;
1242
1243 vsock_insert_connected(vpending);
1244
1245 /* Notify our peer of our attach. */
1246 err = vmci_transport_send_attach(pending, handle);
1247 if (err < 0) {
1248 vsock_remove_connected(vpending);
1249 pr_err("Could not send attach\n");
1250 vmci_transport_send_reset(pending, pkt);
1251 err = vmci_transport_error_to_vsock_error(err);
1252 skerr = -err;
1253 goto destroy;
1254 }
1255
1256 /* We have a connection. Move the now connected socket from the
1257 * listener's pending list to the accept queue so callers of accept()
1258 * can find it.
1259 */
1260 vsock_remove_pending(listener, pending);
1261 vsock_enqueue_accept(listener, pending);
1262
1263 /* Callers of accept() will be be waiting on the listening socket, not
1264 * the pending socket.
1265 */
1266 listener->sk_state_change(listener);
1267
1268 return 0;
1269
1270destroy:
1271 pending->sk_err = skerr;
1272 pending->sk_state = SS_UNCONNECTED;
1273 /* As long as we drop our reference, all necessary cleanup will handle
1274 * when the cleanup function drops its reference and our destruct
1275 * implementation is called. Note that since the listen handler will
1276 * remove pending from the pending list upon our failure, the cleanup
1277 * function won't drop the additional reference, which is why we do it
1278 * here.
1279 */
1280 sock_put(pending);
1281
1282 return err;
1283}
1284
1285static int
1286vmci_transport_recv_connecting_client(struct sock *sk,
1287 struct vmci_transport_packet *pkt)
1288{
1289 struct vsock_sock *vsk;
1290 int err;
1291 int skerr;
1292
1293 vsk = vsock_sk(sk);
1294
1295 switch (pkt->type) {
1296 case VMCI_TRANSPORT_PACKET_TYPE_ATTACH:
1297 if (vmci_handle_is_invalid(pkt->u.handle) ||
1298 !vmci_handle_is_equal(pkt->u.handle,
1299 vmci_trans(vsk)->qp_handle)) {
1300 skerr = EPROTO;
1301 err = -EINVAL;
1302 goto destroy;
1303 }
1304
1305 /* Signify the socket is connected and wakeup the waiter in
1306 * connect(). Also place the socket in the connected table for
1307 * accounting (it can already be found since it's in the bound
1308 * table).
1309 */
1310 sk->sk_state = SS_CONNECTED;
1311 sk->sk_socket->state = SS_CONNECTED;
1312 vsock_insert_connected(vsk);
1313 sk->sk_state_change(sk);
1314
1315 break;
1316 case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE:
1317 case VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2:
1318 if (pkt->u.size == 0
1319 || pkt->dg.src.context != vsk->remote_addr.svm_cid
1320 || pkt->src_port != vsk->remote_addr.svm_port
1321 || !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)
1322 || vmci_trans(vsk)->qpair
1323 || vmci_trans(vsk)->produce_size != 0
1324 || vmci_trans(vsk)->consume_size != 0
1325 || vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID
1326 || vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
1327 skerr = EPROTO;
1328 err = -EINVAL;
1329
1330 goto destroy;
1331 }
1332
1333 err = vmci_transport_recv_connecting_client_negotiate(sk, pkt);
1334 if (err) {
1335 skerr = -err;
1336 goto destroy;
1337 }
1338
1339 break;
1340 case VMCI_TRANSPORT_PACKET_TYPE_INVALID:
1341 err = vmci_transport_recv_connecting_client_invalid(sk, pkt);
1342 if (err) {
1343 skerr = -err;
1344 goto destroy;
1345 }
1346
1347 break;
1348 case VMCI_TRANSPORT_PACKET_TYPE_RST:
1349 /* Older versions of the linux code (WS 6.5 / ESX 4.0) used to
1350 * continue processing here after they sent an INVALID packet.
1351 * This meant that we got a RST after the INVALID. We ignore a
1352 * RST after an INVALID. The common code doesn't send the RST
1353 * ... so we can hang if an old version of the common code
1354 * fails between getting a REQUEST and sending an OFFER back.
1355 * Not much we can do about it... except hope that it doesn't
1356 * happen.
1357 */
1358 if (vsk->ignore_connecting_rst) {
1359 vsk->ignore_connecting_rst = false;
1360 } else {
1361 skerr = ECONNRESET;
1362 err = 0;
1363 goto destroy;
1364 }
1365
1366 break;
1367 default:
1368 /* Close and cleanup the connection. */
1369 skerr = EPROTO;
1370 err = -EINVAL;
1371 goto destroy;
1372 }
1373
1374 return 0;
1375
1376destroy:
1377 vmci_transport_send_reset(sk, pkt);
1378
1379 sk->sk_state = SS_UNCONNECTED;
1380 sk->sk_err = skerr;
1381 sk->sk_error_report(sk);
1382 return err;
1383}
1384
1385static int vmci_transport_recv_connecting_client_negotiate(
1386 struct sock *sk,
1387 struct vmci_transport_packet *pkt)
1388{
1389 int err;
1390 struct vsock_sock *vsk;
1391 struct vmci_handle handle;
1392 struct vmci_qp *qpair;
1393 u32 attach_sub_id;
1394 u32 detach_sub_id;
1395 bool is_local;
1396 u32 flags;
1397 bool old_proto = true;
1398 bool old_pkt_proto;
1399 u16 version;
1400
1401 vsk = vsock_sk(sk);
1402 handle = VMCI_INVALID_HANDLE;
1403 attach_sub_id = VMCI_INVALID_ID;
1404 detach_sub_id = VMCI_INVALID_ID;
1405
1406 /* If we have gotten here then we should be past the point where old
1407 * linux vsock could have sent the bogus rst.
1408 */
1409 vsk->sent_request = false;
1410 vsk->ignore_connecting_rst = false;
1411
1412 /* Verify that we're OK with the proposed queue pair size */
1413 if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size ||
1414 pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) {
1415 err = -EINVAL;
1416 goto destroy;
1417 }
1418
1419 /* At this point we know the CID the peer is using to talk to us. */
1420
1421 if (vsk->local_addr.svm_cid == VMADDR_CID_ANY)
1422 vsk->local_addr.svm_cid = pkt->dg.dst.context;
1423
1424 /* Setup the notify ops to be the highest supported version that both
1425 * the server and the client support.
1426 */
1427
1428 if (vmci_transport_old_proto_override(&old_pkt_proto)) {
1429 old_proto = old_pkt_proto;
1430 } else {
1431 if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE)
1432 old_proto = true;
1433 else if (pkt->type == VMCI_TRANSPORT_PACKET_TYPE_NEGOTIATE2)
1434 old_proto = false;
1435
1436 }
1437
1438 if (old_proto)
1439 version = VSOCK_PROTO_INVALID;
1440 else
1441 version = pkt->proto;
1442
1443 if (!vmci_transport_proto_to_notify_struct(sk, &version, old_proto)) {
1444 err = -EINVAL;
1445 goto destroy;
1446 }
1447
1448 /* Subscribe to attach and detach events first.
1449 *
1450 * XXX We attach once for each queue pair created for now so it is easy
1451 * to find the socket (it's provided), but later we should only
1452 * subscribe once and add a way to lookup sockets by queue pair handle.
1453 */
1454 err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_ATTACH,
1455 vmci_transport_peer_attach_cb,
1456 sk, &attach_sub_id);
1457 if (err < VMCI_SUCCESS) {
1458 err = vmci_transport_error_to_vsock_error(err);
1459 goto destroy;
1460 }
1461
1462 err = vmci_event_subscribe(VMCI_EVENT_QP_PEER_DETACH,
1463 vmci_transport_peer_detach_cb,
1464 sk, &detach_sub_id);
1465 if (err < VMCI_SUCCESS) {
1466 err = vmci_transport_error_to_vsock_error(err);
1467 goto destroy;
1468 }
1469
1470 /* Make VMCI select the handle for us. */
1471 handle = VMCI_INVALID_HANDLE;
1472 is_local = vsk->remote_addr.svm_cid == vsk->local_addr.svm_cid;
1473 flags = is_local ? VMCI_QPFLAG_LOCAL : 0;
1474
1475 err = vmci_transport_queue_pair_alloc(&qpair,
1476 &handle,
1477 pkt->u.size,
1478 pkt->u.size,
1479 vsk->remote_addr.svm_cid,
1480 flags,
1481 vmci_transport_is_trusted(
1482 vsk,
1483 vsk->
1484 remote_addr.svm_cid));
1485 if (err < 0)
1486 goto destroy;
1487
1488 err = vmci_transport_send_qp_offer(sk, handle);
1489 if (err < 0) {
1490 err = vmci_transport_error_to_vsock_error(err);
1491 goto destroy;
1492 }
1493
1494 vmci_trans(vsk)->qp_handle = handle;
1495 vmci_trans(vsk)->qpair = qpair;
1496
1497 vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size =
1498 pkt->u.size;
1499
1500 vmci_trans(vsk)->attach_sub_id = attach_sub_id;
1501 vmci_trans(vsk)->detach_sub_id = detach_sub_id;
1502
1503 vmci_trans(vsk)->notify_ops->process_negotiate(sk);
1504
1505 return 0;
1506
1507destroy:
1508 if (attach_sub_id != VMCI_INVALID_ID)
1509 vmci_event_unsubscribe(attach_sub_id);
1510
1511 if (detach_sub_id != VMCI_INVALID_ID)
1512 vmci_event_unsubscribe(detach_sub_id);
1513
1514 if (!vmci_handle_is_invalid(handle))
1515 vmci_qpair_detach(&qpair);
1516
1517 return err;
1518}
1519
1520static int
1521vmci_transport_recv_connecting_client_invalid(struct sock *sk,
1522 struct vmci_transport_packet *pkt)
1523{
1524 int err = 0;
1525 struct vsock_sock *vsk = vsock_sk(sk);
1526
1527 if (vsk->sent_request) {
1528 vsk->sent_request = false;
1529 vsk->ignore_connecting_rst = true;
1530
1531 err = vmci_transport_send_conn_request(
1532 sk, vmci_trans(vsk)->queue_pair_size);
1533 if (err < 0)
1534 err = vmci_transport_error_to_vsock_error(err);
1535 else
1536 err = 0;
1537
1538 }
1539
1540 return err;
1541}
1542
1543static int vmci_transport_recv_connected(struct sock *sk,
1544 struct vmci_transport_packet *pkt)
1545{
1546 struct vsock_sock *vsk;
1547 bool pkt_processed = false;
1548
1549 /* In cases where we are closing the connection, it's sufficient to
1550 * mark the state change (and maybe error) and wake up any waiting
1551 * threads. Since this is a connected socket, it's owned by a user
1552 * process and will be cleaned up when the failure is passed back on
1553 * the current or next system call. Our system call implementations
1554 * must therefore check for error and state changes on entry and when
1555 * being awoken.
1556 */
1557 switch (pkt->type) {
1558 case VMCI_TRANSPORT_PACKET_TYPE_SHUTDOWN:
1559 if (pkt->u.mode) {
1560 vsk = vsock_sk(sk);
1561
1562 vsk->peer_shutdown |= pkt->u.mode;
1563 sk->sk_state_change(sk);
1564 }
1565 break;
1566
1567 case VMCI_TRANSPORT_PACKET_TYPE_RST:
1568 vsk = vsock_sk(sk);
1569 /* It is possible that we sent our peer a message (e.g a
1570 * WAITING_READ) right before we got notified that the peer had
1571 * detached. If that happens then we can get a RST pkt back
1572 * from our peer even though there is data available for us to
1573 * read. In that case, don't shutdown the socket completely but
1574 * instead allow the local client to finish reading data off
1575 * the queuepair. Always treat a RST pkt in connected mode like
1576 * a clean shutdown.
1577 */
1578 sock_set_flag(sk, SOCK_DONE);
1579 vsk->peer_shutdown = SHUTDOWN_MASK;
1580 if (vsock_stream_has_data(vsk) <= 0)
1581 sk->sk_state = SS_DISCONNECTING;
1582
1583 sk->sk_state_change(sk);
1584 break;
1585
1586 default:
1587 vsk = vsock_sk(sk);
1588 vmci_trans(vsk)->notify_ops->handle_notify_pkt(
1589 sk, pkt, false, NULL, NULL,
1590 &pkt_processed);
1591 if (!pkt_processed)
1592 return -EINVAL;
1593
1594 break;
1595 }
1596
1597 return 0;
1598}
1599
1600static int vmci_transport_socket_init(struct vsock_sock *vsk,
1601 struct vsock_sock *psk)
1602{
1603 vsk->trans = kmalloc(sizeof(struct vmci_transport), GFP_KERNEL);
1604 if (!vsk->trans)
1605 return -ENOMEM;
1606
1607 vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
1608 vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
1609 vmci_trans(vsk)->qpair = NULL;
1610 vmci_trans(vsk)->produce_size = vmci_trans(vsk)->consume_size = 0;
1611 vmci_trans(vsk)->attach_sub_id = vmci_trans(vsk)->detach_sub_id =
1612 VMCI_INVALID_ID;
1613 vmci_trans(vsk)->notify_ops = NULL;
1614 if (psk) {
1615 vmci_trans(vsk)->queue_pair_size =
1616 vmci_trans(psk)->queue_pair_size;
1617 vmci_trans(vsk)->queue_pair_min_size =
1618 vmci_trans(psk)->queue_pair_min_size;
1619 vmci_trans(vsk)->queue_pair_max_size =
1620 vmci_trans(psk)->queue_pair_max_size;
1621 } else {
1622 vmci_trans(vsk)->queue_pair_size =
1623 VMCI_TRANSPORT_DEFAULT_QP_SIZE;
1624 vmci_trans(vsk)->queue_pair_min_size =
1625 VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN;
1626 vmci_trans(vsk)->queue_pair_max_size =
1627 VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX;
1628 }
1629
1630 return 0;
1631}
1632
1633static void vmci_transport_destruct(struct vsock_sock *vsk)
1634{
1635 if (vmci_trans(vsk)->attach_sub_id != VMCI_INVALID_ID) {
1636 vmci_event_unsubscribe(vmci_trans(vsk)->attach_sub_id);
1637 vmci_trans(vsk)->attach_sub_id = VMCI_INVALID_ID;
1638 }
1639
1640 if (vmci_trans(vsk)->detach_sub_id != VMCI_INVALID_ID) {
1641 vmci_event_unsubscribe(vmci_trans(vsk)->detach_sub_id);
1642 vmci_trans(vsk)->detach_sub_id = VMCI_INVALID_ID;
1643 }
1644
1645 if (!vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle)) {
1646 vmci_qpair_detach(&vmci_trans(vsk)->qpair);
1647 vmci_trans(vsk)->qp_handle = VMCI_INVALID_HANDLE;
1648 vmci_trans(vsk)->produce_size = 0;
1649 vmci_trans(vsk)->consume_size = 0;
1650 }
1651
1652 if (vmci_trans(vsk)->notify_ops)
1653 vmci_trans(vsk)->notify_ops->socket_destruct(vsk);
1654
1655 kfree(vsk->trans);
1656 vsk->trans = NULL;
1657}
1658
1659static void vmci_transport_release(struct vsock_sock *vsk)
1660{
1661 if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) {
1662 vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle);
1663 vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;
1664 }
1665}
1666
1667static int vmci_transport_dgram_bind(struct vsock_sock *vsk,
1668 struct sockaddr_vm *addr)
1669{
1670 u32 port;
1671 u32 flags;
1672 int err;
1673
1674 /* VMCI will select a resource ID for us if we provide
1675 * VMCI_INVALID_ID.
1676 */
1677 port = addr->svm_port == VMADDR_PORT_ANY ?
1678 VMCI_INVALID_ID : addr->svm_port;
1679
1680 if (port <= LAST_RESERVED_PORT && !capable(CAP_NET_BIND_SERVICE))
1681 return -EACCES;
1682
1683 flags = addr->svm_cid == VMADDR_CID_ANY ?
1684 VMCI_FLAG_ANYCID_DG_HND : 0;
1685
1686 err = vmci_transport_datagram_create_hnd(port, flags,
1687 vmci_transport_recv_dgram_cb,
1688 &vsk->sk,
1689 &vmci_trans(vsk)->dg_handle);
1690 if (err < VMCI_SUCCESS)
1691 return vmci_transport_error_to_vsock_error(err);
1692 vsock_addr_init(&vsk->local_addr, addr->svm_cid,
1693 vmci_trans(vsk)->dg_handle.resource);
1694
1695 return 0;
1696}
1697
1698static int vmci_transport_dgram_enqueue(
1699 struct vsock_sock *vsk,
1700 struct sockaddr_vm *remote_addr,
1701 struct iovec *iov,
1702 size_t len)
1703{
1704 int err;
1705 struct vmci_datagram *dg;
1706
1707 if (len > VMCI_MAX_DG_PAYLOAD_SIZE)
1708 return -EMSGSIZE;
1709
1710 if (!vmci_transport_allow_dgram(vsk, remote_addr->svm_cid))
1711 return -EPERM;
1712
1713 /* Allocate a buffer for the user's message and our packet header. */
1714 dg = kmalloc(len + sizeof(*dg), GFP_KERNEL);
1715 if (!dg)
1716 return -ENOMEM;
1717
1718 memcpy_fromiovec(VMCI_DG_PAYLOAD(dg), iov, len);
1719
1720 dg->dst = vmci_make_handle(remote_addr->svm_cid,
1721 remote_addr->svm_port);
1722 dg->src = vmci_make_handle(vsk->local_addr.svm_cid,
1723 vsk->local_addr.svm_port);
1724 dg->payload_size = len;
1725
1726 err = vmci_datagram_send(dg);
1727 kfree(dg);
1728 if (err < 0)
1729 return vmci_transport_error_to_vsock_error(err);
1730
1731 return err - sizeof(*dg);
1732}
1733
1734static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
1735 struct vsock_sock *vsk,
1736 struct msghdr *msg, size_t len,
1737 int flags)
1738{
1739 int err;
1740 int noblock;
1741 struct vmci_datagram *dg;
1742 size_t payload_len;
1743 struct sk_buff *skb;
1744
1745 noblock = flags & MSG_DONTWAIT;
1746
1747 if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
1748 return -EOPNOTSUPP;
1749
680d04e0
MK
1750 msg->msg_namelen = 0;
1751
d021c344
AK
1752 /* Retrieve the head sk_buff from the socket's receive queue. */
1753 err = 0;
1754 skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
1755 if (err)
1756 return err;
1757
1758 if (!skb)
1759 return -EAGAIN;
1760
1761 dg = (struct vmci_datagram *)skb->data;
1762 if (!dg)
1763 /* err is 0, meaning we read zero bytes. */
1764 goto out;
1765
1766 payload_len = dg->payload_size;
1767 /* Ensure the sk_buff matches the payload size claimed in the packet. */
1768 if (payload_len != skb->len - sizeof(*dg)) {
1769 err = -EINVAL;
1770 goto out;
1771 }
1772
1773 if (payload_len > len) {
1774 payload_len = len;
1775 msg->msg_flags |= MSG_TRUNC;
1776 }
1777
1778 /* Place the datagram payload in the user's iovec. */
1779 err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov,
1780 payload_len);
1781 if (err)
1782 goto out;
1783
d021c344
AK
1784 if (msg->msg_name) {
1785 struct sockaddr_vm *vm_addr;
1786
1787 /* Provide the address of the sender. */
1788 vm_addr = (struct sockaddr_vm *)msg->msg_name;
1789 vsock_addr_init(vm_addr, dg->src.context, dg->src.resource);
1790 msg->msg_namelen = sizeof(*vm_addr);
1791 }
1792 err = payload_len;
1793
1794out:
1795 skb_free_datagram(&vsk->sk, skb);
1796 return err;
1797}
1798
1799static bool vmci_transport_dgram_allow(u32 cid, u32 port)
1800{
1801 if (cid == VMADDR_CID_HYPERVISOR) {
1802 /* Registrations of PBRPC Servers do not modify VMX/Hypervisor
1803 * state and are allowed.
1804 */
1805 return port == VMCI_UNITY_PBRPC_REGISTER;
1806 }
1807
1808 return true;
1809}
1810
1811static int vmci_transport_connect(struct vsock_sock *vsk)
1812{
1813 int err;
1814 bool old_pkt_proto = false;
1815 struct sock *sk = &vsk->sk;
1816
1817 if (vmci_transport_old_proto_override(&old_pkt_proto) &&
1818 old_pkt_proto) {
1819 err = vmci_transport_send_conn_request(
1820 sk, vmci_trans(vsk)->queue_pair_size);
1821 if (err < 0) {
1822 sk->sk_state = SS_UNCONNECTED;
1823 return err;
1824 }
1825 } else {
1826 int supported_proto_versions =
1827 vmci_transport_new_proto_supported_versions();
1828 err = vmci_transport_send_conn_request2(
1829 sk, vmci_trans(vsk)->queue_pair_size,
1830 supported_proto_versions);
1831 if (err < 0) {
1832 sk->sk_state = SS_UNCONNECTED;
1833 return err;
1834 }
1835
1836 vsk->sent_request = true;
1837 }
1838
1839 return err;
1840}
1841
1842static ssize_t vmci_transport_stream_dequeue(
1843 struct vsock_sock *vsk,
1844 struct iovec *iov,
1845 size_t len,
1846 int flags)
1847{
1848 if (flags & MSG_PEEK)
1849 return vmci_qpair_peekv(vmci_trans(vsk)->qpair, iov, len, 0);
1850 else
1851 return vmci_qpair_dequev(vmci_trans(vsk)->qpair, iov, len, 0);
1852}
1853
1854static ssize_t vmci_transport_stream_enqueue(
1855 struct vsock_sock *vsk,
1856 struct iovec *iov,
1857 size_t len)
1858{
1859 return vmci_qpair_enquev(vmci_trans(vsk)->qpair, iov, len, 0);
1860}
1861
1862static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
1863{
1864 return vmci_qpair_consume_buf_ready(vmci_trans(vsk)->qpair);
1865}
1866
1867static s64 vmci_transport_stream_has_space(struct vsock_sock *vsk)
1868{
1869 return vmci_qpair_produce_free_space(vmci_trans(vsk)->qpair);
1870}
1871
1872static u64 vmci_transport_stream_rcvhiwat(struct vsock_sock *vsk)
1873{
1874 return vmci_trans(vsk)->consume_size;
1875}
1876
1877static bool vmci_transport_stream_is_active(struct vsock_sock *vsk)
1878{
1879 return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle);
1880}
1881
1882static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk)
1883{
1884 return vmci_trans(vsk)->queue_pair_size;
1885}
1886
1887static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk)
1888{
1889 return vmci_trans(vsk)->queue_pair_min_size;
1890}
1891
1892static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk)
1893{
1894 return vmci_trans(vsk)->queue_pair_max_size;
1895}
1896
1897static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
1898{
1899 if (val < vmci_trans(vsk)->queue_pair_min_size)
1900 vmci_trans(vsk)->queue_pair_min_size = val;
1901 if (val > vmci_trans(vsk)->queue_pair_max_size)
1902 vmci_trans(vsk)->queue_pair_max_size = val;
1903 vmci_trans(vsk)->queue_pair_size = val;
1904}
1905
1906static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk,
1907 u64 val)
1908{
1909 if (val > vmci_trans(vsk)->queue_pair_size)
1910 vmci_trans(vsk)->queue_pair_size = val;
1911 vmci_trans(vsk)->queue_pair_min_size = val;
1912}
1913
1914static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk,
1915 u64 val)
1916{
1917 if (val < vmci_trans(vsk)->queue_pair_size)
1918 vmci_trans(vsk)->queue_pair_size = val;
1919 vmci_trans(vsk)->queue_pair_max_size = val;
1920}
1921
1922static int vmci_transport_notify_poll_in(
1923 struct vsock_sock *vsk,
1924 size_t target,
1925 bool *data_ready_now)
1926{
1927 return vmci_trans(vsk)->notify_ops->poll_in(
1928 &vsk->sk, target, data_ready_now);
1929}
1930
1931static int vmci_transport_notify_poll_out(
1932 struct vsock_sock *vsk,
1933 size_t target,
1934 bool *space_available_now)
1935{
1936 return vmci_trans(vsk)->notify_ops->poll_out(
1937 &vsk->sk, target, space_available_now);
1938}
1939
1940static int vmci_transport_notify_recv_init(
1941 struct vsock_sock *vsk,
1942 size_t target,
1943 struct vsock_transport_recv_notify_data *data)
1944{
1945 return vmci_trans(vsk)->notify_ops->recv_init(
1946 &vsk->sk, target,
1947 (struct vmci_transport_recv_notify_data *)data);
1948}
1949
1950static int vmci_transport_notify_recv_pre_block(
1951 struct vsock_sock *vsk,
1952 size_t target,
1953 struct vsock_transport_recv_notify_data *data)
1954{
1955 return vmci_trans(vsk)->notify_ops->recv_pre_block(
1956 &vsk->sk, target,
1957 (struct vmci_transport_recv_notify_data *)data);
1958}
1959
1960static int vmci_transport_notify_recv_pre_dequeue(
1961 struct vsock_sock *vsk,
1962 size_t target,
1963 struct vsock_transport_recv_notify_data *data)
1964{
1965 return vmci_trans(vsk)->notify_ops->recv_pre_dequeue(
1966 &vsk->sk, target,
1967 (struct vmci_transport_recv_notify_data *)data);
1968}
1969
1970static int vmci_transport_notify_recv_post_dequeue(
1971 struct vsock_sock *vsk,
1972 size_t target,
1973 ssize_t copied,
1974 bool data_read,
1975 struct vsock_transport_recv_notify_data *data)
1976{
1977 return vmci_trans(vsk)->notify_ops->recv_post_dequeue(
1978 &vsk->sk, target, copied, data_read,
1979 (struct vmci_transport_recv_notify_data *)data);
1980}
1981
1982static int vmci_transport_notify_send_init(
1983 struct vsock_sock *vsk,
1984 struct vsock_transport_send_notify_data *data)
1985{
1986 return vmci_trans(vsk)->notify_ops->send_init(
1987 &vsk->sk,
1988 (struct vmci_transport_send_notify_data *)data);
1989}
1990
1991static int vmci_transport_notify_send_pre_block(
1992 struct vsock_sock *vsk,
1993 struct vsock_transport_send_notify_data *data)
1994{
1995 return vmci_trans(vsk)->notify_ops->send_pre_block(
1996 &vsk->sk,
1997 (struct vmci_transport_send_notify_data *)data);
1998}
1999
2000static int vmci_transport_notify_send_pre_enqueue(
2001 struct vsock_sock *vsk,
2002 struct vsock_transport_send_notify_data *data)
2003{
2004 return vmci_trans(vsk)->notify_ops->send_pre_enqueue(
2005 &vsk->sk,
2006 (struct vmci_transport_send_notify_data *)data);
2007}
2008
2009static int vmci_transport_notify_send_post_enqueue(
2010 struct vsock_sock *vsk,
2011 ssize_t written,
2012 struct vsock_transport_send_notify_data *data)
2013{
2014 return vmci_trans(vsk)->notify_ops->send_post_enqueue(
2015 &vsk->sk, written,
2016 (struct vmci_transport_send_notify_data *)data);
2017}
2018
2019static bool vmci_transport_old_proto_override(bool *old_pkt_proto)
2020{
2021 if (PROTOCOL_OVERRIDE != -1) {
2022 if (PROTOCOL_OVERRIDE == 0)
2023 *old_pkt_proto = true;
2024 else
2025 *old_pkt_proto = false;
2026
2027 pr_info("Proto override in use\n");
2028 return true;
2029 }
2030
2031 return false;
2032}
2033
2034static bool vmci_transport_proto_to_notify_struct(struct sock *sk,
2035 u16 *proto,
2036 bool old_pkt_proto)
2037{
2038 struct vsock_sock *vsk = vsock_sk(sk);
2039
2040 if (old_pkt_proto) {
2041 if (*proto != VSOCK_PROTO_INVALID) {
2042 pr_err("Can't set both an old and new protocol\n");
2043 return false;
2044 }
2045 vmci_trans(vsk)->notify_ops = &vmci_transport_notify_pkt_ops;
2046 goto exit;
2047 }
2048
2049 switch (*proto) {
2050 case VSOCK_PROTO_PKT_ON_NOTIFY:
2051 vmci_trans(vsk)->notify_ops =
2052 &vmci_transport_notify_pkt_q_state_ops;
2053 break;
2054 default:
2055 pr_err("Unknown notify protocol version\n");
2056 return false;
2057 }
2058
2059exit:
2060 vmci_trans(vsk)->notify_ops->socket_init(sk);
2061 return true;
2062}
2063
2064static u16 vmci_transport_new_proto_supported_versions(void)
2065{
2066 if (PROTOCOL_OVERRIDE != -1)
2067 return PROTOCOL_OVERRIDE;
2068
2069 return VSOCK_PROTO_ALL_SUPPORTED;
2070}
2071
2072static u32 vmci_transport_get_local_cid(void)
2073{
2074 return vmci_get_context_id();
2075}
2076
2077static struct vsock_transport vmci_transport = {
2078 .init = vmci_transport_socket_init,
2079 .destruct = vmci_transport_destruct,
2080 .release = vmci_transport_release,
2081 .connect = vmci_transport_connect,
2082 .dgram_bind = vmci_transport_dgram_bind,
2083 .dgram_dequeue = vmci_transport_dgram_dequeue,
2084 .dgram_enqueue = vmci_transport_dgram_enqueue,
2085 .dgram_allow = vmci_transport_dgram_allow,
2086 .stream_dequeue = vmci_transport_stream_dequeue,
2087 .stream_enqueue = vmci_transport_stream_enqueue,
2088 .stream_has_data = vmci_transport_stream_has_data,
2089 .stream_has_space = vmci_transport_stream_has_space,
2090 .stream_rcvhiwat = vmci_transport_stream_rcvhiwat,
2091 .stream_is_active = vmci_transport_stream_is_active,
2092 .stream_allow = vmci_transport_stream_allow,
2093 .notify_poll_in = vmci_transport_notify_poll_in,
2094 .notify_poll_out = vmci_transport_notify_poll_out,
2095 .notify_recv_init = vmci_transport_notify_recv_init,
2096 .notify_recv_pre_block = vmci_transport_notify_recv_pre_block,
2097 .notify_recv_pre_dequeue = vmci_transport_notify_recv_pre_dequeue,
2098 .notify_recv_post_dequeue = vmci_transport_notify_recv_post_dequeue,
2099 .notify_send_init = vmci_transport_notify_send_init,
2100 .notify_send_pre_block = vmci_transport_notify_send_pre_block,
2101 .notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue,
2102 .notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue,
2103 .shutdown = vmci_transport_shutdown,
2104 .set_buffer_size = vmci_transport_set_buffer_size,
2105 .set_min_buffer_size = vmci_transport_set_min_buffer_size,
2106 .set_max_buffer_size = vmci_transport_set_max_buffer_size,
2107 .get_buffer_size = vmci_transport_get_buffer_size,
2108 .get_min_buffer_size = vmci_transport_get_min_buffer_size,
2109 .get_max_buffer_size = vmci_transport_get_max_buffer_size,
2110 .get_local_cid = vmci_transport_get_local_cid,
2111};
2112
2113static int __init vmci_transport_init(void)
2114{
2115 int err;
2116
2117 /* Create the datagram handle that we will use to send and receive all
2118 * VSocket control messages for this context.
2119 */
2120 err = vmci_transport_datagram_create_hnd(VMCI_TRANSPORT_PACKET_RID,
2121 VMCI_FLAG_ANYCID_DG_HND,
2122 vmci_transport_recv_stream_cb,
2123 NULL,
2124 &vmci_transport_stream_handle);
2125 if (err < VMCI_SUCCESS) {
2126 pr_err("Unable to create datagram handle. (%d)\n", err);
2127 return vmci_transport_error_to_vsock_error(err);
2128 }
2129
2130 err = vmci_event_subscribe(VMCI_EVENT_QP_RESUMED,
2131 vmci_transport_qp_resumed_cb,
2132 NULL, &vmci_transport_qp_resumed_sub_id);
2133 if (err < VMCI_SUCCESS) {
2134 pr_err("Unable to subscribe to resumed event. (%d)\n", err);
2135 err = vmci_transport_error_to_vsock_error(err);
2136 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
2137 goto err_destroy_stream_handle;
2138 }
2139
2140 err = vsock_core_init(&vmci_transport);
2141 if (err < 0)
2142 goto err_unsubscribe;
2143
2144 return 0;
2145
2146err_unsubscribe:
2147 vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
2148err_destroy_stream_handle:
2149 vmci_datagram_destroy_handle(vmci_transport_stream_handle);
2150 return err;
2151}
2152module_init(vmci_transport_init);
2153
2154static void __exit vmci_transport_exit(void)
2155{
2156 if (!vmci_handle_is_invalid(vmci_transport_stream_handle)) {
2157 if (vmci_datagram_destroy_handle(
2158 vmci_transport_stream_handle) != VMCI_SUCCESS)
2159 pr_err("Couldn't destroy datagram handle\n");
2160 vmci_transport_stream_handle = VMCI_INVALID_HANDLE;
2161 }
2162
2163 if (vmci_transport_qp_resumed_sub_id != VMCI_INVALID_ID) {
2164 vmci_event_unsubscribe(vmci_transport_qp_resumed_sub_id);
2165 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
2166 }
2167
2168 vsock_core_exit();
2169}
2170module_exit(vmci_transport_exit);
2171
2172MODULE_AUTHOR("VMware, Inc.");
2173MODULE_DESCRIPTION("VMCI transport for Virtual Sockets");
2174MODULE_LICENSE("GPL v2");
2175MODULE_ALIAS("vmware_vsock");
2176MODULE_ALIAS_NETPROTO(PF_VSOCK);