]> git.proxmox.com Git - mirror_qemu.git/blame - net/vhost-vdpa.c
vdpa: store x-svq parameter in VhostVDPAState
[mirror_qemu.git] / net / vhost-vdpa.c
CommitLineData
1e0a84ea
CL
1/*
2 * vhost-vdpa.c
3 *
4 * Copyright(c) 2017-2018 Intel Corporation.
5 * Copyright(c) 2020 Red Hat, Inc.
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
9 *
10 */
11
12#include "qemu/osdep.h"
13#include "clients.h"
bd907ae4 14#include "hw/virtio/virtio-net.h"
1e0a84ea
CL
15#include "net/vhost_net.h"
16#include "net/vhost-vdpa.h"
17#include "hw/virtio/vhost-vdpa.h"
18#include "qemu/config-file.h"
19#include "qemu/error-report.h"
bd907ae4
EP
20#include "qemu/log.h"
21#include "qemu/memalign.h"
1e0a84ea
CL
22#include "qemu/option.h"
23#include "qapi/error.h"
40237840 24#include <linux/vhost.h>
1e0a84ea
CL
25#include <sys/ioctl.h>
26#include <err.h>
27#include "standard-headers/linux/virtio_net.h"
28#include "monitor/monitor.h"
29#include "hw/virtio/vhost.h"
30
31/* Todo:need to add the multiqueue support here */
32typedef struct VhostVDPAState {
33 NetClientState nc;
34 struct vhost_vdpa vhost_vdpa;
35 VHostNetState *vhost_net;
2df4dd31
EP
36
37 /* Control commands shadow buffers */
17fb889f
EP
38 void *cvq_cmd_out_buffer;
39 virtio_net_ctrl_ack *status;
40
7f211a28
EP
41 /* The device always have SVQ enabled */
42 bool always_svq;
1e0a84ea
CL
43 bool started;
44} VhostVDPAState;
45
46const int vdpa_feature_bits[] = {
47 VIRTIO_F_NOTIFY_ON_EMPTY,
48 VIRTIO_RING_F_INDIRECT_DESC,
49 VIRTIO_RING_F_EVENT_IDX,
50 VIRTIO_F_ANY_LAYOUT,
51 VIRTIO_F_VERSION_1,
52 VIRTIO_NET_F_CSUM,
53 VIRTIO_NET_F_GUEST_CSUM,
54 VIRTIO_NET_F_GSO,
55 VIRTIO_NET_F_GUEST_TSO4,
56 VIRTIO_NET_F_GUEST_TSO6,
57 VIRTIO_NET_F_GUEST_ECN,
58 VIRTIO_NET_F_GUEST_UFO,
59 VIRTIO_NET_F_HOST_TSO4,
60 VIRTIO_NET_F_HOST_TSO6,
61 VIRTIO_NET_F_HOST_ECN,
62 VIRTIO_NET_F_HOST_UFO,
63 VIRTIO_NET_F_MRG_RXBUF,
64 VIRTIO_NET_F_MTU,
40237840
JW
65 VIRTIO_NET_F_CTRL_RX,
66 VIRTIO_NET_F_CTRL_RX_EXTRA,
67 VIRTIO_NET_F_CTRL_VLAN,
40237840
JW
68 VIRTIO_NET_F_CTRL_MAC_ADDR,
69 VIRTIO_NET_F_RSS,
70 VIRTIO_NET_F_MQ,
71 VIRTIO_NET_F_CTRL_VQ,
1e0a84ea
CL
72 VIRTIO_F_IOMMU_PLATFORM,
73 VIRTIO_F_RING_PACKED,
562a7d23 74 VIRTIO_F_RING_RESET,
0145c393
AM
75 VIRTIO_NET_F_RSS,
76 VIRTIO_NET_F_HASH_REPORT,
1e0a84ea 77 VIRTIO_NET_F_GUEST_ANNOUNCE,
9aa47edd 78 VIRTIO_NET_F_STATUS,
1e0a84ea
CL
79 VHOST_INVALID_FEATURE_BIT
80};
81
1576dbb5
EP
82/** Supported device specific feature bits with SVQ */
83static const uint64_t vdpa_svq_device_features =
84 BIT_ULL(VIRTIO_NET_F_CSUM) |
85 BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
86 BIT_ULL(VIRTIO_NET_F_MTU) |
87 BIT_ULL(VIRTIO_NET_F_MAC) |
88 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
89 BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
90 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
91 BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
92 BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
93 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
94 BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
95 BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
96 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
97 BIT_ULL(VIRTIO_NET_F_STATUS) |
98 BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
72b99a87 99 BIT_ULL(VIRTIO_NET_F_MQ) |
1576dbb5
EP
100 BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
101 BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
102 BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
103 BIT_ULL(VIRTIO_NET_F_STANDBY);
104
1e0a84ea
CL
105VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
106{
107 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
108 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
109 return s->vhost_net;
110}
111
36e46472
EP
112static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp)
113{
114 uint64_t invalid_dev_features =
115 features & ~vdpa_svq_device_features &
116 /* Transport are all accepted at this point */
117 ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
118 VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
119
120 if (invalid_dev_features) {
121 error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
122 invalid_dev_features);
258a0394 123 return false;
36e46472
EP
124 }
125
258a0394 126 return vhost_svq_valid_features(features, errp);
36e46472
EP
127}
128
1e0a84ea
CL
129static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
130{
131 uint32_t device_id;
132 int ret;
133 struct vhost_dev *hdev;
134
135 hdev = (struct vhost_dev *)&net->dev;
136 ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id);
137 if (device_id != VIRTIO_ID_NET) {
138 return -ENOTSUP;
139 }
140 return ret;
141}
142
40237840
JW
143static int vhost_vdpa_add(NetClientState *ncs, void *be,
144 int queue_pair_index, int nvqs)
1e0a84ea
CL
145{
146 VhostNetOptions options;
147 struct vhost_net *net = NULL;
148 VhostVDPAState *s;
149 int ret;
150
151 options.backend_type = VHOST_BACKEND_TYPE_VDPA;
152 assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
153 s = DO_UPCAST(VhostVDPAState, nc, ncs);
154 options.net_backend = ncs;
155 options.opaque = be;
156 options.busyloop_timeout = 0;
40237840 157 options.nvqs = nvqs;
1e0a84ea
CL
158
159 net = vhost_net_init(&options);
160 if (!net) {
161 error_report("failed to init vhost_net for queue");
a97ef87a 162 goto err_init;
1e0a84ea 163 }
1e0a84ea
CL
164 s->vhost_net = net;
165 ret = vhost_vdpa_net_check_device_id(net);
166 if (ret) {
a97ef87a 167 goto err_check;
1e0a84ea
CL
168 }
169 return 0;
a97ef87a
JW
170err_check:
171 vhost_net_cleanup(net);
172 g_free(net);
173err_init:
1e0a84ea
CL
174 return -1;
175}
176
177static void vhost_vdpa_cleanup(NetClientState *nc)
178{
179 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
1576dbb5 180 struct vhost_dev *dev = &s->vhost_net->dev;
1e0a84ea 181
2df4dd31 182 qemu_vfree(s->cvq_cmd_out_buffer);
17fb889f 183 qemu_vfree(s->status);
1576dbb5
EP
184 if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
185 g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
186 }
1e0a84ea
CL
187 if (s->vhost_net) {
188 vhost_net_cleanup(s->vhost_net);
189 g_free(s->vhost_net);
190 s->vhost_net = NULL;
191 }
57b3a7d8
CL
192 if (s->vhost_vdpa.device_fd >= 0) {
193 qemu_close(s->vhost_vdpa.device_fd);
194 s->vhost_vdpa.device_fd = -1;
195 }
1e0a84ea
CL
196}
197
198static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
199{
200 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
201
202 return true;
203}
204
205static bool vhost_vdpa_has_ufo(NetClientState *nc)
206{
207 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
208 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
209 uint64_t features = 0;
210 features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
211 features = vhost_net_get_features(s->vhost_net, features);
212 return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
213
214}
215
ee8a1c63
KW
216static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
217 Error **errp)
218{
219 const char *driver = object_class_get_name(oc);
220
221 if (!g_str_has_prefix(driver, "virtio-net-")) {
222 error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
223 return false;
224 }
225
226 return true;
227}
228
846a1e85
EP
229/** Dummy receive in case qemu falls back to userland tap networking */
230static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
231 size_t size)
232{
bc5add1d 233 return size;
846a1e85
EP
234}
235
1e0a84ea
CL
236static NetClientInfo net_vhost_vdpa_info = {
237 .type = NET_CLIENT_DRIVER_VHOST_VDPA,
238 .size = sizeof(VhostVDPAState),
846a1e85 239 .receive = vhost_vdpa_receive,
1e0a84ea
CL
240 .cleanup = vhost_vdpa_cleanup,
241 .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
242 .has_ufo = vhost_vdpa_has_ufo,
ee8a1c63 243 .check_peer_type = vhost_vdpa_check_peer_type,
1e0a84ea
CL
244};
245
2df4dd31
EP
246static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
247{
248 VhostIOVATree *tree = v->iova_tree;
249 DMAMap needle = {
250 /*
251 * No need to specify size or to look for more translations since
252 * this contiguous chunk was allocated by us.
253 */
254 .translated_addr = (hwaddr)(uintptr_t)addr,
255 };
256 const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
257 int r;
258
259 if (unlikely(!map)) {
260 error_report("Cannot locate expected map");
261 return;
262 }
263
cd831ed5 264 r = vhost_vdpa_dma_unmap(v, v->address_space_id, map->iova, map->size + 1);
2df4dd31
EP
265 if (unlikely(r != 0)) {
266 error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
267 }
268
69292a8e 269 vhost_iova_tree_remove(tree, *map);
2df4dd31
EP
270}
271
272static size_t vhost_vdpa_net_cvq_cmd_len(void)
273{
274 /*
275 * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
276 * In buffer is always 1 byte, so it should fit here
277 */
278 return sizeof(struct virtio_net_ctrl_hdr) +
279 2 * sizeof(struct virtio_net_ctrl_mac) +
280 MAC_TABLE_ENTRIES * ETH_ALEN;
281}
282
283static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
284{
285 return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
286}
287
7a7f87e9
EP
288/** Map CVQ buffer. */
289static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
290 bool write)
2df4dd31
EP
291{
292 DMAMap map = {};
293 int r;
294
2df4dd31 295 map.translated_addr = (hwaddr)(uintptr_t)buf;
7a7f87e9 296 map.size = size - 1;
2df4dd31
EP
297 map.perm = write ? IOMMU_RW : IOMMU_RO,
298 r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
299 if (unlikely(r != IOVA_OK)) {
300 error_report("Cannot map injected element");
7a7f87e9 301 return r;
2df4dd31
EP
302 }
303
cd831ed5
EP
304 r = vhost_vdpa_dma_map(v, v->address_space_id, map.iova,
305 vhost_vdpa_net_cvq_cmd_page_len(), buf, !write);
2df4dd31
EP
306 if (unlikely(r < 0)) {
307 goto dma_map_err;
308 }
309
7a7f87e9 310 return 0;
2df4dd31
EP
311
312dma_map_err:
69292a8e 313 vhost_iova_tree_remove(v->iova_tree, map);
7a7f87e9 314 return r;
2df4dd31
EP
315}
316
7a7f87e9 317static int vhost_vdpa_net_cvq_start(NetClientState *nc)
2df4dd31 318{
7a7f87e9
EP
319 VhostVDPAState *s;
320 int r;
2df4dd31 321
7a7f87e9
EP
322 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
323
324 s = DO_UPCAST(VhostVDPAState, nc, nc);
325 if (!s->vhost_vdpa.shadow_vqs_enabled) {
326 return 0;
2df4dd31
EP
327 }
328
7a7f87e9
EP
329 r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
330 vhost_vdpa_net_cvq_cmd_page_len(), false);
331 if (unlikely(r < 0)) {
332 return r;
333 }
334
17fb889f 335 r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status,
7a7f87e9
EP
336 vhost_vdpa_net_cvq_cmd_page_len(), true);
337 if (unlikely(r < 0)) {
2df4dd31 338 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
2df4dd31
EP
339 }
340
7a7f87e9
EP
341 return r;
342}
343
344static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
345{
346 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
347
348 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
349
350 if (s->vhost_vdpa.shadow_vqs_enabled) {
351 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
17fb889f 352 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
7a7f87e9 353 }
2df4dd31
EP
354}
355
be4278b6
EP
356static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
357 size_t in_len)
358{
359 /* Buffers for the device */
360 const struct iovec out = {
361 .iov_base = s->cvq_cmd_out_buffer,
362 .iov_len = out_len,
363 };
364 const struct iovec in = {
17fb889f 365 .iov_base = s->status,
be4278b6
EP
366 .iov_len = sizeof(virtio_net_ctrl_ack),
367 };
368 VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
369 int r;
370
371 r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
372 if (unlikely(r != 0)) {
373 if (unlikely(r == -ENOSPC)) {
374 qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
375 __func__);
376 }
377 return r;
378 }
379
380 /*
381 * We can poll here since we've had BQL from the time we sent the
382 * descriptor. Also, we need to take the answer before SVQ pulls by itself,
383 * when BQL is released
384 */
385 return vhost_svq_poll(svq);
386}
387
f73c0c43
EP
388static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
389 uint8_t cmd, const void *data,
390 size_t data_size)
391{
392 const struct virtio_net_ctrl_hdr ctrl = {
393 .class = class,
394 .cmd = cmd,
395 };
396
397 assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
398
399 memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
400 memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
401
402 return vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + data_size,
403 sizeof(virtio_net_ctrl_ack));
404}
405
406static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
407{
408 uint64_t features = n->parent_obj.guest_features;
409 if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
410 ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
411 VIRTIO_NET_CTRL_MAC_ADDR_SET,
412 n->mac, sizeof(n->mac));
413 if (unlikely(dev_written < 0)) {
414 return dev_written;
415 }
416
417 return *s->status != VIRTIO_NET_OK;
418 }
419
420 return 0;
421}
422
f64c7cda
EP
423static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
424 const VirtIONet *n)
425{
426 struct virtio_net_ctrl_mq mq;
427 uint64_t features = n->parent_obj.guest_features;
428 ssize_t dev_written;
429
430 if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
431 return 0;
432 }
433
434 mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
435 dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
436 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
437 sizeof(mq));
438 if (unlikely(dev_written < 0)) {
439 return dev_written;
440 }
441
442 return *s->status != VIRTIO_NET_OK;
443}
444
dd036d8d
EP
445static int vhost_vdpa_net_load(NetClientState *nc)
446{
447 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
f73c0c43 448 struct vhost_vdpa *v = &s->vhost_vdpa;
dd036d8d 449 const VirtIONet *n;
f73c0c43 450 int r;
dd036d8d
EP
451
452 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
453
454 if (!v->shadow_vqs_enabled) {
455 return 0;
456 }
457
458 n = VIRTIO_NET(v->dev->vdev);
f73c0c43
EP
459 r = vhost_vdpa_net_load_mac(s, n);
460 if (unlikely(r < 0)) {
461 return r;
dd036d8d 462 }
f64c7cda
EP
463 r = vhost_vdpa_net_load_mq(s, n);
464 if (unlikely(r)) {
465 return r;
466 }
dd036d8d
EP
467
468 return 0;
469}
470
f8972b56
EP
471static NetClientInfo net_vhost_vdpa_cvq_info = {
472 .type = NET_CLIENT_DRIVER_VHOST_VDPA,
473 .size = sizeof(VhostVDPAState),
474 .receive = vhost_vdpa_receive,
7a7f87e9 475 .start = vhost_vdpa_net_cvq_start,
dd036d8d 476 .load = vhost_vdpa_net_load,
7a7f87e9 477 .stop = vhost_vdpa_net_cvq_stop,
f8972b56
EP
478 .cleanup = vhost_vdpa_cleanup,
479 .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
480 .has_ufo = vhost_vdpa_has_ufo,
481 .check_peer_type = vhost_vdpa_check_peer_type,
482};
483
2df4dd31
EP
484/**
485 * Validate and copy control virtqueue commands.
486 *
487 * Following QEMU guidelines, we offer a copy of the buffers to the device to
488 * prevent TOCTOU bugs.
bd907ae4
EP
489 */
490static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
491 VirtQueueElement *elem,
492 void *opaque)
493{
2df4dd31 494 VhostVDPAState *s = opaque;
be4278b6 495 size_t in_len;
bd907ae4 496 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
7a7f87e9
EP
497 /* Out buffer sent to both the vdpa device and the device model */
498 struct iovec out = {
499 .iov_base = s->cvq_cmd_out_buffer,
500 };
2df4dd31
EP
501 /* in buffer used for device model */
502 const struct iovec in = {
503 .iov_base = &status,
504 .iov_len = sizeof(status),
505 };
be4278b6 506 ssize_t dev_written = -EINVAL;
2df4dd31 507
7a7f87e9
EP
508 out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
509 s->cvq_cmd_out_buffer,
510 vhost_vdpa_net_cvq_cmd_len());
be4278b6
EP
511 dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
512 if (unlikely(dev_written < 0)) {
bd907ae4
EP
513 goto out;
514 }
515
bd907ae4
EP
516 if (unlikely(dev_written < sizeof(status))) {
517 error_report("Insufficient written data (%zu)", dev_written);
2df4dd31
EP
518 goto out;
519 }
520
17fb889f 521 if (*s->status != VIRTIO_NET_OK) {
be4278b6 522 return VIRTIO_NET_ERR;
2df4dd31
EP
523 }
524
525 status = VIRTIO_NET_ERR;
7a7f87e9 526 virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1);
2df4dd31
EP
527 if (status != VIRTIO_NET_OK) {
528 error_report("Bad CVQ processing in model");
bd907ae4
EP
529 }
530
531out:
532 in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
533 sizeof(status));
534 if (unlikely(in_len < sizeof(status))) {
535 error_report("Bad device CVQ written length");
536 }
537 vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
538 g_free(elem);
be4278b6 539 return dev_written < 0 ? dev_written : 0;
bd907ae4
EP
540}
541
542static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
543 .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
544};
545
654790b6 546static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
a585fad2
EP
547 const char *device,
548 const char *name,
549 int vdpa_device_fd,
550 int queue_pair_index,
551 int nvqs,
552 bool is_datapath,
553 bool svq,
554 struct vhost_vdpa_iova_range iova_range,
555 VhostIOVATree *iova_tree)
1e0a84ea
CL
556{
557 NetClientState *nc = NULL;
558 VhostVDPAState *s;
1e0a84ea
CL
559 int ret = 0;
560 assert(name);
40237840
JW
561 if (is_datapath) {
562 nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
563 name);
564 } else {
f8972b56 565 nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
40237840
JW
566 device, name);
567 }
53b85d95 568 qemu_set_info_str(nc, TYPE_VHOST_VDPA);
1e0a84ea 569 s = DO_UPCAST(VhostVDPAState, nc, nc);
7327813d 570
1e0a84ea 571 s->vhost_vdpa.device_fd = vdpa_device_fd;
40237840 572 s->vhost_vdpa.index = queue_pair_index;
7f211a28 573 s->always_svq = svq;
1576dbb5 574 s->vhost_vdpa.shadow_vqs_enabled = svq;
a585fad2 575 s->vhost_vdpa.iova_range = iova_range;
1576dbb5 576 s->vhost_vdpa.iova_tree = iova_tree;
bd907ae4 577 if (!is_datapath) {
2df4dd31
EP
578 s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
579 vhost_vdpa_net_cvq_cmd_page_len());
580 memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
17fb889f
EP
581 s->status = qemu_memalign(qemu_real_host_page_size(),
582 vhost_vdpa_net_cvq_cmd_page_len());
583 memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len());
2df4dd31 584
bd907ae4
EP
585 s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
586 s->vhost_vdpa.shadow_vq_ops_opaque = s;
587 }
40237840 588 ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
74af5eec 589 if (ret) {
74af5eec 590 qemu_del_net_client(nc);
654790b6 591 return NULL;
74af5eec 592 }
654790b6 593 return nc;
1e0a84ea
CL
594}
595
1576dbb5
EP
596static int vhost_vdpa_get_iova_range(int fd,
597 struct vhost_vdpa_iova_range *iova_range)
598{
599 int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
600
601 return ret < 0 ? -errno : 0;
602}
603
8170ab3f
EP
604static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
605{
606 int ret = ioctl(fd, VHOST_GET_FEATURES, features);
607 if (unlikely(ret < 0)) {
608 error_setg_errno(errp, errno,
609 "Fail to query features from vhost-vDPA device");
610 }
611 return ret;
612}
613
614static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
615 int *has_cvq, Error **errp)
40237840
JW
616{
617 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
cd523a41 618 g_autofree struct vhost_vdpa_config *config = NULL;
40237840 619 __virtio16 *max_queue_pairs;
40237840
JW
620 int ret;
621
40237840
JW
622 if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
623 *has_cvq = 1;
624 } else {
625 *has_cvq = 0;
626 }
627
628 if (features & (1 << VIRTIO_NET_F_MQ)) {
629 config = g_malloc0(config_size + sizeof(*max_queue_pairs));
630 config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs);
631 config->len = sizeof(*max_queue_pairs);
632
633 ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config);
634 if (ret) {
635 error_setg(errp, "Fail to get config from vhost-vDPA device");
636 return -ret;
637 }
638
639 max_queue_pairs = (__virtio16 *)&config->buf;
640
641 return lduw_le_p(max_queue_pairs);
642 }
643
644 return 1;
645}
646
1e0a84ea
CL
647int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
648 NetClientState *peer, Error **errp)
649{
650 const NetdevVhostVDPAOptions *opts;
8170ab3f 651 uint64_t features;
654790b6 652 int vdpa_device_fd;
eb3cb751 653 g_autofree NetClientState **ncs = NULL;
1576dbb5 654 g_autoptr(VhostIOVATree) iova_tree = NULL;
a585fad2 655 struct vhost_vdpa_iova_range iova_range;
eb3cb751 656 NetClientState *nc;
aed5da45 657 int queue_pairs, r, i = 0, has_cvq = 0;
1e0a84ea
CL
658
659 assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
660 opts = &netdev->u.vhost_vdpa;
7480874a 661 if (!opts->vhostdev && !opts->vhostfd) {
8801ccd0
SWL
662 error_setg(errp,
663 "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
c8295404
EP
664 return -1;
665 }
7327813d 666
7480874a 667 if (opts->vhostdev && opts->vhostfd) {
8801ccd0
SWL
668 error_setg(errp,
669 "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
670 return -1;
671 }
672
7480874a 673 if (opts->vhostdev) {
8801ccd0
SWL
674 vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
675 if (vdpa_device_fd == -1) {
676 return -errno;
677 }
5107fd3e
PM
678 } else {
679 /* has_vhostfd */
8801ccd0
SWL
680 vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
681 if (vdpa_device_fd == -1) {
682 error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
683 return -1;
684 }
7327813d
JW
685 }
686
8170ab3f
EP
687 r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
688 if (unlikely(r < 0)) {
aed5da45 689 goto err;
8170ab3f
EP
690 }
691
692 queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
40237840
JW
693 &has_cvq, errp);
694 if (queue_pairs < 0) {
7327813d 695 qemu_close(vdpa_device_fd);
40237840
JW
696 return queue_pairs;
697 }
698
a585fad2 699 vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
1576dbb5 700 if (opts->x_svq) {
36e46472 701 if (!vhost_vdpa_net_valid_svq_features(features, errp)) {
1576dbb5
EP
702 goto err_svq;
703 }
704
1576dbb5
EP
705 iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
706 }
707
40237840
JW
708 ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
709
710 for (i = 0; i < queue_pairs; i++) {
711 ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
1576dbb5 712 vdpa_device_fd, i, 2, true, opts->x_svq,
a585fad2 713 iova_range, iova_tree);
40237840
JW
714 if (!ncs[i])
715 goto err;
7327813d
JW
716 }
717
40237840
JW
718 if (has_cvq) {
719 nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
1576dbb5 720 vdpa_device_fd, i, 1, false,
a585fad2 721 opts->x_svq, iova_range, iova_tree);
40237840
JW
722 if (!nc)
723 goto err;
724 }
725
1576dbb5
EP
726 /* iova_tree ownership belongs to last NetClientState */
727 g_steal_pointer(&iova_tree);
654790b6 728 return 0;
40237840
JW
729
730err:
731 if (i) {
9bd05507
SWL
732 for (i--; i >= 0; i--) {
733 qemu_del_net_client(ncs[i]);
734 }
40237840 735 }
1576dbb5
EP
736
737err_svq:
40237840 738 qemu_close(vdpa_device_fd);
40237840
JW
739
740 return -1;
1e0a84ea 741}