]> git.proxmox.com Git - mirror_qemu.git/blame - net/vhost-vdpa.c
vhost: fix vhost_dev_enable_notifiers() error case
[mirror_qemu.git] / net / vhost-vdpa.c
CommitLineData
1e0a84ea
CL
1/*
2 * vhost-vdpa.c
3 *
4 * Copyright(c) 2017-2018 Intel Corporation.
5 * Copyright(c) 2020 Red Hat, Inc.
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
9 *
10 */
11
12#include "qemu/osdep.h"
13#include "clients.h"
bd907ae4 14#include "hw/virtio/virtio-net.h"
1e0a84ea
CL
15#include "net/vhost_net.h"
16#include "net/vhost-vdpa.h"
17#include "hw/virtio/vhost-vdpa.h"
18#include "qemu/config-file.h"
19#include "qemu/error-report.h"
bd907ae4
EP
20#include "qemu/log.h"
21#include "qemu/memalign.h"
1e0a84ea
CL
22#include "qemu/option.h"
23#include "qapi/error.h"
40237840 24#include <linux/vhost.h>
1e0a84ea
CL
25#include <sys/ioctl.h>
26#include <err.h>
27#include "standard-headers/linux/virtio_net.h"
28#include "monitor/monitor.h"
69498430
EP
29#include "migration/migration.h"
30#include "migration/misc.h"
1e0a84ea
CL
31#include "hw/virtio/vhost.h"
32
33/* Todo:need to add the multiqueue support here */
34typedef struct VhostVDPAState {
35 NetClientState nc;
36 struct vhost_vdpa vhost_vdpa;
69498430 37 Notifier migration_state;
1e0a84ea 38 VHostNetState *vhost_net;
2df4dd31
EP
39
40 /* Control commands shadow buffers */
17fb889f
EP
41 void *cvq_cmd_out_buffer;
42 virtio_net_ctrl_ack *status;
43
7f211a28
EP
44 /* The device always have SVQ enabled */
45 bool always_svq;
152128d6
EP
46
47 /* The device can isolate CVQ in its own ASID */
48 bool cvq_isolated;
49
1e0a84ea
CL
50 bool started;
51} VhostVDPAState;
52
53const int vdpa_feature_bits[] = {
54 VIRTIO_F_NOTIFY_ON_EMPTY,
55 VIRTIO_RING_F_INDIRECT_DESC,
56 VIRTIO_RING_F_EVENT_IDX,
57 VIRTIO_F_ANY_LAYOUT,
58 VIRTIO_F_VERSION_1,
59 VIRTIO_NET_F_CSUM,
60 VIRTIO_NET_F_GUEST_CSUM,
61 VIRTIO_NET_F_GSO,
62 VIRTIO_NET_F_GUEST_TSO4,
63 VIRTIO_NET_F_GUEST_TSO6,
64 VIRTIO_NET_F_GUEST_ECN,
65 VIRTIO_NET_F_GUEST_UFO,
66 VIRTIO_NET_F_HOST_TSO4,
67 VIRTIO_NET_F_HOST_TSO6,
68 VIRTIO_NET_F_HOST_ECN,
69 VIRTIO_NET_F_HOST_UFO,
70 VIRTIO_NET_F_MRG_RXBUF,
71 VIRTIO_NET_F_MTU,
40237840
JW
72 VIRTIO_NET_F_CTRL_RX,
73 VIRTIO_NET_F_CTRL_RX_EXTRA,
74 VIRTIO_NET_F_CTRL_VLAN,
40237840
JW
75 VIRTIO_NET_F_CTRL_MAC_ADDR,
76 VIRTIO_NET_F_RSS,
77 VIRTIO_NET_F_MQ,
78 VIRTIO_NET_F_CTRL_VQ,
1e0a84ea
CL
79 VIRTIO_F_IOMMU_PLATFORM,
80 VIRTIO_F_RING_PACKED,
562a7d23 81 VIRTIO_F_RING_RESET,
0145c393
AM
82 VIRTIO_NET_F_RSS,
83 VIRTIO_NET_F_HASH_REPORT,
9aa47edd 84 VIRTIO_NET_F_STATUS,
1e0a84ea
CL
85 VHOST_INVALID_FEATURE_BIT
86};
87
1576dbb5
EP
88/** Supported device specific feature bits with SVQ */
89static const uint64_t vdpa_svq_device_features =
90 BIT_ULL(VIRTIO_NET_F_CSUM) |
91 BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
4b4a1378 92 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) |
1576dbb5
EP
93 BIT_ULL(VIRTIO_NET_F_MTU) |
94 BIT_ULL(VIRTIO_NET_F_MAC) |
95 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
96 BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
97 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
98 BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
99 BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
100 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
101 BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
102 BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
103 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
104 BIT_ULL(VIRTIO_NET_F_STATUS) |
105 BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
72b99a87 106 BIT_ULL(VIRTIO_NET_F_MQ) |
1576dbb5
EP
107 BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
108 BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
609ab4c3
EP
109 /* VHOST_F_LOG_ALL is exposed by SVQ */
110 BIT_ULL(VHOST_F_LOG_ALL) |
1576dbb5 111 BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
0d74e2b7
EP
112 BIT_ULL(VIRTIO_NET_F_STANDBY) |
113 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX);
1576dbb5 114
c1a10086
EP
115#define VHOST_VDPA_NET_CVQ_ASID 1
116
1e0a84ea
CL
117VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
118{
119 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
120 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
121 return s->vhost_net;
122}
123
915bf6cc
EP
124static size_t vhost_vdpa_net_cvq_cmd_len(void)
125{
126 /*
127 * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
128 * In buffer is always 1 byte, so it should fit here
129 */
130 return sizeof(struct virtio_net_ctrl_hdr) +
131 2 * sizeof(struct virtio_net_ctrl_mac) +
132 MAC_TABLE_ENTRIES * ETH_ALEN;
133}
134
135static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
136{
137 return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
138}
139
36e46472
EP
140static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp)
141{
142 uint64_t invalid_dev_features =
143 features & ~vdpa_svq_device_features &
144 /* Transport are all accepted at this point */
145 ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
146 VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
147
148 if (invalid_dev_features) {
149 error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
150 invalid_dev_features);
258a0394 151 return false;
36e46472
EP
152 }
153
258a0394 154 return vhost_svq_valid_features(features, errp);
36e46472
EP
155}
156
1e0a84ea
CL
157static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
158{
159 uint32_t device_id;
160 int ret;
161 struct vhost_dev *hdev;
162
163 hdev = (struct vhost_dev *)&net->dev;
164 ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id);
165 if (device_id != VIRTIO_ID_NET) {
166 return -ENOTSUP;
167 }
168 return ret;
169}
170
40237840
JW
171static int vhost_vdpa_add(NetClientState *ncs, void *be,
172 int queue_pair_index, int nvqs)
1e0a84ea
CL
173{
174 VhostNetOptions options;
175 struct vhost_net *net = NULL;
176 VhostVDPAState *s;
177 int ret;
178
179 options.backend_type = VHOST_BACKEND_TYPE_VDPA;
180 assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
181 s = DO_UPCAST(VhostVDPAState, nc, ncs);
182 options.net_backend = ncs;
183 options.opaque = be;
184 options.busyloop_timeout = 0;
40237840 185 options.nvqs = nvqs;
1e0a84ea
CL
186
187 net = vhost_net_init(&options);
188 if (!net) {
189 error_report("failed to init vhost_net for queue");
a97ef87a 190 goto err_init;
1e0a84ea 191 }
1e0a84ea
CL
192 s->vhost_net = net;
193 ret = vhost_vdpa_net_check_device_id(net);
194 if (ret) {
a97ef87a 195 goto err_check;
1e0a84ea
CL
196 }
197 return 0;
a97ef87a
JW
198err_check:
199 vhost_net_cleanup(net);
200 g_free(net);
201err_init:
1e0a84ea
CL
202 return -1;
203}
204
205static void vhost_vdpa_cleanup(NetClientState *nc)
206{
207 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
208
babf8b87
EP
209 munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len());
210 munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len());
1e0a84ea
CL
211 if (s->vhost_net) {
212 vhost_net_cleanup(s->vhost_net);
213 g_free(s->vhost_net);
214 s->vhost_net = NULL;
215 }
57b3a7d8
CL
216 if (s->vhost_vdpa.device_fd >= 0) {
217 qemu_close(s->vhost_vdpa.device_fd);
218 s->vhost_vdpa.device_fd = -1;
219 }
1e0a84ea
CL
220}
221
222static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
223{
224 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
225
226 return true;
227}
228
229static bool vhost_vdpa_has_ufo(NetClientState *nc)
230{
231 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
232 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
233 uint64_t features = 0;
234 features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
235 features = vhost_net_get_features(s->vhost_net, features);
236 return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
237
238}
239
ee8a1c63
KW
240static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
241 Error **errp)
242{
243 const char *driver = object_class_get_name(oc);
244
245 if (!g_str_has_prefix(driver, "virtio-net-")) {
246 error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
247 return false;
248 }
249
250 return true;
251}
252
846a1e85
EP
253/** Dummy receive in case qemu falls back to userland tap networking */
254static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
255 size_t size)
256{
bc5add1d 257 return size;
846a1e85
EP
258}
259
00ef422e
EP
260/** From any vdpa net client, get the netclient of the first queue pair */
261static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
262{
263 NICState *nic = qemu_get_nic(s->nc.peer);
264 NetClientState *nc0 = qemu_get_peer(nic->ncs, 0);
265
266 return DO_UPCAST(VhostVDPAState, nc, nc0);
267}
268
69498430
EP
269static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
270{
271 struct vhost_vdpa *v = &s->vhost_vdpa;
272 VirtIONet *n;
273 VirtIODevice *vdev;
274 int data_queue_pairs, cvq, r;
275
276 /* We are only called on the first data vqs and only if x-svq is not set */
277 if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
278 return;
279 }
280
281 vdev = v->dev->vdev;
282 n = VIRTIO_NET(vdev);
283 if (!n->vhost_started) {
284 return;
285 }
286
287 data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
288 cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
289 n->max_ncs - n->max_queue_pairs : 0;
290 /*
291 * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
292 * in the future and resume the device if read-only operations between
293 * suspend and reset goes wrong.
294 */
295 vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
296
297 /* Start will check migration setup_or_active to configure or not SVQ */
298 r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
299 if (unlikely(r < 0)) {
300 error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
301 }
302}
303
304static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
305{
306 MigrationState *migration = data;
307 VhostVDPAState *s = container_of(notifier, VhostVDPAState,
308 migration_state);
309
310 if (migration_in_setup(migration)) {
311 vhost_vdpa_net_log_global_enable(s, true);
312 } else if (migration_has_failed(migration)) {
313 vhost_vdpa_net_log_global_enable(s, false);
314 }
315}
316
00ef422e
EP
317static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
318{
319 struct vhost_vdpa *v = &s->vhost_vdpa;
320
69498430 321 add_migration_state_change_notifier(&s->migration_state);
00ef422e
EP
322 if (v->shadow_vqs_enabled) {
323 v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
324 v->iova_range.last);
325 }
326}
327
328static int vhost_vdpa_net_data_start(NetClientState *nc)
329{
330 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
331 struct vhost_vdpa *v = &s->vhost_vdpa;
332
333 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
334
69498430
EP
335 if (s->always_svq ||
336 migration_is_setup_or_active(migrate_get_current()->state)) {
337 v->shadow_vqs_enabled = true;
338 v->shadow_data = true;
339 } else {
340 v->shadow_vqs_enabled = false;
341 v->shadow_data = false;
342 }
343
00ef422e
EP
344 if (v->index == 0) {
345 vhost_vdpa_net_data_start_first(s);
346 return 0;
347 }
348
349 if (v->shadow_vqs_enabled) {
350 VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s);
351 v->iova_tree = s0->vhost_vdpa.iova_tree;
352 }
353
354 return 0;
355}
356
357static void vhost_vdpa_net_client_stop(NetClientState *nc)
358{
359 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
360 struct vhost_dev *dev;
361
362 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
363
69498430
EP
364 if (s->vhost_vdpa.index == 0) {
365 remove_migration_state_change_notifier(&s->migration_state);
366 }
367
00ef422e
EP
368 dev = s->vhost_vdpa.dev;
369 if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
370 g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
371 }
372}
373
1e0a84ea
CL
374static NetClientInfo net_vhost_vdpa_info = {
375 .type = NET_CLIENT_DRIVER_VHOST_VDPA,
376 .size = sizeof(VhostVDPAState),
846a1e85 377 .receive = vhost_vdpa_receive,
00ef422e
EP
378 .start = vhost_vdpa_net_data_start,
379 .stop = vhost_vdpa_net_client_stop,
1e0a84ea
CL
380 .cleanup = vhost_vdpa_cleanup,
381 .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
382 .has_ufo = vhost_vdpa_has_ufo,
ee8a1c63 383 .check_peer_type = vhost_vdpa_check_peer_type,
1e0a84ea
CL
384};
385
152128d6
EP
386static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index,
387 Error **errp)
c1a10086
EP
388{
389 struct vhost_vring_state state = {
390 .index = vq_index,
391 };
392 int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state);
393
394 if (unlikely(r < 0)) {
0f2bb0bf 395 r = -errno;
152128d6 396 error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index);
c1a10086
EP
397 return r;
398 }
399
400 return state.num;
401}
402
403static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v,
404 unsigned vq_group,
405 unsigned asid_num)
406{
407 struct vhost_vring_state asid = {
408 .index = vq_group,
409 .num = asid_num,
410 };
411 int r;
412
413 r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid);
414 if (unlikely(r < 0)) {
415 error_report("Can't set vq group %u asid %u, errno=%d (%s)",
416 asid.index, asid.num, errno, g_strerror(errno));
417 }
418 return r;
419}
420
2df4dd31
EP
421static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
422{
423 VhostIOVATree *tree = v->iova_tree;
424 DMAMap needle = {
425 /*
426 * No need to specify size or to look for more translations since
427 * this contiguous chunk was allocated by us.
428 */
429 .translated_addr = (hwaddr)(uintptr_t)addr,
430 };
431 const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
432 int r;
433
434 if (unlikely(!map)) {
435 error_report("Cannot locate expected map");
436 return;
437 }
438
cd831ed5 439 r = vhost_vdpa_dma_unmap(v, v->address_space_id, map->iova, map->size + 1);
2df4dd31
EP
440 if (unlikely(r != 0)) {
441 error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
442 }
443
69292a8e 444 vhost_iova_tree_remove(tree, *map);
2df4dd31
EP
445}
446
7a7f87e9
EP
447/** Map CVQ buffer. */
448static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
449 bool write)
2df4dd31
EP
450{
451 DMAMap map = {};
452 int r;
453
2df4dd31 454 map.translated_addr = (hwaddr)(uintptr_t)buf;
7a7f87e9 455 map.size = size - 1;
2df4dd31
EP
456 map.perm = write ? IOMMU_RW : IOMMU_RO,
457 r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
458 if (unlikely(r != IOVA_OK)) {
459 error_report("Cannot map injected element");
7a7f87e9 460 return r;
2df4dd31
EP
461 }
462
cd831ed5
EP
463 r = vhost_vdpa_dma_map(v, v->address_space_id, map.iova,
464 vhost_vdpa_net_cvq_cmd_page_len(), buf, !write);
2df4dd31
EP
465 if (unlikely(r < 0)) {
466 goto dma_map_err;
467 }
468
7a7f87e9 469 return 0;
2df4dd31
EP
470
471dma_map_err:
69292a8e 472 vhost_iova_tree_remove(v->iova_tree, map);
7a7f87e9 473 return r;
2df4dd31
EP
474}
475
7a7f87e9 476static int vhost_vdpa_net_cvq_start(NetClientState *nc)
2df4dd31 477{
00ef422e 478 VhostVDPAState *s, *s0;
c1a10086 479 struct vhost_vdpa *v;
c1a10086 480 int64_t cvq_group;
152128d6
EP
481 int r;
482 Error *err = NULL;
2df4dd31 483
7a7f87e9
EP
484 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
485
486 s = DO_UPCAST(VhostVDPAState, nc, nc);
c1a10086
EP
487 v = &s->vhost_vdpa;
488
69498430
EP
489 s0 = vhost_vdpa_net_first_nc_vdpa(s);
490 v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled;
c1a10086
EP
491 v->shadow_vqs_enabled = s->always_svq;
492 s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
493
69498430 494 if (s->vhost_vdpa.shadow_data) {
c1a10086
EP
495 /* SVQ is already configured for all virtqueues */
496 goto out;
497 }
498
499 /*
500 * If we early return in these cases SVQ will not be enabled. The migration
501 * will be blocked as long as vhost-vdpa backends will not offer _F_LOG.
c1a10086 502 */
152128d6
EP
503 if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
504 return 0;
c1a10086 505 }
152128d6
EP
506
507 if (!s->cvq_isolated) {
c1a10086
EP
508 return 0;
509 }
510
152128d6
EP
511 cvq_group = vhost_vdpa_get_vring_group(v->device_fd,
512 v->dev->vq_index_end - 1,
513 &err);
c1a10086 514 if (unlikely(cvq_group < 0)) {
152128d6 515 error_report_err(err);
c1a10086
EP
516 return cvq_group;
517 }
c1a10086
EP
518
519 r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID);
520 if (unlikely(r < 0)) {
521 return r;
522 }
523
c1a10086
EP
524 v->shadow_vqs_enabled = true;
525 s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
526
527out:
7a7f87e9
EP
528 if (!s->vhost_vdpa.shadow_vqs_enabled) {
529 return 0;
2df4dd31
EP
530 }
531
00ef422e
EP
532 if (s0->vhost_vdpa.iova_tree) {
533 /*
534 * SVQ is already configured for all virtqueues. Reuse IOVA tree for
535 * simplicity, whether CVQ shares ASID with guest or not, because:
536 * - Memory listener need access to guest's memory addresses allocated
537 * in the IOVA tree.
538 * - There should be plenty of IOVA address space for both ASID not to
539 * worry about collisions between them. Guest's translations are
540 * still validated with virtio virtqueue_pop so there is no risk for
541 * the guest to access memory that it shouldn't.
542 *
543 * To allocate a iova tree per ASID is doable but it complicates the
544 * code and it is not worth it for the moment.
545 */
546 v->iova_tree = s0->vhost_vdpa.iova_tree;
547 } else {
548 v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
549 v->iova_range.last);
550 }
551
7a7f87e9
EP
552 r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
553 vhost_vdpa_net_cvq_cmd_page_len(), false);
554 if (unlikely(r < 0)) {
555 return r;
556 }
557
17fb889f 558 r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status,
7a7f87e9
EP
559 vhost_vdpa_net_cvq_cmd_page_len(), true);
560 if (unlikely(r < 0)) {
2df4dd31 561 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
2df4dd31
EP
562 }
563
7a7f87e9
EP
564 return r;
565}
566
567static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
568{
569 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
570
571 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
572
573 if (s->vhost_vdpa.shadow_vqs_enabled) {
574 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
17fb889f 575 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
7a7f87e9 576 }
00ef422e
EP
577
578 vhost_vdpa_net_client_stop(nc);
2df4dd31
EP
579}
580
be4278b6
EP
581static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
582 size_t in_len)
583{
584 /* Buffers for the device */
585 const struct iovec out = {
586 .iov_base = s->cvq_cmd_out_buffer,
587 .iov_len = out_len,
588 };
589 const struct iovec in = {
17fb889f 590 .iov_base = s->status,
be4278b6
EP
591 .iov_len = sizeof(virtio_net_ctrl_ack),
592 };
593 VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
594 int r;
595
596 r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
597 if (unlikely(r != 0)) {
598 if (unlikely(r == -ENOSPC)) {
599 qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
600 __func__);
601 }
602 return r;
603 }
604
605 /*
606 * We can poll here since we've had BQL from the time we sent the
607 * descriptor. Also, we need to take the answer before SVQ pulls by itself,
608 * when BQL is released
609 */
610 return vhost_svq_poll(svq);
611}
612
f73c0c43
EP
613static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
614 uint8_t cmd, const void *data,
615 size_t data_size)
616{
617 const struct virtio_net_ctrl_hdr ctrl = {
618 .class = class,
619 .cmd = cmd,
620 };
621
622 assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
623
624 memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
625 memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
626
627 return vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + data_size,
628 sizeof(virtio_net_ctrl_ack));
629}
630
631static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
632{
02d3bf09 633 if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
f73c0c43
EP
634 ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
635 VIRTIO_NET_CTRL_MAC_ADDR_SET,
636 n->mac, sizeof(n->mac));
637 if (unlikely(dev_written < 0)) {
638 return dev_written;
639 }
640
641 return *s->status != VIRTIO_NET_OK;
642 }
643
644 return 0;
645}
646
f64c7cda
EP
647static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
648 const VirtIONet *n)
649{
650 struct virtio_net_ctrl_mq mq;
f64c7cda
EP
651 ssize_t dev_written;
652
02d3bf09 653 if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_MQ)) {
f64c7cda
EP
654 return 0;
655 }
656
657 mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
658 dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
659 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
660 sizeof(mq));
661 if (unlikely(dev_written < 0)) {
662 return dev_written;
663 }
664
665 return *s->status != VIRTIO_NET_OK;
666}
667
0b58d368
HJ
668static int vhost_vdpa_net_load_offloads(VhostVDPAState *s,
669 const VirtIONet *n)
670{
671 uint64_t offloads;
672 ssize_t dev_written;
673
674 if (!virtio_vdev_has_feature(&n->parent_obj,
675 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
676 return 0;
677 }
678
679 if (n->curr_guest_offloads == virtio_net_supported_guest_offloads(n)) {
680 /*
681 * According to VirtIO standard, "Upon feature negotiation
682 * corresponding offload gets enabled to preserve
683 * backward compatibility.".
684 *
685 * Therefore, there is no need to send this CVQ command if the
686 * driver also enables all supported offloads, which aligns with
687 * the device's defaults.
688 *
689 * Note that the device's defaults can mismatch the driver's
690 * configuration only at live migration.
691 */
692 return 0;
693 }
694
695 offloads = cpu_to_le64(n->curr_guest_offloads);
696 dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
697 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET,
698 &offloads, sizeof(offloads));
699 if (unlikely(dev_written < 0)) {
700 return dev_written;
701 }
702
703 return *s->status != VIRTIO_NET_OK;
704}
705
dd036d8d
EP
706static int vhost_vdpa_net_load(NetClientState *nc)
707{
708 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
f73c0c43 709 struct vhost_vdpa *v = &s->vhost_vdpa;
dd036d8d 710 const VirtIONet *n;
f73c0c43 711 int r;
dd036d8d
EP
712
713 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
714
715 if (!v->shadow_vqs_enabled) {
716 return 0;
717 }
718
719 n = VIRTIO_NET(v->dev->vdev);
f73c0c43
EP
720 r = vhost_vdpa_net_load_mac(s, n);
721 if (unlikely(r < 0)) {
722 return r;
dd036d8d 723 }
f64c7cda
EP
724 r = vhost_vdpa_net_load_mq(s, n);
725 if (unlikely(r)) {
726 return r;
727 }
0b58d368
HJ
728 r = vhost_vdpa_net_load_offloads(s, n);
729 if (unlikely(r)) {
730 return r;
731 }
dd036d8d
EP
732
733 return 0;
734}
735
f8972b56
EP
736static NetClientInfo net_vhost_vdpa_cvq_info = {
737 .type = NET_CLIENT_DRIVER_VHOST_VDPA,
738 .size = sizeof(VhostVDPAState),
739 .receive = vhost_vdpa_receive,
7a7f87e9 740 .start = vhost_vdpa_net_cvq_start,
dd036d8d 741 .load = vhost_vdpa_net_load,
7a7f87e9 742 .stop = vhost_vdpa_net_cvq_stop,
f8972b56
EP
743 .cleanup = vhost_vdpa_cleanup,
744 .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
745 .has_ufo = vhost_vdpa_has_ufo,
746 .check_peer_type = vhost_vdpa_check_peer_type,
747};
748
2df4dd31
EP
749/**
750 * Validate and copy control virtqueue commands.
751 *
752 * Following QEMU guidelines, we offer a copy of the buffers to the device to
753 * prevent TOCTOU bugs.
bd907ae4
EP
754 */
755static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
756 VirtQueueElement *elem,
757 void *opaque)
758{
2df4dd31 759 VhostVDPAState *s = opaque;
be4278b6 760 size_t in_len;
bd907ae4 761 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
7a7f87e9
EP
762 /* Out buffer sent to both the vdpa device and the device model */
763 struct iovec out = {
764 .iov_base = s->cvq_cmd_out_buffer,
765 };
2df4dd31
EP
766 /* in buffer used for device model */
767 const struct iovec in = {
768 .iov_base = &status,
769 .iov_len = sizeof(status),
770 };
be4278b6 771 ssize_t dev_written = -EINVAL;
2df4dd31 772
7a7f87e9
EP
773 out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
774 s->cvq_cmd_out_buffer,
775 vhost_vdpa_net_cvq_cmd_len());
3f9a3eeb
EP
776 if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) {
777 /*
778 * Guest announce capability is emulated by qemu, so don't forward to
779 * the device.
780 */
781 dev_written = sizeof(status);
782 *s->status = VIRTIO_NET_OK;
783 } else {
784 dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
785 if (unlikely(dev_written < 0)) {
786 goto out;
787 }
bd907ae4
EP
788 }
789
bd907ae4
EP
790 if (unlikely(dev_written < sizeof(status))) {
791 error_report("Insufficient written data (%zu)", dev_written);
2df4dd31
EP
792 goto out;
793 }
794
17fb889f 795 if (*s->status != VIRTIO_NET_OK) {
be4278b6 796 return VIRTIO_NET_ERR;
2df4dd31
EP
797 }
798
799 status = VIRTIO_NET_ERR;
7a7f87e9 800 virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1);
2df4dd31
EP
801 if (status != VIRTIO_NET_OK) {
802 error_report("Bad CVQ processing in model");
bd907ae4
EP
803 }
804
805out:
806 in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
807 sizeof(status));
808 if (unlikely(in_len < sizeof(status))) {
809 error_report("Bad device CVQ written length");
810 }
811 vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
812 g_free(elem);
be4278b6 813 return dev_written < 0 ? dev_written : 0;
bd907ae4
EP
814}
815
816static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
817 .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
818};
819
152128d6
EP
820/**
821 * Probe if CVQ is isolated
822 *
823 * @device_fd The vdpa device fd
824 * @features Features offered by the device.
825 * @cvq_index The control vq pair index
826 *
827 * Returns <0 in case of failure, 0 if false and 1 if true.
828 */
829static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features,
830 int cvq_index, Error **errp)
831{
832 uint64_t backend_features;
833 int64_t cvq_group;
834 uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE |
835 VIRTIO_CONFIG_S_DRIVER |
836 VIRTIO_CONFIG_S_FEATURES_OK;
837 int r;
838
839 ERRP_GUARD();
840
841 r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
842 if (unlikely(r < 0)) {
843 error_setg_errno(errp, errno, "Cannot get vdpa backend_features");
844 return r;
845 }
846
847 if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
848 return 0;
849 }
850
851 r = ioctl(device_fd, VHOST_SET_FEATURES, &features);
852 if (unlikely(r)) {
853 error_setg_errno(errp, errno, "Cannot set features");
854 }
855
856 r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
857 if (unlikely(r)) {
858 error_setg_errno(errp, -r, "Cannot set device features");
859 goto out;
860 }
861
862 cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp);
863 if (unlikely(cvq_group < 0)) {
864 if (cvq_group != -ENOTSUP) {
865 r = cvq_group;
866 goto out;
867 }
868
869 /*
870 * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend
871 * support ASID even if the parent driver does not. The CVQ cannot be
872 * isolated in this case.
873 */
874 error_free(*errp);
875 *errp = NULL;
876 r = 0;
877 goto out;
878 }
879
880 for (int i = 0; i < cvq_index; ++i) {
881 int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp);
882 if (unlikely(group < 0)) {
883 r = group;
884 goto out;
885 }
886
887 if (group == (int64_t)cvq_group) {
888 r = 0;
889 goto out;
890 }
891 }
892
893 r = 1;
894
895out:
896 status = 0;
897 ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
898 return r;
899}
900
654790b6 901static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
a585fad2
EP
902 const char *device,
903 const char *name,
904 int vdpa_device_fd,
905 int queue_pair_index,
906 int nvqs,
907 bool is_datapath,
908 bool svq,
5c1ebd4c 909 struct vhost_vdpa_iova_range iova_range,
152128d6
EP
910 uint64_t features,
911 Error **errp)
1e0a84ea
CL
912{
913 NetClientState *nc = NULL;
914 VhostVDPAState *s;
1e0a84ea
CL
915 int ret = 0;
916 assert(name);
152128d6
EP
917 int cvq_isolated;
918
40237840
JW
919 if (is_datapath) {
920 nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
921 name);
922 } else {
152128d6
EP
923 cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features,
924 queue_pair_index * 2,
925 errp);
926 if (unlikely(cvq_isolated < 0)) {
927 return NULL;
928 }
929
f8972b56 930 nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
40237840
JW
931 device, name);
932 }
53b85d95 933 qemu_set_info_str(nc, TYPE_VHOST_VDPA);
1e0a84ea 934 s = DO_UPCAST(VhostVDPAState, nc, nc);
7327813d 935
1e0a84ea 936 s->vhost_vdpa.device_fd = vdpa_device_fd;
40237840 937 s->vhost_vdpa.index = queue_pair_index;
7f211a28 938 s->always_svq = svq;
69498430 939 s->migration_state.notify = vdpa_net_migration_state_notifier;
1576dbb5 940 s->vhost_vdpa.shadow_vqs_enabled = svq;
a585fad2 941 s->vhost_vdpa.iova_range = iova_range;
6188d78a 942 s->vhost_vdpa.shadow_data = svq;
5c1ebd4c
EP
943 if (queue_pair_index == 0) {
944 vhost_vdpa_net_valid_svq_features(features,
945 &s->vhost_vdpa.migration_blocker);
946 } else if (!is_datapath) {
babf8b87
EP
947 s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
948 PROT_READ | PROT_WRITE,
949 MAP_SHARED | MAP_ANONYMOUS, -1, 0);
950 s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
951 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
952 -1, 0);
2df4dd31 953
bd907ae4
EP
954 s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
955 s->vhost_vdpa.shadow_vq_ops_opaque = s;
152128d6 956 s->cvq_isolated = cvq_isolated;
9c363cf6
EP
957
958 /*
8bc0049e
EP
959 * TODO: We cannot migrate devices with CVQ and no x-svq enabled as
960 * there is no way to set the device state (MAC, MQ, etc) before
961 * starting the datapath.
9c363cf6
EP
962 *
963 * Migration blocker ownership now belongs to s->vhost_vdpa.
964 */
8bc0049e
EP
965 if (!svq) {
966 error_setg(&s->vhost_vdpa.migration_blocker,
967 "net vdpa cannot migrate with CVQ feature");
968 }
bd907ae4 969 }
40237840 970 ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
74af5eec 971 if (ret) {
74af5eec 972 qemu_del_net_client(nc);
654790b6 973 return NULL;
74af5eec 974 }
654790b6 975 return nc;
1e0a84ea
CL
976}
977
8170ab3f
EP
978static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
979{
980 int ret = ioctl(fd, VHOST_GET_FEATURES, features);
981 if (unlikely(ret < 0)) {
982 error_setg_errno(errp, errno,
983 "Fail to query features from vhost-vDPA device");
984 }
985 return ret;
986}
987
988static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
989 int *has_cvq, Error **errp)
40237840
JW
990{
991 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
cd523a41 992 g_autofree struct vhost_vdpa_config *config = NULL;
40237840 993 __virtio16 *max_queue_pairs;
40237840
JW
994 int ret;
995
40237840
JW
996 if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
997 *has_cvq = 1;
998 } else {
999 *has_cvq = 0;
1000 }
1001
1002 if (features & (1 << VIRTIO_NET_F_MQ)) {
1003 config = g_malloc0(config_size + sizeof(*max_queue_pairs));
1004 config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs);
1005 config->len = sizeof(*max_queue_pairs);
1006
1007 ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config);
1008 if (ret) {
1009 error_setg(errp, "Fail to get config from vhost-vDPA device");
1010 return -ret;
1011 }
1012
1013 max_queue_pairs = (__virtio16 *)&config->buf;
1014
1015 return lduw_le_p(max_queue_pairs);
1016 }
1017
1018 return 1;
1019}
1020
1e0a84ea
CL
1021int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
1022 NetClientState *peer, Error **errp)
1023{
1024 const NetdevVhostVDPAOptions *opts;
8170ab3f 1025 uint64_t features;
654790b6 1026 int vdpa_device_fd;
eb3cb751 1027 g_autofree NetClientState **ncs = NULL;
a585fad2 1028 struct vhost_vdpa_iova_range iova_range;
eb3cb751 1029 NetClientState *nc;
aed5da45 1030 int queue_pairs, r, i = 0, has_cvq = 0;
1e0a84ea
CL
1031
1032 assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1033 opts = &netdev->u.vhost_vdpa;
7480874a 1034 if (!opts->vhostdev && !opts->vhostfd) {
8801ccd0
SWL
1035 error_setg(errp,
1036 "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
c8295404
EP
1037 return -1;
1038 }
7327813d 1039
7480874a 1040 if (opts->vhostdev && opts->vhostfd) {
8801ccd0
SWL
1041 error_setg(errp,
1042 "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
1043 return -1;
1044 }
1045
7480874a 1046 if (opts->vhostdev) {
8801ccd0
SWL
1047 vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
1048 if (vdpa_device_fd == -1) {
1049 return -errno;
1050 }
5107fd3e
PM
1051 } else {
1052 /* has_vhostfd */
8801ccd0
SWL
1053 vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
1054 if (vdpa_device_fd == -1) {
1055 error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
1056 return -1;
1057 }
7327813d
JW
1058 }
1059
8170ab3f
EP
1060 r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
1061 if (unlikely(r < 0)) {
aed5da45 1062 goto err;
8170ab3f
EP
1063 }
1064
1065 queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
40237840
JW
1066 &has_cvq, errp);
1067 if (queue_pairs < 0) {
7327813d 1068 qemu_close(vdpa_device_fd);
40237840
JW
1069 return queue_pairs;
1070 }
1071
bf7a2ad8
LM
1072 r = vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
1073 if (unlikely(r < 0)) {
1074 error_setg(errp, "vhost-vdpa: get iova range failed: %s",
1075 strerror(-r));
1076 goto err;
1077 }
1078
00ef422e
EP
1079 if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) {
1080 goto err;
1576dbb5
EP
1081 }
1082
40237840
JW
1083 ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
1084
1085 for (i = 0; i < queue_pairs; i++) {
1086 ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
1576dbb5 1087 vdpa_device_fd, i, 2, true, opts->x_svq,
152128d6 1088 iova_range, features, errp);
40237840
JW
1089 if (!ncs[i])
1090 goto err;
7327813d
JW
1091 }
1092
40237840
JW
1093 if (has_cvq) {
1094 nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
1576dbb5 1095 vdpa_device_fd, i, 1, false,
152128d6 1096 opts->x_svq, iova_range, features, errp);
40237840
JW
1097 if (!nc)
1098 goto err;
1099 }
1100
654790b6 1101 return 0;
40237840
JW
1102
1103err:
1104 if (i) {
9bd05507
SWL
1105 for (i--; i >= 0; i--) {
1106 qemu_del_net_client(ncs[i]);
1107 }
40237840 1108 }
1576dbb5 1109
40237840 1110 qemu_close(vdpa_device_fd);
40237840
JW
1111
1112 return -1;
1e0a84ea 1113}