]> git.proxmox.com Git - mirror_qemu.git/blame - net/vhost-vdpa.c
vdpa: Return -EIO if device ack is VIRTIO_NET_ERR in _load_mac()
[mirror_qemu.git] / net / vhost-vdpa.c
CommitLineData
1e0a84ea
CL
1/*
2 * vhost-vdpa.c
3 *
4 * Copyright(c) 2017-2018 Intel Corporation.
5 * Copyright(c) 2020 Red Hat, Inc.
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
9 *
10 */
11
12#include "qemu/osdep.h"
13#include "clients.h"
bd907ae4 14#include "hw/virtio/virtio-net.h"
1e0a84ea
CL
15#include "net/vhost_net.h"
16#include "net/vhost-vdpa.h"
17#include "hw/virtio/vhost-vdpa.h"
18#include "qemu/config-file.h"
19#include "qemu/error-report.h"
bd907ae4
EP
20#include "qemu/log.h"
21#include "qemu/memalign.h"
1e0a84ea
CL
22#include "qemu/option.h"
23#include "qapi/error.h"
40237840 24#include <linux/vhost.h>
1e0a84ea
CL
25#include <sys/ioctl.h>
26#include <err.h>
27#include "standard-headers/linux/virtio_net.h"
28#include "monitor/monitor.h"
69498430
EP
29#include "migration/migration.h"
30#include "migration/misc.h"
1e0a84ea
CL
31#include "hw/virtio/vhost.h"
32
33/* Todo:need to add the multiqueue support here */
34typedef struct VhostVDPAState {
35 NetClientState nc;
36 struct vhost_vdpa vhost_vdpa;
69498430 37 Notifier migration_state;
1e0a84ea 38 VHostNetState *vhost_net;
2df4dd31
EP
39
40 /* Control commands shadow buffers */
17fb889f
EP
41 void *cvq_cmd_out_buffer;
42 virtio_net_ctrl_ack *status;
43
7f211a28
EP
44 /* The device always have SVQ enabled */
45 bool always_svq;
152128d6
EP
46
47 /* The device can isolate CVQ in its own ASID */
48 bool cvq_isolated;
49
1e0a84ea
CL
50 bool started;
51} VhostVDPAState;
52
2875a0ca
HJ
53/*
54 * The array is sorted alphabetically in ascending order,
55 * with the exception of VHOST_INVALID_FEATURE_BIT,
56 * which should always be the last entry.
57 */
1e0a84ea 58const int vdpa_feature_bits[] = {
1e0a84ea 59 VIRTIO_F_ANY_LAYOUT,
2875a0ca
HJ
60 VIRTIO_F_IOMMU_PLATFORM,
61 VIRTIO_F_NOTIFY_ON_EMPTY,
62 VIRTIO_F_RING_PACKED,
63 VIRTIO_F_RING_RESET,
1e0a84ea
CL
64 VIRTIO_F_VERSION_1,
65 VIRTIO_NET_F_CSUM,
51e84244 66 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
2875a0ca
HJ
67 VIRTIO_NET_F_CTRL_MAC_ADDR,
68 VIRTIO_NET_F_CTRL_RX,
69 VIRTIO_NET_F_CTRL_RX_EXTRA,
70 VIRTIO_NET_F_CTRL_VLAN,
71 VIRTIO_NET_F_CTRL_VQ,
1e0a84ea 72 VIRTIO_NET_F_GSO,
2875a0ca
HJ
73 VIRTIO_NET_F_GUEST_CSUM,
74 VIRTIO_NET_F_GUEST_ECN,
1e0a84ea
CL
75 VIRTIO_NET_F_GUEST_TSO4,
76 VIRTIO_NET_F_GUEST_TSO6,
1e0a84ea 77 VIRTIO_NET_F_GUEST_UFO,
2875a0ca
HJ
78 VIRTIO_NET_F_HASH_REPORT,
79 VIRTIO_NET_F_HOST_ECN,
1e0a84ea
CL
80 VIRTIO_NET_F_HOST_TSO4,
81 VIRTIO_NET_F_HOST_TSO6,
1e0a84ea 82 VIRTIO_NET_F_HOST_UFO,
2875a0ca 83 VIRTIO_NET_F_MQ,
1e0a84ea
CL
84 VIRTIO_NET_F_MRG_RXBUF,
85 VIRTIO_NET_F_MTU,
0145c393 86 VIRTIO_NET_F_RSS,
9aa47edd 87 VIRTIO_NET_F_STATUS,
2875a0ca
HJ
88 VIRTIO_RING_F_EVENT_IDX,
89 VIRTIO_RING_F_INDIRECT_DESC,
90
91 /* VHOST_INVALID_FEATURE_BIT should always be the last entry */
1e0a84ea
CL
92 VHOST_INVALID_FEATURE_BIT
93};
94
1576dbb5
EP
95/** Supported device specific feature bits with SVQ */
96static const uint64_t vdpa_svq_device_features =
97 BIT_ULL(VIRTIO_NET_F_CSUM) |
98 BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
4b4a1378 99 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) |
1576dbb5
EP
100 BIT_ULL(VIRTIO_NET_F_MTU) |
101 BIT_ULL(VIRTIO_NET_F_MAC) |
102 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
103 BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
104 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
105 BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
106 BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
107 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
108 BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
109 BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
110 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
111 BIT_ULL(VIRTIO_NET_F_STATUS) |
112 BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
72b99a87 113 BIT_ULL(VIRTIO_NET_F_MQ) |
1576dbb5
EP
114 BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
115 BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
609ab4c3
EP
116 /* VHOST_F_LOG_ALL is exposed by SVQ */
117 BIT_ULL(VHOST_F_LOG_ALL) |
1576dbb5 118 BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
0d74e2b7
EP
119 BIT_ULL(VIRTIO_NET_F_STANDBY) |
120 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX);
1576dbb5 121
c1a10086
EP
122#define VHOST_VDPA_NET_CVQ_ASID 1
123
1e0a84ea
CL
124VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
125{
126 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
127 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
128 return s->vhost_net;
129}
130
915bf6cc
EP
131static size_t vhost_vdpa_net_cvq_cmd_len(void)
132{
133 /*
134 * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
135 * In buffer is always 1 byte, so it should fit here
136 */
137 return sizeof(struct virtio_net_ctrl_hdr) +
138 2 * sizeof(struct virtio_net_ctrl_mac) +
139 MAC_TABLE_ENTRIES * ETH_ALEN;
140}
141
142static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
143{
144 return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
145}
146
36e46472
EP
147static bool vhost_vdpa_net_valid_svq_features(uint64_t features, Error **errp)
148{
149 uint64_t invalid_dev_features =
150 features & ~vdpa_svq_device_features &
151 /* Transport are all accepted at this point */
152 ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
153 VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
154
155 if (invalid_dev_features) {
156 error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
157 invalid_dev_features);
258a0394 158 return false;
36e46472
EP
159 }
160
258a0394 161 return vhost_svq_valid_features(features, errp);
36e46472
EP
162}
163
1e0a84ea
CL
164static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
165{
166 uint32_t device_id;
167 int ret;
168 struct vhost_dev *hdev;
169
170 hdev = (struct vhost_dev *)&net->dev;
171 ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id);
172 if (device_id != VIRTIO_ID_NET) {
173 return -ENOTSUP;
174 }
175 return ret;
176}
177
40237840
JW
178static int vhost_vdpa_add(NetClientState *ncs, void *be,
179 int queue_pair_index, int nvqs)
1e0a84ea
CL
180{
181 VhostNetOptions options;
182 struct vhost_net *net = NULL;
183 VhostVDPAState *s;
184 int ret;
185
186 options.backend_type = VHOST_BACKEND_TYPE_VDPA;
187 assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
188 s = DO_UPCAST(VhostVDPAState, nc, ncs);
189 options.net_backend = ncs;
190 options.opaque = be;
191 options.busyloop_timeout = 0;
40237840 192 options.nvqs = nvqs;
1e0a84ea
CL
193
194 net = vhost_net_init(&options);
195 if (!net) {
196 error_report("failed to init vhost_net for queue");
a97ef87a 197 goto err_init;
1e0a84ea 198 }
1e0a84ea
CL
199 s->vhost_net = net;
200 ret = vhost_vdpa_net_check_device_id(net);
201 if (ret) {
a97ef87a 202 goto err_check;
1e0a84ea
CL
203 }
204 return 0;
a97ef87a
JW
205err_check:
206 vhost_net_cleanup(net);
207 g_free(net);
208err_init:
1e0a84ea
CL
209 return -1;
210}
211
212static void vhost_vdpa_cleanup(NetClientState *nc)
213{
214 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
215
a0d7215e
AS
216 /*
217 * If a peer NIC is attached, do not cleanup anything.
218 * Cleanup will happen as a part of qemu_cleanup() -> net_cleanup()
219 * when the guest is shutting down.
220 */
221 if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_NIC) {
222 return;
223 }
babf8b87
EP
224 munmap(s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len());
225 munmap(s->status, vhost_vdpa_net_cvq_cmd_page_len());
1e0a84ea
CL
226 if (s->vhost_net) {
227 vhost_net_cleanup(s->vhost_net);
228 g_free(s->vhost_net);
229 s->vhost_net = NULL;
230 }
57b3a7d8
CL
231 if (s->vhost_vdpa.device_fd >= 0) {
232 qemu_close(s->vhost_vdpa.device_fd);
233 s->vhost_vdpa.device_fd = -1;
234 }
1e0a84ea
CL
235}
236
237static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
238{
239 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
240
241 return true;
242}
243
244static bool vhost_vdpa_has_ufo(NetClientState *nc)
245{
246 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
247 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
248 uint64_t features = 0;
249 features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
250 features = vhost_net_get_features(s->vhost_net, features);
251 return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
252
253}
254
ee8a1c63
KW
255static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
256 Error **errp)
257{
258 const char *driver = object_class_get_name(oc);
259
260 if (!g_str_has_prefix(driver, "virtio-net-")) {
261 error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
262 return false;
263 }
264
265 return true;
266}
267
846a1e85
EP
268/** Dummy receive in case qemu falls back to userland tap networking */
269static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
270 size_t size)
271{
bc5add1d 272 return size;
846a1e85
EP
273}
274
00ef422e
EP
275/** From any vdpa net client, get the netclient of the first queue pair */
276static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s)
277{
278 NICState *nic = qemu_get_nic(s->nc.peer);
279 NetClientState *nc0 = qemu_get_peer(nic->ncs, 0);
280
281 return DO_UPCAST(VhostVDPAState, nc, nc0);
282}
283
69498430
EP
284static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable)
285{
286 struct vhost_vdpa *v = &s->vhost_vdpa;
287 VirtIONet *n;
288 VirtIODevice *vdev;
289 int data_queue_pairs, cvq, r;
290
291 /* We are only called on the first data vqs and only if x-svq is not set */
292 if (s->vhost_vdpa.shadow_vqs_enabled == enable) {
293 return;
294 }
295
296 vdev = v->dev->vdev;
297 n = VIRTIO_NET(vdev);
298 if (!n->vhost_started) {
299 return;
300 }
301
302 data_queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
303 cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
304 n->max_ncs - n->max_queue_pairs : 0;
305 /*
306 * TODO: vhost_net_stop does suspend, get_base and reset. We can be smarter
307 * in the future and resume the device if read-only operations between
308 * suspend and reset goes wrong.
309 */
310 vhost_net_stop(vdev, n->nic->ncs, data_queue_pairs, cvq);
311
312 /* Start will check migration setup_or_active to configure or not SVQ */
313 r = vhost_net_start(vdev, n->nic->ncs, data_queue_pairs, cvq);
314 if (unlikely(r < 0)) {
315 error_report("unable to start vhost net: %s(%d)", g_strerror(-r), -r);
316 }
317}
318
319static void vdpa_net_migration_state_notifier(Notifier *notifier, void *data)
320{
321 MigrationState *migration = data;
322 VhostVDPAState *s = container_of(notifier, VhostVDPAState,
323 migration_state);
324
325 if (migration_in_setup(migration)) {
326 vhost_vdpa_net_log_global_enable(s, true);
327 } else if (migration_has_failed(migration)) {
328 vhost_vdpa_net_log_global_enable(s, false);
329 }
330}
331
00ef422e
EP
332static void vhost_vdpa_net_data_start_first(VhostVDPAState *s)
333{
334 struct vhost_vdpa *v = &s->vhost_vdpa;
335
69498430 336 add_migration_state_change_notifier(&s->migration_state);
00ef422e
EP
337 if (v->shadow_vqs_enabled) {
338 v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
339 v->iova_range.last);
340 }
341}
342
343static int vhost_vdpa_net_data_start(NetClientState *nc)
344{
345 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
346 struct vhost_vdpa *v = &s->vhost_vdpa;
347
348 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
349
69498430
EP
350 if (s->always_svq ||
351 migration_is_setup_or_active(migrate_get_current()->state)) {
352 v->shadow_vqs_enabled = true;
353 v->shadow_data = true;
354 } else {
355 v->shadow_vqs_enabled = false;
356 v->shadow_data = false;
357 }
358
00ef422e
EP
359 if (v->index == 0) {
360 vhost_vdpa_net_data_start_first(s);
361 return 0;
362 }
363
364 if (v->shadow_vqs_enabled) {
365 VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s);
366 v->iova_tree = s0->vhost_vdpa.iova_tree;
367 }
368
369 return 0;
370}
371
372static void vhost_vdpa_net_client_stop(NetClientState *nc)
373{
374 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
375 struct vhost_dev *dev;
376
377 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
378
69498430
EP
379 if (s->vhost_vdpa.index == 0) {
380 remove_migration_state_change_notifier(&s->migration_state);
381 }
382
00ef422e
EP
383 dev = s->vhost_vdpa.dev;
384 if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
385 g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
386 }
387}
388
1e0a84ea
CL
389static NetClientInfo net_vhost_vdpa_info = {
390 .type = NET_CLIENT_DRIVER_VHOST_VDPA,
391 .size = sizeof(VhostVDPAState),
846a1e85 392 .receive = vhost_vdpa_receive,
00ef422e
EP
393 .start = vhost_vdpa_net_data_start,
394 .stop = vhost_vdpa_net_client_stop,
1e0a84ea
CL
395 .cleanup = vhost_vdpa_cleanup,
396 .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
397 .has_ufo = vhost_vdpa_has_ufo,
ee8a1c63 398 .check_peer_type = vhost_vdpa_check_peer_type,
1e0a84ea
CL
399};
400
152128d6
EP
401static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index,
402 Error **errp)
c1a10086
EP
403{
404 struct vhost_vring_state state = {
405 .index = vq_index,
406 };
407 int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, &state);
408
409 if (unlikely(r < 0)) {
0f2bb0bf 410 r = -errno;
152128d6 411 error_setg_errno(errp, errno, "Cannot get VQ %u group", vq_index);
c1a10086
EP
412 return r;
413 }
414
415 return state.num;
416}
417
418static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v,
419 unsigned vq_group,
420 unsigned asid_num)
421{
422 struct vhost_vring_state asid = {
423 .index = vq_group,
424 .num = asid_num,
425 };
426 int r;
427
428 r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid);
429 if (unlikely(r < 0)) {
430 error_report("Can't set vq group %u asid %u, errno=%d (%s)",
431 asid.index, asid.num, errno, g_strerror(errno));
432 }
433 return r;
434}
435
2df4dd31
EP
436static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
437{
438 VhostIOVATree *tree = v->iova_tree;
439 DMAMap needle = {
440 /*
441 * No need to specify size or to look for more translations since
442 * this contiguous chunk was allocated by us.
443 */
444 .translated_addr = (hwaddr)(uintptr_t)addr,
445 };
446 const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
447 int r;
448
449 if (unlikely(!map)) {
450 error_report("Cannot locate expected map");
451 return;
452 }
453
cd831ed5 454 r = vhost_vdpa_dma_unmap(v, v->address_space_id, map->iova, map->size + 1);
2df4dd31
EP
455 if (unlikely(r != 0)) {
456 error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
457 }
458
69292a8e 459 vhost_iova_tree_remove(tree, *map);
2df4dd31
EP
460}
461
7a7f87e9
EP
462/** Map CVQ buffer. */
463static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
464 bool write)
2df4dd31
EP
465{
466 DMAMap map = {};
467 int r;
468
2df4dd31 469 map.translated_addr = (hwaddr)(uintptr_t)buf;
7a7f87e9 470 map.size = size - 1;
2df4dd31
EP
471 map.perm = write ? IOMMU_RW : IOMMU_RO,
472 r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
473 if (unlikely(r != IOVA_OK)) {
474 error_report("Cannot map injected element");
7a7f87e9 475 return r;
2df4dd31
EP
476 }
477
cd831ed5
EP
478 r = vhost_vdpa_dma_map(v, v->address_space_id, map.iova,
479 vhost_vdpa_net_cvq_cmd_page_len(), buf, !write);
2df4dd31
EP
480 if (unlikely(r < 0)) {
481 goto dma_map_err;
482 }
483
7a7f87e9 484 return 0;
2df4dd31
EP
485
486dma_map_err:
69292a8e 487 vhost_iova_tree_remove(v->iova_tree, map);
7a7f87e9 488 return r;
2df4dd31
EP
489}
490
7a7f87e9 491static int vhost_vdpa_net_cvq_start(NetClientState *nc)
2df4dd31 492{
00ef422e 493 VhostVDPAState *s, *s0;
c1a10086 494 struct vhost_vdpa *v;
c1a10086 495 int64_t cvq_group;
152128d6
EP
496 int r;
497 Error *err = NULL;
2df4dd31 498
7a7f87e9
EP
499 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
500
501 s = DO_UPCAST(VhostVDPAState, nc, nc);
c1a10086
EP
502 v = &s->vhost_vdpa;
503
69498430
EP
504 s0 = vhost_vdpa_net_first_nc_vdpa(s);
505 v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled;
c1a10086
EP
506 v->shadow_vqs_enabled = s->always_svq;
507 s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
508
69498430 509 if (s->vhost_vdpa.shadow_data) {
c1a10086
EP
510 /* SVQ is already configured for all virtqueues */
511 goto out;
512 }
513
514 /*
515 * If we early return in these cases SVQ will not be enabled. The migration
516 * will be blocked as long as vhost-vdpa backends will not offer _F_LOG.
c1a10086 517 */
152128d6
EP
518 if (!vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
519 return 0;
c1a10086 520 }
152128d6
EP
521
522 if (!s->cvq_isolated) {
c1a10086
EP
523 return 0;
524 }
525
152128d6
EP
526 cvq_group = vhost_vdpa_get_vring_group(v->device_fd,
527 v->dev->vq_index_end - 1,
528 &err);
c1a10086 529 if (unlikely(cvq_group < 0)) {
152128d6 530 error_report_err(err);
c1a10086
EP
531 return cvq_group;
532 }
c1a10086
EP
533
534 r = vhost_vdpa_set_address_space_id(v, cvq_group, VHOST_VDPA_NET_CVQ_ASID);
535 if (unlikely(r < 0)) {
536 return r;
537 }
538
c1a10086
EP
539 v->shadow_vqs_enabled = true;
540 s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
541
542out:
7a7f87e9
EP
543 if (!s->vhost_vdpa.shadow_vqs_enabled) {
544 return 0;
2df4dd31
EP
545 }
546
00ef422e
EP
547 if (s0->vhost_vdpa.iova_tree) {
548 /*
549 * SVQ is already configured for all virtqueues. Reuse IOVA tree for
550 * simplicity, whether CVQ shares ASID with guest or not, because:
551 * - Memory listener need access to guest's memory addresses allocated
552 * in the IOVA tree.
553 * - There should be plenty of IOVA address space for both ASID not to
554 * worry about collisions between them. Guest's translations are
555 * still validated with virtio virtqueue_pop so there is no risk for
556 * the guest to access memory that it shouldn't.
557 *
558 * To allocate a iova tree per ASID is doable but it complicates the
559 * code and it is not worth it for the moment.
560 */
561 v->iova_tree = s0->vhost_vdpa.iova_tree;
562 } else {
563 v->iova_tree = vhost_iova_tree_new(v->iova_range.first,
564 v->iova_range.last);
565 }
566
7a7f87e9
EP
567 r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
568 vhost_vdpa_net_cvq_cmd_page_len(), false);
569 if (unlikely(r < 0)) {
570 return r;
571 }
572
17fb889f 573 r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status,
7a7f87e9
EP
574 vhost_vdpa_net_cvq_cmd_page_len(), true);
575 if (unlikely(r < 0)) {
2df4dd31 576 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
2df4dd31
EP
577 }
578
7a7f87e9
EP
579 return r;
580}
581
582static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
583{
584 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
585
586 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
587
588 if (s->vhost_vdpa.shadow_vqs_enabled) {
589 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
17fb889f 590 vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
7a7f87e9 591 }
00ef422e
EP
592
593 vhost_vdpa_net_client_stop(nc);
2df4dd31
EP
594}
595
be4278b6
EP
596static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
597 size_t in_len)
598{
599 /* Buffers for the device */
600 const struct iovec out = {
601 .iov_base = s->cvq_cmd_out_buffer,
602 .iov_len = out_len,
603 };
604 const struct iovec in = {
17fb889f 605 .iov_base = s->status,
be4278b6
EP
606 .iov_len = sizeof(virtio_net_ctrl_ack),
607 };
608 VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
609 int r;
610
611 r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
612 if (unlikely(r != 0)) {
613 if (unlikely(r == -ENOSPC)) {
614 qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
615 __func__);
616 }
617 return r;
618 }
619
620 /*
621 * We can poll here since we've had BQL from the time we sent the
622 * descriptor. Also, we need to take the answer before SVQ pulls by itself,
623 * when BQL is released
624 */
625 return vhost_svq_poll(svq);
626}
627
f73c0c43
EP
628static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
629 uint8_t cmd, const void *data,
630 size_t data_size)
631{
632 const struct virtio_net_ctrl_hdr ctrl = {
633 .class = class,
634 .cmd = cmd,
635 };
636
637 assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
638
639 memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
640 memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
641
642 return vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + data_size,
643 sizeof(virtio_net_ctrl_ack));
644}
645
646static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
647{
02d3bf09 648 if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
f73c0c43
EP
649 ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
650 VIRTIO_NET_CTRL_MAC_ADDR_SET,
651 n->mac, sizeof(n->mac));
652 if (unlikely(dev_written < 0)) {
653 return dev_written;
654 }
b479bc3c
HJ
655 if (*s->status != VIRTIO_NET_OK) {
656 return -EIO;
657 }
f73c0c43
EP
658 }
659
660 return 0;
661}
662
f64c7cda
EP
663static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
664 const VirtIONet *n)
665{
666 struct virtio_net_ctrl_mq mq;
f64c7cda
EP
667 ssize_t dev_written;
668
02d3bf09 669 if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_MQ)) {
f64c7cda
EP
670 return 0;
671 }
672
673 mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
674 dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
675 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
676 sizeof(mq));
677 if (unlikely(dev_written < 0)) {
678 return dev_written;
679 }
680
681 return *s->status != VIRTIO_NET_OK;
682}
683
0b58d368
HJ
684static int vhost_vdpa_net_load_offloads(VhostVDPAState *s,
685 const VirtIONet *n)
686{
687 uint64_t offloads;
688 ssize_t dev_written;
689
690 if (!virtio_vdev_has_feature(&n->parent_obj,
691 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
692 return 0;
693 }
694
695 if (n->curr_guest_offloads == virtio_net_supported_guest_offloads(n)) {
696 /*
697 * According to VirtIO standard, "Upon feature negotiation
698 * corresponding offload gets enabled to preserve
699 * backward compatibility.".
700 *
701 * Therefore, there is no need to send this CVQ command if the
702 * driver also enables all supported offloads, which aligns with
703 * the device's defaults.
704 *
705 * Note that the device's defaults can mismatch the driver's
706 * configuration only at live migration.
707 */
708 return 0;
709 }
710
711 offloads = cpu_to_le64(n->curr_guest_offloads);
712 dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_GUEST_OFFLOADS,
713 VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET,
714 &offloads, sizeof(offloads));
715 if (unlikely(dev_written < 0)) {
716 return dev_written;
717 }
718
719 return *s->status != VIRTIO_NET_OK;
720}
721
dd036d8d
EP
722static int vhost_vdpa_net_load(NetClientState *nc)
723{
724 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
f73c0c43 725 struct vhost_vdpa *v = &s->vhost_vdpa;
dd036d8d 726 const VirtIONet *n;
f73c0c43 727 int r;
dd036d8d
EP
728
729 assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
730
731 if (!v->shadow_vqs_enabled) {
732 return 0;
733 }
734
735 n = VIRTIO_NET(v->dev->vdev);
f73c0c43
EP
736 r = vhost_vdpa_net_load_mac(s, n);
737 if (unlikely(r < 0)) {
738 return r;
dd036d8d 739 }
f64c7cda
EP
740 r = vhost_vdpa_net_load_mq(s, n);
741 if (unlikely(r)) {
742 return r;
743 }
0b58d368
HJ
744 r = vhost_vdpa_net_load_offloads(s, n);
745 if (unlikely(r)) {
746 return r;
747 }
dd036d8d
EP
748
749 return 0;
750}
751
f8972b56
EP
752static NetClientInfo net_vhost_vdpa_cvq_info = {
753 .type = NET_CLIENT_DRIVER_VHOST_VDPA,
754 .size = sizeof(VhostVDPAState),
755 .receive = vhost_vdpa_receive,
7a7f87e9 756 .start = vhost_vdpa_net_cvq_start,
dd036d8d 757 .load = vhost_vdpa_net_load,
7a7f87e9 758 .stop = vhost_vdpa_net_cvq_stop,
f8972b56
EP
759 .cleanup = vhost_vdpa_cleanup,
760 .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
761 .has_ufo = vhost_vdpa_has_ufo,
762 .check_peer_type = vhost_vdpa_check_peer_type,
763};
764
2df4dd31
EP
765/**
766 * Validate and copy control virtqueue commands.
767 *
768 * Following QEMU guidelines, we offer a copy of the buffers to the device to
769 * prevent TOCTOU bugs.
bd907ae4
EP
770 */
771static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
772 VirtQueueElement *elem,
773 void *opaque)
774{
2df4dd31 775 VhostVDPAState *s = opaque;
be4278b6 776 size_t in_len;
bd907ae4 777 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
7a7f87e9
EP
778 /* Out buffer sent to both the vdpa device and the device model */
779 struct iovec out = {
780 .iov_base = s->cvq_cmd_out_buffer,
781 };
2df4dd31
EP
782 /* in buffer used for device model */
783 const struct iovec in = {
784 .iov_base = &status,
785 .iov_len = sizeof(status),
786 };
be4278b6 787 ssize_t dev_written = -EINVAL;
2df4dd31 788
7a7f87e9
EP
789 out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
790 s->cvq_cmd_out_buffer,
791 vhost_vdpa_net_cvq_cmd_len());
3f9a3eeb
EP
792 if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) {
793 /*
794 * Guest announce capability is emulated by qemu, so don't forward to
795 * the device.
796 */
797 dev_written = sizeof(status);
798 *s->status = VIRTIO_NET_OK;
799 } else {
800 dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
801 if (unlikely(dev_written < 0)) {
802 goto out;
803 }
bd907ae4
EP
804 }
805
bd907ae4
EP
806 if (unlikely(dev_written < sizeof(status))) {
807 error_report("Insufficient written data (%zu)", dev_written);
2df4dd31
EP
808 goto out;
809 }
810
17fb889f 811 if (*s->status != VIRTIO_NET_OK) {
d45243bc 812 goto out;
2df4dd31
EP
813 }
814
815 status = VIRTIO_NET_ERR;
7a7f87e9 816 virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1);
2df4dd31
EP
817 if (status != VIRTIO_NET_OK) {
818 error_report("Bad CVQ processing in model");
bd907ae4
EP
819 }
820
821out:
822 in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
823 sizeof(status));
824 if (unlikely(in_len < sizeof(status))) {
825 error_report("Bad device CVQ written length");
826 }
827 vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
828 g_free(elem);
be4278b6 829 return dev_written < 0 ? dev_written : 0;
bd907ae4
EP
830}
831
832static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
833 .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
834};
835
152128d6
EP
836/**
837 * Probe if CVQ is isolated
838 *
839 * @device_fd The vdpa device fd
840 * @features Features offered by the device.
841 * @cvq_index The control vq pair index
842 *
843 * Returns <0 in case of failure, 0 if false and 1 if true.
844 */
845static int vhost_vdpa_probe_cvq_isolation(int device_fd, uint64_t features,
846 int cvq_index, Error **errp)
847{
848 uint64_t backend_features;
849 int64_t cvq_group;
850 uint8_t status = VIRTIO_CONFIG_S_ACKNOWLEDGE |
851 VIRTIO_CONFIG_S_DRIVER |
852 VIRTIO_CONFIG_S_FEATURES_OK;
853 int r;
854
855 ERRP_GUARD();
856
857 r = ioctl(device_fd, VHOST_GET_BACKEND_FEATURES, &backend_features);
858 if (unlikely(r < 0)) {
859 error_setg_errno(errp, errno, "Cannot get vdpa backend_features");
860 return r;
861 }
862
863 if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
864 return 0;
865 }
866
867 r = ioctl(device_fd, VHOST_SET_FEATURES, &features);
868 if (unlikely(r)) {
869 error_setg_errno(errp, errno, "Cannot set features");
870 }
871
872 r = ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
873 if (unlikely(r)) {
874 error_setg_errno(errp, -r, "Cannot set device features");
875 goto out;
876 }
877
878 cvq_group = vhost_vdpa_get_vring_group(device_fd, cvq_index, errp);
879 if (unlikely(cvq_group < 0)) {
880 if (cvq_group != -ENOTSUP) {
881 r = cvq_group;
882 goto out;
883 }
884
885 /*
886 * The kernel report VHOST_BACKEND_F_IOTLB_ASID if the vdpa frontend
887 * support ASID even if the parent driver does not. The CVQ cannot be
888 * isolated in this case.
889 */
890 error_free(*errp);
891 *errp = NULL;
892 r = 0;
893 goto out;
894 }
895
896 for (int i = 0; i < cvq_index; ++i) {
897 int64_t group = vhost_vdpa_get_vring_group(device_fd, i, errp);
898 if (unlikely(group < 0)) {
899 r = group;
900 goto out;
901 }
902
903 if (group == (int64_t)cvq_group) {
904 r = 0;
905 goto out;
906 }
907 }
908
909 r = 1;
910
911out:
912 status = 0;
913 ioctl(device_fd, VHOST_VDPA_SET_STATUS, &status);
914 return r;
915}
916
654790b6 917static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
a585fad2
EP
918 const char *device,
919 const char *name,
920 int vdpa_device_fd,
921 int queue_pair_index,
922 int nvqs,
923 bool is_datapath,
924 bool svq,
5c1ebd4c 925 struct vhost_vdpa_iova_range iova_range,
152128d6
EP
926 uint64_t features,
927 Error **errp)
1e0a84ea
CL
928{
929 NetClientState *nc = NULL;
930 VhostVDPAState *s;
1e0a84ea
CL
931 int ret = 0;
932 assert(name);
152128d6
EP
933 int cvq_isolated;
934
40237840
JW
935 if (is_datapath) {
936 nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
937 name);
938 } else {
152128d6
EP
939 cvq_isolated = vhost_vdpa_probe_cvq_isolation(vdpa_device_fd, features,
940 queue_pair_index * 2,
941 errp);
942 if (unlikely(cvq_isolated < 0)) {
943 return NULL;
944 }
945
f8972b56 946 nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
40237840
JW
947 device, name);
948 }
53b85d95 949 qemu_set_info_str(nc, TYPE_VHOST_VDPA);
1e0a84ea 950 s = DO_UPCAST(VhostVDPAState, nc, nc);
7327813d 951
1e0a84ea 952 s->vhost_vdpa.device_fd = vdpa_device_fd;
40237840 953 s->vhost_vdpa.index = queue_pair_index;
7f211a28 954 s->always_svq = svq;
69498430 955 s->migration_state.notify = vdpa_net_migration_state_notifier;
1576dbb5 956 s->vhost_vdpa.shadow_vqs_enabled = svq;
a585fad2 957 s->vhost_vdpa.iova_range = iova_range;
6188d78a 958 s->vhost_vdpa.shadow_data = svq;
5c1ebd4c
EP
959 if (queue_pair_index == 0) {
960 vhost_vdpa_net_valid_svq_features(features,
961 &s->vhost_vdpa.migration_blocker);
962 } else if (!is_datapath) {
babf8b87
EP
963 s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
964 PROT_READ | PROT_WRITE,
965 MAP_SHARED | MAP_ANONYMOUS, -1, 0);
966 s->status = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(),
967 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
968 -1, 0);
2df4dd31 969
bd907ae4
EP
970 s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
971 s->vhost_vdpa.shadow_vq_ops_opaque = s;
152128d6 972 s->cvq_isolated = cvq_isolated;
9c363cf6
EP
973
974 /*
8bc0049e
EP
975 * TODO: We cannot migrate devices with CVQ and no x-svq enabled as
976 * there is no way to set the device state (MAC, MQ, etc) before
977 * starting the datapath.
9c363cf6
EP
978 *
979 * Migration blocker ownership now belongs to s->vhost_vdpa.
980 */
8bc0049e
EP
981 if (!svq) {
982 error_setg(&s->vhost_vdpa.migration_blocker,
983 "net vdpa cannot migrate with CVQ feature");
984 }
bd907ae4 985 }
40237840 986 ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
74af5eec 987 if (ret) {
74af5eec 988 qemu_del_net_client(nc);
654790b6 989 return NULL;
74af5eec 990 }
654790b6 991 return nc;
1e0a84ea
CL
992}
993
8170ab3f
EP
994static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
995{
996 int ret = ioctl(fd, VHOST_GET_FEATURES, features);
997 if (unlikely(ret < 0)) {
998 error_setg_errno(errp, errno,
999 "Fail to query features from vhost-vDPA device");
1000 }
1001 return ret;
1002}
1003
1004static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
1005 int *has_cvq, Error **errp)
40237840
JW
1006{
1007 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
cd523a41 1008 g_autofree struct vhost_vdpa_config *config = NULL;
40237840 1009 __virtio16 *max_queue_pairs;
40237840
JW
1010 int ret;
1011
40237840
JW
1012 if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
1013 *has_cvq = 1;
1014 } else {
1015 *has_cvq = 0;
1016 }
1017
1018 if (features & (1 << VIRTIO_NET_F_MQ)) {
1019 config = g_malloc0(config_size + sizeof(*max_queue_pairs));
1020 config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs);
1021 config->len = sizeof(*max_queue_pairs);
1022
1023 ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config);
1024 if (ret) {
1025 error_setg(errp, "Fail to get config from vhost-vDPA device");
1026 return -ret;
1027 }
1028
1029 max_queue_pairs = (__virtio16 *)&config->buf;
1030
1031 return lduw_le_p(max_queue_pairs);
1032 }
1033
1034 return 1;
1035}
1036
1e0a84ea
CL
1037int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
1038 NetClientState *peer, Error **errp)
1039{
1040 const NetdevVhostVDPAOptions *opts;
8170ab3f 1041 uint64_t features;
654790b6 1042 int vdpa_device_fd;
eb3cb751 1043 g_autofree NetClientState **ncs = NULL;
a585fad2 1044 struct vhost_vdpa_iova_range iova_range;
eb3cb751 1045 NetClientState *nc;
aed5da45 1046 int queue_pairs, r, i = 0, has_cvq = 0;
1e0a84ea
CL
1047
1048 assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
1049 opts = &netdev->u.vhost_vdpa;
7480874a 1050 if (!opts->vhostdev && !opts->vhostfd) {
8801ccd0
SWL
1051 error_setg(errp,
1052 "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
c8295404
EP
1053 return -1;
1054 }
7327813d 1055
7480874a 1056 if (opts->vhostdev && opts->vhostfd) {
8801ccd0
SWL
1057 error_setg(errp,
1058 "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
1059 return -1;
1060 }
1061
7480874a 1062 if (opts->vhostdev) {
8801ccd0
SWL
1063 vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
1064 if (vdpa_device_fd == -1) {
1065 return -errno;
1066 }
5107fd3e
PM
1067 } else {
1068 /* has_vhostfd */
8801ccd0
SWL
1069 vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
1070 if (vdpa_device_fd == -1) {
1071 error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
1072 return -1;
1073 }
7327813d
JW
1074 }
1075
8170ab3f
EP
1076 r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
1077 if (unlikely(r < 0)) {
aed5da45 1078 goto err;
8170ab3f
EP
1079 }
1080
1081 queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
40237840
JW
1082 &has_cvq, errp);
1083 if (queue_pairs < 0) {
7327813d 1084 qemu_close(vdpa_device_fd);
40237840
JW
1085 return queue_pairs;
1086 }
1087
bf7a2ad8
LM
1088 r = vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
1089 if (unlikely(r < 0)) {
1090 error_setg(errp, "vhost-vdpa: get iova range failed: %s",
1091 strerror(-r));
1092 goto err;
1093 }
1094
00ef422e
EP
1095 if (opts->x_svq && !vhost_vdpa_net_valid_svq_features(features, errp)) {
1096 goto err;
1576dbb5
EP
1097 }
1098
40237840
JW
1099 ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
1100
1101 for (i = 0; i < queue_pairs; i++) {
1102 ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
1576dbb5 1103 vdpa_device_fd, i, 2, true, opts->x_svq,
152128d6 1104 iova_range, features, errp);
40237840
JW
1105 if (!ncs[i])
1106 goto err;
7327813d
JW
1107 }
1108
40237840
JW
1109 if (has_cvq) {
1110 nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
1576dbb5 1111 vdpa_device_fd, i, 1, false,
152128d6 1112 opts->x_svq, iova_range, features, errp);
40237840
JW
1113 if (!nc)
1114 goto err;
1115 }
1116
654790b6 1117 return 0;
40237840
JW
1118
1119err:
1120 if (i) {
9bd05507
SWL
1121 for (i--; i >= 0; i--) {
1122 qemu_del_net_client(ncs[i]);
1123 }
40237840 1124 }
1576dbb5 1125
40237840 1126 qemu_close(vdpa_device_fd);
40237840
JW
1127
1128 return -1;
1e0a84ea 1129}