]>
Commit | Line | Data |
---|---|---|
108a6481 CL |
1 | /* |
2 | * vhost-vdpa | |
3 | * | |
4 | * Copyright(c) 2017-2018 Intel Corporation. | |
5 | * Copyright(c) 2020 Red Hat, Inc. | |
6 | * | |
7 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
8 | * See the COPYING file in the top-level directory. | |
9 | * | |
10 | */ | |
11 | ||
12 | #include "qemu/osdep.h" | |
13 | #include <linux/vhost.h> | |
14 | #include <linux/vfio.h> | |
15 | #include <sys/eventfd.h> | |
16 | #include <sys/ioctl.h> | |
17 | #include "hw/virtio/vhost.h" | |
18 | #include "hw/virtio/vhost-backend.h" | |
19 | #include "hw/virtio/virtio-net.h" | |
dff4426f | 20 | #include "hw/virtio/vhost-shadow-virtqueue.h" |
108a6481 | 21 | #include "hw/virtio/vhost-vdpa.h" |
df77d45a | 22 | #include "exec/address-spaces.h" |
c156d5bf | 23 | #include "migration/blocker.h" |
415b7327 | 24 | #include "qemu/cutils.h" |
108a6481 | 25 | #include "qemu/main-loop.h" |
4dc5acc0 | 26 | #include "cpu.h" |
778e67de | 27 | #include "trace.h" |
dff4426f | 28 | #include "qapi/error.h" |
108a6481 | 29 | |
032e4d68 EP |
30 | /* |
31 | * Return one past the end of the end of section. Be careful with uint64_t | |
32 | * conversions! | |
33 | */ | |
34 | static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section) | |
35 | { | |
36 | Int128 llend = int128_make64(section->offset_within_address_space); | |
37 | llend = int128_add(llend, section->size); | |
38 | llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK)); | |
39 | ||
40 | return llend; | |
41 | } | |
42 | ||
013108b6 EP |
43 | static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, |
44 | uint64_t iova_min, | |
45 | uint64_t iova_max) | |
108a6481 | 46 | { |
013108b6 EP |
47 | Int128 llend; |
48 | ||
49 | if ((!memory_region_is_ram(section->mr) && | |
50 | !memory_region_is_iommu(section->mr)) || | |
51 | memory_region_is_protected(section->mr) || | |
52 | /* vhost-vDPA doesn't allow MMIO to be mapped */ | |
53 | memory_region_is_ram_device(section->mr)) { | |
54 | return true; | |
55 | } | |
56 | ||
57 | if (section->offset_within_address_space < iova_min) { | |
58 | error_report("RAM section out of device range (min=0x%" PRIx64 | |
59 | ", addr=0x%" HWADDR_PRIx ")", | |
60 | iova_min, section->offset_within_address_space); | |
61 | return true; | |
62 | } | |
63 | ||
64 | llend = vhost_vdpa_section_end(section); | |
65 | if (int128_gt(llend, int128_make64(iova_max))) { | |
66 | error_report("RAM section out of device range (max=0x%" PRIx64 | |
67 | ", end addr=0x%" PRIx64 ")", | |
68 | iova_max, int128_get64(llend)); | |
69 | return true; | |
70 | } | |
71 | ||
72 | return false; | |
108a6481 CL |
73 | } |
74 | ||
cd831ed5 EP |
75 | /* |
76 | * The caller must set asid = 0 if the device does not support asid. | |
77 | * This is not an ABI break since it is set to 0 by the initializer anyway. | |
78 | */ | |
79 | int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, | |
80 | hwaddr size, void *vaddr, bool readonly) | |
108a6481 | 81 | { |
386494f2 | 82 | struct vhost_msg_v2 msg = {}; |
108a6481 CL |
83 | int fd = v->device_fd; |
84 | int ret = 0; | |
85 | ||
86 | msg.type = v->msg_type; | |
cd831ed5 | 87 | msg.asid = asid; |
108a6481 CL |
88 | msg.iotlb.iova = iova; |
89 | msg.iotlb.size = size; | |
90 | msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; | |
91 | msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; | |
92 | msg.iotlb.type = VHOST_IOTLB_UPDATE; | |
93 | ||
cd831ed5 EP |
94 | trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.asid, msg.iotlb.iova, |
95 | msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm, | |
96 | msg.iotlb.type); | |
778e67de | 97 | |
108a6481 CL |
98 | if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { |
99 | error_report("failed to write, fd=%d, errno=%d (%s)", | |
100 | fd, errno, strerror(errno)); | |
101 | return -EIO ; | |
102 | } | |
103 | ||
104 | return ret; | |
105 | } | |
106 | ||
cd831ed5 EP |
107 | /* |
108 | * The caller must set asid = 0 if the device does not support asid. | |
109 | * This is not an ABI break since it is set to 0 by the initializer anyway. | |
110 | */ | |
111 | int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, | |
112 | hwaddr size) | |
108a6481 | 113 | { |
386494f2 | 114 | struct vhost_msg_v2 msg = {}; |
108a6481 CL |
115 | int fd = v->device_fd; |
116 | int ret = 0; | |
117 | ||
118 | msg.type = v->msg_type; | |
cd831ed5 | 119 | msg.asid = asid; |
108a6481 CL |
120 | msg.iotlb.iova = iova; |
121 | msg.iotlb.size = size; | |
122 | msg.iotlb.type = VHOST_IOTLB_INVALIDATE; | |
123 | ||
cd831ed5 | 124 | trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.asid, msg.iotlb.iova, |
778e67de LV |
125 | msg.iotlb.size, msg.iotlb.type); |
126 | ||
108a6481 CL |
127 | if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { |
128 | error_report("failed to write, fd=%d, errno=%d (%s)", | |
129 | fd, errno, strerror(errno)); | |
130 | return -EIO ; | |
131 | } | |
132 | ||
133 | return ret; | |
134 | } | |
135 | ||
e6db5df7 | 136 | static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v) |
a5bd0580 | 137 | { |
a5bd0580 | 138 | int fd = v->device_fd; |
e6db5df7 EP |
139 | struct vhost_msg_v2 msg = { |
140 | .type = v->msg_type, | |
141 | .iotlb.type = VHOST_IOTLB_BATCH_BEGIN, | |
142 | }; | |
a5bd0580 | 143 | |
5580b9f0 | 144 | trace_vhost_vdpa_listener_begin_batch(v, fd, msg.type, msg.iotlb.type); |
a5bd0580 JW |
145 | if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { |
146 | error_report("failed to write, fd=%d, errno=%d (%s)", | |
147 | fd, errno, strerror(errno)); | |
148 | } | |
149 | } | |
150 | ||
e6db5df7 EP |
151 | static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v) |
152 | { | |
153 | if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && | |
154 | !v->iotlb_batch_begin_sent) { | |
155 | vhost_vdpa_listener_begin_batch(v); | |
156 | } | |
157 | ||
158 | v->iotlb_batch_begin_sent = true; | |
159 | } | |
160 | ||
a5bd0580 JW |
161 | static void vhost_vdpa_listener_commit(MemoryListener *listener) |
162 | { | |
163 | struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); | |
164 | struct vhost_dev *dev = v->dev; | |
8acb3218 | 165 | struct vhost_msg_v2 msg = {}; |
a5bd0580 JW |
166 | int fd = v->device_fd; |
167 | ||
168 | if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { | |
169 | return; | |
170 | } | |
171 | ||
e6db5df7 EP |
172 | if (!v->iotlb_batch_begin_sent) { |
173 | return; | |
174 | } | |
175 | ||
a5bd0580 JW |
176 | msg.type = v->msg_type; |
177 | msg.iotlb.type = VHOST_IOTLB_BATCH_END; | |
178 | ||
5580b9f0 | 179 | trace_vhost_vdpa_listener_commit(v, fd, msg.type, msg.iotlb.type); |
a5bd0580 JW |
180 | if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { |
181 | error_report("failed to write, fd=%d, errno=%d (%s)", | |
182 | fd, errno, strerror(errno)); | |
183 | } | |
e6db5df7 EP |
184 | |
185 | v->iotlb_batch_begin_sent = false; | |
a5bd0580 JW |
186 | } |
187 | ||
108a6481 CL |
188 | static void vhost_vdpa_listener_region_add(MemoryListener *listener, |
189 | MemoryRegionSection *section) | |
190 | { | |
7dab70be | 191 | DMAMap mem_region = {}; |
108a6481 CL |
192 | struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); |
193 | hwaddr iova; | |
194 | Int128 llend, llsize; | |
195 | void *vaddr; | |
196 | int ret; | |
197 | ||
013108b6 EP |
198 | if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, |
199 | v->iova_range.last)) { | |
108a6481 CL |
200 | return; |
201 | } | |
202 | ||
203 | if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != | |
204 | (section->offset_within_region & ~TARGET_PAGE_MASK))) { | |
205 | error_report("%s received unaligned region", __func__); | |
206 | return; | |
207 | } | |
208 | ||
209 | iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); | |
032e4d68 | 210 | llend = vhost_vdpa_section_end(section); |
108a6481 CL |
211 | if (int128_ge(int128_make64(iova), llend)) { |
212 | return; | |
213 | } | |
214 | ||
215 | memory_region_ref(section->mr); | |
216 | ||
217 | /* Here we assume that memory_region_is_ram(section->mr)==true */ | |
218 | ||
219 | vaddr = memory_region_get_ram_ptr(section->mr) + | |
220 | section->offset_within_region + | |
221 | (iova - section->offset_within_address_space); | |
222 | ||
778e67de LV |
223 | trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend), |
224 | vaddr, section->readonly); | |
225 | ||
108a6481 | 226 | llsize = int128_sub(llend, int128_make64(iova)); |
6188d78a | 227 | if (v->shadow_data) { |
7dab70be | 228 | int r; |
34e3c94e | 229 | |
7dab70be EP |
230 | mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, |
231 | mem_region.size = int128_get64(llsize) - 1, | |
232 | mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), | |
233 | ||
234 | r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); | |
34e3c94e EP |
235 | if (unlikely(r != IOVA_OK)) { |
236 | error_report("Can't allocate a mapping (%d)", r); | |
237 | goto fail; | |
238 | } | |
239 | ||
240 | iova = mem_region.iova; | |
241 | } | |
108a6481 | 242 | |
e6db5df7 | 243 | vhost_vdpa_iotlb_batch_begin_once(v); |
cd831ed5 EP |
244 | ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova, |
245 | int128_get64(llsize), vaddr, section->readonly); | |
108a6481 CL |
246 | if (ret) { |
247 | error_report("vhost vdpa map fail!"); | |
7dab70be | 248 | goto fail_map; |
108a6481 CL |
249 | } |
250 | ||
251 | return; | |
252 | ||
7dab70be | 253 | fail_map: |
6188d78a | 254 | if (v->shadow_data) { |
69292a8e | 255 | vhost_iova_tree_remove(v->iova_tree, mem_region); |
7dab70be EP |
256 | } |
257 | ||
108a6481 | 258 | fail: |
108a6481 CL |
259 | /* |
260 | * On the initfn path, store the first error in the container so we | |
261 | * can gracefully fail. Runtime, there's not much we can do other | |
262 | * than throw a hardware error. | |
263 | */ | |
264 | error_report("vhost-vdpa: DMA mapping failed, unable to continue"); | |
265 | return; | |
266 | ||
267 | } | |
268 | ||
269 | static void vhost_vdpa_listener_region_del(MemoryListener *listener, | |
270 | MemoryRegionSection *section) | |
271 | { | |
272 | struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); | |
273 | hwaddr iova; | |
274 | Int128 llend, llsize; | |
275 | int ret; | |
108a6481 | 276 | |
013108b6 EP |
277 | if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, |
278 | v->iova_range.last)) { | |
108a6481 CL |
279 | return; |
280 | } | |
281 | ||
282 | if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != | |
283 | (section->offset_within_region & ~TARGET_PAGE_MASK))) { | |
284 | error_report("%s received unaligned region", __func__); | |
285 | return; | |
286 | } | |
287 | ||
288 | iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); | |
032e4d68 | 289 | llend = vhost_vdpa_section_end(section); |
108a6481 | 290 | |
778e67de LV |
291 | trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend)); |
292 | ||
108a6481 CL |
293 | if (int128_ge(int128_make64(iova), llend)) { |
294 | return; | |
295 | } | |
296 | ||
297 | llsize = int128_sub(llend, int128_make64(iova)); | |
298 | ||
6188d78a | 299 | if (v->shadow_data) { |
34e3c94e EP |
300 | const DMAMap *result; |
301 | const void *vaddr = memory_region_get_ram_ptr(section->mr) + | |
302 | section->offset_within_region + | |
303 | (iova - section->offset_within_address_space); | |
304 | DMAMap mem_region = { | |
305 | .translated_addr = (hwaddr)(uintptr_t)vaddr, | |
306 | .size = int128_get64(llsize) - 1, | |
307 | }; | |
308 | ||
309 | result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); | |
10dab9f2 EP |
310 | if (!result) { |
311 | /* The memory listener map wasn't mapped */ | |
312 | return; | |
313 | } | |
34e3c94e | 314 | iova = result->iova; |
69292a8e | 315 | vhost_iova_tree_remove(v->iova_tree, *result); |
34e3c94e | 316 | } |
e6db5df7 | 317 | vhost_vdpa_iotlb_batch_begin_once(v); |
cd831ed5 EP |
318 | ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, |
319 | int128_get64(llsize)); | |
9e32d4ea LV |
320 | if (ret) { |
321 | error_report("vhost_vdpa dma unmap error!"); | |
108a6481 CL |
322 | } |
323 | ||
324 | memory_region_unref(section->mr); | |
325 | } | |
326 | /* | |
ef4ff56c | 327 | * IOTLB API is used by vhost-vdpa which requires incremental updating |
108a6481 CL |
328 | * of the mapping. So we can not use generic vhost memory listener which |
329 | * depends on the addnop(). | |
330 | */ | |
331 | static const MemoryListener vhost_vdpa_memory_listener = { | |
142518bd | 332 | .name = "vhost-vdpa", |
a5bd0580 | 333 | .commit = vhost_vdpa_listener_commit, |
108a6481 CL |
334 | .region_add = vhost_vdpa_listener_region_add, |
335 | .region_del = vhost_vdpa_listener_region_del, | |
336 | }; | |
337 | ||
338 | static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, | |
339 | void *arg) | |
340 | { | |
341 | struct vhost_vdpa *v = dev->opaque; | |
342 | int fd = v->device_fd; | |
f2a6e6c4 | 343 | int ret; |
108a6481 CL |
344 | |
345 | assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); | |
346 | ||
f2a6e6c4 KW |
347 | ret = ioctl(fd, request, arg); |
348 | return ret < 0 ? -errno : ret; | |
108a6481 CL |
349 | } |
350 | ||
3631151b | 351 | static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) |
108a6481 CL |
352 | { |
353 | uint8_t s; | |
3631151b | 354 | int ret; |
108a6481 | 355 | |
778e67de | 356 | trace_vhost_vdpa_add_status(dev, status); |
3631151b RK |
357 | ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s); |
358 | if (ret < 0) { | |
359 | return ret; | |
108a6481 CL |
360 | } |
361 | ||
362 | s |= status; | |
363 | ||
3631151b RK |
364 | ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); |
365 | if (ret < 0) { | |
366 | return ret; | |
367 | } | |
368 | ||
369 | ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s); | |
370 | if (ret < 0) { | |
371 | return ret; | |
372 | } | |
373 | ||
374 | if (!(s & status)) { | |
375 | return -EIO; | |
376 | } | |
377 | ||
378 | return 0; | |
108a6481 CL |
379 | } |
380 | ||
c672f348 LM |
381 | int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range) |
382 | { | |
383 | int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range); | |
384 | ||
385 | return ret < 0 ? -errno : 0; | |
386 | } | |
387 | ||
d71b0609 SWL |
388 | /* |
389 | * The use of this function is for requests that only need to be | |
390 | * applied once. Typically such request occurs at the beginning | |
391 | * of operation, and before setting up queues. It should not be | |
392 | * used for request that performs operation until all queues are | |
393 | * set, which would need to check dev->vq_index_end instead. | |
394 | */ | |
395 | static bool vhost_vdpa_first_dev(struct vhost_dev *dev) | |
4d191cfd JW |
396 | { |
397 | struct vhost_vdpa *v = dev->opaque; | |
398 | ||
d71b0609 | 399 | return v->index == 0; |
4d191cfd JW |
400 | } |
401 | ||
12a195fa EP |
402 | static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, |
403 | uint64_t *features) | |
404 | { | |
405 | int ret; | |
406 | ||
407 | ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); | |
408 | trace_vhost_vdpa_get_features(dev, *features); | |
409 | return ret; | |
410 | } | |
411 | ||
258a0394 | 412 | static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) |
dff4426f EP |
413 | { |
414 | g_autoptr(GPtrArray) shadow_vqs = NULL; | |
4725a418 | 415 | |
dff4426f EP |
416 | shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); |
417 | for (unsigned n = 0; n < hdev->nvqs; ++n) { | |
3cfb4d06 | 418 | VhostShadowVirtqueue *svq; |
dff4426f | 419 | |
5fde952b | 420 | svq = vhost_svq_new(v->shadow_vq_ops, v->shadow_vq_ops_opaque); |
3cfb4d06 | 421 | g_ptr_array_add(shadow_vqs, svq); |
dff4426f EP |
422 | } |
423 | ||
424 | v->shadow_vqs = g_steal_pointer(&shadow_vqs); | |
dff4426f EP |
425 | } |
426 | ||
28770ff9 | 427 | static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) |
108a6481 CL |
428 | { |
429 | struct vhost_vdpa *v; | |
108a6481 | 430 | assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); |
778e67de | 431 | trace_vhost_vdpa_init(dev, opaque); |
e1c1915b DH |
432 | int ret; |
433 | ||
108a6481 | 434 | v = opaque; |
a5bd0580 | 435 | v->dev = dev; |
108a6481 | 436 | dev->opaque = opaque ; |
108a6481 CL |
437 | v->listener = vhost_vdpa_memory_listener; |
438 | v->msg_type = VHOST_IOTLB_MSG_V2; | |
258a0394 | 439 | vhost_vdpa_init_svq(dev, v); |
108a6481 | 440 | |
9c363cf6 | 441 | error_propagate(&dev->migration_blocker, v->migration_blocker); |
d71b0609 | 442 | if (!vhost_vdpa_first_dev(dev)) { |
4d191cfd JW |
443 | return 0; |
444 | } | |
445 | ||
57ac8318 EP |
446 | /* |
447 | * If dev->shadow_vqs_enabled at initialization that means the device has | |
448 | * been started with x-svq=on, so don't block migration | |
449 | */ | |
450 | if (dev->migration_blocker == NULL && !v->shadow_vqs_enabled) { | |
451 | /* We don't have dev->features yet */ | |
452 | uint64_t features; | |
453 | ret = vhost_vdpa_get_dev_features(dev, &features); | |
454 | if (unlikely(ret)) { | |
455 | error_setg_errno(errp, -ret, "Could not get device features"); | |
456 | return ret; | |
457 | } | |
458 | vhost_svq_valid_features(features, &dev->migration_blocker); | |
459 | } | |
460 | ||
a230c471 EP |
461 | /* |
462 | * Similar to VFIO, we end up pinning all guest memory and have to | |
463 | * disable discarding of RAM. | |
464 | */ | |
465 | ret = ram_block_discard_disable(true); | |
466 | if (ret) { | |
467 | error_report("Cannot set discarding of RAM broken"); | |
468 | return ret; | |
469 | } | |
470 | ||
108a6481 CL |
471 | vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | |
472 | VIRTIO_CONFIG_S_DRIVER); | |
473 | ||
474 | return 0; | |
475 | } | |
476 | ||
d0416d48 JW |
477 | static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, |
478 | int queue_index) | |
479 | { | |
8e3b0cbb | 480 | size_t page_size = qemu_real_host_page_size(); |
d0416d48 JW |
481 | struct vhost_vdpa *v = dev->opaque; |
482 | VirtIODevice *vdev = dev->vdev; | |
483 | VhostVDPAHostNotifier *n; | |
484 | ||
485 | n = &v->notifier[queue_index]; | |
486 | ||
487 | if (n->addr) { | |
488 | virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false); | |
489 | object_unparent(OBJECT(&n->mr)); | |
490 | munmap(n->addr, page_size); | |
491 | n->addr = NULL; | |
492 | } | |
493 | } | |
494 | ||
d0416d48 JW |
495 | static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) |
496 | { | |
8e3b0cbb | 497 | size_t page_size = qemu_real_host_page_size(); |
d0416d48 JW |
498 | struct vhost_vdpa *v = dev->opaque; |
499 | VirtIODevice *vdev = dev->vdev; | |
500 | VhostVDPAHostNotifier *n; | |
501 | int fd = v->device_fd; | |
502 | void *addr; | |
503 | char *name; | |
504 | ||
505 | vhost_vdpa_host_notifier_uninit(dev, queue_index); | |
506 | ||
507 | n = &v->notifier[queue_index]; | |
508 | ||
509 | addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, | |
510 | queue_index * page_size); | |
511 | if (addr == MAP_FAILED) { | |
512 | goto err; | |
513 | } | |
514 | ||
515 | name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]", | |
516 | v, queue_index); | |
517 | memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, | |
518 | page_size, addr); | |
519 | g_free(name); | |
520 | ||
521 | if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) { | |
98f7607e | 522 | object_unparent(OBJECT(&n->mr)); |
d0416d48 JW |
523 | munmap(addr, page_size); |
524 | goto err; | |
525 | } | |
526 | n->addr = addr; | |
527 | ||
528 | return 0; | |
529 | ||
530 | err: | |
531 | return -1; | |
532 | } | |
533 | ||
b1f030a0 LV |
534 | static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n) |
535 | { | |
536 | int i; | |
537 | ||
e66f2311 LM |
538 | /* |
539 | * Pack all the changes to the memory regions in a single | |
540 | * transaction to avoid a few updating of the address space | |
541 | * topology. | |
542 | */ | |
543 | memory_region_transaction_begin(); | |
544 | ||
b1f030a0 LV |
545 | for (i = dev->vq_index; i < dev->vq_index + n; i++) { |
546 | vhost_vdpa_host_notifier_uninit(dev, i); | |
547 | } | |
e66f2311 LM |
548 | |
549 | memory_region_transaction_commit(); | |
b1f030a0 LV |
550 | } |
551 | ||
d0416d48 JW |
552 | static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev) |
553 | { | |
dff4426f | 554 | struct vhost_vdpa *v = dev->opaque; |
d0416d48 JW |
555 | int i; |
556 | ||
dff4426f EP |
557 | if (v->shadow_vqs_enabled) { |
558 | /* FIXME SVQ is not compatible with host notifiers mr */ | |
559 | return; | |
560 | } | |
561 | ||
e66f2311 LM |
562 | /* |
563 | * Pack all the changes to the memory regions in a single | |
564 | * transaction to avoid a few updating of the address space | |
565 | * topology. | |
566 | */ | |
567 | memory_region_transaction_begin(); | |
568 | ||
d0416d48 JW |
569 | for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) { |
570 | if (vhost_vdpa_host_notifier_init(dev, i)) { | |
e66f2311 LM |
571 | vhost_vdpa_host_notifiers_uninit(dev, i - dev->vq_index); |
572 | break; | |
d0416d48 JW |
573 | } |
574 | } | |
575 | ||
e66f2311 | 576 | memory_region_transaction_commit(); |
d0416d48 JW |
577 | } |
578 | ||
dff4426f EP |
579 | static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev) |
580 | { | |
581 | struct vhost_vdpa *v = dev->opaque; | |
582 | size_t idx; | |
583 | ||
dff4426f EP |
584 | for (idx = 0; idx < v->shadow_vqs->len; ++idx) { |
585 | vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx)); | |
586 | } | |
587 | g_ptr_array_free(v->shadow_vqs, true); | |
588 | } | |
589 | ||
108a6481 CL |
590 | static int vhost_vdpa_cleanup(struct vhost_dev *dev) |
591 | { | |
592 | struct vhost_vdpa *v; | |
593 | assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); | |
594 | v = dev->opaque; | |
778e67de | 595 | trace_vhost_vdpa_cleanup(dev, v); |
a230c471 EP |
596 | if (vhost_vdpa_first_dev(dev)) { |
597 | ram_block_discard_disable(false); | |
598 | } | |
599 | ||
d0416d48 | 600 | vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); |
108a6481 | 601 | memory_listener_unregister(&v->listener); |
dff4426f | 602 | vhost_vdpa_svq_cleanup(dev); |
108a6481 CL |
603 | |
604 | dev->opaque = NULL; | |
e1c1915b | 605 | |
108a6481 CL |
606 | return 0; |
607 | } | |
608 | ||
609 | static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) | |
610 | { | |
778e67de | 611 | trace_vhost_vdpa_memslots_limit(dev, INT_MAX); |
108a6481 CL |
612 | return INT_MAX; |
613 | } | |
614 | ||
615 | static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, | |
616 | struct vhost_memory *mem) | |
617 | { | |
d71b0609 | 618 | if (!vhost_vdpa_first_dev(dev)) { |
4d191cfd JW |
619 | return 0; |
620 | } | |
621 | ||
778e67de LV |
622 | trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding); |
623 | if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) && | |
624 | trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) { | |
625 | int i; | |
626 | for (i = 0; i < mem->nregions; i++) { | |
627 | trace_vhost_vdpa_dump_regions(dev, i, | |
628 | mem->regions[i].guest_phys_addr, | |
629 | mem->regions[i].memory_size, | |
630 | mem->regions[i].userspace_addr, | |
631 | mem->regions[i].flags_padding); | |
632 | } | |
633 | } | |
108a6481 | 634 | if (mem->padding) { |
3631151b | 635 | return -EINVAL; |
108a6481 CL |
636 | } |
637 | ||
638 | return 0; | |
639 | } | |
640 | ||
641 | static int vhost_vdpa_set_features(struct vhost_dev *dev, | |
642 | uint64_t features) | |
643 | { | |
12a195fa | 644 | struct vhost_vdpa *v = dev->opaque; |
108a6481 | 645 | int ret; |
4d191cfd | 646 | |
d71b0609 | 647 | if (!vhost_vdpa_first_dev(dev)) { |
4d191cfd JW |
648 | return 0; |
649 | } | |
650 | ||
12a195fa EP |
651 | if (v->shadow_vqs_enabled) { |
652 | if ((v->acked_features ^ features) == BIT_ULL(VHOST_F_LOG_ALL)) { | |
653 | /* | |
654 | * QEMU is just trying to enable or disable logging. SVQ handles | |
655 | * this sepparately, so no need to forward this. | |
656 | */ | |
657 | v->acked_features = features; | |
658 | return 0; | |
659 | } | |
660 | ||
661 | v->acked_features = features; | |
662 | ||
663 | /* We must not ack _F_LOG if SVQ is enabled */ | |
664 | features &= ~BIT_ULL(VHOST_F_LOG_ALL); | |
665 | } | |
666 | ||
778e67de | 667 | trace_vhost_vdpa_set_features(dev, features); |
108a6481 | 668 | ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); |
108a6481 CL |
669 | if (ret) { |
670 | return ret; | |
671 | } | |
108a6481 | 672 | |
3631151b | 673 | return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); |
108a6481 CL |
674 | } |
675 | ||
a5bd0580 JW |
676 | static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) |
677 | { | |
678 | uint64_t features; | |
679 | uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | | |
c1a10086 | 680 | 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | |
d83b4945 EP |
681 | 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | |
682 | 0x1ULL << VHOST_BACKEND_F_SUSPEND; | |
a5bd0580 JW |
683 | int r; |
684 | ||
685 | if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { | |
2a83e97e | 686 | return -EFAULT; |
a5bd0580 JW |
687 | } |
688 | ||
689 | features &= f; | |
4d191cfd | 690 | |
d71b0609 | 691 | if (vhost_vdpa_first_dev(dev)) { |
4d191cfd JW |
692 | r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); |
693 | if (r) { | |
694 | return -EFAULT; | |
695 | } | |
a5bd0580 JW |
696 | } |
697 | ||
698 | dev->backend_cap = features; | |
699 | ||
700 | return 0; | |
701 | } | |
702 | ||
c232b8f4 ZY |
703 | static int vhost_vdpa_get_device_id(struct vhost_dev *dev, |
704 | uint32_t *device_id) | |
108a6481 | 705 | { |
778e67de LV |
706 | int ret; |
707 | ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); | |
708 | trace_vhost_vdpa_get_device_id(dev, *device_id); | |
709 | return ret; | |
108a6481 CL |
710 | } |
711 | ||
712 | static int vhost_vdpa_reset_device(struct vhost_dev *dev) | |
713 | { | |
0bb302a9 | 714 | struct vhost_vdpa *v = dev->opaque; |
778e67de | 715 | int ret; |
108a6481 CL |
716 | uint8_t status = 0; |
717 | ||
778e67de LV |
718 | ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); |
719 | trace_vhost_vdpa_reset_device(dev, status); | |
0bb302a9 | 720 | v->suspended = false; |
778e67de | 721 | return ret; |
108a6481 CL |
722 | } |
723 | ||
724 | static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) | |
725 | { | |
726 | assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); | |
727 | ||
353244d8 JW |
728 | trace_vhost_vdpa_get_vq_index(dev, idx, idx); |
729 | return idx; | |
108a6481 CL |
730 | } |
731 | ||
732 | static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) | |
733 | { | |
734 | int i; | |
778e67de | 735 | trace_vhost_vdpa_set_vring_ready(dev); |
108a6481 CL |
736 | for (i = 0; i < dev->nvqs; ++i) { |
737 | struct vhost_vring_state state = { | |
738 | .index = dev->vq_index + i, | |
739 | .num = 1, | |
740 | }; | |
741 | vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); | |
742 | } | |
743 | return 0; | |
744 | } | |
745 | ||
259f3acc CL |
746 | static int vhost_vdpa_set_config_call(struct vhost_dev *dev, |
747 | int fd) | |
748 | { | |
749 | trace_vhost_vdpa_set_config_call(dev, fd); | |
750 | return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd); | |
751 | } | |
752 | ||
778e67de LV |
753 | static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, |
754 | uint32_t config_len) | |
755 | { | |
756 | int b, len; | |
757 | char line[QEMU_HEXDUMP_LINE_LEN]; | |
758 | ||
759 | for (b = 0; b < config_len; b += 16) { | |
760 | len = config_len - b; | |
761 | qemu_hexdump_line(line, b, config, len, false); | |
762 | trace_vhost_vdpa_dump_config(dev, line); | |
763 | } | |
764 | } | |
765 | ||
108a6481 CL |
766 | static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, |
767 | uint32_t offset, uint32_t size, | |
768 | uint32_t flags) | |
769 | { | |
770 | struct vhost_vdpa_config *config; | |
771 | int ret; | |
772 | unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); | |
986d4f78 | 773 | |
778e67de | 774 | trace_vhost_vdpa_set_config(dev, offset, size, flags); |
108a6481 | 775 | config = g_malloc(size + config_size); |
108a6481 CL |
776 | config->off = offset; |
777 | config->len = size; | |
778 | memcpy(config->buf, data, size); | |
778e67de LV |
779 | if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) && |
780 | trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { | |
781 | vhost_vdpa_dump_config(dev, data, size); | |
782 | } | |
108a6481 CL |
783 | ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config); |
784 | g_free(config); | |
785 | return ret; | |
786 | } | |
787 | ||
788 | static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, | |
50de5138 | 789 | uint32_t config_len, Error **errp) |
108a6481 CL |
790 | { |
791 | struct vhost_vdpa_config *v_config; | |
792 | unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); | |
793 | int ret; | |
794 | ||
778e67de | 795 | trace_vhost_vdpa_get_config(dev, config, config_len); |
108a6481 | 796 | v_config = g_malloc(config_len + config_size); |
108a6481 CL |
797 | v_config->len = config_len; |
798 | v_config->off = 0; | |
799 | ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config); | |
800 | memcpy(config, v_config->buf, config_len); | |
801 | g_free(v_config); | |
778e67de LV |
802 | if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) && |
803 | trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { | |
804 | vhost_vdpa_dump_config(dev, config, config_len); | |
805 | } | |
108a6481 CL |
806 | return ret; |
807 | } | |
808 | ||
d96be4c8 EP |
809 | static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev, |
810 | struct vhost_vring_state *ring) | |
811 | { | |
812 | trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num); | |
813 | return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); | |
814 | } | |
815 | ||
dff4426f EP |
816 | static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev, |
817 | struct vhost_vring_file *file) | |
818 | { | |
819 | trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd); | |
820 | return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); | |
821 | } | |
822 | ||
a8ac8858 EP |
823 | static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev, |
824 | struct vhost_vring_file *file) | |
825 | { | |
826 | trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd); | |
827 | return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); | |
828 | } | |
829 | ||
d96be4c8 EP |
830 | static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev, |
831 | struct vhost_vring_addr *addr) | |
832 | { | |
833 | trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags, | |
834 | addr->desc_user_addr, addr->used_user_addr, | |
835 | addr->avail_user_addr, | |
836 | addr->log_guest_addr); | |
837 | ||
838 | return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); | |
839 | ||
840 | } | |
841 | ||
dff4426f EP |
842 | /** |
843 | * Set the shadow virtqueue descriptors to the device | |
844 | * | |
845 | * @dev: The vhost device model | |
846 | * @svq: The shadow virtqueue | |
847 | * @idx: The index of the virtqueue in the vhost device | |
848 | * @errp: Error | |
a8ac8858 EP |
849 | * |
850 | * Note that this function does not rewind kick file descriptor if cannot set | |
851 | * call one. | |
dff4426f | 852 | */ |
100890f7 EP |
853 | static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, |
854 | VhostShadowVirtqueue *svq, unsigned idx, | |
855 | Error **errp) | |
dff4426f EP |
856 | { |
857 | struct vhost_vring_file file = { | |
858 | .index = dev->vq_index + idx, | |
859 | }; | |
860 | const EventNotifier *event_notifier = &svq->hdev_kick; | |
861 | int r; | |
862 | ||
3cfb4d06 EP |
863 | r = event_notifier_init(&svq->hdev_kick, 0); |
864 | if (r != 0) { | |
865 | error_setg_errno(errp, -r, "Couldn't create kick event notifier"); | |
866 | goto err_init_hdev_kick; | |
867 | } | |
868 | ||
869 | r = event_notifier_init(&svq->hdev_call, 0); | |
870 | if (r != 0) { | |
871 | error_setg_errno(errp, -r, "Couldn't create call event notifier"); | |
872 | goto err_init_hdev_call; | |
873 | } | |
874 | ||
dff4426f EP |
875 | file.fd = event_notifier_get_fd(event_notifier); |
876 | r = vhost_vdpa_set_vring_dev_kick(dev, &file); | |
877 | if (unlikely(r != 0)) { | |
878 | error_setg_errno(errp, -r, "Can't set device kick fd"); | |
3cfb4d06 | 879 | goto err_init_set_dev_fd; |
a8ac8858 EP |
880 | } |
881 | ||
882 | event_notifier = &svq->hdev_call; | |
883 | file.fd = event_notifier_get_fd(event_notifier); | |
884 | r = vhost_vdpa_set_vring_dev_call(dev, &file); | |
885 | if (unlikely(r != 0)) { | |
886 | error_setg_errno(errp, -r, "Can't set device call fd"); | |
3cfb4d06 | 887 | goto err_init_set_dev_fd; |
dff4426f EP |
888 | } |
889 | ||
3cfb4d06 EP |
890 | return 0; |
891 | ||
892 | err_init_set_dev_fd: | |
893 | event_notifier_set_handler(&svq->hdev_call, NULL); | |
894 | ||
895 | err_init_hdev_call: | |
896 | event_notifier_cleanup(&svq->hdev_kick); | |
897 | ||
898 | err_init_hdev_kick: | |
100890f7 EP |
899 | return r; |
900 | } | |
901 | ||
902 | /** | |
903 | * Unmap a SVQ area in the device | |
904 | */ | |
8b6d6119 | 905 | static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) |
100890f7 | 906 | { |
8b6d6119 EP |
907 | const DMAMap needle = { |
908 | .translated_addr = addr, | |
909 | }; | |
910 | const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, &needle); | |
34e3c94e | 911 | hwaddr size; |
100890f7 EP |
912 | int r; |
913 | ||
34e3c94e EP |
914 | if (unlikely(!result)) { |
915 | error_report("Unable to find SVQ address to unmap"); | |
5b590f51 | 916 | return; |
34e3c94e EP |
917 | } |
918 | ||
8e3b0cbb | 919 | size = ROUND_UP(result->size, qemu_real_host_page_size()); |
cd831ed5 | 920 | r = vhost_vdpa_dma_unmap(v, v->address_space_id, result->iova, size); |
b37c12be EP |
921 | if (unlikely(r < 0)) { |
922 | error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); | |
5b590f51 | 923 | return; |
b37c12be EP |
924 | } |
925 | ||
926 | vhost_iova_tree_remove(v->iova_tree, *result); | |
100890f7 EP |
927 | } |
928 | ||
5b590f51 | 929 | static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, |
100890f7 EP |
930 | const VhostShadowVirtqueue *svq) |
931 | { | |
932 | struct vhost_vdpa *v = dev->opaque; | |
933 | struct vhost_vring_addr svq_addr; | |
100890f7 EP |
934 | |
935 | vhost_svq_get_vring_addr(svq, &svq_addr); | |
936 | ||
8b6d6119 | 937 | vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr); |
100890f7 | 938 | |
8b6d6119 | 939 | vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr); |
34e3c94e EP |
940 | } |
941 | ||
942 | /** | |
943 | * Map the SVQ area in the device | |
944 | * | |
945 | * @v: Vhost-vdpa device | |
946 | * @needle: The area to search iova | |
947 | * @errorp: Error pointer | |
948 | */ | |
949 | static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, | |
950 | Error **errp) | |
951 | { | |
952 | int r; | |
953 | ||
954 | r = vhost_iova_tree_map_alloc(v->iova_tree, needle); | |
955 | if (unlikely(r != IOVA_OK)) { | |
956 | error_setg(errp, "Cannot allocate iova (%d)", r); | |
957 | return false; | |
958 | } | |
959 | ||
cd831ed5 EP |
960 | r = vhost_vdpa_dma_map(v, v->address_space_id, needle->iova, |
961 | needle->size + 1, | |
34e3c94e EP |
962 | (void *)(uintptr_t)needle->translated_addr, |
963 | needle->perm == IOMMU_RO); | |
964 | if (unlikely(r != 0)) { | |
965 | error_setg_errno(errp, -r, "Cannot map region to device"); | |
69292a8e | 966 | vhost_iova_tree_remove(v->iova_tree, *needle); |
34e3c94e EP |
967 | } |
968 | ||
969 | return r == 0; | |
100890f7 EP |
970 | } |
971 | ||
972 | /** | |
973 | * Map the shadow virtqueue rings in the device | |
974 | * | |
975 | * @dev: The vhost device | |
976 | * @svq: The shadow virtqueue | |
977 | * @addr: Assigned IOVA addresses | |
978 | * @errp: Error pointer | |
979 | */ | |
980 | static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, | |
981 | const VhostShadowVirtqueue *svq, | |
982 | struct vhost_vring_addr *addr, | |
983 | Error **errp) | |
984 | { | |
05e385d2 | 985 | ERRP_GUARD(); |
34e3c94e EP |
986 | DMAMap device_region, driver_region; |
987 | struct vhost_vring_addr svq_addr; | |
100890f7 EP |
988 | struct vhost_vdpa *v = dev->opaque; |
989 | size_t device_size = vhost_svq_device_area_size(svq); | |
990 | size_t driver_size = vhost_svq_driver_area_size(svq); | |
34e3c94e EP |
991 | size_t avail_offset; |
992 | bool ok; | |
100890f7 | 993 | |
34e3c94e | 994 | vhost_svq_get_vring_addr(svq, &svq_addr); |
100890f7 | 995 | |
34e3c94e EP |
996 | driver_region = (DMAMap) { |
997 | .translated_addr = svq_addr.desc_user_addr, | |
998 | .size = driver_size - 1, | |
999 | .perm = IOMMU_RO, | |
1000 | }; | |
1001 | ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp); | |
1002 | if (unlikely(!ok)) { | |
1003 | error_prepend(errp, "Cannot create vq driver region: "); | |
100890f7 EP |
1004 | return false; |
1005 | } | |
34e3c94e EP |
1006 | addr->desc_user_addr = driver_region.iova; |
1007 | avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr; | |
1008 | addr->avail_user_addr = driver_region.iova + avail_offset; | |
100890f7 | 1009 | |
34e3c94e EP |
1010 | device_region = (DMAMap) { |
1011 | .translated_addr = svq_addr.used_user_addr, | |
1012 | .size = device_size - 1, | |
1013 | .perm = IOMMU_RW, | |
1014 | }; | |
1015 | ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); | |
1016 | if (unlikely(!ok)) { | |
1017 | error_prepend(errp, "Cannot create vq device region: "); | |
8b6d6119 | 1018 | vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr); |
100890f7 | 1019 | } |
34e3c94e | 1020 | addr->used_user_addr = device_region.iova; |
100890f7 | 1021 | |
34e3c94e | 1022 | return ok; |
100890f7 EP |
1023 | } |
1024 | ||
1025 | static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, | |
1026 | VhostShadowVirtqueue *svq, unsigned idx, | |
1027 | Error **errp) | |
1028 | { | |
1029 | uint16_t vq_index = dev->vq_index + idx; | |
1030 | struct vhost_vring_state s = { | |
1031 | .index = vq_index, | |
1032 | }; | |
1033 | int r; | |
1034 | ||
1035 | r = vhost_vdpa_set_dev_vring_base(dev, &s); | |
1036 | if (unlikely(r)) { | |
1037 | error_setg_errno(errp, -r, "Cannot set vring base"); | |
1038 | return false; | |
1039 | } | |
1040 | ||
1041 | r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp); | |
dff4426f EP |
1042 | return r == 0; |
1043 | } | |
1044 | ||
1045 | static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) | |
1046 | { | |
1047 | struct vhost_vdpa *v = dev->opaque; | |
1048 | Error *err = NULL; | |
1049 | unsigned i; | |
1050 | ||
712c1a31 | 1051 | if (!v->shadow_vqs_enabled) { |
dff4426f EP |
1052 | return true; |
1053 | } | |
1054 | ||
1055 | for (i = 0; i < v->shadow_vqs->len; ++i) { | |
100890f7 | 1056 | VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); |
dff4426f | 1057 | VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); |
100890f7 | 1058 | struct vhost_vring_addr addr = { |
1c82fdfe | 1059 | .index = dev->vq_index + i, |
100890f7 EP |
1060 | }; |
1061 | int r; | |
dff4426f EP |
1062 | bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err); |
1063 | if (unlikely(!ok)) { | |
100890f7 EP |
1064 | goto err; |
1065 | } | |
1066 | ||
5fde952b | 1067 | vhost_svq_start(svq, dev->vdev, vq, v->iova_tree); |
100890f7 EP |
1068 | ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err); |
1069 | if (unlikely(!ok)) { | |
1070 | goto err_map; | |
1071 | } | |
1072 | ||
1073 | /* Override vring GPA set by vhost subsystem */ | |
1074 | r = vhost_vdpa_set_vring_dev_addr(dev, &addr); | |
1075 | if (unlikely(r != 0)) { | |
1076 | error_setg_errno(&err, -r, "Cannot set device address"); | |
1077 | goto err_set_addr; | |
1078 | } | |
1079 | } | |
1080 | ||
1081 | return true; | |
1082 | ||
1083 | err_set_addr: | |
1084 | vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i)); | |
1085 | ||
1086 | err_map: | |
1087 | vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i)); | |
1088 | ||
1089 | err: | |
1090 | error_reportf_err(err, "Cannot setup SVQ %u: ", i); | |
1091 | for (unsigned j = 0; j < i; ++j) { | |
1092 | VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j); | |
1093 | vhost_vdpa_svq_unmap_rings(dev, svq); | |
1094 | vhost_svq_stop(svq); | |
1095 | } | |
1096 | ||
1097 | return false; | |
1098 | } | |
1099 | ||
5b590f51 | 1100 | static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) |
100890f7 EP |
1101 | { |
1102 | struct vhost_vdpa *v = dev->opaque; | |
1103 | ||
712c1a31 | 1104 | if (!v->shadow_vqs_enabled) { |
5b590f51 | 1105 | return; |
100890f7 EP |
1106 | } |
1107 | ||
1108 | for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { | |
1109 | VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); | |
2e1a9de9 EP |
1110 | |
1111 | vhost_svq_stop(svq); | |
5b590f51 | 1112 | vhost_vdpa_svq_unmap_rings(dev, svq); |
3cfb4d06 EP |
1113 | |
1114 | event_notifier_cleanup(&svq->hdev_kick); | |
1115 | event_notifier_cleanup(&svq->hdev_call); | |
dff4426f | 1116 | } |
dff4426f EP |
1117 | } |
1118 | ||
0bb302a9 EP |
1119 | static void vhost_vdpa_suspend(struct vhost_dev *dev) |
1120 | { | |
1121 | struct vhost_vdpa *v = dev->opaque; | |
1122 | int r; | |
1123 | ||
1124 | if (!vhost_vdpa_first_dev(dev)) { | |
1125 | return; | |
1126 | } | |
1127 | ||
1128 | if (dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) { | |
1129 | trace_vhost_vdpa_suspend(dev); | |
1130 | r = ioctl(v->device_fd, VHOST_VDPA_SUSPEND); | |
1131 | if (unlikely(r)) { | |
1132 | error_report("Cannot suspend: %s(%d)", g_strerror(errno), errno); | |
1133 | } else { | |
1134 | v->suspended = true; | |
1135 | return; | |
1136 | } | |
1137 | } | |
1138 | ||
1139 | vhost_vdpa_reset_device(dev); | |
1140 | } | |
1141 | ||
108a6481 CL |
1142 | static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) |
1143 | { | |
1144 | struct vhost_vdpa *v = dev->opaque; | |
dff4426f | 1145 | bool ok; |
778e67de | 1146 | trace_vhost_vdpa_dev_start(dev, started); |
4d191cfd | 1147 | |
108a6481 | 1148 | if (started) { |
d0416d48 | 1149 | vhost_vdpa_host_notifiers_init(dev); |
dff4426f EP |
1150 | ok = vhost_vdpa_svqs_start(dev); |
1151 | if (unlikely(!ok)) { | |
1152 | return -1; | |
1153 | } | |
108a6481 | 1154 | vhost_vdpa_set_vring_ready(dev); |
4d191cfd | 1155 | } else { |
0bb302a9 | 1156 | vhost_vdpa_suspend(dev); |
5b590f51 | 1157 | vhost_vdpa_svqs_stop(dev); |
4d191cfd JW |
1158 | vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); |
1159 | } | |
1160 | ||
245cf2c2 | 1161 | if (dev->vq_index + dev->nvqs != dev->vq_index_end) { |
4d191cfd JW |
1162 | return 0; |
1163 | } | |
1164 | ||
1165 | if (started) { | |
4d191cfd | 1166 | memory_listener_register(&v->listener, &address_space_memory); |
3631151b | 1167 | return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); |
c3716f26 | 1168 | } |
108a6481 | 1169 | |
c3716f26 EP |
1170 | return 0; |
1171 | } | |
1172 | ||
1173 | static void vhost_vdpa_reset_status(struct vhost_dev *dev) | |
1174 | { | |
1175 | struct vhost_vdpa *v = dev->opaque; | |
1176 | ||
1177 | if (dev->vq_index + dev->nvqs != dev->vq_index_end) { | |
1178 | return; | |
108a6481 | 1179 | } |
c3716f26 EP |
1180 | |
1181 | vhost_vdpa_reset_device(dev); | |
1182 | vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | | |
1183 | VIRTIO_CONFIG_S_DRIVER); | |
1184 | memory_listener_unregister(&v->listener); | |
108a6481 CL |
1185 | } |
1186 | ||
1187 | static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, | |
1188 | struct vhost_log *log) | |
1189 | { | |
773ebc95 | 1190 | struct vhost_vdpa *v = dev->opaque; |
d71b0609 | 1191 | if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { |
4d191cfd JW |
1192 | return 0; |
1193 | } | |
1194 | ||
778e67de LV |
1195 | trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd, |
1196 | log->log); | |
108a6481 CL |
1197 | return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); |
1198 | } | |
1199 | ||
1200 | static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, | |
1201 | struct vhost_vring_addr *addr) | |
1202 | { | |
d96be4c8 EP |
1203 | struct vhost_vdpa *v = dev->opaque; |
1204 | ||
1205 | if (v->shadow_vqs_enabled) { | |
1206 | /* | |
1207 | * Device vring addr was set at device start. SVQ base is handled by | |
1208 | * VirtQueue code. | |
1209 | */ | |
1210 | return 0; | |
1211 | } | |
1212 | ||
1213 | return vhost_vdpa_set_vring_dev_addr(dev, addr); | |
108a6481 CL |
1214 | } |
1215 | ||
1216 | static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, | |
1217 | struct vhost_vring_state *ring) | |
1218 | { | |
778e67de | 1219 | trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num); |
108a6481 CL |
1220 | return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); |
1221 | } | |
1222 | ||
1223 | static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, | |
1224 | struct vhost_vring_state *ring) | |
1225 | { | |
d96be4c8 EP |
1226 | struct vhost_vdpa *v = dev->opaque; |
1227 | ||
1228 | if (v->shadow_vqs_enabled) { | |
1229 | /* | |
1230 | * Device vring base was set at device start. SVQ base is handled by | |
1231 | * VirtQueue code. | |
1232 | */ | |
1233 | return 0; | |
1234 | } | |
1235 | ||
1236 | return vhost_vdpa_set_dev_vring_base(dev, ring); | |
108a6481 CL |
1237 | } |
1238 | ||
1239 | static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, | |
1240 | struct vhost_vring_state *ring) | |
1241 | { | |
6d0b2226 | 1242 | struct vhost_vdpa *v = dev->opaque; |
778e67de LV |
1243 | int ret; |
1244 | ||
6d0b2226 | 1245 | if (v->shadow_vqs_enabled) { |
2fdac348 | 1246 | ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index); |
6d0b2226 EP |
1247 | return 0; |
1248 | } | |
1249 | ||
b6662cb7 EP |
1250 | if (!v->suspended) { |
1251 | /* | |
1252 | * Cannot trust in value returned by device, let vhost recover used | |
1253 | * idx from guest. | |
1254 | */ | |
1255 | return -1; | |
1256 | } | |
1257 | ||
778e67de LV |
1258 | ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); |
1259 | trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num); | |
1260 | return ret; | |
108a6481 CL |
1261 | } |
1262 | ||
1263 | static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, | |
1264 | struct vhost_vring_file *file) | |
1265 | { | |
dff4426f EP |
1266 | struct vhost_vdpa *v = dev->opaque; |
1267 | int vdpa_idx = file->index - dev->vq_index; | |
1268 | ||
1269 | if (v->shadow_vqs_enabled) { | |
1270 | VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); | |
1271 | vhost_svq_set_svq_kick_fd(svq, file->fd); | |
1272 | return 0; | |
1273 | } else { | |
1274 | return vhost_vdpa_set_vring_dev_kick(dev, file); | |
1275 | } | |
108a6481 CL |
1276 | } |
1277 | ||
1278 | static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, | |
1279 | struct vhost_vring_file *file) | |
1280 | { | |
a8ac8858 | 1281 | struct vhost_vdpa *v = dev->opaque; |
b2765243 EP |
1282 | int vdpa_idx = file->index - dev->vq_index; |
1283 | VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); | |
a8ac8858 | 1284 | |
b2765243 EP |
1285 | /* Remember last call fd because we can switch to SVQ anytime. */ |
1286 | vhost_svq_set_svq_call_fd(svq, file->fd); | |
a8ac8858 | 1287 | if (v->shadow_vqs_enabled) { |
a8ac8858 | 1288 | return 0; |
a8ac8858 | 1289 | } |
b2765243 EP |
1290 | |
1291 | return vhost_vdpa_set_vring_dev_call(dev, file); | |
108a6481 CL |
1292 | } |
1293 | ||
1294 | static int vhost_vdpa_get_features(struct vhost_dev *dev, | |
1295 | uint64_t *features) | |
1296 | { | |
12a195fa EP |
1297 | struct vhost_vdpa *v = dev->opaque; |
1298 | int ret = vhost_vdpa_get_dev_features(dev, features); | |
1299 | ||
1300 | if (ret == 0 && v->shadow_vqs_enabled) { | |
1301 | /* Add SVQ logging capabilities */ | |
1302 | *features |= BIT_ULL(VHOST_F_LOG_ALL); | |
1303 | } | |
778e67de | 1304 | |
778e67de | 1305 | return ret; |
108a6481 CL |
1306 | } |
1307 | ||
1308 | static int vhost_vdpa_set_owner(struct vhost_dev *dev) | |
1309 | { | |
d71b0609 | 1310 | if (!vhost_vdpa_first_dev(dev)) { |
4d191cfd JW |
1311 | return 0; |
1312 | } | |
1313 | ||
778e67de | 1314 | trace_vhost_vdpa_set_owner(dev); |
108a6481 CL |
1315 | return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); |
1316 | } | |
1317 | ||
1318 | static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, | |
1319 | struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) | |
1320 | { | |
1321 | assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); | |
1322 | addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; | |
1323 | addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; | |
1324 | addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys; | |
778e67de LV |
1325 | trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr, |
1326 | addr->avail_user_addr, addr->used_user_addr); | |
108a6481 CL |
1327 | return 0; |
1328 | } | |
1329 | ||
1330 | static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) | |
1331 | { | |
1332 | return true; | |
1333 | } | |
1334 | ||
1335 | const VhostOps vdpa_ops = { | |
1336 | .backend_type = VHOST_BACKEND_TYPE_VDPA, | |
1337 | .vhost_backend_init = vhost_vdpa_init, | |
1338 | .vhost_backend_cleanup = vhost_vdpa_cleanup, | |
1339 | .vhost_set_log_base = vhost_vdpa_set_log_base, | |
1340 | .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, | |
1341 | .vhost_set_vring_num = vhost_vdpa_set_vring_num, | |
1342 | .vhost_set_vring_base = vhost_vdpa_set_vring_base, | |
1343 | .vhost_get_vring_base = vhost_vdpa_get_vring_base, | |
1344 | .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, | |
1345 | .vhost_set_vring_call = vhost_vdpa_set_vring_call, | |
1346 | .vhost_get_features = vhost_vdpa_get_features, | |
a5bd0580 | 1347 | .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, |
108a6481 CL |
1348 | .vhost_set_owner = vhost_vdpa_set_owner, |
1349 | .vhost_set_vring_endian = NULL, | |
1350 | .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, | |
1351 | .vhost_set_mem_table = vhost_vdpa_set_mem_table, | |
1352 | .vhost_set_features = vhost_vdpa_set_features, | |
1353 | .vhost_reset_device = vhost_vdpa_reset_device, | |
1354 | .vhost_get_vq_index = vhost_vdpa_get_vq_index, | |
1355 | .vhost_get_config = vhost_vdpa_get_config, | |
1356 | .vhost_set_config = vhost_vdpa_set_config, | |
1357 | .vhost_requires_shm_log = NULL, | |
1358 | .vhost_migration_done = NULL, | |
1359 | .vhost_backend_can_merge = NULL, | |
1360 | .vhost_net_set_mtu = NULL, | |
1361 | .vhost_set_iotlb_callback = NULL, | |
1362 | .vhost_send_device_iotlb_msg = NULL, | |
1363 | .vhost_dev_start = vhost_vdpa_dev_start, | |
1364 | .vhost_get_device_id = vhost_vdpa_get_device_id, | |
4b870f1a SG |
1365 | .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, |
1366 | .vhost_force_iommu = vhost_vdpa_force_iommu, | |
259f3acc | 1367 | .vhost_set_config_call = vhost_vdpa_set_config_call, |
c3716f26 | 1368 | .vhost_reset_status = vhost_vdpa_reset_status, |
108a6481 | 1369 | }; |