]>
Commit | Line | Data |
---|---|---|
108a6481 CL |
1 | /* |
2 | * vhost-vdpa | |
3 | * | |
4 | * Copyright(c) 2017-2018 Intel Corporation. | |
5 | * Copyright(c) 2020 Red Hat, Inc. | |
6 | * | |
7 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
8 | * See the COPYING file in the top-level directory. | |
9 | * | |
10 | */ | |
11 | ||
12 | #include "qemu/osdep.h" | |
13 | #include <linux/vhost.h> | |
14 | #include <linux/vfio.h> | |
15 | #include <sys/eventfd.h> | |
16 | #include <sys/ioctl.h> | |
33f21860 | 17 | #include "exec/target_page.h" |
108a6481 CL |
18 | #include "hw/virtio/vhost.h" |
19 | #include "hw/virtio/vhost-backend.h" | |
20 | #include "hw/virtio/virtio-net.h" | |
dff4426f | 21 | #include "hw/virtio/vhost-shadow-virtqueue.h" |
108a6481 | 22 | #include "hw/virtio/vhost-vdpa.h" |
df77d45a | 23 | #include "exec/address-spaces.h" |
c156d5bf | 24 | #include "migration/blocker.h" |
415b7327 | 25 | #include "qemu/cutils.h" |
108a6481 | 26 | #include "qemu/main-loop.h" |
778e67de | 27 | #include "trace.h" |
dff4426f | 28 | #include "qapi/error.h" |
108a6481 | 29 | |
032e4d68 EP |
30 | /* |
31 | * Return one past the end of the end of section. Be careful with uint64_t | |
32 | * conversions! | |
33 | */ | |
8b1a8884 PMD |
34 | static Int128 vhost_vdpa_section_end(const MemoryRegionSection *section, |
35 | int page_mask) | |
032e4d68 EP |
36 | { |
37 | Int128 llend = int128_make64(section->offset_within_address_space); | |
38 | llend = int128_add(llend, section->size); | |
8b1a8884 | 39 | llend = int128_and(llend, int128_exts64(page_mask)); |
032e4d68 EP |
40 | |
41 | return llend; | |
42 | } | |
43 | ||
013108b6 EP |
44 | static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, |
45 | uint64_t iova_min, | |
961d60e9 PMD |
46 | uint64_t iova_max, |
47 | int page_mask) | |
108a6481 | 48 | { |
013108b6 | 49 | Int128 llend; |
a5583457 EP |
50 | bool is_ram = memory_region_is_ram(section->mr); |
51 | bool is_iommu = memory_region_is_iommu(section->mr); | |
52 | bool is_protected = memory_region_is_protected(section->mr); | |
013108b6 | 53 | |
a5583457 EP |
54 | /* vhost-vDPA doesn't allow MMIO to be mapped */ |
55 | bool is_ram_device = memory_region_is_ram_device(section->mr); | |
56 | ||
57 | if ((!is_ram && !is_iommu) || is_protected || is_ram_device) { | |
a8516e5c EP |
58 | trace_vhost_vdpa_skipped_memory_section(is_ram, is_iommu, is_protected, |
59 | is_ram_device, iova_min, | |
60 | iova_max, page_mask); | |
013108b6 EP |
61 | return true; |
62 | } | |
63 | ||
64 | if (section->offset_within_address_space < iova_min) { | |
65 | error_report("RAM section out of device range (min=0x%" PRIx64 | |
66 | ", addr=0x%" HWADDR_PRIx ")", | |
67 | iova_min, section->offset_within_address_space); | |
68 | return true; | |
69 | } | |
bc7b0cac CL |
70 | /* |
71 | * While using vIOMMU, sometimes the section will be larger than iova_max, | |
72 | * but the memory that actually maps is smaller, so move the check to | |
73 | * function vhost_vdpa_iommu_map_notify(). That function will use the actual | |
74 | * size that maps to the kernel | |
75 | */ | |
013108b6 | 76 | |
a5583457 | 77 | if (!is_iommu) { |
8b1a8884 | 78 | llend = vhost_vdpa_section_end(section, page_mask); |
bc7b0cac CL |
79 | if (int128_gt(llend, int128_make64(iova_max))) { |
80 | error_report("RAM section out of device range (max=0x%" PRIx64 | |
81 | ", end addr=0x%" PRIx64 ")", | |
82 | iova_max, int128_get64(llend)); | |
83 | return true; | |
84 | } | |
013108b6 EP |
85 | } |
86 | ||
87 | return false; | |
108a6481 CL |
88 | } |
89 | ||
cd831ed5 EP |
90 | /* |
91 | * The caller must set asid = 0 if the device does not support asid. | |
92 | * This is not an ABI break since it is set to 0 by the initializer anyway. | |
93 | */ | |
6f03d9ef | 94 | int vhost_vdpa_dma_map(VhostVDPAShared *s, uint32_t asid, hwaddr iova, |
cd831ed5 | 95 | hwaddr size, void *vaddr, bool readonly) |
108a6481 | 96 | { |
386494f2 | 97 | struct vhost_msg_v2 msg = {}; |
6f03d9ef | 98 | int fd = s->device_fd; |
108a6481 CL |
99 | int ret = 0; |
100 | ||
74e76c7d | 101 | msg.type = VHOST_IOTLB_MSG_V2; |
cd831ed5 | 102 | msg.asid = asid; |
108a6481 CL |
103 | msg.iotlb.iova = iova; |
104 | msg.iotlb.size = size; | |
105 | msg.iotlb.uaddr = (uint64_t)(uintptr_t)vaddr; | |
106 | msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; | |
107 | msg.iotlb.type = VHOST_IOTLB_UPDATE; | |
108 | ||
6f03d9ef | 109 | trace_vhost_vdpa_dma_map(s, fd, msg.type, msg.asid, msg.iotlb.iova, |
cd831ed5 EP |
110 | msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm, |
111 | msg.iotlb.type); | |
778e67de | 112 | |
108a6481 CL |
113 | if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { |
114 | error_report("failed to write, fd=%d, errno=%d (%s)", | |
115 | fd, errno, strerror(errno)); | |
116 | return -EIO ; | |
117 | } | |
118 | ||
119 | return ret; | |
120 | } | |
121 | ||
cd831ed5 EP |
122 | /* |
123 | * The caller must set asid = 0 if the device does not support asid. | |
124 | * This is not an ABI break since it is set to 0 by the initializer anyway. | |
125 | */ | |
6f03d9ef | 126 | int vhost_vdpa_dma_unmap(VhostVDPAShared *s, uint32_t asid, hwaddr iova, |
cd831ed5 | 127 | hwaddr size) |
108a6481 | 128 | { |
386494f2 | 129 | struct vhost_msg_v2 msg = {}; |
6f03d9ef | 130 | int fd = s->device_fd; |
108a6481 CL |
131 | int ret = 0; |
132 | ||
74e76c7d | 133 | msg.type = VHOST_IOTLB_MSG_V2; |
cd831ed5 | 134 | msg.asid = asid; |
108a6481 CL |
135 | msg.iotlb.iova = iova; |
136 | msg.iotlb.size = size; | |
137 | msg.iotlb.type = VHOST_IOTLB_INVALIDATE; | |
138 | ||
6f03d9ef EP |
139 | trace_vhost_vdpa_dma_unmap(s, fd, msg.type, msg.asid, msg.iotlb.iova, |
140 | msg.iotlb.size, msg.iotlb.type); | |
778e67de | 141 | |
108a6481 CL |
142 | if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { |
143 | error_report("failed to write, fd=%d, errno=%d (%s)", | |
144 | fd, errno, strerror(errno)); | |
145 | return -EIO ; | |
146 | } | |
147 | ||
148 | return ret; | |
149 | } | |
150 | ||
6f03d9ef | 151 | static void vhost_vdpa_listener_begin_batch(VhostVDPAShared *s) |
a5bd0580 | 152 | { |
6f03d9ef | 153 | int fd = s->device_fd; |
e6db5df7 | 154 | struct vhost_msg_v2 msg = { |
74e76c7d | 155 | .type = VHOST_IOTLB_MSG_V2, |
e6db5df7 EP |
156 | .iotlb.type = VHOST_IOTLB_BATCH_BEGIN, |
157 | }; | |
a5bd0580 | 158 | |
6f03d9ef | 159 | trace_vhost_vdpa_listener_begin_batch(s, fd, msg.type, msg.iotlb.type); |
a5bd0580 JW |
160 | if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { |
161 | error_report("failed to write, fd=%d, errno=%d (%s)", | |
162 | fd, errno, strerror(errno)); | |
163 | } | |
164 | } | |
165 | ||
6f03d9ef | 166 | static void vhost_vdpa_iotlb_batch_begin_once(VhostVDPAShared *s) |
e6db5df7 | 167 | { |
6f03d9ef EP |
168 | if (s->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && |
169 | !s->iotlb_batch_begin_sent) { | |
170 | vhost_vdpa_listener_begin_batch(s); | |
e6db5df7 EP |
171 | } |
172 | ||
6f03d9ef | 173 | s->iotlb_batch_begin_sent = true; |
e6db5df7 EP |
174 | } |
175 | ||
a5bd0580 JW |
176 | static void vhost_vdpa_listener_commit(MemoryListener *listener) |
177 | { | |
f6fe3e33 | 178 | VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener); |
8acb3218 | 179 | struct vhost_msg_v2 msg = {}; |
f6fe3e33 | 180 | int fd = s->device_fd; |
a5bd0580 | 181 | |
f6fe3e33 | 182 | if (!(s->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { |
a5bd0580 JW |
183 | return; |
184 | } | |
185 | ||
f6fe3e33 | 186 | if (!s->iotlb_batch_begin_sent) { |
e6db5df7 EP |
187 | return; |
188 | } | |
189 | ||
74e76c7d | 190 | msg.type = VHOST_IOTLB_MSG_V2; |
a5bd0580 JW |
191 | msg.iotlb.type = VHOST_IOTLB_BATCH_END; |
192 | ||
f6fe3e33 | 193 | trace_vhost_vdpa_listener_commit(s, fd, msg.type, msg.iotlb.type); |
a5bd0580 JW |
194 | if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { |
195 | error_report("failed to write, fd=%d, errno=%d (%s)", | |
196 | fd, errno, strerror(errno)); | |
197 | } | |
e6db5df7 | 198 | |
f6fe3e33 | 199 | s->iotlb_batch_begin_sent = false; |
a5bd0580 JW |
200 | } |
201 | ||
bc7b0cac CL |
202 | static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) |
203 | { | |
204 | struct vdpa_iommu *iommu = container_of(n, struct vdpa_iommu, n); | |
205 | ||
206 | hwaddr iova = iotlb->iova + iommu->iommu_offset; | |
b06a38f2 | 207 | VhostVDPAShared *s = iommu->dev_shared; |
bc7b0cac CL |
208 | void *vaddr; |
209 | int ret; | |
210 | Int128 llend; | |
211 | ||
212 | if (iotlb->target_as != &address_space_memory) { | |
213 | error_report("Wrong target AS \"%s\", only system memory is allowed", | |
214 | iotlb->target_as->name ? iotlb->target_as->name : "none"); | |
215 | return; | |
216 | } | |
217 | RCU_READ_LOCK_GUARD(); | |
218 | /* check if RAM section out of device range */ | |
219 | llend = int128_add(int128_makes64(iotlb->addr_mask), int128_makes64(iova)); | |
b06a38f2 | 220 | if (int128_gt(llend, int128_make64(s->iova_range.last))) { |
bc7b0cac CL |
221 | error_report("RAM section out of device range (max=0x%" PRIx64 |
222 | ", end addr=0x%" PRIx64 ")", | |
b06a38f2 | 223 | s->iova_range.last, int128_get64(llend)); |
bc7b0cac CL |
224 | return; |
225 | } | |
226 | ||
227 | if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { | |
228 | bool read_only; | |
229 | ||
230 | if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL)) { | |
231 | return; | |
232 | } | |
b06a38f2 | 233 | ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova, |
bc7b0cac CL |
234 | iotlb->addr_mask + 1, vaddr, read_only); |
235 | if (ret) { | |
236 | error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", " | |
237 | "0x%" HWADDR_PRIx ", %p) = %d (%m)", | |
b06a38f2 | 238 | s, iova, iotlb->addr_mask + 1, vaddr, ret); |
bc7b0cac CL |
239 | } |
240 | } else { | |
b06a38f2 | 241 | ret = vhost_vdpa_dma_unmap(s, VHOST_VDPA_GUEST_PA_ASID, iova, |
bc7b0cac CL |
242 | iotlb->addr_mask + 1); |
243 | if (ret) { | |
244 | error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " | |
245 | "0x%" HWADDR_PRIx ") = %d (%m)", | |
b06a38f2 | 246 | s, iova, iotlb->addr_mask + 1, ret); |
bc7b0cac CL |
247 | } |
248 | } | |
249 | } | |
250 | ||
251 | static void vhost_vdpa_iommu_region_add(MemoryListener *listener, | |
252 | MemoryRegionSection *section) | |
253 | { | |
f6fe3e33 | 254 | VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener); |
bc7b0cac CL |
255 | |
256 | struct vdpa_iommu *iommu; | |
257 | Int128 end; | |
258 | int iommu_idx; | |
259 | IOMMUMemoryRegion *iommu_mr; | |
260 | int ret; | |
261 | ||
262 | iommu_mr = IOMMU_MEMORY_REGION(section->mr); | |
263 | ||
264 | iommu = g_malloc0(sizeof(*iommu)); | |
265 | end = int128_add(int128_make64(section->offset_within_region), | |
266 | section->size); | |
267 | end = int128_sub(end, int128_one()); | |
268 | iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr, | |
269 | MEMTXATTRS_UNSPECIFIED); | |
270 | iommu->iommu_mr = iommu_mr; | |
271 | iommu_notifier_init(&iommu->n, vhost_vdpa_iommu_map_notify, | |
272 | IOMMU_NOTIFIER_IOTLB_EVENTS, | |
273 | section->offset_within_region, | |
274 | int128_get64(end), | |
275 | iommu_idx); | |
276 | iommu->iommu_offset = section->offset_within_address_space - | |
277 | section->offset_within_region; | |
f6fe3e33 | 278 | iommu->dev_shared = s; |
bc7b0cac CL |
279 | |
280 | ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL); | |
281 | if (ret) { | |
282 | g_free(iommu); | |
283 | return; | |
284 | } | |
285 | ||
f6fe3e33 | 286 | QLIST_INSERT_HEAD(&s->iommu_list, iommu, iommu_next); |
bc7b0cac CL |
287 | memory_region_iommu_replay(iommu->iommu_mr, &iommu->n); |
288 | ||
289 | return; | |
290 | } | |
291 | ||
292 | static void vhost_vdpa_iommu_region_del(MemoryListener *listener, | |
293 | MemoryRegionSection *section) | |
294 | { | |
f6fe3e33 | 295 | VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener); |
bc7b0cac CL |
296 | |
297 | struct vdpa_iommu *iommu; | |
298 | ||
f6fe3e33 | 299 | QLIST_FOREACH(iommu, &s->iommu_list, iommu_next) |
bc7b0cac CL |
300 | { |
301 | if (MEMORY_REGION(iommu->iommu_mr) == section->mr && | |
302 | iommu->n.start == section->offset_within_region) { | |
303 | memory_region_unregister_iommu_notifier(section->mr, &iommu->n); | |
304 | QLIST_REMOVE(iommu, iommu_next); | |
305 | g_free(iommu); | |
306 | break; | |
307 | } | |
308 | } | |
309 | } | |
310 | ||
108a6481 CL |
311 | static void vhost_vdpa_listener_region_add(MemoryListener *listener, |
312 | MemoryRegionSection *section) | |
313 | { | |
7dab70be | 314 | DMAMap mem_region = {}; |
f6fe3e33 | 315 | VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener); |
108a6481 CL |
316 | hwaddr iova; |
317 | Int128 llend, llsize; | |
318 | void *vaddr; | |
319 | int ret; | |
33f21860 PMD |
320 | int page_size = qemu_target_page_size(); |
321 | int page_mask = -page_size; | |
108a6481 | 322 | |
f6fe3e33 EP |
323 | if (vhost_vdpa_listener_skipped_section(section, s->iova_range.first, |
324 | s->iova_range.last, page_mask)) { | |
108a6481 CL |
325 | return; |
326 | } | |
bc7b0cac CL |
327 | if (memory_region_is_iommu(section->mr)) { |
328 | vhost_vdpa_iommu_region_add(listener, section); | |
329 | return; | |
330 | } | |
108a6481 | 331 | |
33f21860 PMD |
332 | if (unlikely((section->offset_within_address_space & ~page_mask) != |
333 | (section->offset_within_region & ~page_mask))) { | |
f6fe3e33 | 334 | trace_vhost_vdpa_listener_region_add_unaligned(s, section->mr->name, |
33f21860 PMD |
335 | section->offset_within_address_space & ~page_mask, |
336 | section->offset_within_region & ~page_mask); | |
108a6481 CL |
337 | return; |
338 | } | |
339 | ||
33f21860 PMD |
340 | iova = ROUND_UP(section->offset_within_address_space, page_size); |
341 | llend = vhost_vdpa_section_end(section, page_mask); | |
108a6481 CL |
342 | if (int128_ge(int128_make64(iova), llend)) { |
343 | return; | |
344 | } | |
345 | ||
346 | memory_region_ref(section->mr); | |
347 | ||
348 | /* Here we assume that memory_region_is_ram(section->mr)==true */ | |
349 | ||
350 | vaddr = memory_region_get_ram_ptr(section->mr) + | |
351 | section->offset_within_region + | |
352 | (iova - section->offset_within_address_space); | |
353 | ||
f6fe3e33 | 354 | trace_vhost_vdpa_listener_region_add(s, iova, int128_get64(llend), |
778e67de LV |
355 | vaddr, section->readonly); |
356 | ||
108a6481 | 357 | llsize = int128_sub(llend, int128_make64(iova)); |
f6fe3e33 | 358 | if (s->shadow_data) { |
7dab70be | 359 | int r; |
34e3c94e | 360 | |
7dab70be EP |
361 | mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, |
362 | mem_region.size = int128_get64(llsize) - 1, | |
363 | mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), | |
364 | ||
f6fe3e33 | 365 | r = vhost_iova_tree_map_alloc(s->iova_tree, &mem_region); |
34e3c94e EP |
366 | if (unlikely(r != IOVA_OK)) { |
367 | error_report("Can't allocate a mapping (%d)", r); | |
368 | goto fail; | |
369 | } | |
370 | ||
371 | iova = mem_region.iova; | |
372 | } | |
108a6481 | 373 | |
f6fe3e33 EP |
374 | vhost_vdpa_iotlb_batch_begin_once(s); |
375 | ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova, | |
cd831ed5 | 376 | int128_get64(llsize), vaddr, section->readonly); |
108a6481 CL |
377 | if (ret) { |
378 | error_report("vhost vdpa map fail!"); | |
7dab70be | 379 | goto fail_map; |
108a6481 CL |
380 | } |
381 | ||
382 | return; | |
383 | ||
7dab70be | 384 | fail_map: |
f6fe3e33 EP |
385 | if (s->shadow_data) { |
386 | vhost_iova_tree_remove(s->iova_tree, mem_region); | |
7dab70be EP |
387 | } |
388 | ||
108a6481 | 389 | fail: |
108a6481 CL |
390 | /* |
391 | * On the initfn path, store the first error in the container so we | |
392 | * can gracefully fail. Runtime, there's not much we can do other | |
393 | * than throw a hardware error. | |
394 | */ | |
395 | error_report("vhost-vdpa: DMA mapping failed, unable to continue"); | |
396 | return; | |
397 | ||
398 | } | |
399 | ||
400 | static void vhost_vdpa_listener_region_del(MemoryListener *listener, | |
401 | MemoryRegionSection *section) | |
402 | { | |
f6fe3e33 | 403 | VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener); |
108a6481 CL |
404 | hwaddr iova; |
405 | Int128 llend, llsize; | |
406 | int ret; | |
33f21860 PMD |
407 | int page_size = qemu_target_page_size(); |
408 | int page_mask = -page_size; | |
108a6481 | 409 | |
f6fe3e33 EP |
410 | if (vhost_vdpa_listener_skipped_section(section, s->iova_range.first, |
411 | s->iova_range.last, page_mask)) { | |
108a6481 CL |
412 | return; |
413 | } | |
bc7b0cac CL |
414 | if (memory_region_is_iommu(section->mr)) { |
415 | vhost_vdpa_iommu_region_del(listener, section); | |
416 | } | |
108a6481 | 417 | |
33f21860 PMD |
418 | if (unlikely((section->offset_within_address_space & ~page_mask) != |
419 | (section->offset_within_region & ~page_mask))) { | |
f6fe3e33 | 420 | trace_vhost_vdpa_listener_region_del_unaligned(s, section->mr->name, |
33f21860 PMD |
421 | section->offset_within_address_space & ~page_mask, |
422 | section->offset_within_region & ~page_mask); | |
108a6481 CL |
423 | return; |
424 | } | |
425 | ||
33f21860 PMD |
426 | iova = ROUND_UP(section->offset_within_address_space, page_size); |
427 | llend = vhost_vdpa_section_end(section, page_mask); | |
108a6481 | 428 | |
f6fe3e33 | 429 | trace_vhost_vdpa_listener_region_del(s, iova, |
3d1e4d34 | 430 | int128_get64(int128_sub(llend, int128_one()))); |
778e67de | 431 | |
108a6481 CL |
432 | if (int128_ge(int128_make64(iova), llend)) { |
433 | return; | |
434 | } | |
435 | ||
436 | llsize = int128_sub(llend, int128_make64(iova)); | |
437 | ||
f6fe3e33 | 438 | if (s->shadow_data) { |
34e3c94e EP |
439 | const DMAMap *result; |
440 | const void *vaddr = memory_region_get_ram_ptr(section->mr) + | |
441 | section->offset_within_region + | |
442 | (iova - section->offset_within_address_space); | |
443 | DMAMap mem_region = { | |
444 | .translated_addr = (hwaddr)(uintptr_t)vaddr, | |
445 | .size = int128_get64(llsize) - 1, | |
446 | }; | |
447 | ||
f6fe3e33 | 448 | result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region); |
10dab9f2 EP |
449 | if (!result) { |
450 | /* The memory listener map wasn't mapped */ | |
451 | return; | |
452 | } | |
34e3c94e | 453 | iova = result->iova; |
f6fe3e33 | 454 | vhost_iova_tree_remove(s->iova_tree, *result); |
34e3c94e | 455 | } |
f6fe3e33 | 456 | vhost_vdpa_iotlb_batch_begin_once(s); |
2fbef6aa CL |
457 | /* |
458 | * The unmap ioctl doesn't accept a full 64-bit. need to check it | |
459 | */ | |
460 | if (int128_eq(llsize, int128_2_64())) { | |
461 | llsize = int128_rshift(llsize, 1); | |
f6fe3e33 | 462 | ret = vhost_vdpa_dma_unmap(s, VHOST_VDPA_GUEST_PA_ASID, iova, |
2fbef6aa CL |
463 | int128_get64(llsize)); |
464 | ||
465 | if (ret) { | |
466 | error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " | |
467 | "0x%" HWADDR_PRIx ") = %d (%m)", | |
f6fe3e33 | 468 | s, iova, int128_get64(llsize), ret); |
2fbef6aa CL |
469 | } |
470 | iova += int128_get64(llsize); | |
471 | } | |
f6fe3e33 | 472 | ret = vhost_vdpa_dma_unmap(s, VHOST_VDPA_GUEST_PA_ASID, iova, |
cd831ed5 | 473 | int128_get64(llsize)); |
2fbef6aa | 474 | |
9e32d4ea | 475 | if (ret) { |
2fbef6aa CL |
476 | error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " |
477 | "0x%" HWADDR_PRIx ") = %d (%m)", | |
f6fe3e33 | 478 | s, iova, int128_get64(llsize), ret); |
108a6481 CL |
479 | } |
480 | ||
481 | memory_region_unref(section->mr); | |
482 | } | |
483 | /* | |
ef4ff56c | 484 | * IOTLB API is used by vhost-vdpa which requires incremental updating |
108a6481 CL |
485 | * of the mapping. So we can not use generic vhost memory listener which |
486 | * depends on the addnop(). | |
487 | */ | |
488 | static const MemoryListener vhost_vdpa_memory_listener = { | |
142518bd | 489 | .name = "vhost-vdpa", |
a5bd0580 | 490 | .commit = vhost_vdpa_listener_commit, |
108a6481 CL |
491 | .region_add = vhost_vdpa_listener_region_add, |
492 | .region_del = vhost_vdpa_listener_region_del, | |
493 | }; | |
494 | ||
495 | static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, | |
496 | void *arg) | |
497 | { | |
498 | struct vhost_vdpa *v = dev->opaque; | |
f12b2498 | 499 | int fd = v->shared->device_fd; |
f2a6e6c4 | 500 | int ret; |
108a6481 CL |
501 | |
502 | assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); | |
503 | ||
f2a6e6c4 KW |
504 | ret = ioctl(fd, request, arg); |
505 | return ret < 0 ? -errno : ret; | |
108a6481 CL |
506 | } |
507 | ||
3631151b | 508 | static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) |
108a6481 CL |
509 | { |
510 | uint8_t s; | |
3631151b | 511 | int ret; |
108a6481 | 512 | |
778e67de | 513 | trace_vhost_vdpa_add_status(dev, status); |
3631151b RK |
514 | ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s); |
515 | if (ret < 0) { | |
516 | return ret; | |
108a6481 | 517 | } |
bc865bfe EP |
518 | if ((s & status) == status) { |
519 | /* Don't set bits already set */ | |
520 | return 0; | |
521 | } | |
108a6481 CL |
522 | |
523 | s |= status; | |
524 | ||
3631151b RK |
525 | ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &s); |
526 | if (ret < 0) { | |
527 | return ret; | |
528 | } | |
529 | ||
530 | ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, &s); | |
531 | if (ret < 0) { | |
532 | return ret; | |
533 | } | |
534 | ||
535 | if (!(s & status)) { | |
536 | return -EIO; | |
537 | } | |
538 | ||
539 | return 0; | |
108a6481 CL |
540 | } |
541 | ||
c672f348 LM |
542 | int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range) |
543 | { | |
544 | int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range); | |
545 | ||
546 | return ret < 0 ? -errno : 0; | |
547 | } | |
548 | ||
d71b0609 SWL |
549 | /* |
550 | * The use of this function is for requests that only need to be | |
551 | * applied once. Typically such request occurs at the beginning | |
552 | * of operation, and before setting up queues. It should not be | |
553 | * used for request that performs operation until all queues are | |
554 | * set, which would need to check dev->vq_index_end instead. | |
555 | */ | |
556 | static bool vhost_vdpa_first_dev(struct vhost_dev *dev) | |
4d191cfd JW |
557 | { |
558 | struct vhost_vdpa *v = dev->opaque; | |
559 | ||
d71b0609 | 560 | return v->index == 0; |
4d191cfd JW |
561 | } |
562 | ||
c812b065 SWL |
563 | static bool vhost_vdpa_last_dev(struct vhost_dev *dev) |
564 | { | |
565 | return dev->vq_index + dev->nvqs == dev->vq_index_end; | |
566 | } | |
567 | ||
12a195fa EP |
568 | static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, |
569 | uint64_t *features) | |
570 | { | |
571 | int ret; | |
572 | ||
573 | ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features); | |
574 | trace_vhost_vdpa_get_features(dev, *features); | |
575 | return ret; | |
576 | } | |
577 | ||
258a0394 | 578 | static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) |
dff4426f EP |
579 | { |
580 | g_autoptr(GPtrArray) shadow_vqs = NULL; | |
4725a418 | 581 | |
dff4426f EP |
582 | shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); |
583 | for (unsigned n = 0; n < hdev->nvqs; ++n) { | |
3cfb4d06 | 584 | VhostShadowVirtqueue *svq; |
dff4426f | 585 | |
5fde952b | 586 | svq = vhost_svq_new(v->shadow_vq_ops, v->shadow_vq_ops_opaque); |
3cfb4d06 | 587 | g_ptr_array_add(shadow_vqs, svq); |
dff4426f EP |
588 | } |
589 | ||
590 | v->shadow_vqs = g_steal_pointer(&shadow_vqs); | |
dff4426f EP |
591 | } |
592 | ||
28770ff9 | 593 | static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) |
108a6481 | 594 | { |
e36b9992 | 595 | struct vhost_vdpa *v = opaque; |
108a6481 | 596 | assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); |
e36b9992 | 597 | trace_vhost_vdpa_init(dev, v->shared, opaque); |
e1c1915b DH |
598 | int ret; |
599 | ||
a5bd0580 | 600 | v->dev = dev; |
108a6481 | 601 | dev->opaque = opaque ; |
f6fe3e33 | 602 | v->shared->listener = vhost_vdpa_memory_listener; |
258a0394 | 603 | vhost_vdpa_init_svq(dev, v); |
108a6481 | 604 | |
9c363cf6 | 605 | error_propagate(&dev->migration_blocker, v->migration_blocker); |
d71b0609 | 606 | if (!vhost_vdpa_first_dev(dev)) { |
4d191cfd JW |
607 | return 0; |
608 | } | |
609 | ||
57ac8318 EP |
610 | /* |
611 | * If dev->shadow_vqs_enabled at initialization that means the device has | |
612 | * been started with x-svq=on, so don't block migration | |
613 | */ | |
614 | if (dev->migration_blocker == NULL && !v->shadow_vqs_enabled) { | |
615 | /* We don't have dev->features yet */ | |
616 | uint64_t features; | |
617 | ret = vhost_vdpa_get_dev_features(dev, &features); | |
618 | if (unlikely(ret)) { | |
619 | error_setg_errno(errp, -ret, "Could not get device features"); | |
620 | return ret; | |
621 | } | |
622 | vhost_svq_valid_features(features, &dev->migration_blocker); | |
623 | } | |
624 | ||
a230c471 EP |
625 | /* |
626 | * Similar to VFIO, we end up pinning all guest memory and have to | |
627 | * disable discarding of RAM. | |
628 | */ | |
629 | ret = ram_block_discard_disable(true); | |
630 | if (ret) { | |
631 | error_report("Cannot set discarding of RAM broken"); | |
632 | return ret; | |
633 | } | |
634 | ||
108a6481 CL |
635 | vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | |
636 | VIRTIO_CONFIG_S_DRIVER); | |
637 | ||
638 | return 0; | |
639 | } | |
640 | ||
d0416d48 JW |
641 | static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev, |
642 | int queue_index) | |
643 | { | |
8e3b0cbb | 644 | size_t page_size = qemu_real_host_page_size(); |
d0416d48 JW |
645 | struct vhost_vdpa *v = dev->opaque; |
646 | VirtIODevice *vdev = dev->vdev; | |
647 | VhostVDPAHostNotifier *n; | |
648 | ||
649 | n = &v->notifier[queue_index]; | |
650 | ||
651 | if (n->addr) { | |
652 | virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, false); | |
653 | object_unparent(OBJECT(&n->mr)); | |
654 | munmap(n->addr, page_size); | |
655 | n->addr = NULL; | |
656 | } | |
657 | } | |
658 | ||
d0416d48 JW |
659 | static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) |
660 | { | |
8e3b0cbb | 661 | size_t page_size = qemu_real_host_page_size(); |
d0416d48 JW |
662 | struct vhost_vdpa *v = dev->opaque; |
663 | VirtIODevice *vdev = dev->vdev; | |
664 | VhostVDPAHostNotifier *n; | |
f12b2498 | 665 | int fd = v->shared->device_fd; |
d0416d48 JW |
666 | void *addr; |
667 | char *name; | |
668 | ||
669 | vhost_vdpa_host_notifier_uninit(dev, queue_index); | |
670 | ||
671 | n = &v->notifier[queue_index]; | |
672 | ||
673 | addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd, | |
674 | queue_index * page_size); | |
675 | if (addr == MAP_FAILED) { | |
676 | goto err; | |
677 | } | |
678 | ||
679 | name = g_strdup_printf("vhost-vdpa/host-notifier@%p mmaps[%d]", | |
680 | v, queue_index); | |
681 | memory_region_init_ram_device_ptr(&n->mr, OBJECT(vdev), name, | |
682 | page_size, addr); | |
683 | g_free(name); | |
684 | ||
685 | if (virtio_queue_set_host_notifier_mr(vdev, queue_index, &n->mr, true)) { | |
98f7607e | 686 | object_unparent(OBJECT(&n->mr)); |
d0416d48 JW |
687 | munmap(addr, page_size); |
688 | goto err; | |
689 | } | |
690 | n->addr = addr; | |
691 | ||
692 | return 0; | |
693 | ||
694 | err: | |
695 | return -1; | |
696 | } | |
697 | ||
b1f030a0 LV |
698 | static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n) |
699 | { | |
700 | int i; | |
701 | ||
e66f2311 LM |
702 | /* |
703 | * Pack all the changes to the memory regions in a single | |
704 | * transaction to avoid a few updating of the address space | |
705 | * topology. | |
706 | */ | |
707 | memory_region_transaction_begin(); | |
708 | ||
b1f030a0 LV |
709 | for (i = dev->vq_index; i < dev->vq_index + n; i++) { |
710 | vhost_vdpa_host_notifier_uninit(dev, i); | |
711 | } | |
e66f2311 LM |
712 | |
713 | memory_region_transaction_commit(); | |
b1f030a0 LV |
714 | } |
715 | ||
d0416d48 JW |
716 | static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev) |
717 | { | |
dff4426f | 718 | struct vhost_vdpa *v = dev->opaque; |
d0416d48 JW |
719 | int i; |
720 | ||
dff4426f EP |
721 | if (v->shadow_vqs_enabled) { |
722 | /* FIXME SVQ is not compatible with host notifiers mr */ | |
723 | return; | |
724 | } | |
725 | ||
e66f2311 LM |
726 | /* |
727 | * Pack all the changes to the memory regions in a single | |
728 | * transaction to avoid a few updating of the address space | |
729 | * topology. | |
730 | */ | |
731 | memory_region_transaction_begin(); | |
732 | ||
d0416d48 JW |
733 | for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) { |
734 | if (vhost_vdpa_host_notifier_init(dev, i)) { | |
e66f2311 LM |
735 | vhost_vdpa_host_notifiers_uninit(dev, i - dev->vq_index); |
736 | break; | |
d0416d48 JW |
737 | } |
738 | } | |
739 | ||
e66f2311 | 740 | memory_region_transaction_commit(); |
d0416d48 JW |
741 | } |
742 | ||
dff4426f EP |
743 | static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev) |
744 | { | |
745 | struct vhost_vdpa *v = dev->opaque; | |
746 | size_t idx; | |
747 | ||
dff4426f EP |
748 | for (idx = 0; idx < v->shadow_vqs->len; ++idx) { |
749 | vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx)); | |
750 | } | |
751 | g_ptr_array_free(v->shadow_vqs, true); | |
752 | } | |
753 | ||
108a6481 CL |
754 | static int vhost_vdpa_cleanup(struct vhost_dev *dev) |
755 | { | |
756 | struct vhost_vdpa *v; | |
757 | assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); | |
758 | v = dev->opaque; | |
778e67de | 759 | trace_vhost_vdpa_cleanup(dev, v); |
a230c471 EP |
760 | if (vhost_vdpa_first_dev(dev)) { |
761 | ram_block_discard_disable(false); | |
f6fe3e33 | 762 | memory_listener_unregister(&v->shared->listener); |
a230c471 EP |
763 | } |
764 | ||
d0416d48 | 765 | vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); |
dff4426f | 766 | vhost_vdpa_svq_cleanup(dev); |
108a6481 CL |
767 | |
768 | dev->opaque = NULL; | |
e1c1915b | 769 | |
108a6481 CL |
770 | return 0; |
771 | } | |
772 | ||
773 | static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) | |
774 | { | |
778e67de | 775 | trace_vhost_vdpa_memslots_limit(dev, INT_MAX); |
108a6481 CL |
776 | return INT_MAX; |
777 | } | |
778 | ||
779 | static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, | |
780 | struct vhost_memory *mem) | |
781 | { | |
d71b0609 | 782 | if (!vhost_vdpa_first_dev(dev)) { |
4d191cfd JW |
783 | return 0; |
784 | } | |
785 | ||
778e67de LV |
786 | trace_vhost_vdpa_set_mem_table(dev, mem->nregions, mem->padding); |
787 | if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_MEM_TABLE) && | |
788 | trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_REGIONS)) { | |
789 | int i; | |
790 | for (i = 0; i < mem->nregions; i++) { | |
791 | trace_vhost_vdpa_dump_regions(dev, i, | |
792 | mem->regions[i].guest_phys_addr, | |
793 | mem->regions[i].memory_size, | |
794 | mem->regions[i].userspace_addr, | |
795 | mem->regions[i].flags_padding); | |
796 | } | |
797 | } | |
108a6481 | 798 | if (mem->padding) { |
3631151b | 799 | return -EINVAL; |
108a6481 CL |
800 | } |
801 | ||
802 | return 0; | |
803 | } | |
804 | ||
805 | static int vhost_vdpa_set_features(struct vhost_dev *dev, | |
806 | uint64_t features) | |
807 | { | |
12a195fa | 808 | struct vhost_vdpa *v = dev->opaque; |
108a6481 | 809 | int ret; |
4d191cfd | 810 | |
d71b0609 | 811 | if (!vhost_vdpa_first_dev(dev)) { |
4d191cfd JW |
812 | return 0; |
813 | } | |
814 | ||
12a195fa EP |
815 | if (v->shadow_vqs_enabled) { |
816 | if ((v->acked_features ^ features) == BIT_ULL(VHOST_F_LOG_ALL)) { | |
817 | /* | |
818 | * QEMU is just trying to enable or disable logging. SVQ handles | |
819 | * this sepparately, so no need to forward this. | |
820 | */ | |
821 | v->acked_features = features; | |
822 | return 0; | |
823 | } | |
824 | ||
825 | v->acked_features = features; | |
826 | ||
827 | /* We must not ack _F_LOG if SVQ is enabled */ | |
828 | features &= ~BIT_ULL(VHOST_F_LOG_ALL); | |
829 | } | |
830 | ||
778e67de | 831 | trace_vhost_vdpa_set_features(dev, features); |
108a6481 | 832 | ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features); |
108a6481 CL |
833 | if (ret) { |
834 | return ret; | |
835 | } | |
108a6481 | 836 | |
3631151b | 837 | return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); |
108a6481 CL |
838 | } |
839 | ||
a5bd0580 JW |
840 | static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) |
841 | { | |
4da38d1a EP |
842 | struct vhost_vdpa *v = dev->opaque; |
843 | ||
a5bd0580 JW |
844 | uint64_t features; |
845 | uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | | |
c1a10086 | 846 | 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | |
d83b4945 EP |
847 | 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | |
848 | 0x1ULL << VHOST_BACKEND_F_SUSPEND; | |
a5bd0580 JW |
849 | int r; |
850 | ||
851 | if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { | |
2a83e97e | 852 | return -EFAULT; |
a5bd0580 JW |
853 | } |
854 | ||
855 | features &= f; | |
4d191cfd | 856 | |
d71b0609 | 857 | if (vhost_vdpa_first_dev(dev)) { |
4d191cfd JW |
858 | r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); |
859 | if (r) { | |
860 | return -EFAULT; | |
861 | } | |
a5bd0580 JW |
862 | } |
863 | ||
864 | dev->backend_cap = features; | |
4da38d1a | 865 | v->shared->backend_cap = features; |
a5bd0580 JW |
866 | |
867 | return 0; | |
868 | } | |
869 | ||
c232b8f4 ZY |
870 | static int vhost_vdpa_get_device_id(struct vhost_dev *dev, |
871 | uint32_t *device_id) | |
108a6481 | 872 | { |
778e67de LV |
873 | int ret; |
874 | ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_DEVICE_ID, device_id); | |
875 | trace_vhost_vdpa_get_device_id(dev, *device_id); | |
876 | return ret; | |
108a6481 CL |
877 | } |
878 | ||
879 | static int vhost_vdpa_reset_device(struct vhost_dev *dev) | |
880 | { | |
0bb302a9 | 881 | struct vhost_vdpa *v = dev->opaque; |
778e67de | 882 | int ret; |
108a6481 CL |
883 | uint8_t status = 0; |
884 | ||
778e67de | 885 | ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); |
2b5de4d7 | 886 | trace_vhost_vdpa_reset_device(dev); |
0bb302a9 | 887 | v->suspended = false; |
778e67de | 888 | return ret; |
108a6481 CL |
889 | } |
890 | ||
891 | static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) | |
892 | { | |
893 | assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); | |
894 | ||
353244d8 JW |
895 | trace_vhost_vdpa_get_vq_index(dev, idx, idx); |
896 | return idx; | |
108a6481 CL |
897 | } |
898 | ||
2c66de61 KW |
899 | static int vhost_vdpa_set_vring_enable_one(struct vhost_vdpa *v, unsigned idx, |
900 | int enable) | |
108a6481 | 901 | { |
d7ce0841 EP |
902 | struct vhost_dev *dev = v->dev; |
903 | struct vhost_vring_state state = { | |
904 | .index = idx, | |
2c66de61 | 905 | .num = enable, |
d7ce0841 EP |
906 | }; |
907 | int r = vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state); | |
908 | ||
2c66de61 | 909 | trace_vhost_vdpa_set_vring_enable_one(dev, idx, enable, r); |
d7ce0841 | 910 | return r; |
108a6481 CL |
911 | } |
912 | ||
2c66de61 KW |
913 | static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable) |
914 | { | |
915 | struct vhost_vdpa *v = dev->opaque; | |
916 | unsigned int i; | |
917 | int ret; | |
918 | ||
919 | for (i = 0; i < dev->nvqs; ++i) { | |
920 | ret = vhost_vdpa_set_vring_enable_one(v, i, enable); | |
921 | if (ret < 0) { | |
922 | return ret; | |
923 | } | |
924 | } | |
925 | ||
926 | return 0; | |
927 | } | |
928 | ||
929 | int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx) | |
930 | { | |
931 | return vhost_vdpa_set_vring_enable_one(v, idx, 1); | |
932 | } | |
933 | ||
259f3acc CL |
934 | static int vhost_vdpa_set_config_call(struct vhost_dev *dev, |
935 | int fd) | |
936 | { | |
937 | trace_vhost_vdpa_set_config_call(dev, fd); | |
938 | return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd); | |
939 | } | |
940 | ||
778e67de LV |
941 | static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, |
942 | uint32_t config_len) | |
943 | { | |
944 | int b, len; | |
945 | char line[QEMU_HEXDUMP_LINE_LEN]; | |
946 | ||
947 | for (b = 0; b < config_len; b += 16) { | |
948 | len = config_len - b; | |
949 | qemu_hexdump_line(line, b, config, len, false); | |
950 | trace_vhost_vdpa_dump_config(dev, line); | |
951 | } | |
952 | } | |
953 | ||
108a6481 CL |
954 | static int vhost_vdpa_set_config(struct vhost_dev *dev, const uint8_t *data, |
955 | uint32_t offset, uint32_t size, | |
956 | uint32_t flags) | |
957 | { | |
958 | struct vhost_vdpa_config *config; | |
959 | int ret; | |
960 | unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); | |
986d4f78 | 961 | |
778e67de | 962 | trace_vhost_vdpa_set_config(dev, offset, size, flags); |
108a6481 | 963 | config = g_malloc(size + config_size); |
108a6481 CL |
964 | config->off = offset; |
965 | config->len = size; | |
966 | memcpy(config->buf, data, size); | |
778e67de LV |
967 | if (trace_event_get_state_backends(TRACE_VHOST_VDPA_SET_CONFIG) && |
968 | trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { | |
969 | vhost_vdpa_dump_config(dev, data, size); | |
970 | } | |
108a6481 CL |
971 | ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG, config); |
972 | g_free(config); | |
973 | return ret; | |
974 | } | |
975 | ||
976 | static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config, | |
50de5138 | 977 | uint32_t config_len, Error **errp) |
108a6481 CL |
978 | { |
979 | struct vhost_vdpa_config *v_config; | |
980 | unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); | |
981 | int ret; | |
982 | ||
778e67de | 983 | trace_vhost_vdpa_get_config(dev, config, config_len); |
108a6481 | 984 | v_config = g_malloc(config_len + config_size); |
108a6481 CL |
985 | v_config->len = config_len; |
986 | v_config->off = 0; | |
987 | ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_CONFIG, v_config); | |
988 | memcpy(config, v_config->buf, config_len); | |
989 | g_free(v_config); | |
778e67de LV |
990 | if (trace_event_get_state_backends(TRACE_VHOST_VDPA_GET_CONFIG) && |
991 | trace_event_get_state_backends(TRACE_VHOST_VDPA_DUMP_CONFIG)) { | |
992 | vhost_vdpa_dump_config(dev, config, config_len); | |
993 | } | |
108a6481 CL |
994 | return ret; |
995 | } | |
996 | ||
d96be4c8 EP |
997 | static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev, |
998 | struct vhost_vring_state *ring) | |
999 | { | |
19a060bc SWL |
1000 | struct vhost_vdpa *v = dev->opaque; |
1001 | ||
1002 | trace_vhost_vdpa_set_dev_vring_base(dev, ring->index, ring->num, | |
1003 | v->shadow_vqs_enabled); | |
d96be4c8 EP |
1004 | return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring); |
1005 | } | |
1006 | ||
dff4426f EP |
1007 | static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev, |
1008 | struct vhost_vring_file *file) | |
1009 | { | |
1010 | trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd); | |
1011 | return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file); | |
1012 | } | |
1013 | ||
a8ac8858 EP |
1014 | static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev, |
1015 | struct vhost_vring_file *file) | |
1016 | { | |
1017 | trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd); | |
1018 | return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file); | |
1019 | } | |
1020 | ||
d96be4c8 EP |
1021 | static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev, |
1022 | struct vhost_vring_addr *addr) | |
1023 | { | |
1024 | trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags, | |
1025 | addr->desc_user_addr, addr->used_user_addr, | |
1026 | addr->avail_user_addr, | |
1027 | addr->log_guest_addr); | |
1028 | ||
1029 | return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr); | |
1030 | ||
1031 | } | |
1032 | ||
dff4426f EP |
1033 | /** |
1034 | * Set the shadow virtqueue descriptors to the device | |
1035 | * | |
1036 | * @dev: The vhost device model | |
1037 | * @svq: The shadow virtqueue | |
1038 | * @idx: The index of the virtqueue in the vhost device | |
1039 | * @errp: Error | |
a8ac8858 EP |
1040 | * |
1041 | * Note that this function does not rewind kick file descriptor if cannot set | |
1042 | * call one. | |
dff4426f | 1043 | */ |
100890f7 EP |
1044 | static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev, |
1045 | VhostShadowVirtqueue *svq, unsigned idx, | |
1046 | Error **errp) | |
dff4426f EP |
1047 | { |
1048 | struct vhost_vring_file file = { | |
1049 | .index = dev->vq_index + idx, | |
1050 | }; | |
1051 | const EventNotifier *event_notifier = &svq->hdev_kick; | |
1052 | int r; | |
1053 | ||
3cfb4d06 EP |
1054 | r = event_notifier_init(&svq->hdev_kick, 0); |
1055 | if (r != 0) { | |
1056 | error_setg_errno(errp, -r, "Couldn't create kick event notifier"); | |
1057 | goto err_init_hdev_kick; | |
1058 | } | |
1059 | ||
1060 | r = event_notifier_init(&svq->hdev_call, 0); | |
1061 | if (r != 0) { | |
1062 | error_setg_errno(errp, -r, "Couldn't create call event notifier"); | |
1063 | goto err_init_hdev_call; | |
1064 | } | |
1065 | ||
dff4426f EP |
1066 | file.fd = event_notifier_get_fd(event_notifier); |
1067 | r = vhost_vdpa_set_vring_dev_kick(dev, &file); | |
1068 | if (unlikely(r != 0)) { | |
1069 | error_setg_errno(errp, -r, "Can't set device kick fd"); | |
3cfb4d06 | 1070 | goto err_init_set_dev_fd; |
a8ac8858 EP |
1071 | } |
1072 | ||
1073 | event_notifier = &svq->hdev_call; | |
1074 | file.fd = event_notifier_get_fd(event_notifier); | |
1075 | r = vhost_vdpa_set_vring_dev_call(dev, &file); | |
1076 | if (unlikely(r != 0)) { | |
1077 | error_setg_errno(errp, -r, "Can't set device call fd"); | |
3cfb4d06 | 1078 | goto err_init_set_dev_fd; |
dff4426f EP |
1079 | } |
1080 | ||
3cfb4d06 EP |
1081 | return 0; |
1082 | ||
1083 | err_init_set_dev_fd: | |
1084 | event_notifier_set_handler(&svq->hdev_call, NULL); | |
1085 | ||
1086 | err_init_hdev_call: | |
1087 | event_notifier_cleanup(&svq->hdev_kick); | |
1088 | ||
1089 | err_init_hdev_kick: | |
100890f7 EP |
1090 | return r; |
1091 | } | |
1092 | ||
1093 | /** | |
1094 | * Unmap a SVQ area in the device | |
1095 | */ | |
8b6d6119 | 1096 | static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) |
100890f7 | 1097 | { |
8b6d6119 EP |
1098 | const DMAMap needle = { |
1099 | .translated_addr = addr, | |
1100 | }; | |
5edb02e8 EP |
1101 | const DMAMap *result = vhost_iova_tree_find_iova(v->shared->iova_tree, |
1102 | &needle); | |
34e3c94e | 1103 | hwaddr size; |
100890f7 EP |
1104 | int r; |
1105 | ||
34e3c94e EP |
1106 | if (unlikely(!result)) { |
1107 | error_report("Unable to find SVQ address to unmap"); | |
5b590f51 | 1108 | return; |
34e3c94e EP |
1109 | } |
1110 | ||
8e3b0cbb | 1111 | size = ROUND_UP(result->size, qemu_real_host_page_size()); |
6f03d9ef EP |
1112 | r = vhost_vdpa_dma_unmap(v->shared, v->address_space_id, result->iova, |
1113 | size); | |
b37c12be EP |
1114 | if (unlikely(r < 0)) { |
1115 | error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); | |
5b590f51 | 1116 | return; |
b37c12be EP |
1117 | } |
1118 | ||
5edb02e8 | 1119 | vhost_iova_tree_remove(v->shared->iova_tree, *result); |
100890f7 EP |
1120 | } |
1121 | ||
5b590f51 | 1122 | static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, |
100890f7 EP |
1123 | const VhostShadowVirtqueue *svq) |
1124 | { | |
1125 | struct vhost_vdpa *v = dev->opaque; | |
1126 | struct vhost_vring_addr svq_addr; | |
100890f7 EP |
1127 | |
1128 | vhost_svq_get_vring_addr(svq, &svq_addr); | |
1129 | ||
8b6d6119 | 1130 | vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr); |
100890f7 | 1131 | |
8b6d6119 | 1132 | vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr); |
34e3c94e EP |
1133 | } |
1134 | ||
1135 | /** | |
1136 | * Map the SVQ area in the device | |
1137 | * | |
1138 | * @v: Vhost-vdpa device | |
1139 | * @needle: The area to search iova | |
1140 | * @errorp: Error pointer | |
1141 | */ | |
1142 | static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, | |
1143 | Error **errp) | |
1144 | { | |
1145 | int r; | |
1146 | ||
5edb02e8 | 1147 | r = vhost_iova_tree_map_alloc(v->shared->iova_tree, needle); |
34e3c94e EP |
1148 | if (unlikely(r != IOVA_OK)) { |
1149 | error_setg(errp, "Cannot allocate iova (%d)", r); | |
1150 | return false; | |
1151 | } | |
1152 | ||
6f03d9ef | 1153 | r = vhost_vdpa_dma_map(v->shared, v->address_space_id, needle->iova, |
cd831ed5 | 1154 | needle->size + 1, |
34e3c94e EP |
1155 | (void *)(uintptr_t)needle->translated_addr, |
1156 | needle->perm == IOMMU_RO); | |
1157 | if (unlikely(r != 0)) { | |
1158 | error_setg_errno(errp, -r, "Cannot map region to device"); | |
5edb02e8 | 1159 | vhost_iova_tree_remove(v->shared->iova_tree, *needle); |
34e3c94e EP |
1160 | } |
1161 | ||
1162 | return r == 0; | |
100890f7 EP |
1163 | } |
1164 | ||
1165 | /** | |
1166 | * Map the shadow virtqueue rings in the device | |
1167 | * | |
1168 | * @dev: The vhost device | |
1169 | * @svq: The shadow virtqueue | |
1170 | * @addr: Assigned IOVA addresses | |
1171 | * @errp: Error pointer | |
1172 | */ | |
1173 | static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, | |
1174 | const VhostShadowVirtqueue *svq, | |
1175 | struct vhost_vring_addr *addr, | |
1176 | Error **errp) | |
1177 | { | |
05e385d2 | 1178 | ERRP_GUARD(); |
34e3c94e EP |
1179 | DMAMap device_region, driver_region; |
1180 | struct vhost_vring_addr svq_addr; | |
100890f7 EP |
1181 | struct vhost_vdpa *v = dev->opaque; |
1182 | size_t device_size = vhost_svq_device_area_size(svq); | |
1183 | size_t driver_size = vhost_svq_driver_area_size(svq); | |
34e3c94e EP |
1184 | size_t avail_offset; |
1185 | bool ok; | |
100890f7 | 1186 | |
34e3c94e | 1187 | vhost_svq_get_vring_addr(svq, &svq_addr); |
100890f7 | 1188 | |
34e3c94e EP |
1189 | driver_region = (DMAMap) { |
1190 | .translated_addr = svq_addr.desc_user_addr, | |
1191 | .size = driver_size - 1, | |
1192 | .perm = IOMMU_RO, | |
1193 | }; | |
1194 | ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp); | |
1195 | if (unlikely(!ok)) { | |
1196 | error_prepend(errp, "Cannot create vq driver region: "); | |
100890f7 EP |
1197 | return false; |
1198 | } | |
34e3c94e EP |
1199 | addr->desc_user_addr = driver_region.iova; |
1200 | avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr; | |
1201 | addr->avail_user_addr = driver_region.iova + avail_offset; | |
100890f7 | 1202 | |
34e3c94e EP |
1203 | device_region = (DMAMap) { |
1204 | .translated_addr = svq_addr.used_user_addr, | |
1205 | .size = device_size - 1, | |
1206 | .perm = IOMMU_RW, | |
1207 | }; | |
1208 | ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); | |
1209 | if (unlikely(!ok)) { | |
1210 | error_prepend(errp, "Cannot create vq device region: "); | |
8b6d6119 | 1211 | vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr); |
100890f7 | 1212 | } |
34e3c94e | 1213 | addr->used_user_addr = device_region.iova; |
100890f7 | 1214 | |
34e3c94e | 1215 | return ok; |
100890f7 EP |
1216 | } |
1217 | ||
1218 | static bool vhost_vdpa_svq_setup(struct vhost_dev *dev, | |
1219 | VhostShadowVirtqueue *svq, unsigned idx, | |
1220 | Error **errp) | |
1221 | { | |
1222 | uint16_t vq_index = dev->vq_index + idx; | |
1223 | struct vhost_vring_state s = { | |
1224 | .index = vq_index, | |
1225 | }; | |
1226 | int r; | |
1227 | ||
1228 | r = vhost_vdpa_set_dev_vring_base(dev, &s); | |
1229 | if (unlikely(r)) { | |
1230 | error_setg_errno(errp, -r, "Cannot set vring base"); | |
1231 | return false; | |
1232 | } | |
1233 | ||
1234 | r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp); | |
dff4426f EP |
1235 | return r == 0; |
1236 | } | |
1237 | ||
1238 | static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) | |
1239 | { | |
1240 | struct vhost_vdpa *v = dev->opaque; | |
1241 | Error *err = NULL; | |
1242 | unsigned i; | |
1243 | ||
712c1a31 | 1244 | if (!v->shadow_vqs_enabled) { |
dff4426f EP |
1245 | return true; |
1246 | } | |
1247 | ||
1248 | for (i = 0; i < v->shadow_vqs->len; ++i) { | |
100890f7 | 1249 | VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); |
dff4426f | 1250 | VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); |
100890f7 | 1251 | struct vhost_vring_addr addr = { |
1c82fdfe | 1252 | .index = dev->vq_index + i, |
100890f7 EP |
1253 | }; |
1254 | int r; | |
dff4426f EP |
1255 | bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err); |
1256 | if (unlikely(!ok)) { | |
100890f7 EP |
1257 | goto err; |
1258 | } | |
1259 | ||
5edb02e8 | 1260 | vhost_svq_start(svq, dev->vdev, vq, v->shared->iova_tree); |
100890f7 EP |
1261 | ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err); |
1262 | if (unlikely(!ok)) { | |
1263 | goto err_map; | |
1264 | } | |
1265 | ||
1266 | /* Override vring GPA set by vhost subsystem */ | |
1267 | r = vhost_vdpa_set_vring_dev_addr(dev, &addr); | |
1268 | if (unlikely(r != 0)) { | |
1269 | error_setg_errno(&err, -r, "Cannot set device address"); | |
1270 | goto err_set_addr; | |
1271 | } | |
1272 | } | |
1273 | ||
1274 | return true; | |
1275 | ||
1276 | err_set_addr: | |
1277 | vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i)); | |
1278 | ||
1279 | err_map: | |
1280 | vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i)); | |
1281 | ||
1282 | err: | |
1283 | error_reportf_err(err, "Cannot setup SVQ %u: ", i); | |
1284 | for (unsigned j = 0; j < i; ++j) { | |
1285 | VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j); | |
1286 | vhost_vdpa_svq_unmap_rings(dev, svq); | |
1287 | vhost_svq_stop(svq); | |
1288 | } | |
1289 | ||
1290 | return false; | |
1291 | } | |
1292 | ||
5b590f51 | 1293 | static void vhost_vdpa_svqs_stop(struct vhost_dev *dev) |
100890f7 EP |
1294 | { |
1295 | struct vhost_vdpa *v = dev->opaque; | |
1296 | ||
712c1a31 | 1297 | if (!v->shadow_vqs_enabled) { |
5b590f51 | 1298 | return; |
100890f7 EP |
1299 | } |
1300 | ||
1301 | for (unsigned i = 0; i < v->shadow_vqs->len; ++i) { | |
1302 | VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); | |
2e1a9de9 EP |
1303 | |
1304 | vhost_svq_stop(svq); | |
5b590f51 | 1305 | vhost_vdpa_svq_unmap_rings(dev, svq); |
3cfb4d06 EP |
1306 | |
1307 | event_notifier_cleanup(&svq->hdev_kick); | |
1308 | event_notifier_cleanup(&svq->hdev_call); | |
dff4426f | 1309 | } |
dff4426f EP |
1310 | } |
1311 | ||
0bb302a9 EP |
1312 | static void vhost_vdpa_suspend(struct vhost_dev *dev) |
1313 | { | |
1314 | struct vhost_vdpa *v = dev->opaque; | |
1315 | int r; | |
1316 | ||
1317 | if (!vhost_vdpa_first_dev(dev)) { | |
1318 | return; | |
1319 | } | |
1320 | ||
1321 | if (dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) { | |
1322 | trace_vhost_vdpa_suspend(dev); | |
f12b2498 | 1323 | r = ioctl(v->shared->device_fd, VHOST_VDPA_SUSPEND); |
0bb302a9 EP |
1324 | if (unlikely(r)) { |
1325 | error_report("Cannot suspend: %s(%d)", g_strerror(errno), errno); | |
1326 | } else { | |
1327 | v->suspended = true; | |
1328 | return; | |
1329 | } | |
1330 | } | |
1331 | ||
1332 | vhost_vdpa_reset_device(dev); | |
1333 | } | |
1334 | ||
108a6481 CL |
1335 | static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) |
1336 | { | |
1337 | struct vhost_vdpa *v = dev->opaque; | |
dff4426f | 1338 | bool ok; |
778e67de | 1339 | trace_vhost_vdpa_dev_start(dev, started); |
4d191cfd | 1340 | |
108a6481 | 1341 | if (started) { |
d0416d48 | 1342 | vhost_vdpa_host_notifiers_init(dev); |
dff4426f EP |
1343 | ok = vhost_vdpa_svqs_start(dev); |
1344 | if (unlikely(!ok)) { | |
1345 | return -1; | |
1346 | } | |
4d191cfd | 1347 | } else { |
0bb302a9 | 1348 | vhost_vdpa_suspend(dev); |
5b590f51 | 1349 | vhost_vdpa_svqs_stop(dev); |
4d191cfd JW |
1350 | vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); |
1351 | } | |
1352 | ||
c812b065 | 1353 | if (!vhost_vdpa_last_dev(dev)) { |
4d191cfd JW |
1354 | return 0; |
1355 | } | |
1356 | ||
1357 | if (started) { | |
bc7b0cac CL |
1358 | if (vhost_dev_has_iommu(dev) && (v->shadow_vqs_enabled)) { |
1359 | error_report("SVQ can not work while IOMMU enable, please disable" | |
1360 | "IOMMU and try again"); | |
1361 | return -1; | |
1362 | } | |
f6fe3e33 | 1363 | memory_listener_register(&v->shared->listener, dev->vdev->dma_as); |
bc7b0cac | 1364 | |
3631151b | 1365 | return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); |
c3716f26 | 1366 | } |
108a6481 | 1367 | |
c3716f26 EP |
1368 | return 0; |
1369 | } | |
1370 | ||
1371 | static void vhost_vdpa_reset_status(struct vhost_dev *dev) | |
1372 | { | |
1373 | struct vhost_vdpa *v = dev->opaque; | |
1374 | ||
c812b065 | 1375 | if (!vhost_vdpa_last_dev(dev)) { |
c3716f26 | 1376 | return; |
108a6481 | 1377 | } |
c3716f26 EP |
1378 | |
1379 | vhost_vdpa_reset_device(dev); | |
1380 | vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | | |
1381 | VIRTIO_CONFIG_S_DRIVER); | |
f6fe3e33 | 1382 | memory_listener_unregister(&v->shared->listener); |
108a6481 CL |
1383 | } |
1384 | ||
1385 | static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, | |
1386 | struct vhost_log *log) | |
1387 | { | |
773ebc95 | 1388 | struct vhost_vdpa *v = dev->opaque; |
d71b0609 | 1389 | if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { |
4d191cfd JW |
1390 | return 0; |
1391 | } | |
1392 | ||
778e67de LV |
1393 | trace_vhost_vdpa_set_log_base(dev, base, log->size, log->refcnt, log->fd, |
1394 | log->log); | |
108a6481 CL |
1395 | return vhost_vdpa_call(dev, VHOST_SET_LOG_BASE, &base); |
1396 | } | |
1397 | ||
1398 | static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev, | |
1399 | struct vhost_vring_addr *addr) | |
1400 | { | |
d96be4c8 EP |
1401 | struct vhost_vdpa *v = dev->opaque; |
1402 | ||
1403 | if (v->shadow_vqs_enabled) { | |
1404 | /* | |
1405 | * Device vring addr was set at device start. SVQ base is handled by | |
1406 | * VirtQueue code. | |
1407 | */ | |
1408 | return 0; | |
1409 | } | |
1410 | ||
1411 | return vhost_vdpa_set_vring_dev_addr(dev, addr); | |
108a6481 CL |
1412 | } |
1413 | ||
1414 | static int vhost_vdpa_set_vring_num(struct vhost_dev *dev, | |
1415 | struct vhost_vring_state *ring) | |
1416 | { | |
778e67de | 1417 | trace_vhost_vdpa_set_vring_num(dev, ring->index, ring->num); |
108a6481 CL |
1418 | return vhost_vdpa_call(dev, VHOST_SET_VRING_NUM, ring); |
1419 | } | |
1420 | ||
1421 | static int vhost_vdpa_set_vring_base(struct vhost_dev *dev, | |
1422 | struct vhost_vring_state *ring) | |
1423 | { | |
d96be4c8 EP |
1424 | struct vhost_vdpa *v = dev->opaque; |
1425 | ||
1426 | if (v->shadow_vqs_enabled) { | |
1427 | /* | |
1428 | * Device vring base was set at device start. SVQ base is handled by | |
1429 | * VirtQueue code. | |
1430 | */ | |
1431 | return 0; | |
1432 | } | |
1433 | ||
1434 | return vhost_vdpa_set_dev_vring_base(dev, ring); | |
108a6481 CL |
1435 | } |
1436 | ||
1437 | static int vhost_vdpa_get_vring_base(struct vhost_dev *dev, | |
1438 | struct vhost_vring_state *ring) | |
1439 | { | |
6d0b2226 | 1440 | struct vhost_vdpa *v = dev->opaque; |
778e67de LV |
1441 | int ret; |
1442 | ||
6d0b2226 | 1443 | if (v->shadow_vqs_enabled) { |
2fdac348 | 1444 | ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index); |
6ec0a746 | 1445 | trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num, true); |
6d0b2226 EP |
1446 | return 0; |
1447 | } | |
1448 | ||
b6662cb7 EP |
1449 | if (!v->suspended) { |
1450 | /* | |
1451 | * Cannot trust in value returned by device, let vhost recover used | |
1452 | * idx from guest. | |
1453 | */ | |
1454 | return -1; | |
1455 | } | |
1456 | ||
778e67de | 1457 | ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring); |
6ec0a746 | 1458 | trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num, false); |
778e67de | 1459 | return ret; |
108a6481 CL |
1460 | } |
1461 | ||
1462 | static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev, | |
1463 | struct vhost_vring_file *file) | |
1464 | { | |
dff4426f EP |
1465 | struct vhost_vdpa *v = dev->opaque; |
1466 | int vdpa_idx = file->index - dev->vq_index; | |
1467 | ||
1468 | if (v->shadow_vqs_enabled) { | |
1469 | VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); | |
1470 | vhost_svq_set_svq_kick_fd(svq, file->fd); | |
1471 | return 0; | |
1472 | } else { | |
1473 | return vhost_vdpa_set_vring_dev_kick(dev, file); | |
1474 | } | |
108a6481 CL |
1475 | } |
1476 | ||
1477 | static int vhost_vdpa_set_vring_call(struct vhost_dev *dev, | |
1478 | struct vhost_vring_file *file) | |
1479 | { | |
a8ac8858 | 1480 | struct vhost_vdpa *v = dev->opaque; |
b2765243 EP |
1481 | int vdpa_idx = file->index - dev->vq_index; |
1482 | VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx); | |
a8ac8858 | 1483 | |
b2765243 EP |
1484 | /* Remember last call fd because we can switch to SVQ anytime. */ |
1485 | vhost_svq_set_svq_call_fd(svq, file->fd); | |
9ed62809 SWL |
1486 | /* |
1487 | * When SVQ is transitioning to off, shadow_vqs_enabled has | |
1488 | * not been set back to false yet, but the underlying call fd | |
1489 | * will have to switch back to the guest notifier to signal the | |
1490 | * passthrough virtqueues. In other situations, SVQ's own call | |
1491 | * fd shall be used to signal the device model. | |
1492 | */ | |
1493 | if (v->shadow_vqs_enabled && | |
1494 | v->shared->svq_switching != SVQ_TSTATE_DISABLING) { | |
a8ac8858 | 1495 | return 0; |
a8ac8858 | 1496 | } |
b2765243 EP |
1497 | |
1498 | return vhost_vdpa_set_vring_dev_call(dev, file); | |
108a6481 CL |
1499 | } |
1500 | ||
1501 | static int vhost_vdpa_get_features(struct vhost_dev *dev, | |
1502 | uint64_t *features) | |
1503 | { | |
12a195fa EP |
1504 | int ret = vhost_vdpa_get_dev_features(dev, features); |
1505 | ||
ab7337e3 | 1506 | if (ret == 0) { |
12a195fa EP |
1507 | /* Add SVQ logging capabilities */ |
1508 | *features |= BIT_ULL(VHOST_F_LOG_ALL); | |
1509 | } | |
778e67de | 1510 | |
778e67de | 1511 | return ret; |
108a6481 CL |
1512 | } |
1513 | ||
1514 | static int vhost_vdpa_set_owner(struct vhost_dev *dev) | |
1515 | { | |
d71b0609 | 1516 | if (!vhost_vdpa_first_dev(dev)) { |
4d191cfd JW |
1517 | return 0; |
1518 | } | |
1519 | ||
778e67de | 1520 | trace_vhost_vdpa_set_owner(dev); |
108a6481 CL |
1521 | return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); |
1522 | } | |
1523 | ||
1524 | static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, | |
1525 | struct vhost_vring_addr *addr, struct vhost_virtqueue *vq) | |
1526 | { | |
1527 | assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); | |
1528 | addr->desc_user_addr = (uint64_t)(unsigned long)vq->desc_phys; | |
1529 | addr->avail_user_addr = (uint64_t)(unsigned long)vq->avail_phys; | |
1530 | addr->used_user_addr = (uint64_t)(unsigned long)vq->used_phys; | |
778e67de LV |
1531 | trace_vhost_vdpa_vq_get_addr(dev, vq, addr->desc_user_addr, |
1532 | addr->avail_user_addr, addr->used_user_addr); | |
108a6481 CL |
1533 | return 0; |
1534 | } | |
1535 | ||
1536 | static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) | |
1537 | { | |
1538 | return true; | |
1539 | } | |
1540 | ||
1541 | const VhostOps vdpa_ops = { | |
1542 | .backend_type = VHOST_BACKEND_TYPE_VDPA, | |
1543 | .vhost_backend_init = vhost_vdpa_init, | |
1544 | .vhost_backend_cleanup = vhost_vdpa_cleanup, | |
1545 | .vhost_set_log_base = vhost_vdpa_set_log_base, | |
1546 | .vhost_set_vring_addr = vhost_vdpa_set_vring_addr, | |
1547 | .vhost_set_vring_num = vhost_vdpa_set_vring_num, | |
1548 | .vhost_set_vring_base = vhost_vdpa_set_vring_base, | |
1549 | .vhost_get_vring_base = vhost_vdpa_get_vring_base, | |
1550 | .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, | |
1551 | .vhost_set_vring_call = vhost_vdpa_set_vring_call, | |
1552 | .vhost_get_features = vhost_vdpa_get_features, | |
a5bd0580 | 1553 | .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, |
108a6481 CL |
1554 | .vhost_set_owner = vhost_vdpa_set_owner, |
1555 | .vhost_set_vring_endian = NULL, | |
1556 | .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, | |
1557 | .vhost_set_mem_table = vhost_vdpa_set_mem_table, | |
1558 | .vhost_set_features = vhost_vdpa_set_features, | |
1559 | .vhost_reset_device = vhost_vdpa_reset_device, | |
1560 | .vhost_get_vq_index = vhost_vdpa_get_vq_index, | |
2c66de61 | 1561 | .vhost_set_vring_enable = vhost_vdpa_set_vring_enable, |
108a6481 CL |
1562 | .vhost_get_config = vhost_vdpa_get_config, |
1563 | .vhost_set_config = vhost_vdpa_set_config, | |
1564 | .vhost_requires_shm_log = NULL, | |
1565 | .vhost_migration_done = NULL, | |
108a6481 CL |
1566 | .vhost_net_set_mtu = NULL, |
1567 | .vhost_set_iotlb_callback = NULL, | |
1568 | .vhost_send_device_iotlb_msg = NULL, | |
1569 | .vhost_dev_start = vhost_vdpa_dev_start, | |
1570 | .vhost_get_device_id = vhost_vdpa_get_device_id, | |
4b870f1a SG |
1571 | .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, |
1572 | .vhost_force_iommu = vhost_vdpa_force_iommu, | |
259f3acc | 1573 | .vhost_set_config_call = vhost_vdpa_set_config_call, |
c3716f26 | 1574 | .vhost_reset_status = vhost_vdpa_reset_status, |
108a6481 | 1575 | }; |