]>
Commit | Line | Data |
---|---|---|
919ae3dd YS |
1 | /* |
2 | * QEMU paravirtual RDMA | |
3 | * | |
4 | * Copyright (C) 2018 Oracle | |
5 | * Copyright (C) 2018 Red Hat Inc | |
6 | * | |
7 | * Authors: | |
8 | * Yuval Shaia <yuval.shaia@oracle.com> | |
9 | * Marcel Apfelbaum <marcel@redhat.com> | |
10 | * | |
11 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
12 | * See the COPYING file in the top-level directory. | |
13 | * | |
14 | */ | |
15 | ||
0efc9511 MT |
16 | #include "qemu/osdep.h" |
17 | #include "qapi/error.h" | |
0b8fa32f | 18 | #include "qemu/module.h" |
0efc9511 MT |
19 | #include "hw/pci/pci.h" |
20 | #include "hw/pci/pci_ids.h" | |
21 | #include "hw/pci/msi.h" | |
22 | #include "hw/pci/msix.h" | |
0efc9511 | 23 | #include "hw/qdev-properties.h" |
ce35e229 | 24 | #include "hw/qdev-properties-system.h" |
0efc9511 | 25 | #include "cpu.h" |
919ae3dd | 26 | #include "trace.h" |
f4b2c02a YS |
27 | #include "monitor/monitor.h" |
28 | #include "hw/rdma/rdma.h" | |
919ae3dd YS |
29 | |
30 | #include "../rdma_rm.h" | |
31 | #include "../rdma_backend.h" | |
32 | #include "../rdma_utils.h" | |
33 | ||
34 | #include <infiniband/verbs.h> | |
35 | #include "pvrdma.h" | |
0efc9511 | 36 | #include "standard-headers/rdma/vmw_pvrdma-abi.h" |
54d31236 | 37 | #include "sysemu/runstate.h" |
0efc9511 | 38 | #include "standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h" |
919ae3dd YS |
39 | #include "pvrdma_qp_ops.h" |
40 | ||
41 | static Property pvrdma_dev_properties[] = { | |
2b05705d YS |
42 | DEFINE_PROP_STRING("netdev", PVRDMADev, backend_eth_device_name), |
43 | DEFINE_PROP_STRING("ibdev", PVRDMADev, backend_device_name), | |
44 | DEFINE_PROP_UINT8("ibport", PVRDMADev, backend_port_num, 1), | |
919ae3dd YS |
45 | DEFINE_PROP_UINT64("dev-caps-max-mr-size", PVRDMADev, dev_attr.max_mr_size, |
46 | MAX_MR_SIZE), | |
47 | DEFINE_PROP_INT32("dev-caps-max-qp", PVRDMADev, dev_attr.max_qp, MAX_QP), | |
919ae3dd YS |
48 | DEFINE_PROP_INT32("dev-caps-max-cq", PVRDMADev, dev_attr.max_cq, MAX_CQ), |
49 | DEFINE_PROP_INT32("dev-caps-max-mr", PVRDMADev, dev_attr.max_mr, MAX_MR), | |
50 | DEFINE_PROP_INT32("dev-caps-max-pd", PVRDMADev, dev_attr.max_pd, MAX_PD), | |
51 | DEFINE_PROP_INT32("dev-caps-qp-rd-atom", PVRDMADev, dev_attr.max_qp_rd_atom, | |
52 | MAX_QP_RD_ATOM), | |
53 | DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev, | |
54 | dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM), | |
55 | DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH), | |
355b7cf3 | 56 | DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ), |
605ec166 | 57 | DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr), |
919ae3dd YS |
58 | DEFINE_PROP_END_OF_LIST(), |
59 | }; | |
60 | ||
8dbbca5c | 61 | static void pvrdma_format_statistics(RdmaProvider *obj, GString *buf) |
f4b2c02a YS |
62 | { |
63 | PVRDMADev *dev = PVRDMA_DEV(obj); | |
64 | PCIDevice *pdev = PCI_DEVICE(dev); | |
65 | ||
8dbbca5c DB |
66 | g_string_append_printf(buf, "%s, %x.%x\n", |
67 | pdev->name, PCI_SLOT(pdev->devfn), | |
68 | PCI_FUNC(pdev->devfn)); | |
69 | g_string_append_printf(buf, "\tcommands : %" PRId64 "\n", | |
70 | dev->stats.commands); | |
71 | g_string_append_printf(buf, "\tregs_reads : %" PRId64 "\n", | |
72 | dev->stats.regs_reads); | |
73 | g_string_append_printf(buf, "\tregs_writes : %" PRId64 "\n", | |
74 | dev->stats.regs_writes); | |
75 | g_string_append_printf(buf, "\tuar_writes : %" PRId64 "\n", | |
76 | dev->stats.uar_writes); | |
77 | g_string_append_printf(buf, "\tinterrupts : %" PRId64 "\n", | |
78 | dev->stats.interrupts); | |
79 | rdma_format_device_counters(&dev->rdma_dev_res, buf); | |
f4b2c02a YS |
80 | } |
81 | ||
919ae3dd YS |
82 | static void free_dev_ring(PCIDevice *pci_dev, PvrdmaRing *ring, |
83 | void *ring_state) | |
84 | { | |
85 | pvrdma_ring_free(ring); | |
86 | rdma_pci_dma_unmap(pci_dev, ring_state, TARGET_PAGE_SIZE); | |
87 | } | |
88 | ||
3aa1b7af | 89 | static int init_dev_ring(PvrdmaRing *ring, PvrdmaRingState **ring_state, |
919ae3dd YS |
90 | const char *name, PCIDevice *pci_dev, |
91 | dma_addr_t dir_addr, uint32_t num_pages) | |
92 | { | |
93 | uint64_t *dir, *tbl; | |
85fc35af | 94 | int max_pages, rc = 0; |
919ae3dd | 95 | |
32e5703c MA |
96 | if (!num_pages) { |
97 | rdma_error_report("Ring pages count must be strictly positive"); | |
98 | return -EINVAL; | |
99 | } | |
100 | ||
85fc35af YS |
101 | /* |
102 | * Make sure we can satisfy the requested number of pages in a single | |
103 | * TARGET_PAGE_SIZE sized page table (taking into account that first entry | |
104 | * is reserved for ring-state) | |
105 | */ | |
106 | max_pages = TARGET_PAGE_SIZE / sizeof(dma_addr_t) - 1; | |
107 | if (num_pages > max_pages) { | |
108 | rdma_error_report("Maximum pages on a single directory must not exceed %d\n", | |
109 | max_pages); | |
110 | return -EINVAL; | |
111 | } | |
112 | ||
919ae3dd YS |
113 | dir = rdma_pci_dma_map(pci_dev, dir_addr, TARGET_PAGE_SIZE); |
114 | if (!dir) { | |
4d71b38a | 115 | rdma_error_report("Failed to map to page directory (ring %s)", name); |
919ae3dd YS |
116 | rc = -ENOMEM; |
117 | goto out; | |
118 | } | |
85fc35af YS |
119 | |
120 | /* We support only one page table for a ring */ | |
919ae3dd YS |
121 | tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE); |
122 | if (!tbl) { | |
4d71b38a | 123 | rdma_error_report("Failed to map to page table (ring %s)", name); |
919ae3dd YS |
124 | rc = -ENOMEM; |
125 | goto out_free_dir; | |
126 | } | |
127 | ||
128 | *ring_state = rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE); | |
129 | if (!*ring_state) { | |
4d71b38a | 130 | rdma_error_report("Failed to map to ring state (ring %s)", name); |
919ae3dd YS |
131 | rc = -ENOMEM; |
132 | goto out_free_tbl; | |
133 | } | |
134 | /* RX ring is the second */ | |
197053e2 | 135 | (*ring_state)++; |
919ae3dd | 136 | rc = pvrdma_ring_init(ring, name, pci_dev, |
3aa1b7af | 137 | (PvrdmaRingState *)*ring_state, |
919ae3dd YS |
138 | (num_pages - 1) * TARGET_PAGE_SIZE / |
139 | sizeof(struct pvrdma_cqne), | |
140 | sizeof(struct pvrdma_cqne), | |
141 | (dma_addr_t *)&tbl[1], (dma_addr_t)num_pages - 1); | |
142 | if (rc) { | |
919ae3dd YS |
143 | rc = -ENOMEM; |
144 | goto out_free_ring_state; | |
145 | } | |
146 | ||
147 | goto out_free_tbl; | |
148 | ||
149 | out_free_ring_state: | |
150 | rdma_pci_dma_unmap(pci_dev, *ring_state, TARGET_PAGE_SIZE); | |
151 | ||
152 | out_free_tbl: | |
153 | rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE); | |
154 | ||
155 | out_free_dir: | |
156 | rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE); | |
157 | ||
158 | out: | |
159 | return rc; | |
160 | } | |
161 | ||
162 | static void free_dsr(PVRDMADev *dev) | |
163 | { | |
164 | PCIDevice *pci_dev = PCI_DEVICE(dev); | |
165 | ||
166 | if (!dev->dsr_info.dsr) { | |
167 | return; | |
168 | } | |
169 | ||
170 | free_dev_ring(pci_dev, &dev->dsr_info.async, | |
171 | dev->dsr_info.async_ring_state); | |
172 | ||
173 | free_dev_ring(pci_dev, &dev->dsr_info.cq, dev->dsr_info.cq_ring_state); | |
174 | ||
175 | rdma_pci_dma_unmap(pci_dev, dev->dsr_info.req, | |
46c1ea01 | 176 | sizeof(union pvrdma_cmd_req)); |
919ae3dd YS |
177 | |
178 | rdma_pci_dma_unmap(pci_dev, dev->dsr_info.rsp, | |
46c1ea01 | 179 | sizeof(union pvrdma_cmd_resp)); |
919ae3dd YS |
180 | |
181 | rdma_pci_dma_unmap(pci_dev, dev->dsr_info.dsr, | |
46c1ea01 | 182 | sizeof(struct pvrdma_device_shared_region)); |
919ae3dd YS |
183 | |
184 | dev->dsr_info.dsr = NULL; | |
185 | } | |
186 | ||
187 | static int load_dsr(PVRDMADev *dev) | |
188 | { | |
189 | int rc = 0; | |
190 | PCIDevice *pci_dev = PCI_DEVICE(dev); | |
191 | DSRInfo *dsr_info; | |
192 | struct pvrdma_device_shared_region *dsr; | |
193 | ||
194 | free_dsr(dev); | |
195 | ||
196 | /* Map to DSR */ | |
919ae3dd YS |
197 | dev->dsr_info.dsr = rdma_pci_dma_map(pci_dev, dev->dsr_info.dma, |
198 | sizeof(struct pvrdma_device_shared_region)); | |
199 | if (!dev->dsr_info.dsr) { | |
4d71b38a | 200 | rdma_error_report("Failed to map to DSR"); |
919ae3dd YS |
201 | rc = -ENOMEM; |
202 | goto out; | |
203 | } | |
204 | ||
205 | /* Shortcuts */ | |
206 | dsr_info = &dev->dsr_info; | |
207 | dsr = dsr_info->dsr; | |
208 | ||
209 | /* Map to command slot */ | |
919ae3dd YS |
210 | dsr_info->req = rdma_pci_dma_map(pci_dev, dsr->cmd_slot_dma, |
211 | sizeof(union pvrdma_cmd_req)); | |
212 | if (!dsr_info->req) { | |
4d71b38a | 213 | rdma_error_report("Failed to map to command slot address"); |
919ae3dd YS |
214 | rc = -ENOMEM; |
215 | goto out_free_dsr; | |
216 | } | |
217 | ||
218 | /* Map to response slot */ | |
919ae3dd YS |
219 | dsr_info->rsp = rdma_pci_dma_map(pci_dev, dsr->resp_slot_dma, |
220 | sizeof(union pvrdma_cmd_resp)); | |
221 | if (!dsr_info->rsp) { | |
4d71b38a | 222 | rdma_error_report("Failed to map to response slot address"); |
919ae3dd YS |
223 | rc = -ENOMEM; |
224 | goto out_free_req; | |
225 | } | |
226 | ||
227 | /* Map to CQ notification ring */ | |
228 | rc = init_dev_ring(&dsr_info->cq, &dsr_info->cq_ring_state, "dev_cq", | |
229 | pci_dev, dsr->cq_ring_pages.pdir_dma, | |
230 | dsr->cq_ring_pages.num_pages); | |
231 | if (rc) { | |
919ae3dd YS |
232 | rc = -ENOMEM; |
233 | goto out_free_rsp; | |
234 | } | |
235 | ||
236 | /* Map to event notification ring */ | |
237 | rc = init_dev_ring(&dsr_info->async, &dsr_info->async_ring_state, | |
238 | "dev_async", pci_dev, dsr->async_ring_pages.pdir_dma, | |
239 | dsr->async_ring_pages.num_pages); | |
240 | if (rc) { | |
919ae3dd YS |
241 | rc = -ENOMEM; |
242 | goto out_free_rsp; | |
243 | } | |
244 | ||
245 | goto out; | |
246 | ||
247 | out_free_rsp: | |
248 | rdma_pci_dma_unmap(pci_dev, dsr_info->rsp, sizeof(union pvrdma_cmd_resp)); | |
249 | ||
250 | out_free_req: | |
251 | rdma_pci_dma_unmap(pci_dev, dsr_info->req, sizeof(union pvrdma_cmd_req)); | |
252 | ||
253 | out_free_dsr: | |
254 | rdma_pci_dma_unmap(pci_dev, dsr_info->dsr, | |
255 | sizeof(struct pvrdma_device_shared_region)); | |
256 | dsr_info->dsr = NULL; | |
257 | ||
258 | out: | |
259 | return rc; | |
260 | } | |
261 | ||
262 | static void init_dsr_dev_caps(PVRDMADev *dev) | |
263 | { | |
264 | struct pvrdma_device_shared_region *dsr; | |
265 | ||
46c1ea01 YS |
266 | if (!dev->dsr_info.dsr) { |
267 | /* Buggy or malicious guest driver */ | |
4d71b38a | 268 | rdma_error_report("Can't initialized DSR"); |
919ae3dd YS |
269 | return; |
270 | } | |
271 | ||
272 | dsr = dev->dsr_info.dsr; | |
919ae3dd | 273 | dsr->caps.fw_ver = PVRDMA_FW_VERSION; |
919ae3dd | 274 | dsr->caps.mode = PVRDMA_DEVICE_MODE_ROCE; |
919ae3dd | 275 | dsr->caps.gid_types |= PVRDMA_GID_TYPE_FLAG_ROCE_V1; |
919ae3dd | 276 | dsr->caps.max_uar = RDMA_BAR2_UAR_SIZE; |
919ae3dd YS |
277 | dsr->caps.max_mr_size = dev->dev_attr.max_mr_size; |
278 | dsr->caps.max_qp = dev->dev_attr.max_qp; | |
279 | dsr->caps.max_qp_wr = dev->dev_attr.max_qp_wr; | |
280 | dsr->caps.max_sge = dev->dev_attr.max_sge; | |
281 | dsr->caps.max_cq = dev->dev_attr.max_cq; | |
282 | dsr->caps.max_cqe = dev->dev_attr.max_cqe; | |
283 | dsr->caps.max_mr = dev->dev_attr.max_mr; | |
284 | dsr->caps.max_pd = dev->dev_attr.max_pd; | |
285 | dsr->caps.max_ah = dev->dev_attr.max_ah; | |
355b7cf3 KH |
286 | dsr->caps.max_srq = dev->dev_attr.max_srq; |
287 | dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr; | |
288 | dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge; | |
919ae3dd | 289 | dsr->caps.gid_tbl_len = MAX_GIDS; |
919ae3dd | 290 | dsr->caps.sys_image_guid = 0; |
028c3f93 | 291 | dsr->caps.node_guid = dev->node_guid; |
919ae3dd | 292 | dsr->caps.phys_port_cnt = MAX_PORTS; |
919ae3dd | 293 | dsr->caps.max_pkeys = MAX_PKEYS; |
919ae3dd YS |
294 | } |
295 | ||
75152227 YS |
296 | static void uninit_msix(PCIDevice *pdev, int used_vectors) |
297 | { | |
298 | PVRDMADev *dev = PVRDMA_DEV(pdev); | |
299 | int i; | |
300 | ||
301 | for (i = 0; i < used_vectors; i++) { | |
302 | msix_vector_unuse(pdev, i); | |
303 | } | |
304 | ||
305 | msix_uninit(pdev, &dev->msix, &dev->msix); | |
306 | } | |
307 | ||
4d71b38a | 308 | static int init_msix(PCIDevice *pdev) |
75152227 YS |
309 | { |
310 | PVRDMADev *dev = PVRDMA_DEV(pdev); | |
311 | int i; | |
312 | int rc; | |
313 | ||
314 | rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX, | |
315 | RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX, | |
316 | RDMA_MSIX_PBA, 0, NULL); | |
317 | ||
318 | if (rc < 0) { | |
4d71b38a | 319 | rdma_error_report("Failed to initialize MSI-X"); |
75152227 YS |
320 | return rc; |
321 | } | |
322 | ||
323 | for (i = 0; i < RDMA_MAX_INTRS; i++) { | |
15377f6e | 324 | msix_vector_use(PCI_DEVICE(dev), i); |
75152227 YS |
325 | } |
326 | ||
327 | return 0; | |
328 | } | |
329 | ||
330 | static void pvrdma_fini(PCIDevice *pdev) | |
331 | { | |
332 | PVRDMADev *dev = PVRDMA_DEV(pdev); | |
333 | ||
b556c3ce YS |
334 | notifier_remove(&dev->shutdown_notifier); |
335 | ||
75152227 YS |
336 | pvrdma_qp_ops_fini(); |
337 | ||
ff30a446 YS |
338 | rdma_backend_stop(&dev->backend_dev); |
339 | ||
2b05705d YS |
340 | rdma_rm_fini(&dev->rdma_dev_res, &dev->backend_dev, |
341 | dev->backend_eth_device_name); | |
75152227 YS |
342 | |
343 | rdma_backend_fini(&dev->backend_dev); | |
344 | ||
345 | free_dsr(dev); | |
346 | ||
347 | if (msix_enabled(pdev)) { | |
348 | uninit_msix(pdev, RDMA_MAX_INTRS); | |
349 | } | |
ffa65d97 | 350 | |
4d71b38a YS |
351 | rdma_info_report("Device %s %x.%x is down", pdev->name, |
352 | PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); | |
75152227 YS |
353 | } |
354 | ||
355 | static void pvrdma_stop(PVRDMADev *dev) | |
356 | { | |
357 | rdma_backend_stop(&dev->backend_dev); | |
358 | } | |
359 | ||
360 | static void pvrdma_start(PVRDMADev *dev) | |
361 | { | |
362 | rdma_backend_start(&dev->backend_dev); | |
363 | } | |
364 | ||
919ae3dd YS |
365 | static void activate_device(PVRDMADev *dev) |
366 | { | |
75152227 | 367 | pvrdma_start(dev); |
919ae3dd | 368 | set_reg_val(dev, PVRDMA_REG_ERR, 0); |
919ae3dd YS |
369 | } |
370 | ||
371 | static int unquiesce_device(PVRDMADev *dev) | |
372 | { | |
919ae3dd YS |
373 | return 0; |
374 | } | |
375 | ||
f00c48ca | 376 | static void reset_device(PVRDMADev *dev) |
919ae3dd | 377 | { |
75152227 | 378 | pvrdma_stop(dev); |
919ae3dd YS |
379 | } |
380 | ||
4d71b38a | 381 | static uint64_t pvrdma_regs_read(void *opaque, hwaddr addr, unsigned size) |
919ae3dd YS |
382 | { |
383 | PVRDMADev *dev = opaque; | |
384 | uint32_t val; | |
385 | ||
c2dd117b YS |
386 | dev->stats.regs_reads++; |
387 | ||
919ae3dd | 388 | if (get_reg_val(dev, addr, &val)) { |
4d71b38a YS |
389 | rdma_error_report("Failed to read REG value from address 0x%x", |
390 | (uint32_t)addr); | |
919ae3dd YS |
391 | return -EINVAL; |
392 | } | |
393 | ||
394 | trace_pvrdma_regs_read(addr, val); | |
395 | ||
396 | return val; | |
397 | } | |
398 | ||
4d71b38a YS |
399 | static void pvrdma_regs_write(void *opaque, hwaddr addr, uint64_t val, |
400 | unsigned size) | |
919ae3dd YS |
401 | { |
402 | PVRDMADev *dev = opaque; | |
403 | ||
c2dd117b YS |
404 | dev->stats.regs_writes++; |
405 | ||
919ae3dd | 406 | if (set_reg_val(dev, addr, val)) { |
4d71b38a YS |
407 | rdma_error_report("Failed to set REG value, addr=0x%"PRIx64 ", val=0x%"PRIx64, |
408 | addr, val); | |
919ae3dd YS |
409 | return; |
410 | } | |
411 | ||
919ae3dd YS |
412 | switch (addr) { |
413 | case PVRDMA_REG_DSRLOW: | |
4d71b38a | 414 | trace_pvrdma_regs_write(addr, val, "DSRLOW", ""); |
919ae3dd YS |
415 | dev->dsr_info.dma = val; |
416 | break; | |
417 | case PVRDMA_REG_DSRHIGH: | |
4d71b38a | 418 | trace_pvrdma_regs_write(addr, val, "DSRHIGH", ""); |
919ae3dd YS |
419 | dev->dsr_info.dma |= val << 32; |
420 | load_dsr(dev); | |
421 | init_dsr_dev_caps(dev); | |
422 | break; | |
423 | case PVRDMA_REG_CTL: | |
424 | switch (val) { | |
425 | case PVRDMA_DEVICE_CTL_ACTIVATE: | |
4d71b38a | 426 | trace_pvrdma_regs_write(addr, val, "CTL", "ACTIVATE"); |
919ae3dd YS |
427 | activate_device(dev); |
428 | break; | |
429 | case PVRDMA_DEVICE_CTL_UNQUIESCE: | |
4d71b38a | 430 | trace_pvrdma_regs_write(addr, val, "CTL", "UNQUIESCE"); |
919ae3dd YS |
431 | unquiesce_device(dev); |
432 | break; | |
433 | case PVRDMA_DEVICE_CTL_RESET: | |
4d71b38a | 434 | trace_pvrdma_regs_write(addr, val, "CTL", "URESET"); |
919ae3dd YS |
435 | reset_device(dev); |
436 | break; | |
437 | } | |
67b32fe2 | 438 | break; |
919ae3dd | 439 | case PVRDMA_REG_IMR: |
4d71b38a | 440 | trace_pvrdma_regs_write(addr, val, "INTR_MASK", ""); |
919ae3dd YS |
441 | dev->interrupt_mask = val; |
442 | break; | |
443 | case PVRDMA_REG_REQUEST: | |
444 | if (val == 0) { | |
4d71b38a YS |
445 | trace_pvrdma_regs_write(addr, val, "REQUEST", ""); |
446 | pvrdma_exec_cmd(dev); | |
919ae3dd | 447 | } |
67b32fe2 | 448 | break; |
919ae3dd YS |
449 | default: |
450 | break; | |
451 | } | |
452 | } | |
453 | ||
454 | static const MemoryRegionOps regs_ops = { | |
4d71b38a YS |
455 | .read = pvrdma_regs_read, |
456 | .write = pvrdma_regs_write, | |
919ae3dd YS |
457 | .endianness = DEVICE_LITTLE_ENDIAN, |
458 | .impl = { | |
459 | .min_access_size = sizeof(uint32_t), | |
460 | .max_access_size = sizeof(uint32_t), | |
461 | }, | |
462 | }; | |
463 | ||
4d71b38a | 464 | static uint64_t pvrdma_uar_read(void *opaque, hwaddr addr, unsigned size) |
2aa86456 PP |
465 | { |
466 | return 0xffffffff; | |
467 | } | |
468 | ||
4d71b38a YS |
469 | static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val, |
470 | unsigned size) | |
919ae3dd YS |
471 | { |
472 | PVRDMADev *dev = opaque; | |
473 | ||
c2dd117b YS |
474 | dev->stats.uar_writes++; |
475 | ||
919ae3dd YS |
476 | switch (addr & 0xFFF) { /* Mask with 0xFFF as each UC gets page */ |
477 | case PVRDMA_UAR_QP_OFFSET: | |
919ae3dd | 478 | if (val & PVRDMA_UAR_QP_SEND) { |
4d71b38a YS |
479 | trace_pvrdma_uar_write(addr, val, "QP", "SEND", |
480 | val & PVRDMA_UAR_HANDLE_MASK, 0); | |
919ae3dd YS |
481 | pvrdma_qp_send(dev, val & PVRDMA_UAR_HANDLE_MASK); |
482 | } | |
483 | if (val & PVRDMA_UAR_QP_RECV) { | |
4d71b38a YS |
484 | trace_pvrdma_uar_write(addr, val, "QP", "RECV", |
485 | val & PVRDMA_UAR_HANDLE_MASK, 0); | |
919ae3dd YS |
486 | pvrdma_qp_recv(dev, val & PVRDMA_UAR_HANDLE_MASK); |
487 | } | |
488 | break; | |
489 | case PVRDMA_UAR_CQ_OFFSET: | |
919ae3dd | 490 | if (val & PVRDMA_UAR_CQ_ARM) { |
4d71b38a YS |
491 | trace_pvrdma_uar_write(addr, val, "CQ", "ARM", |
492 | val & PVRDMA_UAR_HANDLE_MASK, | |
493 | !!(val & PVRDMA_UAR_CQ_ARM_SOL)); | |
919ae3dd YS |
494 | rdma_rm_req_notify_cq(&dev->rdma_dev_res, |
495 | val & PVRDMA_UAR_HANDLE_MASK, | |
496 | !!(val & PVRDMA_UAR_CQ_ARM_SOL)); | |
497 | } | |
498 | if (val & PVRDMA_UAR_CQ_ARM_SOL) { | |
4d71b38a YS |
499 | trace_pvrdma_uar_write(addr, val, "CQ", "ARMSOL - not supported", 0, |
500 | 0); | |
919ae3dd YS |
501 | } |
502 | if (val & PVRDMA_UAR_CQ_POLL) { | |
4d71b38a YS |
503 | trace_pvrdma_uar_write(addr, val, "CQ", "POLL", |
504 | val & PVRDMA_UAR_HANDLE_MASK, 0); | |
919ae3dd YS |
505 | pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK); |
506 | } | |
507 | break; | |
355b7cf3 KH |
508 | case PVRDMA_UAR_SRQ_OFFSET: |
509 | if (val & PVRDMA_UAR_SRQ_RECV) { | |
510 | trace_pvrdma_uar_write(addr, val, "QP", "SRQ", | |
511 | val & PVRDMA_UAR_HANDLE_MASK, 0); | |
512 | pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK); | |
513 | } | |
514 | break; | |
919ae3dd | 515 | default: |
4d71b38a YS |
516 | rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64, |
517 | addr, val); | |
919ae3dd YS |
518 | break; |
519 | } | |
520 | } | |
521 | ||
522 | static const MemoryRegionOps uar_ops = { | |
4d71b38a YS |
523 | .read = pvrdma_uar_read, |
524 | .write = pvrdma_uar_write, | |
919ae3dd YS |
525 | .endianness = DEVICE_LITTLE_ENDIAN, |
526 | .impl = { | |
527 | .min_access_size = sizeof(uint32_t), | |
528 | .max_access_size = sizeof(uint32_t), | |
529 | }, | |
530 | }; | |
531 | ||
532 | static void init_pci_config(PCIDevice *pdev) | |
533 | { | |
534 | pdev->config[PCI_INTERRUPT_PIN] = 1; | |
535 | } | |
536 | ||
537 | static void init_bars(PCIDevice *pdev) | |
538 | { | |
539 | PVRDMADev *dev = PVRDMA_DEV(pdev); | |
540 | ||
541 | /* BAR 0 - MSI-X */ | |
542 | memory_region_init(&dev->msix, OBJECT(dev), "pvrdma-msix", | |
543 | RDMA_BAR0_MSIX_SIZE); | |
544 | pci_register_bar(pdev, RDMA_MSIX_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY, | |
545 | &dev->msix); | |
546 | ||
547 | /* BAR 1 - Registers */ | |
548 | memset(&dev->regs_data, 0, sizeof(dev->regs_data)); | |
549 | memory_region_init_io(&dev->regs, OBJECT(dev), ®s_ops, dev, | |
35092917 | 550 | "pvrdma-regs", sizeof(dev->regs_data)); |
919ae3dd YS |
551 | pci_register_bar(pdev, RDMA_REG_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY, |
552 | &dev->regs); | |
553 | ||
554 | /* BAR 2 - UAR */ | |
555 | memset(&dev->uar_data, 0, sizeof(dev->uar_data)); | |
556 | memory_region_init_io(&dev->uar, OBJECT(dev), &uar_ops, dev, "rdma-uar", | |
35092917 | 557 | sizeof(dev->uar_data)); |
919ae3dd YS |
558 | pci_register_bar(pdev, RDMA_UAR_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY, |
559 | &dev->uar); | |
560 | } | |
561 | ||
562 | static void init_regs(PCIDevice *pdev) | |
563 | { | |
564 | PVRDMADev *dev = PVRDMA_DEV(pdev); | |
565 | ||
566 | set_reg_val(dev, PVRDMA_REG_VERSION, PVRDMA_HW_VERSION); | |
567 | set_reg_val(dev, PVRDMA_REG_ERR, 0xFFFF); | |
568 | } | |
569 | ||
919ae3dd YS |
570 | static void init_dev_caps(PVRDMADev *dev) |
571 | { | |
572 | size_t pg_tbl_bytes = TARGET_PAGE_SIZE * | |
573 | (TARGET_PAGE_SIZE / sizeof(uint64_t)); | |
574 | size_t wr_sz = MAX(sizeof(struct pvrdma_sq_wqe_hdr), | |
575 | sizeof(struct pvrdma_rq_wqe_hdr)); | |
576 | ||
577 | dev->dev_attr.max_qp_wr = pg_tbl_bytes / | |
ffef4775 YS |
578 | (wr_sz + sizeof(struct pvrdma_sge) * |
579 | dev->dev_attr.max_sge) - TARGET_PAGE_SIZE; | |
580 | /* First page is ring state ^^^^ */ | |
919ae3dd YS |
581 | |
582 | dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) - | |
583 | TARGET_PAGE_SIZE; /* First page is ring state */ | |
355b7cf3 KH |
584 | |
585 | dev->dev_attr.max_srq_wr = pg_tbl_bytes / | |
586 | ((sizeof(struct pvrdma_rq_wqe_hdr) + | |
587 | sizeof(struct pvrdma_sge)) * | |
588 | dev->dev_attr.max_sge) - TARGET_PAGE_SIZE; | |
919ae3dd YS |
589 | } |
590 | ||
591 | static int pvrdma_check_ram_shared(Object *obj, void *opaque) | |
592 | { | |
593 | bool *shared = opaque; | |
594 | ||
595 | if (object_dynamic_cast(obj, "memory-backend-ram")) { | |
596 | *shared = object_property_get_bool(obj, "share", NULL); | |
597 | } | |
598 | ||
599 | return 0; | |
600 | } | |
601 | ||
ffa65d97 YS |
602 | static void pvrdma_shutdown_notifier(Notifier *n, void *opaque) |
603 | { | |
604 | PVRDMADev *dev = container_of(n, PVRDMADev, shutdown_notifier); | |
605 | PCIDevice *pci_dev = PCI_DEVICE(dev); | |
606 | ||
607 | pvrdma_fini(pci_dev); | |
608 | } | |
609 | ||
919ae3dd YS |
610 | static void pvrdma_realize(PCIDevice *pdev, Error **errp) |
611 | { | |
cce64861 | 612 | int rc = 0; |
919ae3dd YS |
613 | PVRDMADev *dev = PVRDMA_DEV(pdev); |
614 | Object *memdev_root; | |
615 | bool ram_shared = false; | |
d961ead1 | 616 | PCIDevice *func0; |
919ae3dd | 617 | |
e9a54265 TH |
618 | warn_report_once("pvrdma is deprecated and will be removed in a future release"); |
619 | ||
4d71b38a YS |
620 | rdma_info_report("Initializing device %s %x.%x", pdev->name, |
621 | PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); | |
919ae3dd | 622 | |
8e3b0cbb | 623 | if (TARGET_PAGE_SIZE != qemu_real_host_page_size()) { |
919ae3dd YS |
624 | error_setg(errp, "Target page size must be the same as host page size"); |
625 | return; | |
626 | } | |
627 | ||
d961ead1 YS |
628 | func0 = pci_get_function_0(pdev); |
629 | /* Break if not vmxnet3 device in slot 0 */ | |
db8b88bf | 630 | if (strcmp(object_get_typename(OBJECT(func0)), TYPE_VMXNET3)) { |
d961ead1 YS |
631 | error_setg(errp, "Device on %x.0 must be %s", PCI_SLOT(pdev->devfn), |
632 | TYPE_VMXNET3); | |
633 | return; | |
634 | } | |
635 | dev->func0 = VMXNET3(func0); | |
636 | ||
028c3f93 YS |
637 | addrconf_addr_eui48((unsigned char *)&dev->node_guid, |
638 | (const char *)&dev->func0->conf.macaddr.a); | |
639 | ||
919ae3dd YS |
640 | memdev_root = object_resolve_path("/objects", NULL); |
641 | if (memdev_root) { | |
642 | object_child_foreach(memdev_root, pvrdma_check_ram_shared, &ram_shared); | |
643 | } | |
644 | if (!ram_shared) { | |
645 | error_setg(errp, "Only shared memory backed ram is supported"); | |
646 | return; | |
647 | } | |
648 | ||
649 | dev->dsr_info.dsr = NULL; | |
650 | ||
651 | init_pci_config(pdev); | |
652 | ||
653 | init_bars(pdev); | |
654 | ||
655 | init_regs(pdev); | |
656 | ||
4d71b38a | 657 | rc = init_msix(pdev); |
919ae3dd YS |
658 | if (rc) { |
659 | goto out; | |
660 | } | |
661 | ||
430e440c | 662 | rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res, |
919ae3dd | 663 | dev->backend_device_name, dev->backend_port_num, |
4d71b38a | 664 | &dev->dev_attr, &dev->mad_chr); |
919ae3dd YS |
665 | if (rc) { |
666 | goto out; | |
667 | } | |
668 | ||
ffef4775 YS |
669 | init_dev_caps(dev); |
670 | ||
4d71b38a | 671 | rc = rdma_rm_init(&dev->rdma_dev_res, &dev->dev_attr); |
919ae3dd YS |
672 | if (rc) { |
673 | goto out; | |
674 | } | |
675 | ||
919ae3dd YS |
676 | rc = pvrdma_qp_ops_init(); |
677 | if (rc) { | |
678 | goto out; | |
679 | } | |
680 | ||
c2dd117b YS |
681 | memset(&dev->stats, 0, sizeof(dev->stats)); |
682 | ||
ffa65d97 YS |
683 | dev->shutdown_notifier.notify = pvrdma_shutdown_notifier; |
684 | qemu_register_shutdown_notifier(&dev->shutdown_notifier); | |
685 | ||
68b89aee YS |
686 | #ifdef LEGACY_RDMA_REG_MR |
687 | rdma_info_report("Using legacy reg_mr"); | |
688 | #else | |
689 | rdma_info_report("Using iova reg_mr"); | |
690 | #endif | |
691 | ||
919ae3dd YS |
692 | out: |
693 | if (rc) { | |
cce64861 | 694 | pvrdma_fini(pdev); |
4d71b38a | 695 | error_append_hint(errp, "Device failed to load\n"); |
919ae3dd YS |
696 | } |
697 | } | |
698 | ||
919ae3dd YS |
699 | static void pvrdma_class_init(ObjectClass *klass, void *data) |
700 | { | |
701 | DeviceClass *dc = DEVICE_CLASS(klass); | |
702 | PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); | |
7b4433bb | 703 | RdmaProviderClass *ir = RDMA_PROVIDER_CLASS(klass); |
919ae3dd YS |
704 | |
705 | k->realize = pvrdma_realize; | |
919ae3dd YS |
706 | k->vendor_id = PCI_VENDOR_ID_VMWARE; |
707 | k->device_id = PCI_DEVICE_ID_VMWARE_PVRDMA; | |
708 | k->revision = 0x00; | |
709 | k->class_id = PCI_CLASS_NETWORK_OTHER; | |
710 | ||
711 | dc->desc = "RDMA Device"; | |
4f67d30b | 712 | device_class_set_props(dc, pvrdma_dev_properties); |
919ae3dd | 713 | set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); |
f4b2c02a | 714 | |
8dbbca5c | 715 | ir->format_statistics = pvrdma_format_statistics; |
919ae3dd YS |
716 | } |
717 | ||
718 | static const TypeInfo pvrdma_info = { | |
719 | .name = PVRDMA_HW_NAME, | |
720 | .parent = TYPE_PCI_DEVICE, | |
721 | .instance_size = sizeof(PVRDMADev), | |
722 | .class_init = pvrdma_class_init, | |
723 | .interfaces = (InterfaceInfo[]) { | |
724 | { INTERFACE_CONVENTIONAL_PCI_DEVICE }, | |
f4b2c02a | 725 | { INTERFACE_RDMA_PROVIDER }, |
919ae3dd YS |
726 | { } |
727 | } | |
728 | }; | |
729 | ||
730 | static void register_types(void) | |
731 | { | |
732 | type_register_static(&pvrdma_info); | |
733 | } | |
734 | ||
735 | type_init(register_types) |