]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * QEMU paravirtual RDMA | |
3 | * | |
4 | * Copyright (C) 2018 Oracle | |
5 | * Copyright (C) 2018 Red Hat Inc | |
6 | * | |
7 | * Authors: | |
8 | * Yuval Shaia <yuval.shaia@oracle.com> | |
9 | * Marcel Apfelbaum <marcel@redhat.com> | |
10 | * | |
11 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
12 | * See the COPYING file in the top-level directory. | |
13 | * | |
14 | */ | |
15 | ||
16 | #include "qemu/osdep.h" | |
17 | #include "qapi/error.h" | |
18 | #include "hw/hw.h" | |
19 | #include "hw/pci/pci.h" | |
20 | #include "hw/pci/pci_ids.h" | |
21 | #include "hw/pci/msi.h" | |
22 | #include "hw/pci/msix.h" | |
23 | #include "hw/qdev-core.h" | |
24 | #include "hw/qdev-properties.h" | |
25 | #include "cpu.h" | |
26 | #include "trace.h" | |
27 | #include "sysemu/sysemu.h" | |
28 | #include "monitor/monitor.h" | |
29 | #include "hw/rdma/rdma.h" | |
30 | ||
31 | #include "../rdma_rm.h" | |
32 | #include "../rdma_backend.h" | |
33 | #include "../rdma_utils.h" | |
34 | ||
35 | #include <infiniband/verbs.h> | |
36 | #include "pvrdma.h" | |
37 | #include "standard-headers/rdma/vmw_pvrdma-abi.h" | |
38 | #include "standard-headers/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h" | |
39 | #include "pvrdma_qp_ops.h" | |
40 | ||
41 | static Property pvrdma_dev_properties[] = { | |
42 | DEFINE_PROP_STRING("netdev", PVRDMADev, backend_eth_device_name), | |
43 | DEFINE_PROP_STRING("ibdev", PVRDMADev, backend_device_name), | |
44 | DEFINE_PROP_UINT8("ibport", PVRDMADev, backend_port_num, 1), | |
45 | DEFINE_PROP_UINT64("dev-caps-max-mr-size", PVRDMADev, dev_attr.max_mr_size, | |
46 | MAX_MR_SIZE), | |
47 | DEFINE_PROP_INT32("dev-caps-max-qp", PVRDMADev, dev_attr.max_qp, MAX_QP), | |
48 | DEFINE_PROP_INT32("dev-caps-max-cq", PVRDMADev, dev_attr.max_cq, MAX_CQ), | |
49 | DEFINE_PROP_INT32("dev-caps-max-mr", PVRDMADev, dev_attr.max_mr, MAX_MR), | |
50 | DEFINE_PROP_INT32("dev-caps-max-pd", PVRDMADev, dev_attr.max_pd, MAX_PD), | |
51 | DEFINE_PROP_INT32("dev-caps-qp-rd-atom", PVRDMADev, dev_attr.max_qp_rd_atom, | |
52 | MAX_QP_RD_ATOM), | |
53 | DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev, | |
54 | dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM), | |
55 | DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH), | |
56 | DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr), | |
57 | DEFINE_PROP_END_OF_LIST(), | |
58 | }; | |
59 | ||
60 | static void pvrdma_print_statistics(Monitor *mon, RdmaProvider *obj) | |
61 | { | |
62 | PVRDMADev *dev = PVRDMA_DEV(obj); | |
63 | PCIDevice *pdev = PCI_DEVICE(dev); | |
64 | ||
65 | monitor_printf(mon, "%s, %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn), | |
66 | PCI_FUNC(pdev->devfn)); | |
67 | monitor_printf(mon, "\tcommands : %" PRId64 "\n", | |
68 | dev->stats.commands); | |
69 | monitor_printf(mon, "\tregs_reads : %" PRId64 "\n", | |
70 | dev->stats.regs_reads); | |
71 | monitor_printf(mon, "\tregs_writes : %" PRId64 "\n", | |
72 | dev->stats.regs_writes); | |
73 | monitor_printf(mon, "\tuar_writes : %" PRId64 "\n", | |
74 | dev->stats.uar_writes); | |
75 | monitor_printf(mon, "\tinterrupts : %" PRId64 "\n", | |
76 | dev->stats.interrupts); | |
77 | rdma_dump_device_counters(mon, &dev->rdma_dev_res); | |
78 | } | |
79 | ||
80 | static void free_dev_ring(PCIDevice *pci_dev, PvrdmaRing *ring, | |
81 | void *ring_state) | |
82 | { | |
83 | pvrdma_ring_free(ring); | |
84 | rdma_pci_dma_unmap(pci_dev, ring_state, TARGET_PAGE_SIZE); | |
85 | } | |
86 | ||
87 | static int init_dev_ring(PvrdmaRing *ring, struct pvrdma_ring **ring_state, | |
88 | const char *name, PCIDevice *pci_dev, | |
89 | dma_addr_t dir_addr, uint32_t num_pages) | |
90 | { | |
91 | uint64_t *dir, *tbl; | |
92 | int rc = 0; | |
93 | ||
94 | dir = rdma_pci_dma_map(pci_dev, dir_addr, TARGET_PAGE_SIZE); | |
95 | if (!dir) { | |
96 | rdma_error_report("Failed to map to page directory (ring %s)", name); | |
97 | rc = -ENOMEM; | |
98 | goto out; | |
99 | } | |
100 | tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE); | |
101 | if (!tbl) { | |
102 | rdma_error_report("Failed to map to page table (ring %s)", name); | |
103 | rc = -ENOMEM; | |
104 | goto out_free_dir; | |
105 | } | |
106 | ||
107 | *ring_state = rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE); | |
108 | if (!*ring_state) { | |
109 | rdma_error_report("Failed to map to ring state (ring %s)", name); | |
110 | rc = -ENOMEM; | |
111 | goto out_free_tbl; | |
112 | } | |
113 | /* RX ring is the second */ | |
114 | (*ring_state)++; | |
115 | rc = pvrdma_ring_init(ring, name, pci_dev, | |
116 | (struct pvrdma_ring *)*ring_state, | |
117 | (num_pages - 1) * TARGET_PAGE_SIZE / | |
118 | sizeof(struct pvrdma_cqne), | |
119 | sizeof(struct pvrdma_cqne), | |
120 | (dma_addr_t *)&tbl[1], (dma_addr_t)num_pages - 1); | |
121 | if (rc) { | |
122 | rc = -ENOMEM; | |
123 | goto out_free_ring_state; | |
124 | } | |
125 | ||
126 | goto out_free_tbl; | |
127 | ||
128 | out_free_ring_state: | |
129 | rdma_pci_dma_unmap(pci_dev, *ring_state, TARGET_PAGE_SIZE); | |
130 | ||
131 | out_free_tbl: | |
132 | rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE); | |
133 | ||
134 | out_free_dir: | |
135 | rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE); | |
136 | ||
137 | out: | |
138 | return rc; | |
139 | } | |
140 | ||
141 | static void free_dsr(PVRDMADev *dev) | |
142 | { | |
143 | PCIDevice *pci_dev = PCI_DEVICE(dev); | |
144 | ||
145 | if (!dev->dsr_info.dsr) { | |
146 | return; | |
147 | } | |
148 | ||
149 | free_dev_ring(pci_dev, &dev->dsr_info.async, | |
150 | dev->dsr_info.async_ring_state); | |
151 | ||
152 | free_dev_ring(pci_dev, &dev->dsr_info.cq, dev->dsr_info.cq_ring_state); | |
153 | ||
154 | rdma_pci_dma_unmap(pci_dev, dev->dsr_info.req, | |
155 | sizeof(union pvrdma_cmd_req)); | |
156 | ||
157 | rdma_pci_dma_unmap(pci_dev, dev->dsr_info.rsp, | |
158 | sizeof(union pvrdma_cmd_resp)); | |
159 | ||
160 | rdma_pci_dma_unmap(pci_dev, dev->dsr_info.dsr, | |
161 | sizeof(struct pvrdma_device_shared_region)); | |
162 | ||
163 | dev->dsr_info.dsr = NULL; | |
164 | } | |
165 | ||
166 | static int load_dsr(PVRDMADev *dev) | |
167 | { | |
168 | int rc = 0; | |
169 | PCIDevice *pci_dev = PCI_DEVICE(dev); | |
170 | DSRInfo *dsr_info; | |
171 | struct pvrdma_device_shared_region *dsr; | |
172 | ||
173 | free_dsr(dev); | |
174 | ||
175 | /* Map to DSR */ | |
176 | dev->dsr_info.dsr = rdma_pci_dma_map(pci_dev, dev->dsr_info.dma, | |
177 | sizeof(struct pvrdma_device_shared_region)); | |
178 | if (!dev->dsr_info.dsr) { | |
179 | rdma_error_report("Failed to map to DSR"); | |
180 | rc = -ENOMEM; | |
181 | goto out; | |
182 | } | |
183 | ||
184 | /* Shortcuts */ | |
185 | dsr_info = &dev->dsr_info; | |
186 | dsr = dsr_info->dsr; | |
187 | ||
188 | /* Map to command slot */ | |
189 | dsr_info->req = rdma_pci_dma_map(pci_dev, dsr->cmd_slot_dma, | |
190 | sizeof(union pvrdma_cmd_req)); | |
191 | if (!dsr_info->req) { | |
192 | rdma_error_report("Failed to map to command slot address"); | |
193 | rc = -ENOMEM; | |
194 | goto out_free_dsr; | |
195 | } | |
196 | ||
197 | /* Map to response slot */ | |
198 | dsr_info->rsp = rdma_pci_dma_map(pci_dev, dsr->resp_slot_dma, | |
199 | sizeof(union pvrdma_cmd_resp)); | |
200 | if (!dsr_info->rsp) { | |
201 | rdma_error_report("Failed to map to response slot address"); | |
202 | rc = -ENOMEM; | |
203 | goto out_free_req; | |
204 | } | |
205 | ||
206 | /* Map to CQ notification ring */ | |
207 | rc = init_dev_ring(&dsr_info->cq, &dsr_info->cq_ring_state, "dev_cq", | |
208 | pci_dev, dsr->cq_ring_pages.pdir_dma, | |
209 | dsr->cq_ring_pages.num_pages); | |
210 | if (rc) { | |
211 | rc = -ENOMEM; | |
212 | goto out_free_rsp; | |
213 | } | |
214 | ||
215 | /* Map to event notification ring */ | |
216 | rc = init_dev_ring(&dsr_info->async, &dsr_info->async_ring_state, | |
217 | "dev_async", pci_dev, dsr->async_ring_pages.pdir_dma, | |
218 | dsr->async_ring_pages.num_pages); | |
219 | if (rc) { | |
220 | rc = -ENOMEM; | |
221 | goto out_free_rsp; | |
222 | } | |
223 | ||
224 | goto out; | |
225 | ||
226 | out_free_rsp: | |
227 | rdma_pci_dma_unmap(pci_dev, dsr_info->rsp, sizeof(union pvrdma_cmd_resp)); | |
228 | ||
229 | out_free_req: | |
230 | rdma_pci_dma_unmap(pci_dev, dsr_info->req, sizeof(union pvrdma_cmd_req)); | |
231 | ||
232 | out_free_dsr: | |
233 | rdma_pci_dma_unmap(pci_dev, dsr_info->dsr, | |
234 | sizeof(struct pvrdma_device_shared_region)); | |
235 | dsr_info->dsr = NULL; | |
236 | ||
237 | out: | |
238 | return rc; | |
239 | } | |
240 | ||
241 | static void init_dsr_dev_caps(PVRDMADev *dev) | |
242 | { | |
243 | struct pvrdma_device_shared_region *dsr; | |
244 | ||
245 | if (dev->dsr_info.dsr == NULL) { | |
246 | rdma_error_report("Can't initialized DSR"); | |
247 | return; | |
248 | } | |
249 | ||
250 | dsr = dev->dsr_info.dsr; | |
251 | dsr->caps.fw_ver = PVRDMA_FW_VERSION; | |
252 | dsr->caps.mode = PVRDMA_DEVICE_MODE_ROCE; | |
253 | dsr->caps.gid_types |= PVRDMA_GID_TYPE_FLAG_ROCE_V1; | |
254 | dsr->caps.max_uar = RDMA_BAR2_UAR_SIZE; | |
255 | dsr->caps.max_mr_size = dev->dev_attr.max_mr_size; | |
256 | dsr->caps.max_qp = dev->dev_attr.max_qp; | |
257 | dsr->caps.max_qp_wr = dev->dev_attr.max_qp_wr; | |
258 | dsr->caps.max_sge = dev->dev_attr.max_sge; | |
259 | dsr->caps.max_cq = dev->dev_attr.max_cq; | |
260 | dsr->caps.max_cqe = dev->dev_attr.max_cqe; | |
261 | dsr->caps.max_mr = dev->dev_attr.max_mr; | |
262 | dsr->caps.max_pd = dev->dev_attr.max_pd; | |
263 | dsr->caps.max_ah = dev->dev_attr.max_ah; | |
264 | dsr->caps.gid_tbl_len = MAX_GIDS; | |
265 | dsr->caps.sys_image_guid = 0; | |
266 | dsr->caps.node_guid = dev->node_guid; | |
267 | dsr->caps.phys_port_cnt = MAX_PORTS; | |
268 | dsr->caps.max_pkeys = MAX_PKEYS; | |
269 | } | |
270 | ||
271 | static void uninit_msix(PCIDevice *pdev, int used_vectors) | |
272 | { | |
273 | PVRDMADev *dev = PVRDMA_DEV(pdev); | |
274 | int i; | |
275 | ||
276 | for (i = 0; i < used_vectors; i++) { | |
277 | msix_vector_unuse(pdev, i); | |
278 | } | |
279 | ||
280 | msix_uninit(pdev, &dev->msix, &dev->msix); | |
281 | } | |
282 | ||
283 | static int init_msix(PCIDevice *pdev) | |
284 | { | |
285 | PVRDMADev *dev = PVRDMA_DEV(pdev); | |
286 | int i; | |
287 | int rc; | |
288 | ||
289 | rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX, | |
290 | RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX, | |
291 | RDMA_MSIX_PBA, 0, NULL); | |
292 | ||
293 | if (rc < 0) { | |
294 | rdma_error_report("Failed to initialize MSI-X"); | |
295 | return rc; | |
296 | } | |
297 | ||
298 | for (i = 0; i < RDMA_MAX_INTRS; i++) { | |
299 | rc = msix_vector_use(PCI_DEVICE(dev), i); | |
300 | if (rc < 0) { | |
301 | rdma_error_report("Fail mark MSI-X vector %d", i); | |
302 | uninit_msix(pdev, i); | |
303 | return rc; | |
304 | } | |
305 | } | |
306 | ||
307 | return 0; | |
308 | } | |
309 | ||
310 | static void pvrdma_fini(PCIDevice *pdev) | |
311 | { | |
312 | PVRDMADev *dev = PVRDMA_DEV(pdev); | |
313 | ||
314 | pvrdma_qp_ops_fini(); | |
315 | ||
316 | rdma_backend_stop(&dev->backend_dev); | |
317 | ||
318 | rdma_rm_fini(&dev->rdma_dev_res, &dev->backend_dev, | |
319 | dev->backend_eth_device_name); | |
320 | ||
321 | rdma_backend_fini(&dev->backend_dev); | |
322 | ||
323 | free_dsr(dev); | |
324 | ||
325 | if (msix_enabled(pdev)) { | |
326 | uninit_msix(pdev, RDMA_MAX_INTRS); | |
327 | } | |
328 | ||
329 | rdma_info_report("Device %s %x.%x is down", pdev->name, | |
330 | PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); | |
331 | } | |
332 | ||
333 | static void pvrdma_stop(PVRDMADev *dev) | |
334 | { | |
335 | rdma_backend_stop(&dev->backend_dev); | |
336 | } | |
337 | ||
338 | static void pvrdma_start(PVRDMADev *dev) | |
339 | { | |
340 | rdma_backend_start(&dev->backend_dev); | |
341 | } | |
342 | ||
343 | static void activate_device(PVRDMADev *dev) | |
344 | { | |
345 | pvrdma_start(dev); | |
346 | set_reg_val(dev, PVRDMA_REG_ERR, 0); | |
347 | } | |
348 | ||
349 | static int unquiesce_device(PVRDMADev *dev) | |
350 | { | |
351 | return 0; | |
352 | } | |
353 | ||
354 | static void reset_device(PVRDMADev *dev) | |
355 | { | |
356 | pvrdma_stop(dev); | |
357 | } | |
358 | ||
359 | static uint64_t pvrdma_regs_read(void *opaque, hwaddr addr, unsigned size) | |
360 | { | |
361 | PVRDMADev *dev = opaque; | |
362 | uint32_t val; | |
363 | ||
364 | dev->stats.regs_reads++; | |
365 | ||
366 | if (get_reg_val(dev, addr, &val)) { | |
367 | rdma_error_report("Failed to read REG value from address 0x%x", | |
368 | (uint32_t)addr); | |
369 | return -EINVAL; | |
370 | } | |
371 | ||
372 | trace_pvrdma_regs_read(addr, val); | |
373 | ||
374 | return val; | |
375 | } | |
376 | ||
377 | static void pvrdma_regs_write(void *opaque, hwaddr addr, uint64_t val, | |
378 | unsigned size) | |
379 | { | |
380 | PVRDMADev *dev = opaque; | |
381 | ||
382 | dev->stats.regs_writes++; | |
383 | ||
384 | if (set_reg_val(dev, addr, val)) { | |
385 | rdma_error_report("Failed to set REG value, addr=0x%"PRIx64 ", val=0x%"PRIx64, | |
386 | addr, val); | |
387 | return; | |
388 | } | |
389 | ||
390 | switch (addr) { | |
391 | case PVRDMA_REG_DSRLOW: | |
392 | trace_pvrdma_regs_write(addr, val, "DSRLOW", ""); | |
393 | dev->dsr_info.dma = val; | |
394 | break; | |
395 | case PVRDMA_REG_DSRHIGH: | |
396 | trace_pvrdma_regs_write(addr, val, "DSRHIGH", ""); | |
397 | dev->dsr_info.dma |= val << 32; | |
398 | load_dsr(dev); | |
399 | init_dsr_dev_caps(dev); | |
400 | break; | |
401 | case PVRDMA_REG_CTL: | |
402 | switch (val) { | |
403 | case PVRDMA_DEVICE_CTL_ACTIVATE: | |
404 | trace_pvrdma_regs_write(addr, val, "CTL", "ACTIVATE"); | |
405 | activate_device(dev); | |
406 | break; | |
407 | case PVRDMA_DEVICE_CTL_UNQUIESCE: | |
408 | trace_pvrdma_regs_write(addr, val, "CTL", "UNQUIESCE"); | |
409 | unquiesce_device(dev); | |
410 | break; | |
411 | case PVRDMA_DEVICE_CTL_RESET: | |
412 | trace_pvrdma_regs_write(addr, val, "CTL", "URESET"); | |
413 | reset_device(dev); | |
414 | break; | |
415 | } | |
416 | break; | |
417 | case PVRDMA_REG_IMR: | |
418 | trace_pvrdma_regs_write(addr, val, "INTR_MASK", ""); | |
419 | dev->interrupt_mask = val; | |
420 | break; | |
421 | case PVRDMA_REG_REQUEST: | |
422 | if (val == 0) { | |
423 | trace_pvrdma_regs_write(addr, val, "REQUEST", ""); | |
424 | pvrdma_exec_cmd(dev); | |
425 | } | |
426 | break; | |
427 | default: | |
428 | break; | |
429 | } | |
430 | } | |
431 | ||
432 | static const MemoryRegionOps regs_ops = { | |
433 | .read = pvrdma_regs_read, | |
434 | .write = pvrdma_regs_write, | |
435 | .endianness = DEVICE_LITTLE_ENDIAN, | |
436 | .impl = { | |
437 | .min_access_size = sizeof(uint32_t), | |
438 | .max_access_size = sizeof(uint32_t), | |
439 | }, | |
440 | }; | |
441 | ||
442 | static uint64_t pvrdma_uar_read(void *opaque, hwaddr addr, unsigned size) | |
443 | { | |
444 | return 0xffffffff; | |
445 | } | |
446 | ||
447 | static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val, | |
448 | unsigned size) | |
449 | { | |
450 | PVRDMADev *dev = opaque; | |
451 | ||
452 | dev->stats.uar_writes++; | |
453 | ||
454 | switch (addr & 0xFFF) { /* Mask with 0xFFF as each UC gets page */ | |
455 | case PVRDMA_UAR_QP_OFFSET: | |
456 | if (val & PVRDMA_UAR_QP_SEND) { | |
457 | trace_pvrdma_uar_write(addr, val, "QP", "SEND", | |
458 | val & PVRDMA_UAR_HANDLE_MASK, 0); | |
459 | pvrdma_qp_send(dev, val & PVRDMA_UAR_HANDLE_MASK); | |
460 | } | |
461 | if (val & PVRDMA_UAR_QP_RECV) { | |
462 | trace_pvrdma_uar_write(addr, val, "QP", "RECV", | |
463 | val & PVRDMA_UAR_HANDLE_MASK, 0); | |
464 | pvrdma_qp_recv(dev, val & PVRDMA_UAR_HANDLE_MASK); | |
465 | } | |
466 | break; | |
467 | case PVRDMA_UAR_CQ_OFFSET: | |
468 | if (val & PVRDMA_UAR_CQ_ARM) { | |
469 | trace_pvrdma_uar_write(addr, val, "CQ", "ARM", | |
470 | val & PVRDMA_UAR_HANDLE_MASK, | |
471 | !!(val & PVRDMA_UAR_CQ_ARM_SOL)); | |
472 | rdma_rm_req_notify_cq(&dev->rdma_dev_res, | |
473 | val & PVRDMA_UAR_HANDLE_MASK, | |
474 | !!(val & PVRDMA_UAR_CQ_ARM_SOL)); | |
475 | } | |
476 | if (val & PVRDMA_UAR_CQ_ARM_SOL) { | |
477 | trace_pvrdma_uar_write(addr, val, "CQ", "ARMSOL - not supported", 0, | |
478 | 0); | |
479 | } | |
480 | if (val & PVRDMA_UAR_CQ_POLL) { | |
481 | trace_pvrdma_uar_write(addr, val, "CQ", "POLL", | |
482 | val & PVRDMA_UAR_HANDLE_MASK, 0); | |
483 | pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK); | |
484 | } | |
485 | break; | |
486 | default: | |
487 | rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64, | |
488 | addr, val); | |
489 | break; | |
490 | } | |
491 | } | |
492 | ||
493 | static const MemoryRegionOps uar_ops = { | |
494 | .read = pvrdma_uar_read, | |
495 | .write = pvrdma_uar_write, | |
496 | .endianness = DEVICE_LITTLE_ENDIAN, | |
497 | .impl = { | |
498 | .min_access_size = sizeof(uint32_t), | |
499 | .max_access_size = sizeof(uint32_t), | |
500 | }, | |
501 | }; | |
502 | ||
503 | static void init_pci_config(PCIDevice *pdev) | |
504 | { | |
505 | pdev->config[PCI_INTERRUPT_PIN] = 1; | |
506 | } | |
507 | ||
508 | static void init_bars(PCIDevice *pdev) | |
509 | { | |
510 | PVRDMADev *dev = PVRDMA_DEV(pdev); | |
511 | ||
512 | /* BAR 0 - MSI-X */ | |
513 | memory_region_init(&dev->msix, OBJECT(dev), "pvrdma-msix", | |
514 | RDMA_BAR0_MSIX_SIZE); | |
515 | pci_register_bar(pdev, RDMA_MSIX_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY, | |
516 | &dev->msix); | |
517 | ||
518 | /* BAR 1 - Registers */ | |
519 | memset(&dev->regs_data, 0, sizeof(dev->regs_data)); | |
520 | memory_region_init_io(&dev->regs, OBJECT(dev), ®s_ops, dev, | |
521 | "pvrdma-regs", sizeof(dev->regs_data)); | |
522 | pci_register_bar(pdev, RDMA_REG_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY, | |
523 | &dev->regs); | |
524 | ||
525 | /* BAR 2 - UAR */ | |
526 | memset(&dev->uar_data, 0, sizeof(dev->uar_data)); | |
527 | memory_region_init_io(&dev->uar, OBJECT(dev), &uar_ops, dev, "rdma-uar", | |
528 | sizeof(dev->uar_data)); | |
529 | pci_register_bar(pdev, RDMA_UAR_BAR_IDX, PCI_BASE_ADDRESS_SPACE_MEMORY, | |
530 | &dev->uar); | |
531 | } | |
532 | ||
533 | static void init_regs(PCIDevice *pdev) | |
534 | { | |
535 | PVRDMADev *dev = PVRDMA_DEV(pdev); | |
536 | ||
537 | set_reg_val(dev, PVRDMA_REG_VERSION, PVRDMA_HW_VERSION); | |
538 | set_reg_val(dev, PVRDMA_REG_ERR, 0xFFFF); | |
539 | } | |
540 | ||
541 | static void init_dev_caps(PVRDMADev *dev) | |
542 | { | |
543 | size_t pg_tbl_bytes = TARGET_PAGE_SIZE * | |
544 | (TARGET_PAGE_SIZE / sizeof(uint64_t)); | |
545 | size_t wr_sz = MAX(sizeof(struct pvrdma_sq_wqe_hdr), | |
546 | sizeof(struct pvrdma_rq_wqe_hdr)); | |
547 | ||
548 | dev->dev_attr.max_qp_wr = pg_tbl_bytes / | |
549 | (wr_sz + sizeof(struct pvrdma_sge) * | |
550 | dev->dev_attr.max_sge) - TARGET_PAGE_SIZE; | |
551 | /* First page is ring state ^^^^ */ | |
552 | ||
553 | dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) - | |
554 | TARGET_PAGE_SIZE; /* First page is ring state */ | |
555 | } | |
556 | ||
557 | static int pvrdma_check_ram_shared(Object *obj, void *opaque) | |
558 | { | |
559 | bool *shared = opaque; | |
560 | ||
561 | if (object_dynamic_cast(obj, "memory-backend-ram")) { | |
562 | *shared = object_property_get_bool(obj, "share", NULL); | |
563 | } | |
564 | ||
565 | return 0; | |
566 | } | |
567 | ||
568 | static void pvrdma_shutdown_notifier(Notifier *n, void *opaque) | |
569 | { | |
570 | PVRDMADev *dev = container_of(n, PVRDMADev, shutdown_notifier); | |
571 | PCIDevice *pci_dev = PCI_DEVICE(dev); | |
572 | ||
573 | pvrdma_fini(pci_dev); | |
574 | } | |
575 | ||
576 | static void pvrdma_realize(PCIDevice *pdev, Error **errp) | |
577 | { | |
578 | int rc = 0; | |
579 | PVRDMADev *dev = PVRDMA_DEV(pdev); | |
580 | Object *memdev_root; | |
581 | bool ram_shared = false; | |
582 | PCIDevice *func0; | |
583 | ||
584 | rdma_info_report("Initializing device %s %x.%x", pdev->name, | |
585 | PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); | |
586 | ||
587 | if (TARGET_PAGE_SIZE != getpagesize()) { | |
588 | error_setg(errp, "Target page size must be the same as host page size"); | |
589 | return; | |
590 | } | |
591 | ||
592 | func0 = pci_get_function_0(pdev); | |
593 | /* Break if not vmxnet3 device in slot 0 */ | |
594 | if (strcmp(object_get_typename(&func0->qdev.parent_obj), TYPE_VMXNET3)) { | |
595 | error_setg(errp, "Device on %x.0 must be %s", PCI_SLOT(pdev->devfn), | |
596 | TYPE_VMXNET3); | |
597 | return; | |
598 | } | |
599 | dev->func0 = VMXNET3(func0); | |
600 | ||
601 | addrconf_addr_eui48((unsigned char *)&dev->node_guid, | |
602 | (const char *)&dev->func0->conf.macaddr.a); | |
603 | ||
604 | memdev_root = object_resolve_path("/objects", NULL); | |
605 | if (memdev_root) { | |
606 | object_child_foreach(memdev_root, pvrdma_check_ram_shared, &ram_shared); | |
607 | } | |
608 | if (!ram_shared) { | |
609 | error_setg(errp, "Only shared memory backed ram is supported"); | |
610 | return; | |
611 | } | |
612 | ||
613 | dev->dsr_info.dsr = NULL; | |
614 | ||
615 | init_pci_config(pdev); | |
616 | ||
617 | init_bars(pdev); | |
618 | ||
619 | init_regs(pdev); | |
620 | ||
621 | rc = init_msix(pdev); | |
622 | if (rc) { | |
623 | goto out; | |
624 | } | |
625 | ||
626 | rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res, | |
627 | dev->backend_device_name, dev->backend_port_num, | |
628 | &dev->dev_attr, &dev->mad_chr); | |
629 | if (rc) { | |
630 | goto out; | |
631 | } | |
632 | ||
633 | init_dev_caps(dev); | |
634 | ||
635 | rc = rdma_rm_init(&dev->rdma_dev_res, &dev->dev_attr); | |
636 | if (rc) { | |
637 | goto out; | |
638 | } | |
639 | ||
640 | rc = pvrdma_qp_ops_init(); | |
641 | if (rc) { | |
642 | goto out; | |
643 | } | |
644 | ||
645 | memset(&dev->stats, 0, sizeof(dev->stats)); | |
646 | ||
647 | dev->shutdown_notifier.notify = pvrdma_shutdown_notifier; | |
648 | qemu_register_shutdown_notifier(&dev->shutdown_notifier); | |
649 | ||
650 | out: | |
651 | if (rc) { | |
652 | pvrdma_fini(pdev); | |
653 | error_append_hint(errp, "Device failed to load\n"); | |
654 | } | |
655 | } | |
656 | ||
657 | static void pvrdma_class_init(ObjectClass *klass, void *data) | |
658 | { | |
659 | DeviceClass *dc = DEVICE_CLASS(klass); | |
660 | PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); | |
661 | RdmaProviderClass *ir = INTERFACE_RDMA_PROVIDER_CLASS(klass); | |
662 | ||
663 | k->realize = pvrdma_realize; | |
664 | k->vendor_id = PCI_VENDOR_ID_VMWARE; | |
665 | k->device_id = PCI_DEVICE_ID_VMWARE_PVRDMA; | |
666 | k->revision = 0x00; | |
667 | k->class_id = PCI_CLASS_NETWORK_OTHER; | |
668 | ||
669 | dc->desc = "RDMA Device"; | |
670 | dc->props = pvrdma_dev_properties; | |
671 | set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); | |
672 | ||
673 | ir->print_statistics = pvrdma_print_statistics; | |
674 | } | |
675 | ||
676 | static const TypeInfo pvrdma_info = { | |
677 | .name = PVRDMA_HW_NAME, | |
678 | .parent = TYPE_PCI_DEVICE, | |
679 | .instance_size = sizeof(PVRDMADev), | |
680 | .class_init = pvrdma_class_init, | |
681 | .interfaces = (InterfaceInfo[]) { | |
682 | { INTERFACE_CONVENTIONAL_PCI_DEVICE }, | |
683 | { INTERFACE_RDMA_PROVIDER }, | |
684 | { } | |
685 | } | |
686 | }; | |
687 | ||
688 | static void register_types(void) | |
689 | { | |
690 | type_register_static(&pvrdma_info); | |
691 | } | |
692 | ||
693 | type_init(register_types) |