]>
Commit | Line | Data |
---|---|---|
ef6d4ccd YS |
1 | /* |
2 | * QEMU paravirtual RDMA - Resource Manager Implementation | |
3 | * | |
4 | * Copyright (C) 2018 Oracle | |
5 | * Copyright (C) 2018 Red Hat Inc | |
6 | * | |
7 | * Authors: | |
8 | * Yuval Shaia <yuval.shaia@oracle.com> | |
9 | * Marcel Apfelbaum <marcel@redhat.com> | |
10 | * | |
11 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
12 | * See the COPYING file in the top-level directory. | |
13 | * | |
14 | */ | |
15 | ||
16 | #include <qemu/osdep.h> | |
17 | #include <qapi/error.h> | |
18 | #include <cpu.h> | |
19 | ||
20 | #include "rdma_utils.h" | |
21 | #include "rdma_backend.h" | |
22 | #include "rdma_rm.h" | |
23 | ||
24 | #define MAX_RM_TBL_NAME 16 | |
25 | ||
26 | /* Page directory and page tables */ | |
27 | #define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) } | |
28 | #define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) } | |
29 | ||
30 | static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl, | |
31 | uint32_t tbl_sz, uint32_t res_sz) | |
32 | { | |
33 | tbl->tbl = g_malloc(tbl_sz * res_sz); | |
34 | ||
35 | strncpy(tbl->name, name, MAX_RM_TBL_NAME); | |
36 | tbl->name[MAX_RM_TBL_NAME - 1] = 0; | |
37 | ||
38 | tbl->bitmap = bitmap_new(tbl_sz); | |
39 | tbl->tbl_sz = tbl_sz; | |
40 | tbl->res_sz = res_sz; | |
41 | qemu_mutex_init(&tbl->lock); | |
42 | } | |
43 | ||
44 | static inline void res_tbl_free(RdmaRmResTbl *tbl) | |
45 | { | |
46 | qemu_mutex_destroy(&tbl->lock); | |
47 | g_free(tbl->tbl); | |
48 | bitmap_zero_extend(tbl->bitmap, tbl->tbl_sz, 0); | |
49 | } | |
50 | ||
51 | static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle) | |
52 | { | |
53 | pr_dbg("%s, handle=%d\n", tbl->name, handle); | |
54 | ||
55 | if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) { | |
56 | return tbl->tbl + handle * tbl->res_sz; | |
57 | } else { | |
58 | pr_dbg("Invalid handle %d\n", handle); | |
59 | return NULL; | |
60 | } | |
61 | } | |
62 | ||
63 | static inline void *res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle) | |
64 | { | |
65 | qemu_mutex_lock(&tbl->lock); | |
66 | ||
67 | *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz); | |
68 | if (*handle > tbl->tbl_sz) { | |
69 | pr_dbg("Failed to alloc, bitmap is full\n"); | |
70 | qemu_mutex_unlock(&tbl->lock); | |
71 | return NULL; | |
72 | } | |
73 | ||
74 | set_bit(*handle, tbl->bitmap); | |
75 | ||
76 | qemu_mutex_unlock(&tbl->lock); | |
77 | ||
78 | memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz); | |
79 | ||
80 | pr_dbg("%s, handle=%d\n", tbl->name, *handle); | |
81 | ||
82 | return tbl->tbl + *handle * tbl->res_sz; | |
83 | } | |
84 | ||
85 | static inline void res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle) | |
86 | { | |
87 | pr_dbg("%s, handle=%d\n", tbl->name, handle); | |
88 | ||
89 | qemu_mutex_lock(&tbl->lock); | |
90 | ||
91 | if (handle < tbl->tbl_sz) { | |
92 | clear_bit(handle, tbl->bitmap); | |
93 | } | |
94 | ||
95 | qemu_mutex_unlock(&tbl->lock); | |
96 | } | |
97 | ||
98 | int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, | |
99 | uint32_t *pd_handle, uint32_t ctx_handle) | |
100 | { | |
101 | RdmaRmPD *pd; | |
102 | int ret = -ENOMEM; | |
103 | ||
104 | pd = res_tbl_alloc(&dev_res->pd_tbl, pd_handle); | |
105 | if (!pd) { | |
106 | goto out; | |
107 | } | |
108 | ||
109 | ret = rdma_backend_create_pd(backend_dev, &pd->backend_pd); | |
110 | if (ret) { | |
111 | ret = -EIO; | |
112 | goto out_tbl_dealloc; | |
113 | } | |
114 | ||
115 | pd->ctx_handle = ctx_handle; | |
116 | ||
117 | return 0; | |
118 | ||
119 | out_tbl_dealloc: | |
120 | res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle); | |
121 | ||
122 | out: | |
123 | return ret; | |
124 | } | |
125 | ||
126 | RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle) | |
127 | { | |
128 | return res_tbl_get(&dev_res->pd_tbl, pd_handle); | |
129 | } | |
130 | ||
131 | void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle) | |
132 | { | |
133 | RdmaRmPD *pd = rdma_rm_get_pd(dev_res, pd_handle); | |
134 | ||
135 | if (pd) { | |
136 | rdma_backend_destroy_pd(&pd->backend_pd); | |
137 | res_tbl_dealloc(&dev_res->pd_tbl, pd_handle); | |
138 | } | |
139 | } | |
140 | ||
141 | int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle, | |
142 | uint64_t guest_start, size_t guest_length, void *host_virt, | |
143 | int access_flags, uint32_t *mr_handle, uint32_t *lkey, | |
144 | uint32_t *rkey) | |
145 | { | |
146 | RdmaRmMR *mr; | |
147 | int ret = 0; | |
148 | RdmaRmPD *pd; | |
149 | uint64_t addr; | |
150 | size_t length; | |
151 | ||
152 | pd = rdma_rm_get_pd(dev_res, pd_handle); | |
153 | if (!pd) { | |
154 | pr_dbg("Invalid PD\n"); | |
155 | return -EINVAL; | |
156 | } | |
157 | ||
158 | mr = res_tbl_alloc(&dev_res->mr_tbl, mr_handle); | |
159 | if (!mr) { | |
160 | pr_dbg("Failed to allocate obj in table\n"); | |
161 | return -ENOMEM; | |
162 | } | |
163 | ||
164 | if (!host_virt) { | |
165 | /* TODO: This is my guess but not so sure that this needs to be | |
166 | * done */ | |
167 | length = TARGET_PAGE_SIZE; | |
168 | addr = (uint64_t)g_malloc(length); | |
169 | } else { | |
170 | mr->user_mr.host_virt = (uint64_t) host_virt; | |
171 | pr_dbg("host_virt=0x%lx\n", mr->user_mr.host_virt); | |
172 | mr->user_mr.length = guest_length; | |
173 | pr_dbg("length=0x%lx\n", guest_length); | |
174 | mr->user_mr.guest_start = guest_start; | |
175 | pr_dbg("guest_start=0x%lx\n", mr->user_mr.guest_start); | |
176 | ||
177 | length = mr->user_mr.length; | |
178 | addr = mr->user_mr.host_virt; | |
179 | } | |
180 | ||
181 | ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, addr, length, | |
182 | access_flags); | |
183 | if (ret) { | |
184 | pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret); | |
185 | ret = -EIO; | |
186 | goto out_dealloc_mr; | |
187 | } | |
188 | ||
189 | if (!host_virt) { | |
190 | *lkey = mr->lkey = rdma_backend_mr_lkey(&mr->backend_mr); | |
191 | *rkey = mr->rkey = rdma_backend_mr_rkey(&mr->backend_mr); | |
192 | } else { | |
193 | /* We keep mr_handle in lkey so send and recv get get mr ptr */ | |
194 | *lkey = *mr_handle; | |
195 | *rkey = -1; | |
196 | } | |
197 | ||
198 | mr->pd_handle = pd_handle; | |
199 | ||
200 | return 0; | |
201 | ||
202 | out_dealloc_mr: | |
203 | res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle); | |
204 | ||
205 | return ret; | |
206 | } | |
207 | ||
208 | RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle) | |
209 | { | |
210 | return res_tbl_get(&dev_res->mr_tbl, mr_handle); | |
211 | } | |
212 | ||
213 | void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle) | |
214 | { | |
215 | RdmaRmMR *mr = rdma_rm_get_mr(dev_res, mr_handle); | |
216 | ||
217 | if (mr) { | |
218 | rdma_backend_destroy_mr(&mr->backend_mr); | |
219 | munmap((void *)mr->user_mr.host_virt, mr->user_mr.length); | |
220 | res_tbl_dealloc(&dev_res->mr_tbl, mr_handle); | |
221 | } | |
222 | } | |
223 | ||
224 | int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn, | |
225 | uint32_t *uc_handle) | |
226 | { | |
227 | RdmaRmUC *uc; | |
228 | ||
229 | /* TODO: Need to make sure pfn is between bar start address and | |
230 | * bsd+RDMA_BAR2_UAR_SIZE | |
231 | if (pfn > RDMA_BAR2_UAR_SIZE) { | |
232 | pr_err("pfn out of range (%d > %d)\n", pfn, RDMA_BAR2_UAR_SIZE); | |
233 | return -ENOMEM; | |
234 | } | |
235 | */ | |
236 | ||
237 | uc = res_tbl_alloc(&dev_res->uc_tbl, uc_handle); | |
238 | if (!uc) { | |
239 | return -ENOMEM; | |
240 | } | |
241 | ||
242 | return 0; | |
243 | } | |
244 | ||
245 | RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle) | |
246 | { | |
247 | return res_tbl_get(&dev_res->uc_tbl, uc_handle); | |
248 | } | |
249 | ||
250 | void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle) | |
251 | { | |
252 | RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle); | |
253 | ||
254 | if (uc) { | |
255 | res_tbl_dealloc(&dev_res->uc_tbl, uc_handle); | |
256 | } | |
257 | } | |
258 | ||
259 | RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle) | |
260 | { | |
261 | return res_tbl_get(&dev_res->cq_tbl, cq_handle); | |
262 | } | |
263 | ||
264 | int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, | |
265 | uint32_t cqe, uint32_t *cq_handle, void *opaque) | |
266 | { | |
267 | int rc; | |
268 | RdmaRmCQ *cq; | |
269 | ||
270 | cq = res_tbl_alloc(&dev_res->cq_tbl, cq_handle); | |
271 | if (!cq) { | |
272 | return -ENOMEM; | |
273 | } | |
274 | ||
275 | cq->opaque = opaque; | |
276 | cq->notify = false; | |
277 | ||
278 | rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe); | |
279 | if (rc) { | |
280 | rc = -EIO; | |
281 | goto out_dealloc_cq; | |
282 | } | |
283 | ||
284 | return 0; | |
285 | ||
286 | out_dealloc_cq: | |
287 | rdma_rm_dealloc_cq(dev_res, *cq_handle); | |
288 | ||
289 | return rc; | |
290 | } | |
291 | ||
292 | void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle, | |
293 | bool notify) | |
294 | { | |
295 | RdmaRmCQ *cq; | |
296 | ||
297 | pr_dbg("cq_handle=%d, notify=0x%x\n", cq_handle, notify); | |
298 | ||
299 | cq = rdma_rm_get_cq(dev_res, cq_handle); | |
300 | if (!cq) { | |
301 | return; | |
302 | } | |
303 | ||
304 | cq->notify = notify; | |
305 | pr_dbg("notify=%d\n", cq->notify); | |
306 | } | |
307 | ||
308 | void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle) | |
309 | { | |
310 | RdmaRmCQ *cq; | |
311 | ||
312 | cq = rdma_rm_get_cq(dev_res, cq_handle); | |
313 | if (!cq) { | |
314 | return; | |
315 | } | |
316 | ||
317 | rdma_backend_destroy_cq(&cq->backend_cq); | |
318 | ||
319 | res_tbl_dealloc(&dev_res->cq_tbl, cq_handle); | |
320 | } | |
321 | ||
322 | RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn) | |
323 | { | |
324 | GBytes *key = g_bytes_new(&qpn, sizeof(qpn)); | |
325 | ||
326 | RdmaRmQP *qp = g_hash_table_lookup(dev_res->qp_hash, key); | |
327 | ||
328 | g_bytes_unref(key); | |
329 | ||
330 | return qp; | |
331 | } | |
332 | ||
333 | int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle, | |
334 | uint8_t qp_type, uint32_t max_send_wr, | |
335 | uint32_t max_send_sge, uint32_t send_cq_handle, | |
336 | uint32_t max_recv_wr, uint32_t max_recv_sge, | |
337 | uint32_t recv_cq_handle, void *opaque, uint32_t *qpn) | |
338 | { | |
339 | int rc; | |
340 | RdmaRmQP *qp; | |
341 | RdmaRmCQ *scq, *rcq; | |
342 | RdmaRmPD *pd; | |
343 | uint32_t rm_qpn; | |
344 | ||
345 | pr_dbg("qp_type=%d\n", qp_type); | |
346 | ||
347 | pd = rdma_rm_get_pd(dev_res, pd_handle); | |
348 | if (!pd) { | |
349 | pr_err("Invalid pd handle (%d)\n", pd_handle); | |
350 | return -EINVAL; | |
351 | } | |
352 | ||
353 | scq = rdma_rm_get_cq(dev_res, send_cq_handle); | |
354 | rcq = rdma_rm_get_cq(dev_res, recv_cq_handle); | |
355 | ||
356 | if (!scq || !rcq) { | |
357 | pr_err("Invalid send_cqn or recv_cqn (%d, %d)\n", | |
358 | send_cq_handle, recv_cq_handle); | |
359 | return -EINVAL; | |
360 | } | |
361 | ||
362 | qp = res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn); | |
363 | if (!qp) { | |
364 | return -ENOMEM; | |
365 | } | |
366 | pr_dbg("rm_qpn=%d\n", rm_qpn); | |
367 | ||
368 | qp->qpn = rm_qpn; | |
369 | qp->qp_state = IBV_QPS_RESET; | |
370 | qp->qp_type = qp_type; | |
371 | qp->send_cq_handle = send_cq_handle; | |
372 | qp->recv_cq_handle = recv_cq_handle; | |
373 | qp->opaque = opaque; | |
374 | ||
375 | rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd, | |
376 | &scq->backend_cq, &rcq->backend_cq, max_send_wr, | |
377 | max_recv_wr, max_send_sge, max_recv_sge); | |
378 | if (rc) { | |
379 | rc = -EIO; | |
380 | goto out_dealloc_qp; | |
381 | } | |
382 | ||
383 | *qpn = rdma_backend_qpn(&qp->backend_qp); | |
384 | pr_dbg("rm_qpn=%d, backend_qpn=0x%x\n", rm_qpn, *qpn); | |
385 | g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp); | |
386 | ||
387 | return 0; | |
388 | ||
389 | out_dealloc_qp: | |
390 | res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn); | |
391 | ||
392 | return rc; | |
393 | } | |
394 | ||
395 | int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev, | |
396 | uint32_t qp_handle, uint32_t attr_mask, | |
397 | union ibv_gid *dgid, uint32_t dqpn, | |
398 | enum ibv_qp_state qp_state, uint32_t qkey, | |
399 | uint32_t rq_psn, uint32_t sq_psn) | |
400 | { | |
401 | RdmaRmQP *qp; | |
402 | int ret; | |
403 | ||
404 | pr_dbg("qpn=%d\n", qp_handle); | |
405 | ||
406 | qp = rdma_rm_get_qp(dev_res, qp_handle); | |
407 | if (!qp) { | |
408 | return -EINVAL; | |
409 | } | |
410 | ||
411 | pr_dbg("qp_type=%d\n", qp->qp_type); | |
412 | pr_dbg("attr_mask=0x%x\n", attr_mask); | |
413 | ||
414 | if (qp->qp_type == IBV_QPT_SMI) { | |
415 | pr_dbg("QP0 unsupported\n"); | |
416 | return -EPERM; | |
417 | } else if (qp->qp_type == IBV_QPT_GSI) { | |
418 | pr_dbg("QP1\n"); | |
419 | return 0; | |
420 | } | |
421 | ||
422 | if (attr_mask & IBV_QP_STATE) { | |
423 | qp->qp_state = qp_state; | |
424 | pr_dbg("qp_state=%d\n", qp->qp_state); | |
425 | ||
426 | if (qp->qp_state == IBV_QPS_INIT) { | |
427 | ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp, | |
428 | qp->qp_type, qkey); | |
429 | if (ret) { | |
430 | return -EIO; | |
431 | } | |
432 | } | |
433 | ||
434 | if (qp->qp_state == IBV_QPS_RTR) { | |
435 | ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp, | |
436 | qp->qp_type, dgid, dqpn, rq_psn, | |
437 | qkey, attr_mask & IBV_QP_QKEY); | |
438 | if (ret) { | |
439 | return -EIO; | |
440 | } | |
441 | } | |
442 | ||
443 | if (qp->qp_state == IBV_QPS_RTS) { | |
444 | ret = rdma_backend_qp_state_rts(&qp->backend_qp, qp->qp_type, | |
445 | sq_psn, qkey, | |
446 | attr_mask & IBV_QP_QKEY); | |
447 | if (ret) { | |
448 | return -EIO; | |
449 | } | |
450 | } | |
451 | } | |
452 | ||
453 | return 0; | |
454 | } | |
455 | ||
456 | void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle) | |
457 | { | |
458 | RdmaRmQP *qp; | |
459 | GBytes *key; | |
460 | ||
461 | key = g_bytes_new(&qp_handle, sizeof(qp_handle)); | |
462 | qp = g_hash_table_lookup(dev_res->qp_hash, key); | |
463 | g_hash_table_remove(dev_res->qp_hash, key); | |
464 | g_bytes_unref(key); | |
465 | ||
466 | if (!qp) { | |
467 | return; | |
468 | } | |
469 | ||
470 | rdma_backend_destroy_qp(&qp->backend_qp); | |
471 | ||
472 | res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn); | |
473 | } | |
474 | ||
475 | void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id) | |
476 | { | |
477 | void **cqe_ctx; | |
478 | ||
479 | cqe_ctx = res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id); | |
480 | if (!cqe_ctx) { | |
481 | return NULL; | |
482 | } | |
483 | ||
484 | pr_dbg("ctx=%p\n", *cqe_ctx); | |
485 | ||
486 | return *cqe_ctx; | |
487 | } | |
488 | ||
489 | int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id, | |
490 | void *ctx) | |
491 | { | |
492 | void **cqe_ctx; | |
493 | ||
494 | cqe_ctx = res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id); | |
495 | if (!cqe_ctx) { | |
496 | return -ENOMEM; | |
497 | } | |
498 | ||
499 | pr_dbg("ctx=%p\n", ctx); | |
500 | *cqe_ctx = ctx; | |
501 | ||
502 | return 0; | |
503 | } | |
504 | ||
505 | void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id) | |
506 | { | |
507 | res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id); | |
508 | } | |
509 | ||
510 | static void destroy_qp_hash_key(gpointer data) | |
511 | { | |
512 | g_bytes_unref(data); | |
513 | } | |
514 | ||
515 | int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr, | |
516 | Error **errp) | |
517 | { | |
518 | dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal, | |
519 | destroy_qp_hash_key, NULL); | |
520 | if (!dev_res->qp_hash) { | |
521 | return -ENOMEM; | |
522 | } | |
523 | ||
524 | res_tbl_init("PD", &dev_res->pd_tbl, dev_attr->max_pd, sizeof(RdmaRmPD)); | |
525 | res_tbl_init("CQ", &dev_res->cq_tbl, dev_attr->max_cq, sizeof(RdmaRmCQ)); | |
526 | res_tbl_init("MR", &dev_res->mr_tbl, dev_attr->max_mr, sizeof(RdmaRmMR)); | |
527 | res_tbl_init("QP", &dev_res->qp_tbl, dev_attr->max_qp, sizeof(RdmaRmQP)); | |
528 | res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp * | |
529 | dev_attr->max_qp_wr, sizeof(void *)); | |
530 | res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC)); | |
531 | ||
532 | return 0; | |
533 | } | |
534 | ||
535 | void rdma_rm_fini(RdmaDeviceResources *dev_res) | |
536 | { | |
537 | res_tbl_free(&dev_res->uc_tbl); | |
538 | res_tbl_free(&dev_res->cqe_ctx_tbl); | |
539 | res_tbl_free(&dev_res->qp_tbl); | |
540 | res_tbl_free(&dev_res->cq_tbl); | |
541 | res_tbl_free(&dev_res->mr_tbl); | |
542 | res_tbl_free(&dev_res->pd_tbl); | |
543 | g_hash_table_destroy(dev_res->qp_hash); | |
544 | } |