]> git.proxmox.com Git - mirror_qemu.git/blob - hw/rdma/rdma_rm.c
b4938169b6f7bd9c2d575ad739aed25dc16c0b26
[mirror_qemu.git] / hw / rdma / rdma_rm.c
1 /*
2 * QEMU paravirtual RDMA - Resource Manager Implementation
3 *
4 * Copyright (C) 2018 Oracle
5 * Copyright (C) 2018 Red Hat Inc
6 *
7 * Authors:
8 * Yuval Shaia <yuval.shaia@oracle.com>
9 * Marcel Apfelbaum <marcel@redhat.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
13 *
14 */
15
16 #include "qemu/osdep.h"
17 #include "qapi/error.h"
18 #include "cpu.h"
19
20 #include "rdma_utils.h"
21 #include "rdma_backend.h"
22 #include "rdma_rm.h"
23
24 #define MAX_RM_TBL_NAME 16
25
26 /* Page directory and page tables */
27 #define PG_DIR_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
28 #define PG_TBL_SZ { TARGET_PAGE_SIZE / sizeof(__u64) }
29
30 static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl,
31 uint32_t tbl_sz, uint32_t res_sz)
32 {
33 tbl->tbl = g_malloc(tbl_sz * res_sz);
34
35 strncpy(tbl->name, name, MAX_RM_TBL_NAME);
36 tbl->name[MAX_RM_TBL_NAME - 1] = 0;
37
38 tbl->bitmap = bitmap_new(tbl_sz);
39 tbl->tbl_sz = tbl_sz;
40 tbl->res_sz = res_sz;
41 qemu_mutex_init(&tbl->lock);
42 }
43
44 static inline void res_tbl_free(RdmaRmResTbl *tbl)
45 {
46 qemu_mutex_destroy(&tbl->lock);
47 g_free(tbl->tbl);
48 bitmap_zero_extend(tbl->bitmap, tbl->tbl_sz, 0);
49 }
50
51 static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)
52 {
53 pr_dbg("%s, handle=%d\n", tbl->name, handle);
54
55 if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) {
56 return tbl->tbl + handle * tbl->res_sz;
57 } else {
58 pr_dbg("Invalid handle %d\n", handle);
59 return NULL;
60 }
61 }
62
63 static inline void *res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle)
64 {
65 qemu_mutex_lock(&tbl->lock);
66
67 *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz);
68 if (*handle > tbl->tbl_sz) {
69 pr_dbg("Failed to alloc, bitmap is full\n");
70 qemu_mutex_unlock(&tbl->lock);
71 return NULL;
72 }
73
74 set_bit(*handle, tbl->bitmap);
75
76 qemu_mutex_unlock(&tbl->lock);
77
78 memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz);
79
80 pr_dbg("%s, handle=%d\n", tbl->name, *handle);
81
82 return tbl->tbl + *handle * tbl->res_sz;
83 }
84
85 static inline void res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle)
86 {
87 pr_dbg("%s, handle=%d\n", tbl->name, handle);
88
89 qemu_mutex_lock(&tbl->lock);
90
91 if (handle < tbl->tbl_sz) {
92 clear_bit(handle, tbl->bitmap);
93 }
94
95 qemu_mutex_unlock(&tbl->lock);
96 }
97
98 int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
99 uint32_t *pd_handle, uint32_t ctx_handle)
100 {
101 RdmaRmPD *pd;
102 int ret = -ENOMEM;
103
104 pd = res_tbl_alloc(&dev_res->pd_tbl, pd_handle);
105 if (!pd) {
106 goto out;
107 }
108
109 ret = rdma_backend_create_pd(backend_dev, &pd->backend_pd);
110 if (ret) {
111 ret = -EIO;
112 goto out_tbl_dealloc;
113 }
114
115 pd->ctx_handle = ctx_handle;
116
117 return 0;
118
119 out_tbl_dealloc:
120 res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle);
121
122 out:
123 return ret;
124 }
125
126 RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
127 {
128 return res_tbl_get(&dev_res->pd_tbl, pd_handle);
129 }
130
131 void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
132 {
133 RdmaRmPD *pd = rdma_rm_get_pd(dev_res, pd_handle);
134
135 if (pd) {
136 rdma_backend_destroy_pd(&pd->backend_pd);
137 res_tbl_dealloc(&dev_res->pd_tbl, pd_handle);
138 }
139 }
140
141 int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
142 uint64_t guest_start, size_t guest_length, void *host_virt,
143 int access_flags, uint32_t *mr_handle, uint32_t *lkey,
144 uint32_t *rkey)
145 {
146 RdmaRmMR *mr;
147 int ret = 0;
148 RdmaRmPD *pd;
149 void *addr;
150 size_t length;
151
152 pd = rdma_rm_get_pd(dev_res, pd_handle);
153 if (!pd) {
154 pr_dbg("Invalid PD\n");
155 return -EINVAL;
156 }
157
158 mr = res_tbl_alloc(&dev_res->mr_tbl, mr_handle);
159 if (!mr) {
160 pr_dbg("Failed to allocate obj in table\n");
161 return -ENOMEM;
162 }
163
164 if (!host_virt) {
165 /* TODO: This is my guess but not so sure that this needs to be
166 * done */
167 length = TARGET_PAGE_SIZE;
168 addr = g_malloc(length);
169 } else {
170 mr->user_mr.host_virt = host_virt;
171 pr_dbg("host_virt=0x%p\n", mr->user_mr.host_virt);
172 mr->user_mr.length = guest_length;
173 pr_dbg("length=0x%lx\n", guest_length);
174 mr->user_mr.guest_start = guest_start;
175 pr_dbg("guest_start=0x%lx\n", mr->user_mr.guest_start);
176
177 length = mr->user_mr.length;
178 addr = mr->user_mr.host_virt;
179 }
180
181 ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, addr, length,
182 access_flags);
183 if (ret) {
184 pr_dbg("Fail in rdma_backend_create_mr, err=%d\n", ret);
185 ret = -EIO;
186 goto out_dealloc_mr;
187 }
188
189 if (!host_virt) {
190 *lkey = mr->lkey = rdma_backend_mr_lkey(&mr->backend_mr);
191 *rkey = mr->rkey = rdma_backend_mr_rkey(&mr->backend_mr);
192 } else {
193 /* We keep mr_handle in lkey so send and recv get get mr ptr */
194 *lkey = *mr_handle;
195 *rkey = -1;
196 }
197
198 mr->pd_handle = pd_handle;
199
200 return 0;
201
202 out_dealloc_mr:
203 res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle);
204
205 return ret;
206 }
207
208 RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
209 {
210 return res_tbl_get(&dev_res->mr_tbl, mr_handle);
211 }
212
213 void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
214 {
215 RdmaRmMR *mr = rdma_rm_get_mr(dev_res, mr_handle);
216
217 if (mr) {
218 rdma_backend_destroy_mr(&mr->backend_mr);
219 munmap(mr->user_mr.host_virt, mr->user_mr.length);
220 res_tbl_dealloc(&dev_res->mr_tbl, mr_handle);
221 }
222 }
223
224 int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn,
225 uint32_t *uc_handle)
226 {
227 RdmaRmUC *uc;
228
229 /* TODO: Need to make sure pfn is between bar start address and
230 * bsd+RDMA_BAR2_UAR_SIZE
231 if (pfn > RDMA_BAR2_UAR_SIZE) {
232 pr_err("pfn out of range (%d > %d)\n", pfn, RDMA_BAR2_UAR_SIZE);
233 return -ENOMEM;
234 }
235 */
236
237 uc = res_tbl_alloc(&dev_res->uc_tbl, uc_handle);
238 if (!uc) {
239 return -ENOMEM;
240 }
241
242 return 0;
243 }
244
245 RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
246 {
247 return res_tbl_get(&dev_res->uc_tbl, uc_handle);
248 }
249
250 void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
251 {
252 RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle);
253
254 if (uc) {
255 res_tbl_dealloc(&dev_res->uc_tbl, uc_handle);
256 }
257 }
258
259 RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
260 {
261 return res_tbl_get(&dev_res->cq_tbl, cq_handle);
262 }
263
264 int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
265 uint32_t cqe, uint32_t *cq_handle, void *opaque)
266 {
267 int rc;
268 RdmaRmCQ *cq;
269
270 cq = res_tbl_alloc(&dev_res->cq_tbl, cq_handle);
271 if (!cq) {
272 return -ENOMEM;
273 }
274
275 cq->opaque = opaque;
276 cq->notify = false;
277
278 rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe);
279 if (rc) {
280 rc = -EIO;
281 goto out_dealloc_cq;
282 }
283
284 return 0;
285
286 out_dealloc_cq:
287 rdma_rm_dealloc_cq(dev_res, *cq_handle);
288
289 return rc;
290 }
291
292 void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
293 bool notify)
294 {
295 RdmaRmCQ *cq;
296
297 pr_dbg("cq_handle=%d, notify=0x%x\n", cq_handle, notify);
298
299 cq = rdma_rm_get_cq(dev_res, cq_handle);
300 if (!cq) {
301 return;
302 }
303
304 cq->notify = notify;
305 pr_dbg("notify=%d\n", cq->notify);
306 }
307
308 void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
309 {
310 RdmaRmCQ *cq;
311
312 cq = rdma_rm_get_cq(dev_res, cq_handle);
313 if (!cq) {
314 return;
315 }
316
317 rdma_backend_destroy_cq(&cq->backend_cq);
318
319 res_tbl_dealloc(&dev_res->cq_tbl, cq_handle);
320 }
321
322 RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn)
323 {
324 GBytes *key = g_bytes_new(&qpn, sizeof(qpn));
325
326 RdmaRmQP *qp = g_hash_table_lookup(dev_res->qp_hash, key);
327
328 g_bytes_unref(key);
329
330 return qp;
331 }
332
333 int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
334 uint8_t qp_type, uint32_t max_send_wr,
335 uint32_t max_send_sge, uint32_t send_cq_handle,
336 uint32_t max_recv_wr, uint32_t max_recv_sge,
337 uint32_t recv_cq_handle, void *opaque, uint32_t *qpn)
338 {
339 int rc;
340 RdmaRmQP *qp;
341 RdmaRmCQ *scq, *rcq;
342 RdmaRmPD *pd;
343 uint32_t rm_qpn;
344
345 pr_dbg("qp_type=%d\n", qp_type);
346
347 pd = rdma_rm_get_pd(dev_res, pd_handle);
348 if (!pd) {
349 pr_err("Invalid pd handle (%d)\n", pd_handle);
350 return -EINVAL;
351 }
352
353 scq = rdma_rm_get_cq(dev_res, send_cq_handle);
354 rcq = rdma_rm_get_cq(dev_res, recv_cq_handle);
355
356 if (!scq || !rcq) {
357 pr_err("Invalid send_cqn or recv_cqn (%d, %d)\n",
358 send_cq_handle, recv_cq_handle);
359 return -EINVAL;
360 }
361
362 qp = res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn);
363 if (!qp) {
364 return -ENOMEM;
365 }
366 pr_dbg("rm_qpn=%d\n", rm_qpn);
367
368 qp->qpn = rm_qpn;
369 qp->qp_state = IBV_QPS_RESET;
370 qp->qp_type = qp_type;
371 qp->send_cq_handle = send_cq_handle;
372 qp->recv_cq_handle = recv_cq_handle;
373 qp->opaque = opaque;
374
375 rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
376 &scq->backend_cq, &rcq->backend_cq, max_send_wr,
377 max_recv_wr, max_send_sge, max_recv_sge);
378 if (rc) {
379 rc = -EIO;
380 goto out_dealloc_qp;
381 }
382
383 *qpn = rdma_backend_qpn(&qp->backend_qp);
384 pr_dbg("rm_qpn=%d, backend_qpn=0x%x\n", rm_qpn, *qpn);
385 g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp);
386
387 return 0;
388
389 out_dealloc_qp:
390 res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
391
392 return rc;
393 }
394
395 int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
396 uint32_t qp_handle, uint32_t attr_mask,
397 union ibv_gid *dgid, uint32_t dqpn,
398 enum ibv_qp_state qp_state, uint32_t qkey,
399 uint32_t rq_psn, uint32_t sq_psn)
400 {
401 RdmaRmQP *qp;
402 int ret;
403
404 pr_dbg("qpn=%d\n", qp_handle);
405
406 qp = rdma_rm_get_qp(dev_res, qp_handle);
407 if (!qp) {
408 return -EINVAL;
409 }
410
411 pr_dbg("qp_type=%d\n", qp->qp_type);
412 pr_dbg("attr_mask=0x%x\n", attr_mask);
413
414 if (qp->qp_type == IBV_QPT_SMI) {
415 pr_dbg("QP0 unsupported\n");
416 return -EPERM;
417 } else if (qp->qp_type == IBV_QPT_GSI) {
418 pr_dbg("QP1\n");
419 return 0;
420 }
421
422 if (attr_mask & IBV_QP_STATE) {
423 qp->qp_state = qp_state;
424 pr_dbg("qp_state=%d\n", qp->qp_state);
425
426 if (qp->qp_state == IBV_QPS_INIT) {
427 ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp,
428 qp->qp_type, qkey);
429 if (ret) {
430 return -EIO;
431 }
432 }
433
434 if (qp->qp_state == IBV_QPS_RTR) {
435 ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
436 qp->qp_type, dgid, dqpn, rq_psn,
437 qkey, attr_mask & IBV_QP_QKEY);
438 if (ret) {
439 return -EIO;
440 }
441 }
442
443 if (qp->qp_state == IBV_QPS_RTS) {
444 ret = rdma_backend_qp_state_rts(&qp->backend_qp, qp->qp_type,
445 sq_psn, qkey,
446 attr_mask & IBV_QP_QKEY);
447 if (ret) {
448 return -EIO;
449 }
450 }
451 }
452
453 return 0;
454 }
455
456 int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
457 uint32_t qp_handle, struct ibv_qp_attr *attr,
458 int attr_mask, struct ibv_qp_init_attr *init_attr)
459 {
460 RdmaRmQP *qp;
461
462 pr_dbg("qpn=%d\n", qp_handle);
463
464 qp = rdma_rm_get_qp(dev_res, qp_handle);
465 if (!qp) {
466 return -EINVAL;
467 }
468
469 pr_dbg("qp_type=%d\n", qp->qp_type);
470
471 return rdma_backend_query_qp(&qp->backend_qp, attr, attr_mask, init_attr);
472 }
473
474 void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
475 {
476 RdmaRmQP *qp;
477 GBytes *key;
478
479 key = g_bytes_new(&qp_handle, sizeof(qp_handle));
480 qp = g_hash_table_lookup(dev_res->qp_hash, key);
481 g_hash_table_remove(dev_res->qp_hash, key);
482 g_bytes_unref(key);
483
484 if (!qp) {
485 return;
486 }
487
488 rdma_backend_destroy_qp(&qp->backend_qp);
489
490 res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
491 }
492
493 void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
494 {
495 void **cqe_ctx;
496
497 cqe_ctx = res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
498 if (!cqe_ctx) {
499 return NULL;
500 }
501
502 pr_dbg("ctx=%p\n", *cqe_ctx);
503
504 return *cqe_ctx;
505 }
506
507 int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
508 void *ctx)
509 {
510 void **cqe_ctx;
511
512 cqe_ctx = res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
513 if (!cqe_ctx) {
514 return -ENOMEM;
515 }
516
517 pr_dbg("ctx=%p\n", ctx);
518 *cqe_ctx = ctx;
519
520 return 0;
521 }
522
523 void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
524 {
525 res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
526 }
527
528 static void destroy_qp_hash_key(gpointer data)
529 {
530 g_bytes_unref(data);
531 }
532
533 int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
534 Error **errp)
535 {
536 dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
537 destroy_qp_hash_key, NULL);
538 if (!dev_res->qp_hash) {
539 return -ENOMEM;
540 }
541
542 res_tbl_init("PD", &dev_res->pd_tbl, dev_attr->max_pd, sizeof(RdmaRmPD));
543 res_tbl_init("CQ", &dev_res->cq_tbl, dev_attr->max_cq, sizeof(RdmaRmCQ));
544 res_tbl_init("MR", &dev_res->mr_tbl, dev_attr->max_mr, sizeof(RdmaRmMR));
545 res_tbl_init("QP", &dev_res->qp_tbl, dev_attr->max_qp, sizeof(RdmaRmQP));
546 res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
547 dev_attr->max_qp_wr, sizeof(void *));
548 res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
549
550 return 0;
551 }
552
553 void rdma_rm_fini(RdmaDeviceResources *dev_res)
554 {
555 res_tbl_free(&dev_res->uc_tbl);
556 res_tbl_free(&dev_res->cqe_ctx_tbl);
557 res_tbl_free(&dev_res->qp_tbl);
558 res_tbl_free(&dev_res->cq_tbl);
559 res_tbl_free(&dev_res->mr_tbl);
560 res_tbl_free(&dev_res->pd_tbl);
561 g_hash_table_destroy(dev_res->qp_hash);
562 }