1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2019 Mellanox Technologies, Ltd
6 #include <rte_malloc.h>
8 #include <rte_common.h>
9 #include <rte_sched_common.h>
12 #include <mlx5_common.h>
14 #include "mlx5_vdpa_utils.h"
15 #include "mlx5_vdpa.h"
18 mlx5_vdpa_pd_prepare(struct mlx5_vdpa_priv
*priv
)
20 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
23 priv
->pd
= mlx5_glue
->alloc_pd(priv
->ctx
);
24 if (priv
->pd
== NULL
) {
25 DRV_LOG(ERR
, "Failed to allocate PD.");
26 return errno
? -errno
: -ENOMEM
;
28 struct mlx5dv_obj obj
;
29 struct mlx5dv_pd pd_info
;
33 obj
.pd
.out
= &pd_info
;
34 ret
= mlx5_glue
->dv_init_obj(&obj
, MLX5DV_OBJ_PD
);
36 DRV_LOG(ERR
, "Fail to get PD object info.");
37 mlx5_glue
->dealloc_pd(priv
->pd
);
41 priv
->pdn
= pd_info
.pdn
;
45 DRV_LOG(ERR
, "Cannot get pdn - no DV support.");
47 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
51 mlx5_vdpa_mem_dereg(struct mlx5_vdpa_priv
*priv
)
53 struct mlx5_vdpa_query_mr
*entry
;
54 struct mlx5_vdpa_query_mr
*next
;
56 entry
= SLIST_FIRST(&priv
->mr_list
);
58 next
= SLIST_NEXT(entry
, next
);
59 claim_zero(mlx5_devx_cmd_destroy(entry
->mkey
));
60 if (!entry
->is_indirect
)
61 claim_zero(mlx5_glue
->devx_umem_dereg(entry
->umem
));
62 SLIST_REMOVE(&priv
->mr_list
, entry
, mlx5_vdpa_query_mr
, next
);
66 SLIST_INIT(&priv
->mr_list
);
68 claim_zero(mlx5_glue
->dereg_mr(priv
->null_mr
));
72 claim_zero(mlx5_glue
->dealloc_pd(priv
->pd
));
82 mlx5_vdpa_regions_addr_cmp(const void *a
, const void *b
)
84 const struct rte_vhost_mem_region
*region_a
= a
;
85 const struct rte_vhost_mem_region
*region_b
= b
;
87 if (region_a
->guest_phys_addr
< region_b
->guest_phys_addr
)
89 if (region_a
->guest_phys_addr
> region_b
->guest_phys_addr
)
94 #define KLM_NUM_MAX_ALIGN(sz) (RTE_ALIGN_CEIL(sz, MLX5_MAX_KLM_BYTE_COUNT) / \
95 MLX5_MAX_KLM_BYTE_COUNT)
98 * Allocate and sort the region list and choose indirect mkey mode:
99 * 1. Calculate GCD, guest memory size and indirect mkey entries num per mode.
100 * 2. Align GCD to the maximum allowed size(2G) and to be power of 2.
101 * 2. Decide the indirect mkey mode according to the next rules:
102 * a. If both KLM_FBS entries number and KLM entries number are bigger
103 * than the maximum allowed(MLX5_DEVX_MAX_KLM_ENTRIES) - error.
104 * b. KLM mode if KLM_FBS entries number is bigger than the maximum
105 * allowed(MLX5_DEVX_MAX_KLM_ENTRIES).
106 * c. KLM mode if GCD is smaller than the minimum allowed(4K).
107 * d. KLM mode if the total size of KLM entries is in one cache line
108 * and the total size of KLM_FBS entries is not in one cache line.
109 * e. Otherwise, KLM_FBS mode.
111 static struct rte_vhost_memory
*
112 mlx5_vdpa_vhost_mem_regions_prepare(int vid
, uint8_t *mode
, uint64_t *mem_size
,
113 uint64_t *gcd
, uint32_t *entries_num
)
115 struct rte_vhost_memory
*mem
;
117 uint64_t klm_entries_num
= 0;
118 uint64_t klm_fbs_entries_num
;
120 int ret
= rte_vhost_get_mem_table(vid
, &mem
);
123 DRV_LOG(ERR
, "Failed to get VM memory layout vid =%d.", vid
);
127 qsort(mem
->regions
, mem
->nregions
, sizeof(mem
->regions
[0]),
128 mlx5_vdpa_regions_addr_cmp
);
129 *mem_size
= (mem
->regions
[(mem
->nregions
- 1)].guest_phys_addr
) +
130 (mem
->regions
[(mem
->nregions
- 1)].size
) -
131 (mem
->regions
[0].guest_phys_addr
);
133 for (i
= 0; i
< mem
->nregions
; ++i
) {
134 DRV_LOG(INFO
, "Region %u: HVA 0x%" PRIx64
", GPA 0x%" PRIx64
135 ", size 0x%" PRIx64
".", i
,
136 mem
->regions
[i
].host_user_addr
,
137 mem
->regions
[i
].guest_phys_addr
, mem
->regions
[i
].size
);
140 size
= mem
->regions
[i
].guest_phys_addr
-
141 (mem
->regions
[i
- 1].guest_phys_addr
+
142 mem
->regions
[i
- 1].size
);
143 *gcd
= rte_get_gcd(*gcd
, size
);
144 klm_entries_num
+= KLM_NUM_MAX_ALIGN(size
);
146 size
= mem
->regions
[i
].size
;
147 *gcd
= rte_get_gcd(*gcd
, size
);
148 klm_entries_num
+= KLM_NUM_MAX_ALIGN(size
);
150 if (*gcd
> MLX5_MAX_KLM_BYTE_COUNT
)
151 *gcd
= rte_get_gcd(*gcd
, MLX5_MAX_KLM_BYTE_COUNT
);
152 if (!RTE_IS_POWER_OF_2(*gcd
)) {
153 uint64_t candidate_gcd
= rte_align64prevpow2(*gcd
);
155 while (candidate_gcd
> 1 && (*gcd
% candidate_gcd
))
157 DRV_LOG(DEBUG
, "GCD 0x%" PRIx64
" is not power of 2. Adjusted "
158 "GCD is 0x%" PRIx64
".", *gcd
, candidate_gcd
);
159 *gcd
= candidate_gcd
;
161 klm_fbs_entries_num
= *mem_size
/ *gcd
;
162 if (*gcd
< MLX5_MIN_KLM_FIXED_BUFFER_SIZE
|| klm_fbs_entries_num
>
163 MLX5_DEVX_MAX_KLM_ENTRIES
||
164 ((klm_entries_num
* sizeof(struct mlx5_klm
)) <=
165 RTE_CACHE_LINE_SIZE
&& (klm_fbs_entries_num
*
166 sizeof(struct mlx5_klm
)) >
167 RTE_CACHE_LINE_SIZE
)) {
168 *mode
= MLX5_MKC_ACCESS_MODE_KLM
;
169 *entries_num
= klm_entries_num
;
170 DRV_LOG(INFO
, "Indirect mkey mode is KLM.");
172 *mode
= MLX5_MKC_ACCESS_MODE_KLM_FBS
;
173 *entries_num
= klm_fbs_entries_num
;
174 DRV_LOG(INFO
, "Indirect mkey mode is KLM Fixed Buffer Size.");
176 DRV_LOG(DEBUG
, "Memory registration information: nregions = %u, "
177 "mem_size = 0x%" PRIx64
", GCD = 0x%" PRIx64
178 ", klm_fbs_entries_num = 0x%" PRIx64
", klm_entries_num = 0x%"
179 PRIx64
".", mem
->nregions
, *mem_size
, *gcd
, klm_fbs_entries_num
,
181 if (*entries_num
> MLX5_DEVX_MAX_KLM_ENTRIES
) {
182 DRV_LOG(ERR
, "Failed to prepare memory of vid %d - memory is "
183 "too fragmented.", vid
);
190 #define KLM_SIZE_MAX_ALIGN(sz) ((sz) > MLX5_MAX_KLM_BYTE_COUNT ? \
191 MLX5_MAX_KLM_BYTE_COUNT : (sz))
194 * The target here is to group all the physical memory regions of the
195 * virtio device in one indirect mkey.
196 * For KLM Fixed Buffer Size mode (HW find the translation entry in one
197 * read according to the guest phisical address):
198 * All the sub-direct mkeys of it must be in the same size, hence, each
199 * one of them should be in the GCD size of all the virtio memory
200 * regions and the holes between them.
201 * For KLM mode (each entry may be in different size so HW must iterate
203 * Each virtio memory region and each hole between them have one entry,
204 * just need to cover the maximum allowed size(2G) by splitting entries
205 * which their associated memory regions are bigger than 2G.
206 * It means that each virtio memory region may be mapped to more than
207 * one direct mkey in the 2 modes.
208 * All the holes of invalid memory between the virtio memory regions
209 * will be mapped to the null memory region for security.
212 mlx5_vdpa_mem_register(struct mlx5_vdpa_priv
*priv
)
214 struct mlx5_devx_mkey_attr mkey_attr
;
215 struct mlx5_vdpa_query_mr
*entry
= NULL
;
216 struct rte_vhost_mem_region
*reg
= NULL
;
218 uint32_t entries_num
= 0;
226 struct rte_vhost_memory
*mem
= mlx5_vdpa_vhost_mem_regions_prepare
227 (priv
->vid
, &mode
, &mem_size
, &gcd
, &entries_num
);
228 struct mlx5_klm klm_array
[entries_num
];
233 ret
= mlx5_vdpa_pd_prepare(priv
);
236 priv
->null_mr
= mlx5_glue
->alloc_null_mr(priv
->pd
);
237 if (!priv
->null_mr
) {
238 DRV_LOG(ERR
, "Failed to allocate null MR.");
242 DRV_LOG(DEBUG
, "Dump fill Mkey = %u.", priv
->null_mr
->lkey
);
243 for (i
= 0; i
< mem
->nregions
; i
++) {
244 reg
= &mem
->regions
[i
];
245 entry
= rte_zmalloc(__func__
, sizeof(*entry
), 0);
248 DRV_LOG(ERR
, "Failed to allocate mem entry memory.");
251 entry
->umem
= mlx5_glue
->devx_umem_reg(priv
->ctx
,
252 (void *)(uintptr_t)reg
->host_user_addr
,
253 reg
->size
, IBV_ACCESS_LOCAL_WRITE
);
255 DRV_LOG(ERR
, "Failed to register Umem by Devx.");
259 mkey_attr
.addr
= (uintptr_t)(reg
->guest_phys_addr
);
260 mkey_attr
.size
= reg
->size
;
261 mkey_attr
.umem_id
= entry
->umem
->umem_id
;
262 mkey_attr
.pd
= priv
->pdn
;
263 mkey_attr
.pg_access
= 1;
264 mkey_attr
.klm_array
= NULL
;
265 mkey_attr
.klm_num
= 0;
266 mkey_attr
.relaxed_ordering
= 0;
267 entry
->mkey
= mlx5_devx_cmd_mkey_create(priv
->ctx
, &mkey_attr
);
269 DRV_LOG(ERR
, "Failed to create direct Mkey.");
273 entry
->addr
= (void *)(uintptr_t)(reg
->host_user_addr
);
274 entry
->length
= reg
->size
;
275 entry
->is_indirect
= 0;
278 uint64_t empty_region_sz
= reg
->guest_phys_addr
-
279 (mem
->regions
[i
- 1].guest_phys_addr
+
280 mem
->regions
[i
- 1].size
);
282 if (empty_region_sz
> 0) {
283 sadd
= mem
->regions
[i
- 1].guest_phys_addr
+
284 mem
->regions
[i
- 1].size
;
285 klm_size
= mode
== MLX5_MKC_ACCESS_MODE_KLM
?
286 KLM_SIZE_MAX_ALIGN(empty_region_sz
) : gcd
;
287 for (k
= 0; k
< empty_region_sz
;
289 klm_array
[klm_index
].byte_count
=
290 k
+ klm_size
> empty_region_sz
?
291 empty_region_sz
- k
: klm_size
;
292 klm_array
[klm_index
].mkey
=
294 klm_array
[klm_index
].address
= sadd
+ k
;
299 klm_size
= mode
== MLX5_MKC_ACCESS_MODE_KLM
?
300 KLM_SIZE_MAX_ALIGN(reg
->size
) : gcd
;
301 for (k
= 0; k
< reg
->size
; k
+= klm_size
) {
302 klm_array
[klm_index
].byte_count
= k
+ klm_size
>
303 reg
->size
? reg
->size
- k
: klm_size
;
304 klm_array
[klm_index
].mkey
= entry
->mkey
->id
;
305 klm_array
[klm_index
].address
= reg
->guest_phys_addr
+ k
;
308 SLIST_INSERT_HEAD(&priv
->mr_list
, entry
, next
);
310 mkey_attr
.addr
= (uintptr_t)(mem
->regions
[0].guest_phys_addr
);
311 mkey_attr
.size
= mem_size
;
312 mkey_attr
.pd
= priv
->pdn
;
313 mkey_attr
.umem_id
= 0;
314 /* Must be zero for KLM mode. */
315 mkey_attr
.log_entity_size
= mode
== MLX5_MKC_ACCESS_MODE_KLM_FBS
?
316 rte_log2_u64(gcd
) : 0;
317 mkey_attr
.pg_access
= 0;
318 mkey_attr
.klm_array
= klm_array
;
319 mkey_attr
.klm_num
= klm_index
;
320 entry
= rte_zmalloc(__func__
, sizeof(*entry
), 0);
322 DRV_LOG(ERR
, "Failed to allocate memory for indirect entry.");
326 entry
->mkey
= mlx5_devx_cmd_mkey_create(priv
->ctx
, &mkey_attr
);
328 DRV_LOG(ERR
, "Failed to create indirect Mkey.");
332 entry
->is_indirect
= 1;
333 SLIST_INSERT_HEAD(&priv
->mr_list
, entry
, next
);
334 priv
->gpa_mkey_index
= entry
->mkey
->id
;
339 mlx5_devx_cmd_destroy(entry
->mkey
);
341 mlx5_glue
->devx_umem_dereg(entry
->umem
);
344 mlx5_vdpa_mem_dereg(priv
);