]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /* SPDX-License-Identifier: BSD-3-Clause |
2 | * Copyright(c) 2010-2018 Intel Corporation | |
3 | */ | |
4 | ||
5 | #include <inttypes.h> | |
6 | #include <string.h> | |
7 | #include <fcntl.h> | |
8 | #include <unistd.h> | |
9 | #include <sys/ioctl.h> | |
10 | ||
11 | #include <rte_errno.h> | |
12 | #include <rte_log.h> | |
13 | #include <rte_memory.h> | |
14 | #include <rte_eal_memconfig.h> | |
15 | #include <rte_vfio.h> | |
16 | ||
17 | #include "eal_filesystem.h" | |
18 | #include "eal_vfio.h" | |
19 | #include "eal_private.h" | |
20 | ||
21 | #ifdef VFIO_PRESENT | |
22 | ||
23 | #define VFIO_MEM_EVENT_CLB_NAME "vfio_mem_event_clb" | |
24 | ||
25 | /* hot plug/unplug of VFIO groups may cause all DMA maps to be dropped. we can | |
26 | * recreate the mappings for DPDK segments, but we cannot do so for memory that | |
27 | * was registered by the user themselves, so we need to store the user mappings | |
28 | * somewhere, to recreate them later. | |
29 | */ | |
30 | #define VFIO_MAX_USER_MEM_MAPS 256 | |
31 | struct user_mem_map { | |
32 | uint64_t addr; | |
33 | uint64_t iova; | |
34 | uint64_t len; | |
35 | }; | |
36 | ||
37 | struct user_mem_maps { | |
38 | rte_spinlock_recursive_t lock; | |
39 | int n_maps; | |
40 | struct user_mem_map maps[VFIO_MAX_USER_MEM_MAPS]; | |
41 | }; | |
42 | ||
43 | struct vfio_config { | |
44 | int vfio_enabled; | |
45 | int vfio_container_fd; | |
46 | int vfio_active_groups; | |
47 | const struct vfio_iommu_type *vfio_iommu_type; | |
48 | struct vfio_group vfio_groups[VFIO_MAX_GROUPS]; | |
49 | struct user_mem_maps mem_maps; | |
50 | }; | |
51 | ||
52 | /* per-process VFIO config */ | |
53 | static struct vfio_config vfio_cfgs[VFIO_MAX_CONTAINERS]; | |
54 | static struct vfio_config *default_vfio_cfg = &vfio_cfgs[0]; | |
55 | ||
56 | static int vfio_type1_dma_map(int); | |
57 | static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); | |
58 | static int vfio_spapr_dma_map(int); | |
59 | static int vfio_spapr_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); | |
60 | static int vfio_noiommu_dma_map(int); | |
61 | static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int); | |
62 | static int vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, | |
63 | uint64_t iova, uint64_t len, int do_map); | |
64 | ||
65 | /* IOMMU types we support */ | |
66 | static const struct vfio_iommu_type iommu_types[] = { | |
67 | /* x86 IOMMU, otherwise known as type 1 */ | |
68 | { | |
69 | .type_id = RTE_VFIO_TYPE1, | |
70 | .name = "Type 1", | |
71 | .dma_map_func = &vfio_type1_dma_map, | |
72 | .dma_user_map_func = &vfio_type1_dma_mem_map | |
73 | }, | |
74 | /* ppc64 IOMMU, otherwise known as spapr */ | |
75 | { | |
76 | .type_id = RTE_VFIO_SPAPR, | |
77 | .name = "sPAPR", | |
78 | .dma_map_func = &vfio_spapr_dma_map, | |
79 | .dma_user_map_func = &vfio_spapr_dma_mem_map | |
80 | }, | |
81 | /* IOMMU-less mode */ | |
82 | { | |
83 | .type_id = RTE_VFIO_NOIOMMU, | |
84 | .name = "No-IOMMU", | |
85 | .dma_map_func = &vfio_noiommu_dma_map, | |
86 | .dma_user_map_func = &vfio_noiommu_dma_mem_map | |
87 | }, | |
88 | }; | |
89 | ||
90 | static int | |
91 | is_null_map(const struct user_mem_map *map) | |
92 | { | |
93 | return map->addr == 0 && map->iova == 0 && map->len == 0; | |
94 | } | |
95 | ||
96 | /* we may need to merge user mem maps together in case of user mapping/unmapping | |
97 | * chunks of memory, so we'll need a comparator function to sort segments. | |
98 | */ | |
99 | static int | |
100 | user_mem_map_cmp(const void *a, const void *b) | |
101 | { | |
102 | const struct user_mem_map *umm_a = a; | |
103 | const struct user_mem_map *umm_b = b; | |
104 | ||
105 | /* move null entries to end */ | |
106 | if (is_null_map(umm_a)) | |
107 | return 1; | |
108 | if (is_null_map(umm_b)) | |
109 | return -1; | |
110 | ||
111 | /* sort by iova first */ | |
112 | if (umm_a->iova < umm_b->iova) | |
113 | return -1; | |
114 | if (umm_a->iova > umm_b->iova) | |
115 | return 1; | |
116 | ||
117 | if (umm_a->addr < umm_b->addr) | |
118 | return -1; | |
119 | if (umm_a->addr > umm_b->addr) | |
120 | return 1; | |
121 | ||
122 | if (umm_a->len < umm_b->len) | |
123 | return -1; | |
124 | if (umm_a->len > umm_b->len) | |
125 | return 1; | |
126 | ||
127 | return 0; | |
128 | } | |
129 | ||
130 | /* adjust user map entry. this may result in shortening of existing map, or in | |
131 | * splitting existing map in two pieces. | |
132 | */ | |
133 | static void | |
134 | adjust_map(struct user_mem_map *src, struct user_mem_map *end, | |
135 | uint64_t remove_va_start, uint64_t remove_len) | |
136 | { | |
137 | /* if va start is same as start address, we're simply moving start */ | |
138 | if (remove_va_start == src->addr) { | |
139 | src->addr += remove_len; | |
140 | src->iova += remove_len; | |
141 | src->len -= remove_len; | |
142 | } else if (remove_va_start + remove_len == src->addr + src->len) { | |
143 | /* we're shrinking mapping from the end */ | |
144 | src->len -= remove_len; | |
145 | } else { | |
146 | /* we're blowing a hole in the middle */ | |
147 | struct user_mem_map tmp; | |
148 | uint64_t total_len = src->len; | |
149 | ||
150 | /* adjust source segment length */ | |
151 | src->len = remove_va_start - src->addr; | |
152 | ||
153 | /* create temporary segment in the middle */ | |
154 | tmp.addr = src->addr + src->len; | |
155 | tmp.iova = src->iova + src->len; | |
156 | tmp.len = remove_len; | |
157 | ||
158 | /* populate end segment - this one we will be keeping */ | |
159 | end->addr = tmp.addr + tmp.len; | |
160 | end->iova = tmp.iova + tmp.len; | |
161 | end->len = total_len - src->len - tmp.len; | |
162 | } | |
163 | } | |
164 | ||
165 | /* try merging two maps into one, return 1 if succeeded */ | |
166 | static int | |
167 | merge_map(struct user_mem_map *left, struct user_mem_map *right) | |
168 | { | |
169 | if (left->addr + left->len != right->addr) | |
170 | return 0; | |
171 | if (left->iova + left->len != right->iova) | |
172 | return 0; | |
173 | ||
174 | left->len += right->len; | |
175 | ||
176 | memset(right, 0, sizeof(*right)); | |
177 | ||
178 | return 1; | |
179 | } | |
180 | ||
181 | static struct user_mem_map * | |
182 | find_user_mem_map(struct user_mem_maps *user_mem_maps, uint64_t addr, | |
183 | uint64_t iova, uint64_t len) | |
184 | { | |
185 | uint64_t va_end = addr + len; | |
186 | uint64_t iova_end = iova + len; | |
187 | int i; | |
188 | ||
189 | for (i = 0; i < user_mem_maps->n_maps; i++) { | |
190 | struct user_mem_map *map = &user_mem_maps->maps[i]; | |
191 | uint64_t map_va_end = map->addr + map->len; | |
192 | uint64_t map_iova_end = map->iova + map->len; | |
193 | ||
194 | /* check start VA */ | |
195 | if (addr < map->addr || addr >= map_va_end) | |
196 | continue; | |
197 | /* check if VA end is within boundaries */ | |
198 | if (va_end <= map->addr || va_end > map_va_end) | |
199 | continue; | |
200 | ||
201 | /* check start IOVA */ | |
202 | if (iova < map->iova || iova >= map_iova_end) | |
203 | continue; | |
204 | /* check if IOVA end is within boundaries */ | |
205 | if (iova_end <= map->iova || iova_end > map_iova_end) | |
206 | continue; | |
207 | ||
208 | /* we've found our map */ | |
209 | return map; | |
210 | } | |
211 | return NULL; | |
212 | } | |
213 | ||
214 | /* this will sort all user maps, and merge/compact any adjacent maps */ | |
215 | static void | |
216 | compact_user_maps(struct user_mem_maps *user_mem_maps) | |
217 | { | |
218 | int i, n_merged, cur_idx; | |
219 | ||
220 | qsort(user_mem_maps->maps, user_mem_maps->n_maps, | |
221 | sizeof(user_mem_maps->maps[0]), user_mem_map_cmp); | |
222 | ||
223 | /* we'll go over the list backwards when merging */ | |
224 | n_merged = 0; | |
225 | for (i = user_mem_maps->n_maps - 2; i >= 0; i--) { | |
226 | struct user_mem_map *l, *r; | |
227 | ||
228 | l = &user_mem_maps->maps[i]; | |
229 | r = &user_mem_maps->maps[i + 1]; | |
230 | ||
231 | if (is_null_map(l) || is_null_map(r)) | |
232 | continue; | |
233 | ||
234 | if (merge_map(l, r)) | |
235 | n_merged++; | |
236 | } | |
237 | ||
238 | /* the entries are still sorted, but now they have holes in them, so | |
239 | * walk through the list and remove the holes | |
240 | */ | |
241 | if (n_merged > 0) { | |
242 | cur_idx = 0; | |
243 | for (i = 0; i < user_mem_maps->n_maps; i++) { | |
244 | if (!is_null_map(&user_mem_maps->maps[i])) { | |
245 | struct user_mem_map *src, *dst; | |
246 | ||
247 | src = &user_mem_maps->maps[i]; | |
248 | dst = &user_mem_maps->maps[cur_idx++]; | |
249 | ||
250 | if (src != dst) { | |
251 | memcpy(dst, src, sizeof(*src)); | |
252 | memset(src, 0, sizeof(*src)); | |
253 | } | |
254 | } | |
255 | } | |
256 | user_mem_maps->n_maps = cur_idx; | |
257 | } | |
258 | } | |
259 | ||
260 | static int | |
261 | vfio_open_group_fd(int iommu_group_num) | |
262 | { | |
263 | int vfio_group_fd; | |
264 | char filename[PATH_MAX]; | |
265 | struct rte_mp_msg mp_req, *mp_rep; | |
266 | struct rte_mp_reply mp_reply; | |
267 | struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; | |
268 | struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; | |
269 | ||
270 | /* if primary, try to open the group */ | |
271 | if (internal_config.process_type == RTE_PROC_PRIMARY) { | |
272 | /* try regular group format */ | |
273 | snprintf(filename, sizeof(filename), | |
274 | VFIO_GROUP_FMT, iommu_group_num); | |
275 | vfio_group_fd = open(filename, O_RDWR); | |
276 | if (vfio_group_fd < 0) { | |
277 | /* if file not found, it's not an error */ | |
278 | if (errno != ENOENT) { | |
279 | RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename, | |
280 | strerror(errno)); | |
281 | return -1; | |
282 | } | |
283 | ||
284 | /* special case: try no-IOMMU path as well */ | |
285 | snprintf(filename, sizeof(filename), | |
286 | VFIO_NOIOMMU_GROUP_FMT, | |
287 | iommu_group_num); | |
288 | vfio_group_fd = open(filename, O_RDWR); | |
289 | if (vfio_group_fd < 0) { | |
290 | if (errno != ENOENT) { | |
291 | RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename, | |
292 | strerror(errno)); | |
293 | return -1; | |
294 | } | |
295 | return 0; | |
296 | } | |
297 | /* noiommu group found */ | |
298 | } | |
299 | ||
300 | return vfio_group_fd; | |
301 | } | |
302 | /* if we're in a secondary process, request group fd from the primary | |
303 | * process via mp channel. | |
304 | */ | |
305 | p->req = SOCKET_REQ_GROUP; | |
306 | p->group_num = iommu_group_num; | |
307 | strcpy(mp_req.name, EAL_VFIO_MP); | |
308 | mp_req.len_param = sizeof(*p); | |
309 | mp_req.num_fds = 0; | |
310 | ||
311 | vfio_group_fd = -1; | |
312 | if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && | |
313 | mp_reply.nb_received == 1) { | |
314 | mp_rep = &mp_reply.msgs[0]; | |
315 | p = (struct vfio_mp_param *)mp_rep->param; | |
316 | if (p->result == SOCKET_OK && mp_rep->num_fds == 1) { | |
317 | vfio_group_fd = mp_rep->fds[0]; | |
318 | } else if (p->result == SOCKET_NO_FD) { | |
319 | RTE_LOG(ERR, EAL, " bad VFIO group fd\n"); | |
320 | vfio_group_fd = 0; | |
321 | } | |
322 | free(mp_reply.msgs); | |
323 | } | |
324 | ||
325 | if (vfio_group_fd < 0) | |
326 | RTE_LOG(ERR, EAL, " cannot request group fd\n"); | |
327 | return vfio_group_fd; | |
328 | } | |
329 | ||
330 | static struct vfio_config * | |
331 | get_vfio_cfg_by_group_num(int iommu_group_num) | |
332 | { | |
333 | struct vfio_config *vfio_cfg; | |
334 | int i, j; | |
335 | ||
336 | for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { | |
337 | vfio_cfg = &vfio_cfgs[i]; | |
338 | for (j = 0; j < VFIO_MAX_GROUPS; j++) { | |
339 | if (vfio_cfg->vfio_groups[j].group_num == | |
340 | iommu_group_num) | |
341 | return vfio_cfg; | |
342 | } | |
343 | } | |
344 | ||
345 | return NULL; | |
346 | } | |
347 | ||
348 | static struct vfio_config * | |
349 | get_vfio_cfg_by_group_fd(int vfio_group_fd) | |
350 | { | |
351 | struct vfio_config *vfio_cfg; | |
352 | int i, j; | |
353 | ||
354 | for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { | |
355 | vfio_cfg = &vfio_cfgs[i]; | |
356 | for (j = 0; j < VFIO_MAX_GROUPS; j++) | |
357 | if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd) | |
358 | return vfio_cfg; | |
359 | } | |
360 | ||
361 | return NULL; | |
362 | } | |
363 | ||
364 | static struct vfio_config * | |
365 | get_vfio_cfg_by_container_fd(int container_fd) | |
366 | { | |
367 | int i; | |
368 | ||
369 | for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { | |
370 | if (vfio_cfgs[i].vfio_container_fd == container_fd) | |
371 | return &vfio_cfgs[i]; | |
372 | } | |
373 | ||
374 | return NULL; | |
375 | } | |
376 | ||
377 | int | |
378 | rte_vfio_get_group_fd(int iommu_group_num) | |
379 | { | |
380 | int i; | |
381 | int vfio_group_fd; | |
382 | struct vfio_group *cur_grp; | |
383 | struct vfio_config *vfio_cfg; | |
384 | ||
385 | /* get the vfio_config it belongs to */ | |
386 | vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num); | |
387 | vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg; | |
388 | ||
389 | /* check if we already have the group descriptor open */ | |
390 | for (i = 0; i < VFIO_MAX_GROUPS; i++) | |
391 | if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) | |
392 | return vfio_cfg->vfio_groups[i].fd; | |
393 | ||
394 | /* Lets see first if there is room for a new group */ | |
395 | if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) { | |
396 | RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); | |
397 | return -1; | |
398 | } | |
399 | ||
400 | /* Now lets get an index for the new group */ | |
401 | for (i = 0; i < VFIO_MAX_GROUPS; i++) | |
402 | if (vfio_cfg->vfio_groups[i].group_num == -1) { | |
403 | cur_grp = &vfio_cfg->vfio_groups[i]; | |
404 | break; | |
405 | } | |
406 | ||
407 | /* This should not happen */ | |
408 | if (i == VFIO_MAX_GROUPS) { | |
409 | RTE_LOG(ERR, EAL, "No VFIO group free slot found\n"); | |
410 | return -1; | |
411 | } | |
412 | ||
413 | vfio_group_fd = vfio_open_group_fd(iommu_group_num); | |
414 | if (vfio_group_fd < 0) { | |
415 | RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num); | |
416 | return -1; | |
417 | } | |
418 | ||
419 | cur_grp->group_num = iommu_group_num; | |
420 | cur_grp->fd = vfio_group_fd; | |
421 | vfio_cfg->vfio_active_groups++; | |
422 | ||
423 | return vfio_group_fd; | |
424 | } | |
425 | ||
426 | static int | |
427 | get_vfio_group_idx(int vfio_group_fd) | |
428 | { | |
429 | struct vfio_config *vfio_cfg; | |
430 | int i, j; | |
431 | ||
432 | for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { | |
433 | vfio_cfg = &vfio_cfgs[i]; | |
434 | for (j = 0; j < VFIO_MAX_GROUPS; j++) | |
435 | if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd) | |
436 | return j; | |
437 | } | |
438 | ||
439 | return -1; | |
440 | } | |
441 | ||
442 | static void | |
443 | vfio_group_device_get(int vfio_group_fd) | |
444 | { | |
445 | struct vfio_config *vfio_cfg; | |
446 | int i; | |
447 | ||
448 | vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); | |
449 | if (vfio_cfg == NULL) { | |
450 | RTE_LOG(ERR, EAL, " invalid group fd!\n"); | |
451 | return; | |
452 | } | |
453 | ||
454 | i = get_vfio_group_idx(vfio_group_fd); | |
455 | if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) | |
456 | RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); | |
457 | else | |
458 | vfio_cfg->vfio_groups[i].devices++; | |
459 | } | |
460 | ||
461 | static void | |
462 | vfio_group_device_put(int vfio_group_fd) | |
463 | { | |
464 | struct vfio_config *vfio_cfg; | |
465 | int i; | |
466 | ||
467 | vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); | |
468 | if (vfio_cfg == NULL) { | |
469 | RTE_LOG(ERR, EAL, " invalid group fd!\n"); | |
470 | return; | |
471 | } | |
472 | ||
473 | i = get_vfio_group_idx(vfio_group_fd); | |
474 | if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) | |
475 | RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); | |
476 | else | |
477 | vfio_cfg->vfio_groups[i].devices--; | |
478 | } | |
479 | ||
480 | static int | |
481 | vfio_group_device_count(int vfio_group_fd) | |
482 | { | |
483 | struct vfio_config *vfio_cfg; | |
484 | int i; | |
485 | ||
486 | vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); | |
487 | if (vfio_cfg == NULL) { | |
488 | RTE_LOG(ERR, EAL, " invalid group fd!\n"); | |
489 | return -1; | |
490 | } | |
491 | ||
492 | i = get_vfio_group_idx(vfio_group_fd); | |
493 | if (i < 0 || i > (VFIO_MAX_GROUPS - 1)) { | |
494 | RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i); | |
495 | return -1; | |
496 | } | |
497 | ||
498 | return vfio_cfg->vfio_groups[i].devices; | |
499 | } | |
500 | ||
501 | static void | |
502 | vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len, | |
503 | void *arg __rte_unused) | |
504 | { | |
505 | struct rte_memseg_list *msl; | |
506 | struct rte_memseg *ms; | |
507 | size_t cur_len = 0; | |
508 | ||
509 | msl = rte_mem_virt2memseg_list(addr); | |
510 | ||
511 | /* for IOVA as VA mode, no need to care for IOVA addresses */ | |
512 | if (rte_eal_iova_mode() == RTE_IOVA_VA) { | |
513 | uint64_t vfio_va = (uint64_t)(uintptr_t)addr; | |
514 | if (type == RTE_MEM_EVENT_ALLOC) | |
515 | vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va, | |
516 | len, 1); | |
517 | else | |
518 | vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va, | |
519 | len, 0); | |
520 | return; | |
521 | } | |
522 | ||
523 | /* memsegs are contiguous in memory */ | |
524 | ms = rte_mem_virt2memseg(addr, msl); | |
525 | while (cur_len < len) { | |
526 | if (type == RTE_MEM_EVENT_ALLOC) | |
527 | vfio_dma_mem_map(default_vfio_cfg, ms->addr_64, | |
528 | ms->iova, ms->len, 1); | |
529 | else | |
530 | vfio_dma_mem_map(default_vfio_cfg, ms->addr_64, | |
531 | ms->iova, ms->len, 0); | |
532 | ||
533 | cur_len += ms->len; | |
534 | ++ms; | |
535 | } | |
536 | } | |
537 | ||
538 | int | |
539 | rte_vfio_clear_group(int vfio_group_fd) | |
540 | { | |
541 | int i; | |
542 | struct vfio_config *vfio_cfg; | |
543 | ||
544 | vfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd); | |
545 | if (vfio_cfg == NULL) { | |
546 | RTE_LOG(ERR, EAL, " invalid group fd!\n"); | |
547 | return -1; | |
548 | } | |
549 | ||
550 | i = get_vfio_group_idx(vfio_group_fd); | |
551 | if (i < 0) | |
552 | return -1; | |
553 | vfio_cfg->vfio_groups[i].group_num = -1; | |
554 | vfio_cfg->vfio_groups[i].fd = -1; | |
555 | vfio_cfg->vfio_groups[i].devices = 0; | |
556 | vfio_cfg->vfio_active_groups--; | |
557 | ||
558 | return 0; | |
559 | } | |
560 | ||
561 | int | |
562 | rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, | |
563 | int *vfio_dev_fd, struct vfio_device_info *device_info) | |
564 | { | |
565 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; | |
566 | rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock; | |
567 | struct vfio_group_status group_status = { | |
568 | .argsz = sizeof(group_status) | |
569 | }; | |
570 | struct vfio_config *vfio_cfg; | |
571 | struct user_mem_maps *user_mem_maps; | |
572 | int vfio_container_fd; | |
573 | int vfio_group_fd; | |
574 | int iommu_group_num; | |
575 | int i, ret; | |
576 | ||
577 | /* get group number */ | |
578 | ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num); | |
579 | if (ret == 0) { | |
580 | RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", | |
581 | dev_addr); | |
582 | return 1; | |
583 | } | |
584 | ||
585 | /* if negative, something failed */ | |
586 | if (ret < 0) | |
587 | return -1; | |
588 | ||
589 | /* get the actual group fd */ | |
590 | vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num); | |
591 | if (vfio_group_fd < 0) | |
592 | return -1; | |
593 | ||
594 | /* if group_fd == 0, that means the device isn't managed by VFIO */ | |
595 | if (vfio_group_fd == 0) { | |
596 | RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", | |
597 | dev_addr); | |
598 | return 1; | |
599 | } | |
600 | ||
601 | /* | |
602 | * at this point, we know that this group is viable (meaning, all devices | |
603 | * are either bound to VFIO or not bound to anything) | |
604 | */ | |
605 | ||
606 | /* check if the group is viable */ | |
607 | ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status); | |
608 | if (ret) { | |
609 | RTE_LOG(ERR, EAL, " %s cannot get group status, " | |
610 | "error %i (%s)\n", dev_addr, errno, strerror(errno)); | |
611 | close(vfio_group_fd); | |
612 | rte_vfio_clear_group(vfio_group_fd); | |
613 | return -1; | |
614 | } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) { | |
615 | RTE_LOG(ERR, EAL, " %s VFIO group is not viable!\n", dev_addr); | |
616 | close(vfio_group_fd); | |
617 | rte_vfio_clear_group(vfio_group_fd); | |
618 | return -1; | |
619 | } | |
620 | ||
621 | /* get the vfio_config it belongs to */ | |
622 | vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num); | |
623 | vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg; | |
624 | vfio_container_fd = vfio_cfg->vfio_container_fd; | |
625 | user_mem_maps = &vfio_cfg->mem_maps; | |
626 | ||
627 | /* check if group does not have a container yet */ | |
628 | if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) { | |
629 | ||
630 | /* add group to a container */ | |
631 | ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER, | |
632 | &vfio_container_fd); | |
633 | if (ret) { | |
634 | RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, " | |
635 | "error %i (%s)\n", dev_addr, errno, strerror(errno)); | |
636 | close(vfio_group_fd); | |
637 | rte_vfio_clear_group(vfio_group_fd); | |
638 | return -1; | |
639 | } | |
640 | ||
641 | /* | |
642 | * pick an IOMMU type and set up DMA mappings for container | |
643 | * | |
644 | * needs to be done only once, only when first group is | |
645 | * assigned to a container and only in primary process. | |
646 | * Note this can happen several times with the hotplug | |
647 | * functionality. | |
648 | */ | |
649 | if (internal_config.process_type == RTE_PROC_PRIMARY && | |
650 | vfio_cfg->vfio_active_groups == 1 && | |
651 | vfio_group_device_count(vfio_group_fd) == 0) { | |
652 | const struct vfio_iommu_type *t; | |
653 | ||
654 | /* select an IOMMU type which we will be using */ | |
655 | t = vfio_set_iommu_type(vfio_container_fd); | |
656 | if (!t) { | |
657 | RTE_LOG(ERR, EAL, | |
658 | " %s failed to select IOMMU type\n", | |
659 | dev_addr); | |
660 | close(vfio_group_fd); | |
661 | rte_vfio_clear_group(vfio_group_fd); | |
662 | return -1; | |
663 | } | |
664 | /* lock memory hotplug before mapping and release it | |
665 | * after registering callback, to prevent races | |
666 | */ | |
667 | rte_rwlock_read_lock(mem_lock); | |
668 | if (vfio_cfg == default_vfio_cfg && | |
669 | (internal_config.single_file_segments == 0 || | |
670 | internal_config.legacy_mem == 0)) | |
671 | ret = t->dma_map_func(vfio_container_fd); | |
672 | else | |
673 | ret = 0; | |
674 | if (ret) { | |
675 | RTE_LOG(ERR, EAL, | |
676 | " %s DMA remapping failed, error %i (%s)\n", | |
677 | dev_addr, errno, strerror(errno)); | |
678 | close(vfio_group_fd); | |
679 | rte_vfio_clear_group(vfio_group_fd); | |
680 | rte_rwlock_read_unlock(mem_lock); | |
681 | return -1; | |
682 | } | |
683 | ||
684 | vfio_cfg->vfio_iommu_type = t; | |
685 | ||
686 | /* re-map all user-mapped segments */ | |
687 | rte_spinlock_recursive_lock(&user_mem_maps->lock); | |
688 | ||
689 | /* this IOMMU type may not support DMA mapping, but | |
690 | * if we have mappings in the list - that means we have | |
691 | * previously mapped something successfully, so we can | |
692 | * be sure that DMA mapping is supported. | |
693 | */ | |
694 | for (i = 0; i < user_mem_maps->n_maps; i++) { | |
695 | struct user_mem_map *map; | |
696 | map = &user_mem_maps->maps[i]; | |
697 | ||
698 | ret = t->dma_user_map_func( | |
699 | vfio_container_fd, | |
700 | map->addr, map->iova, map->len, | |
701 | 1); | |
702 | if (ret) { | |
703 | RTE_LOG(ERR, EAL, "Couldn't map user memory for DMA: " | |
704 | "va: 0x%" PRIx64 " " | |
705 | "iova: 0x%" PRIx64 " " | |
706 | "len: 0x%" PRIu64 "\n", | |
707 | map->addr, map->iova, | |
708 | map->len); | |
709 | rte_spinlock_recursive_unlock( | |
710 | &user_mem_maps->lock); | |
711 | rte_rwlock_read_unlock(mem_lock); | |
712 | return -1; | |
713 | } | |
714 | } | |
715 | rte_spinlock_recursive_unlock(&user_mem_maps->lock); | |
716 | ||
717 | /* register callback for mem events */ | |
718 | if (vfio_cfg == default_vfio_cfg) | |
719 | ret = rte_mem_event_callback_register( | |
720 | VFIO_MEM_EVENT_CLB_NAME, | |
721 | vfio_mem_event_callback, NULL); | |
722 | else | |
723 | ret = 0; | |
724 | /* unlock memory hotplug */ | |
725 | rte_rwlock_read_unlock(mem_lock); | |
726 | ||
727 | if (ret && rte_errno != ENOTSUP) { | |
728 | RTE_LOG(ERR, EAL, "Could not install memory event callback for VFIO\n"); | |
729 | return -1; | |
730 | } | |
731 | if (ret) | |
732 | RTE_LOG(DEBUG, EAL, "Memory event callbacks not supported\n"); | |
733 | else | |
734 | RTE_LOG(DEBUG, EAL, "Installed memory event callback for VFIO\n"); | |
735 | } | |
736 | } | |
737 | ||
738 | /* get a file descriptor for the device */ | |
739 | *vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr); | |
740 | if (*vfio_dev_fd < 0) { | |
741 | /* if we cannot get a device fd, this implies a problem with | |
742 | * the VFIO group or the container not having IOMMU configured. | |
743 | */ | |
744 | ||
745 | RTE_LOG(WARNING, EAL, "Getting a vfio_dev_fd for %s failed\n", | |
746 | dev_addr); | |
747 | close(vfio_group_fd); | |
748 | rte_vfio_clear_group(vfio_group_fd); | |
749 | return -1; | |
750 | } | |
751 | ||
752 | /* test and setup the device */ | |
753 | ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO, device_info); | |
754 | if (ret) { | |
755 | RTE_LOG(ERR, EAL, " %s cannot get device info, " | |
756 | "error %i (%s)\n", dev_addr, errno, | |
757 | strerror(errno)); | |
758 | close(*vfio_dev_fd); | |
759 | close(vfio_group_fd); | |
760 | rte_vfio_clear_group(vfio_group_fd); | |
761 | return -1; | |
762 | } | |
763 | vfio_group_device_get(vfio_group_fd); | |
764 | ||
765 | return 0; | |
766 | } | |
767 | ||
768 | int | |
769 | rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, | |
770 | int vfio_dev_fd) | |
771 | { | |
772 | struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; | |
773 | rte_rwlock_t *mem_lock = &mcfg->memory_hotplug_lock; | |
774 | struct vfio_group_status group_status = { | |
775 | .argsz = sizeof(group_status) | |
776 | }; | |
777 | struct vfio_config *vfio_cfg; | |
778 | int vfio_group_fd; | |
779 | int iommu_group_num; | |
780 | int ret; | |
781 | ||
782 | /* we don't want any DMA mapping messages to come while we're detaching | |
783 | * VFIO device, because this might be the last device and we might need | |
784 | * to unregister the callback. | |
785 | */ | |
786 | rte_rwlock_read_lock(mem_lock); | |
787 | ||
788 | /* get group number */ | |
789 | ret = rte_vfio_get_group_num(sysfs_base, dev_addr, &iommu_group_num); | |
790 | if (ret <= 0) { | |
791 | RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver\n", | |
792 | dev_addr); | |
793 | /* This is an error at this point. */ | |
794 | ret = -1; | |
795 | goto out; | |
796 | } | |
797 | ||
798 | /* get the actual group fd */ | |
799 | vfio_group_fd = rte_vfio_get_group_fd(iommu_group_num); | |
800 | if (vfio_group_fd <= 0) { | |
801 | RTE_LOG(INFO, EAL, "rte_vfio_get_group_fd failed for %s\n", | |
802 | dev_addr); | |
803 | ret = -1; | |
804 | goto out; | |
805 | } | |
806 | ||
807 | /* get the vfio_config it belongs to */ | |
808 | vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num); | |
809 | vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg; | |
810 | ||
811 | /* At this point we got an active group. Closing it will make the | |
812 | * container detachment. If this is the last active group, VFIO kernel | |
813 | * code will unset the container and the IOMMU mappings. | |
814 | */ | |
815 | ||
816 | /* Closing a device */ | |
817 | if (close(vfio_dev_fd) < 0) { | |
818 | RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n", | |
819 | dev_addr); | |
820 | ret = -1; | |
821 | goto out; | |
822 | } | |
823 | ||
824 | /* An VFIO group can have several devices attached. Just when there is | |
825 | * no devices remaining should the group be closed. | |
826 | */ | |
827 | vfio_group_device_put(vfio_group_fd); | |
828 | if (!vfio_group_device_count(vfio_group_fd)) { | |
829 | ||
830 | if (close(vfio_group_fd) < 0) { | |
831 | RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n", | |
832 | dev_addr); | |
833 | ret = -1; | |
834 | goto out; | |
835 | } | |
836 | ||
837 | if (rte_vfio_clear_group(vfio_group_fd) < 0) { | |
838 | RTE_LOG(INFO, EAL, "Error when clearing group for %s\n", | |
839 | dev_addr); | |
840 | ret = -1; | |
841 | goto out; | |
842 | } | |
843 | } | |
844 | ||
845 | /* if there are no active device groups, unregister the callback to | |
846 | * avoid spurious attempts to map/unmap memory from VFIO. | |
847 | */ | |
848 | if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0) | |
849 | rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME, | |
850 | NULL); | |
851 | ||
852 | /* success */ | |
853 | ret = 0; | |
854 | ||
855 | out: | |
856 | rte_rwlock_read_unlock(mem_lock); | |
857 | return ret; | |
858 | } | |
859 | ||
860 | int | |
861 | rte_vfio_enable(const char *modname) | |
862 | { | |
863 | /* initialize group list */ | |
864 | int i, j; | |
865 | int vfio_available; | |
866 | ||
867 | rte_spinlock_recursive_t lock = RTE_SPINLOCK_RECURSIVE_INITIALIZER; | |
868 | ||
869 | for (i = 0; i < VFIO_MAX_CONTAINERS; i++) { | |
870 | vfio_cfgs[i].vfio_container_fd = -1; | |
871 | vfio_cfgs[i].vfio_active_groups = 0; | |
872 | vfio_cfgs[i].vfio_iommu_type = NULL; | |
873 | vfio_cfgs[i].mem_maps.lock = lock; | |
874 | ||
875 | for (j = 0; j < VFIO_MAX_GROUPS; j++) { | |
876 | vfio_cfgs[i].vfio_groups[j].fd = -1; | |
877 | vfio_cfgs[i].vfio_groups[j].group_num = -1; | |
878 | vfio_cfgs[i].vfio_groups[j].devices = 0; | |
879 | } | |
880 | } | |
881 | ||
882 | /* inform the user that we are probing for VFIO */ | |
883 | RTE_LOG(INFO, EAL, "Probing VFIO support...\n"); | |
884 | ||
885 | /* check if vfio module is loaded */ | |
886 | vfio_available = rte_eal_check_module(modname); | |
887 | ||
888 | /* return error directly */ | |
889 | if (vfio_available == -1) { | |
890 | RTE_LOG(INFO, EAL, "Could not get loaded module details!\n"); | |
891 | return -1; | |
892 | } | |
893 | ||
894 | /* return 0 if VFIO modules not loaded */ | |
895 | if (vfio_available == 0) { | |
896 | RTE_LOG(DEBUG, EAL, "VFIO modules not loaded, " | |
897 | "skipping VFIO support...\n"); | |
898 | return 0; | |
899 | } | |
900 | ||
901 | default_vfio_cfg->vfio_container_fd = rte_vfio_get_container_fd(); | |
902 | ||
903 | /* check if we have VFIO driver enabled */ | |
904 | if (default_vfio_cfg->vfio_container_fd != -1) { | |
905 | RTE_LOG(NOTICE, EAL, "VFIO support initialized\n"); | |
906 | default_vfio_cfg->vfio_enabled = 1; | |
907 | } else { | |
908 | RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n"); | |
909 | } | |
910 | ||
911 | return 0; | |
912 | } | |
913 | ||
914 | int | |
915 | rte_vfio_is_enabled(const char *modname) | |
916 | { | |
917 | const int mod_available = rte_eal_check_module(modname) > 0; | |
918 | return default_vfio_cfg->vfio_enabled && mod_available; | |
919 | } | |
920 | ||
921 | const struct vfio_iommu_type * | |
922 | vfio_set_iommu_type(int vfio_container_fd) | |
923 | { | |
924 | unsigned idx; | |
925 | for (idx = 0; idx < RTE_DIM(iommu_types); idx++) { | |
926 | const struct vfio_iommu_type *t = &iommu_types[idx]; | |
927 | ||
928 | int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU, | |
929 | t->type_id); | |
930 | if (!ret) { | |
931 | RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n", | |
932 | t->type_id, t->name); | |
933 | return t; | |
934 | } | |
935 | /* not an error, there may be more supported IOMMU types */ | |
936 | RTE_LOG(DEBUG, EAL, " set IOMMU type %d (%s) failed, " | |
937 | "error %i (%s)\n", t->type_id, t->name, errno, | |
938 | strerror(errno)); | |
939 | } | |
940 | /* if we didn't find a suitable IOMMU type, fail */ | |
941 | return NULL; | |
942 | } | |
943 | ||
944 | int | |
945 | vfio_has_supported_extensions(int vfio_container_fd) | |
946 | { | |
947 | int ret; | |
948 | unsigned idx, n_extensions = 0; | |
949 | for (idx = 0; idx < RTE_DIM(iommu_types); idx++) { | |
950 | const struct vfio_iommu_type *t = &iommu_types[idx]; | |
951 | ||
952 | ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION, | |
953 | t->type_id); | |
954 | if (ret < 0) { | |
955 | RTE_LOG(ERR, EAL, " could not get IOMMU type, " | |
956 | "error %i (%s)\n", errno, | |
957 | strerror(errno)); | |
958 | close(vfio_container_fd); | |
959 | return -1; | |
960 | } else if (ret == 1) { | |
961 | /* we found a supported extension */ | |
962 | n_extensions++; | |
963 | } | |
964 | RTE_LOG(DEBUG, EAL, " IOMMU type %d (%s) is %s\n", | |
965 | t->type_id, t->name, | |
966 | ret ? "supported" : "not supported"); | |
967 | } | |
968 | ||
969 | /* if we didn't find any supported IOMMU types, fail */ | |
970 | if (!n_extensions) { | |
971 | close(vfio_container_fd); | |
972 | return -1; | |
973 | } | |
974 | ||
975 | return 0; | |
976 | } | |
977 | ||
978 | int | |
979 | rte_vfio_get_container_fd(void) | |
980 | { | |
981 | int ret, vfio_container_fd; | |
982 | struct rte_mp_msg mp_req, *mp_rep; | |
983 | struct rte_mp_reply mp_reply; | |
984 | struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; | |
985 | struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; | |
986 | ||
987 | ||
988 | /* if we're in a primary process, try to open the container */ | |
989 | if (internal_config.process_type == RTE_PROC_PRIMARY) { | |
990 | vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR); | |
991 | if (vfio_container_fd < 0) { | |
992 | RTE_LOG(ERR, EAL, " cannot open VFIO container, " | |
993 | "error %i (%s)\n", errno, strerror(errno)); | |
994 | return -1; | |
995 | } | |
996 | ||
997 | /* check VFIO API version */ | |
998 | ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION); | |
999 | if (ret != VFIO_API_VERSION) { | |
1000 | if (ret < 0) | |
1001 | RTE_LOG(ERR, EAL, " could not get VFIO API version, " | |
1002 | "error %i (%s)\n", errno, strerror(errno)); | |
1003 | else | |
1004 | RTE_LOG(ERR, EAL, " unsupported VFIO API version!\n"); | |
1005 | close(vfio_container_fd); | |
1006 | return -1; | |
1007 | } | |
1008 | ||
1009 | ret = vfio_has_supported_extensions(vfio_container_fd); | |
1010 | if (ret) { | |
1011 | RTE_LOG(ERR, EAL, " no supported IOMMU " | |
1012 | "extensions found!\n"); | |
1013 | return -1; | |
1014 | } | |
1015 | ||
1016 | return vfio_container_fd; | |
1017 | } | |
1018 | /* | |
1019 | * if we're in a secondary process, request container fd from the | |
1020 | * primary process via mp channel | |
1021 | */ | |
1022 | p->req = SOCKET_REQ_CONTAINER; | |
1023 | strcpy(mp_req.name, EAL_VFIO_MP); | |
1024 | mp_req.len_param = sizeof(*p); | |
1025 | mp_req.num_fds = 0; | |
1026 | ||
1027 | vfio_container_fd = -1; | |
1028 | if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && | |
1029 | mp_reply.nb_received == 1) { | |
1030 | mp_rep = &mp_reply.msgs[0]; | |
1031 | p = (struct vfio_mp_param *)mp_rep->param; | |
1032 | if (p->result == SOCKET_OK && mp_rep->num_fds == 1) { | |
1033 | free(mp_reply.msgs); | |
1034 | return mp_rep->fds[0]; | |
1035 | } | |
1036 | free(mp_reply.msgs); | |
1037 | } | |
1038 | ||
1039 | RTE_LOG(ERR, EAL, " cannot request container fd\n"); | |
1040 | return -1; | |
1041 | } | |
1042 | ||
1043 | int | |
1044 | rte_vfio_get_group_num(const char *sysfs_base, | |
1045 | const char *dev_addr, int *iommu_group_num) | |
1046 | { | |
1047 | char linkname[PATH_MAX]; | |
1048 | char filename[PATH_MAX]; | |
1049 | char *tok[16], *group_tok, *end; | |
1050 | int ret; | |
1051 | ||
1052 | memset(linkname, 0, sizeof(linkname)); | |
1053 | memset(filename, 0, sizeof(filename)); | |
1054 | ||
1055 | /* try to find out IOMMU group for this device */ | |
1056 | snprintf(linkname, sizeof(linkname), | |
1057 | "%s/%s/iommu_group", sysfs_base, dev_addr); | |
1058 | ||
1059 | ret = readlink(linkname, filename, sizeof(filename)); | |
1060 | ||
1061 | /* if the link doesn't exist, no VFIO for us */ | |
1062 | if (ret < 0) | |
1063 | return 0; | |
1064 | ||
1065 | ret = rte_strsplit(filename, sizeof(filename), | |
1066 | tok, RTE_DIM(tok), '/'); | |
1067 | ||
1068 | if (ret <= 0) { | |
1069 | RTE_LOG(ERR, EAL, " %s cannot get IOMMU group\n", dev_addr); | |
1070 | return -1; | |
1071 | } | |
1072 | ||
1073 | /* IOMMU group is always the last token */ | |
1074 | errno = 0; | |
1075 | group_tok = tok[ret - 1]; | |
1076 | end = group_tok; | |
1077 | *iommu_group_num = strtol(group_tok, &end, 10); | |
1078 | if ((end != group_tok && *end != '\0') || errno != 0) { | |
1079 | RTE_LOG(ERR, EAL, " %s error parsing IOMMU number!\n", dev_addr); | |
1080 | return -1; | |
1081 | } | |
1082 | ||
1083 | return 1; | |
1084 | } | |
1085 | ||
1086 | static int | |
1087 | type1_map(const struct rte_memseg_list *msl __rte_unused, | |
1088 | const struct rte_memseg *ms, void *arg) | |
1089 | { | |
1090 | int *vfio_container_fd = arg; | |
1091 | ||
1092 | return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova, | |
1093 | ms->len, 1); | |
1094 | } | |
1095 | ||
1096 | static int | |
1097 | vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, | |
1098 | uint64_t len, int do_map) | |
1099 | { | |
1100 | struct vfio_iommu_type1_dma_map dma_map; | |
1101 | struct vfio_iommu_type1_dma_unmap dma_unmap; | |
1102 | int ret; | |
1103 | ||
1104 | if (do_map != 0) { | |
1105 | memset(&dma_map, 0, sizeof(dma_map)); | |
1106 | dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); | |
1107 | dma_map.vaddr = vaddr; | |
1108 | dma_map.size = len; | |
1109 | dma_map.iova = iova; | |
1110 | dma_map.flags = VFIO_DMA_MAP_FLAG_READ | | |
1111 | VFIO_DMA_MAP_FLAG_WRITE; | |
1112 | ||
1113 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); | |
1114 | if (ret) { | |
1115 | RTE_LOG(ERR, EAL, " cannot set up DMA remapping, error %i (%s)\n", | |
1116 | errno, strerror(errno)); | |
1117 | return -1; | |
1118 | } | |
1119 | } else { | |
1120 | memset(&dma_unmap, 0, sizeof(dma_unmap)); | |
1121 | dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap); | |
1122 | dma_unmap.size = len; | |
1123 | dma_unmap.iova = iova; | |
1124 | ||
1125 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, | |
1126 | &dma_unmap); | |
1127 | if (ret) { | |
1128 | RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n", | |
1129 | errno, strerror(errno)); | |
1130 | return -1; | |
1131 | } | |
1132 | } | |
1133 | ||
1134 | return 0; | |
1135 | } | |
1136 | ||
1137 | static int | |
1138 | vfio_type1_dma_map(int vfio_container_fd) | |
1139 | { | |
1140 | return rte_memseg_walk(type1_map, &vfio_container_fd); | |
1141 | } | |
1142 | ||
1143 | static int | |
1144 | vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, | |
1145 | uint64_t len, int do_map) | |
1146 | { | |
1147 | struct vfio_iommu_type1_dma_map dma_map; | |
1148 | struct vfio_iommu_type1_dma_unmap dma_unmap; | |
1149 | int ret; | |
1150 | ||
1151 | if (do_map != 0) { | |
1152 | memset(&dma_map, 0, sizeof(dma_map)); | |
1153 | dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); | |
1154 | dma_map.vaddr = vaddr; | |
1155 | dma_map.size = len; | |
1156 | dma_map.iova = iova; | |
1157 | dma_map.flags = VFIO_DMA_MAP_FLAG_READ | | |
1158 | VFIO_DMA_MAP_FLAG_WRITE; | |
1159 | ||
1160 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); | |
1161 | if (ret) { | |
1162 | RTE_LOG(ERR, EAL, " cannot set up DMA remapping, error %i (%s)\n", | |
1163 | errno, strerror(errno)); | |
1164 | return -1; | |
1165 | } | |
1166 | ||
1167 | } else { | |
1168 | struct vfio_iommu_spapr_register_memory reg = { | |
1169 | .argsz = sizeof(reg), | |
1170 | .flags = 0 | |
1171 | }; | |
1172 | reg.vaddr = (uintptr_t) vaddr; | |
1173 | reg.size = len; | |
1174 | ||
1175 | ret = ioctl(vfio_container_fd, | |
1176 | VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, ®); | |
1177 | if (ret) { | |
1178 | RTE_LOG(ERR, EAL, " cannot unregister vaddr for IOMMU, error %i (%s)\n", | |
1179 | errno, strerror(errno)); | |
1180 | return -1; | |
1181 | } | |
1182 | ||
1183 | memset(&dma_unmap, 0, sizeof(dma_unmap)); | |
1184 | dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap); | |
1185 | dma_unmap.size = len; | |
1186 | dma_unmap.iova = iova; | |
1187 | ||
1188 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, | |
1189 | &dma_unmap); | |
1190 | if (ret) { | |
1191 | RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n", | |
1192 | errno, strerror(errno)); | |
1193 | return -1; | |
1194 | } | |
1195 | } | |
1196 | ||
1197 | return 0; | |
1198 | } | |
1199 | ||
1200 | static int | |
1201 | vfio_spapr_map_walk(const struct rte_memseg_list *msl __rte_unused, | |
1202 | const struct rte_memseg *ms, void *arg) | |
1203 | { | |
1204 | int *vfio_container_fd = arg; | |
1205 | ||
1206 | return vfio_spapr_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova, | |
1207 | ms->len, 1); | |
1208 | } | |
1209 | ||
1210 | struct spapr_walk_param { | |
1211 | uint64_t window_size; | |
1212 | uint64_t hugepage_sz; | |
1213 | }; | |
1214 | static int | |
1215 | vfio_spapr_window_size_walk(const struct rte_memseg_list *msl __rte_unused, | |
1216 | const struct rte_memseg *ms, void *arg) | |
1217 | { | |
1218 | struct spapr_walk_param *param = arg; | |
1219 | uint64_t max = ms->iova + ms->len; | |
1220 | ||
1221 | if (max > param->window_size) { | |
1222 | param->hugepage_sz = ms->hugepage_sz; | |
1223 | param->window_size = max; | |
1224 | } | |
1225 | ||
1226 | return 0; | |
1227 | } | |
1228 | ||
1229 | static int | |
1230 | vfio_spapr_create_new_dma_window(int vfio_container_fd, | |
1231 | struct vfio_iommu_spapr_tce_create *create) { | |
1232 | struct vfio_iommu_spapr_tce_remove remove = { | |
1233 | .argsz = sizeof(remove), | |
1234 | }; | |
1235 | struct vfio_iommu_spapr_tce_info info = { | |
1236 | .argsz = sizeof(info), | |
1237 | }; | |
1238 | int ret; | |
1239 | ||
1240 | /* query spapr iommu info */ | |
1241 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); | |
1242 | if (ret) { | |
1243 | RTE_LOG(ERR, EAL, " cannot get iommu info, " | |
1244 | "error %i (%s)\n", errno, strerror(errno)); | |
1245 | return -1; | |
1246 | } | |
1247 | ||
1248 | /* remove default DMA of 32 bit window */ | |
1249 | remove.start_addr = info.dma32_window_start; | |
1250 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove); | |
1251 | if (ret) { | |
1252 | RTE_LOG(ERR, EAL, " cannot remove default DMA window, " | |
1253 | "error %i (%s)\n", errno, strerror(errno)); | |
1254 | return -1; | |
1255 | } | |
1256 | ||
1257 | /* create new DMA window */ | |
1258 | ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, create); | |
1259 | if (ret) { | |
1260 | RTE_LOG(ERR, EAL, " cannot create new DMA window, " | |
1261 | "error %i (%s)\n", errno, strerror(errno)); | |
1262 | return -1; | |
1263 | } | |
1264 | ||
1265 | if (create->start_addr != 0) { | |
1266 | RTE_LOG(ERR, EAL, " DMA window start address != 0\n"); | |
1267 | return -1; | |
1268 | } | |
1269 | ||
1270 | return 0; | |
1271 | } | |
1272 | ||
1273 | static int | |
1274 | vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova, | |
1275 | uint64_t len, int do_map) | |
1276 | { | |
1277 | struct spapr_walk_param param; | |
1278 | struct vfio_iommu_spapr_tce_create create = { | |
1279 | .argsz = sizeof(create), | |
1280 | }; | |
1281 | struct vfio_config *vfio_cfg; | |
1282 | struct user_mem_maps *user_mem_maps; | |
1283 | int i, ret = 0; | |
1284 | ||
1285 | vfio_cfg = get_vfio_cfg_by_container_fd(vfio_container_fd); | |
1286 | if (vfio_cfg == NULL) { | |
1287 | RTE_LOG(ERR, EAL, " invalid container fd!\n"); | |
1288 | return -1; | |
1289 | } | |
1290 | ||
1291 | user_mem_maps = &vfio_cfg->mem_maps; | |
1292 | rte_spinlock_recursive_lock(&user_mem_maps->lock); | |
1293 | ||
1294 | /* check if window size needs to be adjusted */ | |
1295 | memset(¶m, 0, sizeof(param)); | |
1296 | ||
1297 | /* we're inside a callback so use thread-unsafe version */ | |
1298 | if (rte_memseg_walk_thread_unsafe(vfio_spapr_window_size_walk, | |
1299 | ¶m) < 0) { | |
1300 | RTE_LOG(ERR, EAL, "Could not get window size\n"); | |
1301 | ret = -1; | |
1302 | goto out; | |
1303 | } | |
1304 | ||
1305 | /* also check user maps */ | |
1306 | for (i = 0; i < user_mem_maps->n_maps; i++) { | |
1307 | uint64_t max = user_mem_maps->maps[i].iova + | |
1308 | user_mem_maps->maps[i].len; | |
1309 | create.window_size = RTE_MAX(create.window_size, max); | |
1310 | } | |
1311 | ||
1312 | /* sPAPR requires window size to be a power of 2 */ | |
1313 | create.window_size = rte_align64pow2(param.window_size); | |
1314 | create.page_shift = __builtin_ctzll(param.hugepage_sz); | |
1315 | create.levels = 1; | |
1316 | ||
1317 | if (do_map) { | |
1318 | void *addr; | |
1319 | /* re-create window and remap the entire memory */ | |
1320 | if (iova > create.window_size) { | |
1321 | if (vfio_spapr_create_new_dma_window(vfio_container_fd, | |
1322 | &create) < 0) { | |
1323 | RTE_LOG(ERR, EAL, "Could not create new DMA window\n"); | |
1324 | ret = -1; | |
1325 | goto out; | |
1326 | } | |
1327 | /* we're inside a callback, so use thread-unsafe version | |
1328 | */ | |
1329 | if (rte_memseg_walk_thread_unsafe(vfio_spapr_map_walk, | |
1330 | &vfio_container_fd) < 0) { | |
1331 | RTE_LOG(ERR, EAL, "Could not recreate DMA maps\n"); | |
1332 | ret = -1; | |
1333 | goto out; | |
1334 | } | |
1335 | /* remap all user maps */ | |
1336 | for (i = 0; i < user_mem_maps->n_maps; i++) { | |
1337 | struct user_mem_map *map = | |
1338 | &user_mem_maps->maps[i]; | |
1339 | if (vfio_spapr_dma_do_map(vfio_container_fd, | |
1340 | map->addr, map->iova, map->len, | |
1341 | 1)) { | |
1342 | RTE_LOG(ERR, EAL, "Could not recreate user DMA maps\n"); | |
1343 | ret = -1; | |
1344 | goto out; | |
1345 | } | |
1346 | } | |
1347 | } | |
1348 | ||
1349 | /* now that we've remapped all of the memory that was present | |
1350 | * before, map the segment that we were requested to map. | |
1351 | * | |
1352 | * however, if we were called by the callback, the memory we | |
1353 | * were called with was already in the memseg list, so previous | |
1354 | * mapping should've mapped that segment already. | |
1355 | * | |
1356 | * virt2memseg_list is a relatively cheap check, so use that. if | |
1357 | * memory is within any memseg list, it's a memseg, so it's | |
1358 | * already mapped. | |
1359 | */ | |
1360 | addr = (void *)(uintptr_t)vaddr; | |
1361 | if (rte_mem_virt2memseg_list(addr) == NULL && | |
1362 | vfio_spapr_dma_do_map(vfio_container_fd, | |
1363 | vaddr, iova, len, 1) < 0) { | |
1364 | RTE_LOG(ERR, EAL, "Could not map segment\n"); | |
1365 | ret = -1; | |
1366 | goto out; | |
1367 | } | |
1368 | } else { | |
1369 | /* for unmap, check if iova within DMA window */ | |
1370 | if (iova > create.window_size) { | |
1371 | RTE_LOG(ERR, EAL, "iova beyond DMA window for unmap"); | |
1372 | ret = -1; | |
1373 | goto out; | |
1374 | } | |
1375 | ||
1376 | vfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 0); | |
1377 | } | |
1378 | out: | |
1379 | rte_spinlock_recursive_unlock(&user_mem_maps->lock); | |
1380 | return ret; | |
1381 | } | |
1382 | ||
1383 | static int | |
1384 | vfio_spapr_dma_map(int vfio_container_fd) | |
1385 | { | |
1386 | struct vfio_iommu_spapr_tce_create create = { | |
1387 | .argsz = sizeof(create), | |
1388 | }; | |
1389 | struct spapr_walk_param param; | |
1390 | ||
1391 | memset(¶m, 0, sizeof(param)); | |
1392 | ||
1393 | /* create DMA window from 0 to max(phys_addr + len) */ | |
1394 | rte_memseg_walk(vfio_spapr_window_size_walk, ¶m); | |
1395 | ||
1396 | /* sPAPR requires window size to be a power of 2 */ | |
1397 | create.window_size = rte_align64pow2(param.window_size); | |
1398 | create.page_shift = __builtin_ctzll(param.hugepage_sz); | |
1399 | create.levels = 1; | |
1400 | ||
1401 | if (vfio_spapr_create_new_dma_window(vfio_container_fd, &create) < 0) { | |
1402 | RTE_LOG(ERR, EAL, "Could not create new DMA window\n"); | |
1403 | return -1; | |
1404 | } | |
1405 | ||
1406 | /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ | |
1407 | if (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0) | |
1408 | return -1; | |
1409 | ||
1410 | return 0; | |
1411 | } | |
1412 | ||
1413 | static int | |
1414 | vfio_noiommu_dma_map(int __rte_unused vfio_container_fd) | |
1415 | { | |
1416 | /* No-IOMMU mode does not need DMA mapping */ | |
1417 | return 0; | |
1418 | } | |
1419 | ||
1420 | static int | |
1421 | vfio_noiommu_dma_mem_map(int __rte_unused vfio_container_fd, | |
1422 | uint64_t __rte_unused vaddr, | |
1423 | uint64_t __rte_unused iova, uint64_t __rte_unused len, | |
1424 | int __rte_unused do_map) | |
1425 | { | |
1426 | /* No-IOMMU mode does not need DMA mapping */ | |
1427 | return 0; | |
1428 | } | |
1429 | ||
1430 | static int | |
1431 | vfio_dma_mem_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, | |
1432 | uint64_t len, int do_map) | |
1433 | { | |
1434 | const struct vfio_iommu_type *t = vfio_cfg->vfio_iommu_type; | |
1435 | ||
1436 | if (!t) { | |
1437 | RTE_LOG(ERR, EAL, " VFIO support not initialized\n"); | |
1438 | rte_errno = ENODEV; | |
1439 | return -1; | |
1440 | } | |
1441 | ||
1442 | if (!t->dma_user_map_func) { | |
1443 | RTE_LOG(ERR, EAL, | |
1444 | " VFIO custom DMA region maping not supported by IOMMU %s\n", | |
1445 | t->name); | |
1446 | rte_errno = ENOTSUP; | |
1447 | return -1; | |
1448 | } | |
1449 | ||
1450 | return t->dma_user_map_func(vfio_cfg->vfio_container_fd, vaddr, iova, | |
1451 | len, do_map); | |
1452 | } | |
1453 | ||
1454 | static int | |
1455 | container_dma_map(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, | |
1456 | uint64_t len) | |
1457 | { | |
1458 | struct user_mem_map *new_map; | |
1459 | struct user_mem_maps *user_mem_maps; | |
1460 | int ret = 0; | |
1461 | ||
1462 | user_mem_maps = &vfio_cfg->mem_maps; | |
1463 | rte_spinlock_recursive_lock(&user_mem_maps->lock); | |
1464 | if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) { | |
1465 | RTE_LOG(ERR, EAL, "No more space for user mem maps\n"); | |
1466 | rte_errno = ENOMEM; | |
1467 | ret = -1; | |
1468 | goto out; | |
1469 | } | |
1470 | /* map the entry */ | |
1471 | if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 1)) { | |
1472 | /* technically, this will fail if there are currently no devices | |
1473 | * plugged in, even if a device were added later, this mapping | |
1474 | * might have succeeded. however, since we cannot verify if this | |
1475 | * is a valid mapping without having a device attached, consider | |
1476 | * this to be unsupported, because we can't just store any old | |
1477 | * mapping and pollute list of active mappings willy-nilly. | |
1478 | */ | |
1479 | RTE_LOG(ERR, EAL, "Couldn't map new region for DMA\n"); | |
1480 | ret = -1; | |
1481 | goto out; | |
1482 | } | |
1483 | /* create new user mem map entry */ | |
1484 | new_map = &user_mem_maps->maps[user_mem_maps->n_maps++]; | |
1485 | new_map->addr = vaddr; | |
1486 | new_map->iova = iova; | |
1487 | new_map->len = len; | |
1488 | ||
1489 | compact_user_maps(user_mem_maps); | |
1490 | out: | |
1491 | rte_spinlock_recursive_unlock(&user_mem_maps->lock); | |
1492 | return ret; | |
1493 | } | |
1494 | ||
1495 | static int | |
1496 | container_dma_unmap(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova, | |
1497 | uint64_t len) | |
1498 | { | |
1499 | struct user_mem_map *map, *new_map = NULL; | |
1500 | struct user_mem_maps *user_mem_maps; | |
1501 | int ret = 0; | |
1502 | ||
1503 | user_mem_maps = &vfio_cfg->mem_maps; | |
1504 | rte_spinlock_recursive_lock(&user_mem_maps->lock); | |
1505 | ||
1506 | /* find our mapping */ | |
1507 | map = find_user_mem_map(user_mem_maps, vaddr, iova, len); | |
1508 | if (!map) { | |
1509 | RTE_LOG(ERR, EAL, "Couldn't find previously mapped region\n"); | |
1510 | rte_errno = EINVAL; | |
1511 | ret = -1; | |
1512 | goto out; | |
1513 | } | |
1514 | if (map->addr != vaddr || map->iova != iova || map->len != len) { | |
1515 | /* we're partially unmapping a previously mapped region, so we | |
1516 | * need to split entry into two. | |
1517 | */ | |
1518 | if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) { | |
1519 | RTE_LOG(ERR, EAL, "Not enough space to store partial mapping\n"); | |
1520 | rte_errno = ENOMEM; | |
1521 | ret = -1; | |
1522 | goto out; | |
1523 | } | |
1524 | new_map = &user_mem_maps->maps[user_mem_maps->n_maps++]; | |
1525 | } | |
1526 | ||
1527 | /* unmap the entry */ | |
1528 | if (vfio_dma_mem_map(vfio_cfg, vaddr, iova, len, 0)) { | |
1529 | /* there may not be any devices plugged in, so unmapping will | |
1530 | * fail with ENODEV/ENOTSUP rte_errno values, but that doesn't | |
1531 | * stop us from removing the mapping, as the assumption is we | |
1532 | * won't be needing this memory any more and thus will want to | |
1533 | * prevent it from being remapped again on hotplug. so, only | |
1534 | * fail if we indeed failed to unmap (e.g. if the mapping was | |
1535 | * within our mapped range but had invalid alignment). | |
1536 | */ | |
1537 | if (rte_errno != ENODEV && rte_errno != ENOTSUP) { | |
1538 | RTE_LOG(ERR, EAL, "Couldn't unmap region for DMA\n"); | |
1539 | ret = -1; | |
1540 | goto out; | |
1541 | } else { | |
1542 | RTE_LOG(DEBUG, EAL, "DMA unmapping failed, but removing mappings anyway\n"); | |
1543 | } | |
1544 | } | |
1545 | /* remove map from the list of active mappings */ | |
1546 | if (new_map != NULL) { | |
1547 | adjust_map(map, new_map, vaddr, len); | |
1548 | ||
1549 | /* if we've created a new map by splitting, sort everything */ | |
1550 | if (!is_null_map(new_map)) { | |
1551 | compact_user_maps(user_mem_maps); | |
1552 | } else { | |
1553 | /* we've created a new mapping, but it was unused */ | |
1554 | user_mem_maps->n_maps--; | |
1555 | } | |
1556 | } else { | |
1557 | memset(map, 0, sizeof(*map)); | |
1558 | compact_user_maps(user_mem_maps); | |
1559 | user_mem_maps->n_maps--; | |
1560 | } | |
1561 | ||
1562 | out: | |
1563 | rte_spinlock_recursive_unlock(&user_mem_maps->lock); | |
1564 | return ret; | |
1565 | } | |
1566 | ||
1567 | int | |
1568 | rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len) | |
1569 | { | |
1570 | if (len == 0) { | |
1571 | rte_errno = EINVAL; | |
1572 | return -1; | |
1573 | } | |
1574 | ||
1575 | return container_dma_map(default_vfio_cfg, vaddr, iova, len); | |
1576 | } | |
1577 | ||
1578 | int | |
1579 | rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len) | |
1580 | { | |
1581 | if (len == 0) { | |
1582 | rte_errno = EINVAL; | |
1583 | return -1; | |
1584 | } | |
1585 | ||
1586 | return container_dma_unmap(default_vfio_cfg, vaddr, iova, len); | |
1587 | } | |
1588 | ||
1589 | int | |
1590 | rte_vfio_noiommu_is_enabled(void) | |
1591 | { | |
1592 | int fd; | |
1593 | ssize_t cnt; | |
1594 | char c; | |
1595 | ||
1596 | fd = open(VFIO_NOIOMMU_MODE, O_RDONLY); | |
1597 | if (fd < 0) { | |
1598 | if (errno != ENOENT) { | |
1599 | RTE_LOG(ERR, EAL, " cannot open vfio noiommu file %i (%s)\n", | |
1600 | errno, strerror(errno)); | |
1601 | return -1; | |
1602 | } | |
1603 | /* | |
1604 | * else the file does not exists | |
1605 | * i.e. noiommu is not enabled | |
1606 | */ | |
1607 | return 0; | |
1608 | } | |
1609 | ||
1610 | cnt = read(fd, &c, 1); | |
1611 | close(fd); | |
1612 | if (cnt != 1) { | |
1613 | RTE_LOG(ERR, EAL, " unable to read from vfio noiommu " | |
1614 | "file %i (%s)\n", errno, strerror(errno)); | |
1615 | return -1; | |
1616 | } | |
1617 | ||
1618 | return c == 'Y'; | |
1619 | } | |
1620 | ||
1621 | int | |
1622 | rte_vfio_container_create(void) | |
1623 | { | |
1624 | int i; | |
1625 | ||
1626 | /* Find an empty slot to store new vfio config */ | |
1627 | for (i = 1; i < VFIO_MAX_CONTAINERS; i++) { | |
1628 | if (vfio_cfgs[i].vfio_container_fd == -1) | |
1629 | break; | |
1630 | } | |
1631 | ||
1632 | if (i == VFIO_MAX_CONTAINERS) { | |
1633 | RTE_LOG(ERR, EAL, "exceed max vfio container limit\n"); | |
1634 | return -1; | |
1635 | } | |
1636 | ||
1637 | vfio_cfgs[i].vfio_container_fd = rte_vfio_get_container_fd(); | |
1638 | if (vfio_cfgs[i].vfio_container_fd < 0) { | |
1639 | RTE_LOG(NOTICE, EAL, "fail to create a new container\n"); | |
1640 | return -1; | |
1641 | } | |
1642 | ||
1643 | return vfio_cfgs[i].vfio_container_fd; | |
1644 | } | |
1645 | ||
1646 | int __rte_experimental | |
1647 | rte_vfio_container_destroy(int container_fd) | |
1648 | { | |
1649 | struct vfio_config *vfio_cfg; | |
1650 | int i; | |
1651 | ||
1652 | vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); | |
1653 | if (vfio_cfg == NULL) { | |
1654 | RTE_LOG(ERR, EAL, "Invalid container fd\n"); | |
1655 | return -1; | |
1656 | } | |
1657 | ||
1658 | for (i = 0; i < VFIO_MAX_GROUPS; i++) | |
1659 | if (vfio_cfg->vfio_groups[i].group_num != -1) | |
1660 | rte_vfio_container_group_unbind(container_fd, | |
1661 | vfio_cfg->vfio_groups[i].group_num); | |
1662 | ||
1663 | close(container_fd); | |
1664 | vfio_cfg->vfio_container_fd = -1; | |
1665 | vfio_cfg->vfio_active_groups = 0; | |
1666 | vfio_cfg->vfio_iommu_type = NULL; | |
1667 | ||
1668 | return 0; | |
1669 | } | |
1670 | ||
1671 | int | |
1672 | rte_vfio_container_group_bind(int container_fd, int iommu_group_num) | |
1673 | { | |
1674 | struct vfio_config *vfio_cfg; | |
1675 | struct vfio_group *cur_grp; | |
1676 | int vfio_group_fd; | |
1677 | int i; | |
1678 | ||
1679 | vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); | |
1680 | if (vfio_cfg == NULL) { | |
1681 | RTE_LOG(ERR, EAL, "Invalid container fd\n"); | |
1682 | return -1; | |
1683 | } | |
1684 | ||
1685 | /* Check room for new group */ | |
1686 | if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) { | |
1687 | RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); | |
1688 | return -1; | |
1689 | } | |
1690 | ||
1691 | /* Get an index for the new group */ | |
1692 | for (i = 0; i < VFIO_MAX_GROUPS; i++) | |
1693 | if (vfio_cfg->vfio_groups[i].group_num == -1) { | |
1694 | cur_grp = &vfio_cfg->vfio_groups[i]; | |
1695 | break; | |
1696 | } | |
1697 | ||
1698 | /* This should not happen */ | |
1699 | if (i == VFIO_MAX_GROUPS) { | |
1700 | RTE_LOG(ERR, EAL, "No VFIO group free slot found\n"); | |
1701 | return -1; | |
1702 | } | |
1703 | ||
1704 | vfio_group_fd = vfio_open_group_fd(iommu_group_num); | |
1705 | if (vfio_group_fd < 0) { | |
1706 | RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num); | |
1707 | return -1; | |
1708 | } | |
1709 | cur_grp->group_num = iommu_group_num; | |
1710 | cur_grp->fd = vfio_group_fd; | |
1711 | cur_grp->devices = 0; | |
1712 | vfio_cfg->vfio_active_groups++; | |
1713 | ||
1714 | return vfio_group_fd; | |
1715 | } | |
1716 | ||
1717 | int | |
1718 | rte_vfio_container_group_unbind(int container_fd, int iommu_group_num) | |
1719 | { | |
1720 | struct vfio_config *vfio_cfg; | |
1721 | struct vfio_group *cur_grp = NULL; | |
1722 | int i; | |
1723 | ||
1724 | vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); | |
1725 | if (vfio_cfg == NULL) { | |
1726 | RTE_LOG(ERR, EAL, "Invalid container fd\n"); | |
1727 | return -1; | |
1728 | } | |
1729 | ||
1730 | for (i = 0; i < VFIO_MAX_GROUPS; i++) { | |
1731 | if (vfio_cfg->vfio_groups[i].group_num == iommu_group_num) { | |
1732 | cur_grp = &vfio_cfg->vfio_groups[i]; | |
1733 | break; | |
1734 | } | |
1735 | } | |
1736 | ||
1737 | /* This should not happen */ | |
1738 | if (i == VFIO_MAX_GROUPS || cur_grp == NULL) { | |
1739 | RTE_LOG(ERR, EAL, "Specified group number not found\n"); | |
1740 | return -1; | |
1741 | } | |
1742 | ||
1743 | if (cur_grp->fd >= 0 && close(cur_grp->fd) < 0) { | |
1744 | RTE_LOG(ERR, EAL, "Error when closing vfio_group_fd for" | |
1745 | " iommu_group_num %d\n", iommu_group_num); | |
1746 | return -1; | |
1747 | } | |
1748 | cur_grp->group_num = -1; | |
1749 | cur_grp->fd = -1; | |
1750 | cur_grp->devices = 0; | |
1751 | vfio_cfg->vfio_active_groups--; | |
1752 | ||
1753 | return 0; | |
1754 | } | |
1755 | ||
1756 | int | |
1757 | rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, uint64_t iova, | |
1758 | uint64_t len) | |
1759 | { | |
1760 | struct vfio_config *vfio_cfg; | |
1761 | ||
1762 | if (len == 0) { | |
1763 | rte_errno = EINVAL; | |
1764 | return -1; | |
1765 | } | |
1766 | ||
1767 | vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); | |
1768 | if (vfio_cfg == NULL) { | |
1769 | RTE_LOG(ERR, EAL, "Invalid container fd\n"); | |
1770 | return -1; | |
1771 | } | |
1772 | ||
1773 | return container_dma_map(vfio_cfg, vaddr, iova, len); | |
1774 | } | |
1775 | ||
1776 | int | |
1777 | rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova, | |
1778 | uint64_t len) | |
1779 | { | |
1780 | struct vfio_config *vfio_cfg; | |
1781 | ||
1782 | if (len == 0) { | |
1783 | rte_errno = EINVAL; | |
1784 | return -1; | |
1785 | } | |
1786 | ||
1787 | vfio_cfg = get_vfio_cfg_by_container_fd(container_fd); | |
1788 | if (vfio_cfg == NULL) { | |
1789 | RTE_LOG(ERR, EAL, "Invalid container fd\n"); | |
1790 | return -1; | |
1791 | } | |
1792 | ||
1793 | return container_dma_unmap(vfio_cfg, vaddr, iova, len); | |
1794 | } | |
1795 | ||
1796 | #else | |
1797 | ||
1798 | int | |
1799 | rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova, | |
1800 | __rte_unused uint64_t len) | |
1801 | { | |
1802 | return -1; | |
1803 | } | |
1804 | ||
1805 | int | |
1806 | rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova, | |
1807 | __rte_unused uint64_t len) | |
1808 | { | |
1809 | return -1; | |
1810 | } | |
1811 | ||
1812 | int | |
1813 | rte_vfio_setup_device(__rte_unused const char *sysfs_base, | |
1814 | __rte_unused const char *dev_addr, | |
1815 | __rte_unused int *vfio_dev_fd, | |
1816 | __rte_unused struct vfio_device_info *device_info) | |
1817 | { | |
1818 | return -1; | |
1819 | } | |
1820 | ||
1821 | int | |
1822 | rte_vfio_release_device(__rte_unused const char *sysfs_base, | |
1823 | __rte_unused const char *dev_addr, __rte_unused int fd) | |
1824 | { | |
1825 | return -1; | |
1826 | } | |
1827 | ||
1828 | int | |
1829 | rte_vfio_enable(__rte_unused const char *modname) | |
1830 | { | |
1831 | return -1; | |
1832 | } | |
1833 | ||
1834 | int | |
1835 | rte_vfio_is_enabled(__rte_unused const char *modname) | |
1836 | { | |
1837 | return -1; | |
1838 | } | |
1839 | ||
1840 | int | |
1841 | rte_vfio_noiommu_is_enabled(void) | |
1842 | { | |
1843 | return -1; | |
1844 | } | |
1845 | ||
1846 | int | |
1847 | rte_vfio_clear_group(__rte_unused int vfio_group_fd) | |
1848 | { | |
1849 | return -1; | |
1850 | } | |
1851 | ||
1852 | int | |
1853 | rte_vfio_get_group_num(__rte_unused const char *sysfs_base, | |
1854 | __rte_unused const char *dev_addr, | |
1855 | __rte_unused int *iommu_group_num) | |
1856 | { | |
1857 | return -1; | |
1858 | } | |
1859 | ||
1860 | int | |
1861 | rte_vfio_get_container_fd(void) | |
1862 | { | |
1863 | return -1; | |
1864 | } | |
1865 | ||
1866 | int | |
1867 | rte_vfio_get_group_fd(__rte_unused int iommu_group_num) | |
1868 | { | |
1869 | return -1; | |
1870 | } | |
1871 | ||
1872 | int | |
1873 | rte_vfio_container_create(void) | |
1874 | { | |
1875 | return -1; | |
1876 | } | |
1877 | ||
1878 | int | |
1879 | rte_vfio_container_destroy(__rte_unused int container_fd) | |
1880 | { | |
1881 | return -1; | |
1882 | } | |
1883 | ||
1884 | int | |
1885 | rte_vfio_container_group_bind(__rte_unused int container_fd, | |
1886 | __rte_unused int iommu_group_num) | |
1887 | { | |
1888 | return -1; | |
1889 | } | |
1890 | ||
1891 | int | |
1892 | rte_vfio_container_group_unbind(__rte_unused int container_fd, | |
1893 | __rte_unused int iommu_group_num) | |
1894 | { | |
1895 | return -1; | |
1896 | } | |
1897 | ||
1898 | int | |
1899 | rte_vfio_container_dma_map(__rte_unused int container_fd, | |
1900 | __rte_unused uint64_t vaddr, | |
1901 | __rte_unused uint64_t iova, | |
1902 | __rte_unused uint64_t len) | |
1903 | { | |
1904 | return -1; | |
1905 | } | |
1906 | ||
1907 | int | |
1908 | rte_vfio_container_dma_unmap(__rte_unused int container_fd, | |
1909 | __rte_unused uint64_t vaddr, | |
1910 | __rte_unused uint64_t iova, | |
1911 | __rte_unused uint64_t len) | |
1912 | { | |
1913 | return -1; | |
1914 | } | |
1915 | ||
1916 | #endif /* VFIO_PRESENT */ |