]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /*- |
2 | * BSD LICENSE | |
3 | * | |
4 | * Copyright (c) 2017 Red Hat, Inc. | |
5 | * | |
6 | * Redistribution and use in source and binary forms, with or without | |
7 | * modification, are permitted provided that the following conditions | |
8 | * are met: | |
9 | * | |
10 | * * Redistributions of source code must retain the above copyright | |
11 | * notice, this list of conditions and the following disclaimer. | |
12 | * * Redistributions in binary form must reproduce the above copyright | |
13 | * notice, this list of conditions and the following disclaimer in | |
14 | * the documentation and/or other materials provided with the | |
15 | * distribution. | |
16 | * * Neither the name of Intel Corporation nor the names of its | |
17 | * contributors may be used to endorse or promote products derived | |
18 | * from this software without specific prior written permission. | |
19 | * | |
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
24 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
25 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
26 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
27 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
28 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
31 | */ | |
32 | ||
33 | #ifdef RTE_LIBRTE_VHOST_NUMA | |
34 | #include <numaif.h> | |
35 | #endif | |
36 | ||
37 | #include <rte_tailq.h> | |
38 | ||
39 | #include "iotlb.h" | |
40 | #include "vhost.h" | |
41 | ||
42 | struct vhost_iotlb_entry { | |
43 | TAILQ_ENTRY(vhost_iotlb_entry) next; | |
44 | ||
45 | uint64_t iova; | |
46 | uint64_t uaddr; | |
47 | uint64_t size; | |
48 | uint8_t perm; | |
49 | }; | |
50 | ||
51 | #define IOTLB_CACHE_SIZE 2048 | |
52 | ||
53 | static void | |
54 | vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq); | |
55 | ||
56 | static void | |
57 | vhost_user_iotlb_pending_remove_all(struct vhost_virtqueue *vq) | |
58 | { | |
59 | struct vhost_iotlb_entry *node, *temp_node; | |
60 | ||
61 | rte_rwlock_write_lock(&vq->iotlb_pending_lock); | |
62 | ||
63 | TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) { | |
64 | TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); | |
65 | rte_mempool_put(vq->iotlb_pool, node); | |
66 | } | |
67 | ||
68 | rte_rwlock_write_unlock(&vq->iotlb_pending_lock); | |
69 | } | |
70 | ||
71 | bool | |
72 | vhost_user_iotlb_pending_miss(struct vhost_virtqueue *vq, uint64_t iova, | |
73 | uint8_t perm) | |
74 | { | |
75 | struct vhost_iotlb_entry *node; | |
76 | bool found = false; | |
77 | ||
78 | rte_rwlock_read_lock(&vq->iotlb_pending_lock); | |
79 | ||
80 | TAILQ_FOREACH(node, &vq->iotlb_pending_list, next) { | |
81 | if ((node->iova == iova) && (node->perm == perm)) { | |
82 | found = true; | |
83 | break; | |
84 | } | |
85 | } | |
86 | ||
87 | rte_rwlock_read_unlock(&vq->iotlb_pending_lock); | |
88 | ||
89 | return found; | |
90 | } | |
91 | ||
92 | void | |
93 | vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq, | |
94 | uint64_t iova, uint8_t perm) | |
95 | { | |
96 | struct vhost_iotlb_entry *node; | |
97 | int ret; | |
98 | ||
99 | ret = rte_mempool_get(vq->iotlb_pool, (void **)&node); | |
100 | if (ret) { | |
101 | RTE_LOG(DEBUG, VHOST_CONFIG, "IOTLB pool empty, clear entries\n"); | |
102 | if (!TAILQ_EMPTY(&vq->iotlb_pending_list)) | |
103 | vhost_user_iotlb_pending_remove_all(vq); | |
104 | else | |
105 | vhost_user_iotlb_cache_random_evict(vq); | |
106 | ret = rte_mempool_get(vq->iotlb_pool, (void **)&node); | |
107 | if (ret) { | |
108 | RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n"); | |
109 | return; | |
110 | } | |
111 | } | |
112 | ||
113 | node->iova = iova; | |
114 | node->perm = perm; | |
115 | ||
116 | rte_rwlock_write_lock(&vq->iotlb_pending_lock); | |
117 | ||
118 | TAILQ_INSERT_TAIL(&vq->iotlb_pending_list, node, next); | |
119 | ||
120 | rte_rwlock_write_unlock(&vq->iotlb_pending_lock); | |
121 | } | |
122 | ||
123 | void | |
124 | vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, | |
125 | uint64_t iova, uint64_t size, uint8_t perm) | |
126 | { | |
127 | struct vhost_iotlb_entry *node, *temp_node; | |
128 | ||
129 | rte_rwlock_write_lock(&vq->iotlb_pending_lock); | |
130 | ||
131 | TAILQ_FOREACH_SAFE(node, &vq->iotlb_pending_list, next, temp_node) { | |
132 | if (node->iova < iova) | |
133 | continue; | |
134 | if (node->iova >= iova + size) | |
135 | continue; | |
136 | if ((node->perm & perm) != node->perm) | |
137 | continue; | |
138 | TAILQ_REMOVE(&vq->iotlb_pending_list, node, next); | |
139 | rte_mempool_put(vq->iotlb_pool, node); | |
140 | } | |
141 | ||
142 | rte_rwlock_write_unlock(&vq->iotlb_pending_lock); | |
143 | } | |
144 | ||
145 | static void | |
146 | vhost_user_iotlb_cache_remove_all(struct vhost_virtqueue *vq) | |
147 | { | |
148 | struct vhost_iotlb_entry *node, *temp_node; | |
149 | ||
150 | rte_rwlock_write_lock(&vq->iotlb_lock); | |
151 | ||
152 | TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { | |
153 | TAILQ_REMOVE(&vq->iotlb_list, node, next); | |
154 | rte_mempool_put(vq->iotlb_pool, node); | |
155 | } | |
156 | ||
157 | vq->iotlb_cache_nr = 0; | |
158 | ||
159 | rte_rwlock_write_unlock(&vq->iotlb_lock); | |
160 | } | |
161 | ||
162 | static void | |
163 | vhost_user_iotlb_cache_random_evict(struct vhost_virtqueue *vq) | |
164 | { | |
165 | struct vhost_iotlb_entry *node, *temp_node; | |
166 | int entry_idx; | |
167 | ||
168 | rte_rwlock_write_lock(&vq->iotlb_lock); | |
169 | ||
170 | entry_idx = rte_rand() % vq->iotlb_cache_nr; | |
171 | ||
172 | TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { | |
173 | if (!entry_idx) { | |
174 | TAILQ_REMOVE(&vq->iotlb_list, node, next); | |
175 | rte_mempool_put(vq->iotlb_pool, node); | |
176 | vq->iotlb_cache_nr--; | |
177 | break; | |
178 | } | |
179 | entry_idx--; | |
180 | } | |
181 | ||
182 | rte_rwlock_write_unlock(&vq->iotlb_lock); | |
183 | } | |
184 | ||
185 | void | |
186 | vhost_user_iotlb_cache_insert(struct vhost_virtqueue *vq, uint64_t iova, | |
187 | uint64_t uaddr, uint64_t size, uint8_t perm) | |
188 | { | |
189 | struct vhost_iotlb_entry *node, *new_node; | |
190 | int ret; | |
191 | ||
192 | ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node); | |
193 | if (ret) { | |
194 | RTE_LOG(DEBUG, VHOST_CONFIG, "IOTLB pool empty, clear entries\n"); | |
195 | if (!TAILQ_EMPTY(&vq->iotlb_list)) | |
196 | vhost_user_iotlb_cache_random_evict(vq); | |
197 | else | |
198 | vhost_user_iotlb_pending_remove_all(vq); | |
199 | ret = rte_mempool_get(vq->iotlb_pool, (void **)&new_node); | |
200 | if (ret) { | |
201 | RTE_LOG(ERR, VHOST_CONFIG, "IOTLB pool still empty, failure\n"); | |
202 | return; | |
203 | } | |
204 | } | |
205 | ||
206 | new_node->iova = iova; | |
207 | new_node->uaddr = uaddr; | |
208 | new_node->size = size; | |
209 | new_node->perm = perm; | |
210 | ||
211 | rte_rwlock_write_lock(&vq->iotlb_lock); | |
212 | ||
213 | TAILQ_FOREACH(node, &vq->iotlb_list, next) { | |
214 | /* | |
215 | * Entries must be invalidated before being updated. | |
216 | * So if iova already in list, assume identical. | |
217 | */ | |
218 | if (node->iova == new_node->iova) { | |
219 | rte_mempool_put(vq->iotlb_pool, new_node); | |
220 | goto unlock; | |
221 | } else if (node->iova > new_node->iova) { | |
222 | TAILQ_INSERT_BEFORE(node, new_node, next); | |
223 | vq->iotlb_cache_nr++; | |
224 | goto unlock; | |
225 | } | |
226 | } | |
227 | ||
228 | TAILQ_INSERT_TAIL(&vq->iotlb_list, new_node, next); | |
229 | vq->iotlb_cache_nr++; | |
230 | ||
231 | unlock: | |
232 | vhost_user_iotlb_pending_remove(vq, iova, size, perm); | |
233 | ||
234 | rte_rwlock_write_unlock(&vq->iotlb_lock); | |
235 | ||
236 | } | |
237 | ||
238 | void | |
239 | vhost_user_iotlb_cache_remove(struct vhost_virtqueue *vq, | |
240 | uint64_t iova, uint64_t size) | |
241 | { | |
242 | struct vhost_iotlb_entry *node, *temp_node; | |
243 | ||
244 | if (unlikely(!size)) | |
245 | return; | |
246 | ||
247 | rte_rwlock_write_lock(&vq->iotlb_lock); | |
248 | ||
249 | TAILQ_FOREACH_SAFE(node, &vq->iotlb_list, next, temp_node) { | |
250 | /* Sorted list */ | |
251 | if (unlikely(iova + size < node->iova)) | |
252 | break; | |
253 | ||
254 | if (iova < node->iova + node->size) { | |
255 | TAILQ_REMOVE(&vq->iotlb_list, node, next); | |
256 | rte_mempool_put(vq->iotlb_pool, node); | |
257 | vq->iotlb_cache_nr--; | |
258 | } | |
259 | } | |
260 | ||
261 | rte_rwlock_write_unlock(&vq->iotlb_lock); | |
262 | } | |
263 | ||
264 | uint64_t | |
265 | vhost_user_iotlb_cache_find(struct vhost_virtqueue *vq, uint64_t iova, | |
266 | uint64_t *size, uint8_t perm) | |
267 | { | |
268 | struct vhost_iotlb_entry *node; | |
269 | uint64_t offset, vva = 0, mapped = 0; | |
270 | ||
271 | if (unlikely(!*size)) | |
272 | goto out; | |
273 | ||
274 | TAILQ_FOREACH(node, &vq->iotlb_list, next) { | |
275 | /* List sorted by iova */ | |
276 | if (unlikely(iova < node->iova)) | |
277 | break; | |
278 | ||
279 | if (iova >= node->iova + node->size) | |
280 | continue; | |
281 | ||
282 | if (unlikely((perm & node->perm) != perm)) { | |
283 | vva = 0; | |
284 | break; | |
285 | } | |
286 | ||
287 | offset = iova - node->iova; | |
288 | if (!vva) | |
289 | vva = node->uaddr + offset; | |
290 | ||
291 | mapped += node->size - offset; | |
292 | iova = node->iova + node->size; | |
293 | ||
294 | if (mapped >= *size) | |
295 | break; | |
296 | } | |
297 | ||
298 | out: | |
299 | /* Only part of the requested chunk is mapped */ | |
300 | if (unlikely(mapped < *size)) | |
301 | *size = mapped; | |
302 | ||
303 | return vva; | |
304 | } | |
305 | ||
306 | void | |
307 | vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq) | |
308 | { | |
309 | vhost_user_iotlb_cache_remove_all(vq); | |
310 | vhost_user_iotlb_pending_remove_all(vq); | |
311 | } | |
312 | ||
313 | int | |
314 | vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) | |
315 | { | |
316 | char pool_name[RTE_MEMPOOL_NAMESIZE]; | |
317 | struct vhost_virtqueue *vq = dev->virtqueue[vq_index]; | |
318 | int socket = 0; | |
319 | ||
320 | if (vq->iotlb_pool) { | |
321 | /* | |
322 | * The cache has already been initialized, | |
323 | * just drop all cached and pending entries. | |
324 | */ | |
325 | vhost_user_iotlb_flush_all(vq); | |
326 | } | |
327 | ||
328 | #ifdef RTE_LIBRTE_VHOST_NUMA | |
329 | if (get_mempolicy(&socket, NULL, 0, vq, MPOL_F_NODE | MPOL_F_ADDR) != 0) | |
330 | socket = 0; | |
331 | #endif | |
332 | ||
333 | rte_rwlock_init(&vq->iotlb_lock); | |
334 | rte_rwlock_init(&vq->iotlb_pending_lock); | |
335 | ||
336 | TAILQ_INIT(&vq->iotlb_list); | |
337 | TAILQ_INIT(&vq->iotlb_pending_list); | |
338 | ||
339 | snprintf(pool_name, sizeof(pool_name), "iotlb_cache_%d_%d", | |
340 | dev->vid, vq_index); | |
341 | ||
342 | /* If already created, free it and recreate */ | |
343 | vq->iotlb_pool = rte_mempool_lookup(pool_name); | |
344 | if (vq->iotlb_pool) | |
345 | rte_mempool_free(vq->iotlb_pool); | |
346 | ||
347 | vq->iotlb_pool = rte_mempool_create(pool_name, | |
348 | IOTLB_CACHE_SIZE, sizeof(struct vhost_iotlb_entry), 0, | |
349 | 0, 0, NULL, NULL, NULL, socket, | |
350 | MEMPOOL_F_NO_CACHE_ALIGN | | |
351 | MEMPOOL_F_SP_PUT | | |
352 | MEMPOOL_F_SC_GET); | |
353 | if (!vq->iotlb_pool) { | |
354 | RTE_LOG(ERR, VHOST_CONFIG, | |
355 | "Failed to create IOTLB cache pool (%s)\n", | |
356 | pool_name); | |
357 | return -1; | |
358 | } | |
359 | ||
360 | vq->iotlb_cache_nr = 0; | |
361 | ||
362 | return 0; | |
363 | } |