]>
Commit | Line | Data |
---|---|---|
e3c495c7 JR |
1 | /* |
2 | * Copyright (C) 2010-2012 Advanced Micro Devices, Inc. | |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify it | |
6 | * under the terms of the GNU General Public License version 2 as published | |
7 | * by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17 | */ | |
18 | ||
ed96f228 JR |
19 | #include <linux/amd-iommu.h> |
20 | #include <linux/mm_types.h> | |
e3c495c7 | 21 | #include <linux/module.h> |
2d5503b6 | 22 | #include <linux/sched.h> |
ed96f228 JR |
23 | #include <linux/iommu.h> |
24 | #include <linux/pci.h> | |
25 | #include <linux/gfp.h> | |
26 | ||
27 | #include "amd_iommu_proto.h" | |
e3c495c7 JR |
28 | |
29 | MODULE_LICENSE("GPL v2"); | |
30 | MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>"); | |
31 | ||
ed96f228 JR |
32 | #define MAX_DEVICES 0x10000 |
33 | #define PRI_QUEUE_SIZE 512 | |
34 | ||
35 | struct pri_queue { | |
36 | atomic_t inflight; | |
37 | bool finish; | |
38 | }; | |
39 | ||
40 | struct pasid_state { | |
41 | struct list_head list; /* For global state-list */ | |
42 | atomic_t count; /* Reference count */ | |
43 | struct task_struct *task; /* Task bound to this PASID */ | |
44 | struct mm_struct *mm; /* mm_struct for the faults */ | |
45 | struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ | |
46 | struct device_state *device_state; /* Link to our device_state */ | |
47 | int pasid; /* PASID index */ | |
48 | }; | |
49 | ||
50 | struct device_state { | |
51 | atomic_t count; | |
52 | struct pci_dev *pdev; | |
53 | struct pasid_state **states; | |
54 | struct iommu_domain *domain; | |
55 | int pasid_levels; | |
56 | int max_pasids; | |
57 | spinlock_t lock; | |
58 | }; | |
59 | ||
60 | struct device_state **state_table; | |
61 | static spinlock_t state_lock; | |
62 | ||
63 | /* List and lock for all pasid_states */ | |
64 | static LIST_HEAD(pasid_state_list); | |
2d5503b6 JR |
65 | static DEFINE_SPINLOCK(ps_lock); |
66 | ||
67 | static void free_pasid_states(struct device_state *dev_state); | |
68 | static void unbind_pasid(struct device_state *dev_state, int pasid); | |
ed96f228 JR |
69 | |
70 | static u16 device_id(struct pci_dev *pdev) | |
71 | { | |
72 | u16 devid; | |
73 | ||
74 | devid = pdev->bus->number; | |
75 | devid = (devid << 8) | pdev->devfn; | |
76 | ||
77 | return devid; | |
78 | } | |
79 | ||
80 | static struct device_state *get_device_state(u16 devid) | |
81 | { | |
82 | struct device_state *dev_state; | |
83 | unsigned long flags; | |
84 | ||
85 | spin_lock_irqsave(&state_lock, flags); | |
86 | dev_state = state_table[devid]; | |
87 | if (dev_state != NULL) | |
88 | atomic_inc(&dev_state->count); | |
89 | spin_unlock_irqrestore(&state_lock, flags); | |
90 | ||
91 | return dev_state; | |
92 | } | |
93 | ||
94 | static void free_device_state(struct device_state *dev_state) | |
95 | { | |
2d5503b6 JR |
96 | /* |
97 | * First detach device from domain - No more PRI requests will arrive | |
98 | * from that device after it is unbound from the IOMMUv2 domain. | |
99 | */ | |
ed96f228 | 100 | iommu_detach_device(dev_state->domain, &dev_state->pdev->dev); |
2d5503b6 JR |
101 | |
102 | /* Everything is down now, free the IOMMUv2 domain */ | |
ed96f228 | 103 | iommu_domain_free(dev_state->domain); |
2d5503b6 JR |
104 | |
105 | /* Finally get rid of the device-state */ | |
ed96f228 JR |
106 | kfree(dev_state); |
107 | } | |
108 | ||
109 | static void put_device_state(struct device_state *dev_state) | |
110 | { | |
111 | if (atomic_dec_and_test(&dev_state->count)) | |
112 | free_device_state(dev_state); | |
113 | } | |
114 | ||
2d5503b6 JR |
115 | static void link_pasid_state(struct pasid_state *pasid_state) |
116 | { | |
117 | spin_lock(&ps_lock); | |
118 | list_add_tail(&pasid_state->list, &pasid_state_list); | |
119 | spin_unlock(&ps_lock); | |
120 | } | |
121 | ||
122 | static void __unlink_pasid_state(struct pasid_state *pasid_state) | |
123 | { | |
124 | list_del(&pasid_state->list); | |
125 | } | |
126 | ||
127 | static void unlink_pasid_state(struct pasid_state *pasid_state) | |
128 | { | |
129 | spin_lock(&ps_lock); | |
130 | __unlink_pasid_state(pasid_state); | |
131 | spin_unlock(&ps_lock); | |
132 | } | |
133 | ||
134 | /* Must be called under dev_state->lock */ | |
135 | static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state, | |
136 | int pasid, bool alloc) | |
137 | { | |
138 | struct pasid_state **root, **ptr; | |
139 | int level, index; | |
140 | ||
141 | level = dev_state->pasid_levels; | |
142 | root = dev_state->states; | |
143 | ||
144 | while (true) { | |
145 | ||
146 | index = (pasid >> (9 * level)) & 0x1ff; | |
147 | ptr = &root[index]; | |
148 | ||
149 | if (level == 0) | |
150 | break; | |
151 | ||
152 | if (*ptr == NULL) { | |
153 | if (!alloc) | |
154 | return NULL; | |
155 | ||
156 | *ptr = (void *)get_zeroed_page(GFP_ATOMIC); | |
157 | if (*ptr == NULL) | |
158 | return NULL; | |
159 | } | |
160 | ||
161 | root = (struct pasid_state **)*ptr; | |
162 | level -= 1; | |
163 | } | |
164 | ||
165 | return ptr; | |
166 | } | |
167 | ||
168 | static int set_pasid_state(struct device_state *dev_state, | |
169 | struct pasid_state *pasid_state, | |
170 | int pasid) | |
171 | { | |
172 | struct pasid_state **ptr; | |
173 | unsigned long flags; | |
174 | int ret; | |
175 | ||
176 | spin_lock_irqsave(&dev_state->lock, flags); | |
177 | ptr = __get_pasid_state_ptr(dev_state, pasid, true); | |
178 | ||
179 | ret = -ENOMEM; | |
180 | if (ptr == NULL) | |
181 | goto out_unlock; | |
182 | ||
183 | ret = -ENOMEM; | |
184 | if (*ptr != NULL) | |
185 | goto out_unlock; | |
186 | ||
187 | *ptr = pasid_state; | |
188 | ||
189 | ret = 0; | |
190 | ||
191 | out_unlock: | |
192 | spin_unlock_irqrestore(&dev_state->lock, flags); | |
193 | ||
194 | return ret; | |
195 | } | |
196 | ||
197 | static void clear_pasid_state(struct device_state *dev_state, int pasid) | |
198 | { | |
199 | struct pasid_state **ptr; | |
200 | unsigned long flags; | |
201 | ||
202 | spin_lock_irqsave(&dev_state->lock, flags); | |
203 | ptr = __get_pasid_state_ptr(dev_state, pasid, true); | |
204 | ||
205 | if (ptr == NULL) | |
206 | goto out_unlock; | |
207 | ||
208 | *ptr = NULL; | |
209 | ||
210 | out_unlock: | |
211 | spin_unlock_irqrestore(&dev_state->lock, flags); | |
212 | } | |
213 | ||
214 | static struct pasid_state *get_pasid_state(struct device_state *dev_state, | |
215 | int pasid) | |
216 | { | |
217 | struct pasid_state **ptr, *ret = NULL; | |
218 | unsigned long flags; | |
219 | ||
220 | spin_lock_irqsave(&dev_state->lock, flags); | |
221 | ptr = __get_pasid_state_ptr(dev_state, pasid, false); | |
222 | ||
223 | if (ptr == NULL) | |
224 | goto out_unlock; | |
225 | ||
226 | ret = *ptr; | |
227 | if (ret) | |
228 | atomic_inc(&ret->count); | |
229 | ||
230 | out_unlock: | |
231 | spin_unlock_irqrestore(&dev_state->lock, flags); | |
232 | ||
233 | return ret; | |
234 | } | |
235 | ||
236 | static void free_pasid_state(struct pasid_state *pasid_state) | |
237 | { | |
238 | kfree(pasid_state); | |
239 | } | |
240 | ||
241 | static void put_pasid_state(struct pasid_state *pasid_state) | |
242 | { | |
243 | if (atomic_dec_and_test(&pasid_state->count)) { | |
244 | put_device_state(pasid_state->device_state); | |
245 | mmput(pasid_state->mm); | |
246 | free_pasid_state(pasid_state); | |
247 | } | |
248 | } | |
249 | ||
250 | static void unbind_pasid(struct device_state *dev_state, int pasid) | |
251 | { | |
252 | struct pasid_state *pasid_state; | |
253 | ||
254 | pasid_state = get_pasid_state(dev_state, pasid); | |
255 | if (pasid_state == NULL) | |
256 | return; | |
257 | ||
258 | unlink_pasid_state(pasid_state); | |
259 | ||
260 | amd_iommu_domain_clear_gcr3(dev_state->domain, pasid); | |
261 | clear_pasid_state(dev_state, pasid); | |
262 | ||
263 | put_pasid_state(pasid_state); /* Reference taken in this function */ | |
264 | put_pasid_state(pasid_state); /* Reference taken in bind() function */ | |
265 | } | |
266 | ||
267 | static void free_pasid_states_level1(struct pasid_state **tbl) | |
268 | { | |
269 | int i; | |
270 | ||
271 | for (i = 0; i < 512; ++i) { | |
272 | if (tbl[i] == NULL) | |
273 | continue; | |
274 | ||
275 | free_page((unsigned long)tbl[i]); | |
276 | } | |
277 | } | |
278 | ||
279 | static void free_pasid_states_level2(struct pasid_state **tbl) | |
280 | { | |
281 | struct pasid_state **ptr; | |
282 | int i; | |
283 | ||
284 | for (i = 0; i < 512; ++i) { | |
285 | if (tbl[i] == NULL) | |
286 | continue; | |
287 | ||
288 | ptr = (struct pasid_state **)tbl[i]; | |
289 | free_pasid_states_level1(ptr); | |
290 | } | |
291 | } | |
292 | ||
293 | static void free_pasid_states(struct device_state *dev_state) | |
294 | { | |
295 | struct pasid_state *pasid_state; | |
296 | int i; | |
297 | ||
298 | for (i = 0; i < dev_state->max_pasids; ++i) { | |
299 | pasid_state = get_pasid_state(dev_state, i); | |
300 | if (pasid_state == NULL) | |
301 | continue; | |
302 | ||
303 | unbind_pasid(dev_state, i); | |
304 | put_pasid_state(pasid_state); | |
305 | } | |
306 | ||
307 | if (dev_state->pasid_levels == 2) | |
308 | free_pasid_states_level2(dev_state->states); | |
309 | else if (dev_state->pasid_levels == 1) | |
310 | free_pasid_states_level1(dev_state->states); | |
311 | else if (dev_state->pasid_levels != 0) | |
312 | BUG(); | |
313 | ||
314 | free_page((unsigned long)dev_state->states); | |
315 | } | |
316 | ||
317 | int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, | |
318 | struct task_struct *task) | |
319 | { | |
320 | struct pasid_state *pasid_state; | |
321 | struct device_state *dev_state; | |
322 | u16 devid; | |
323 | int ret; | |
324 | ||
325 | might_sleep(); | |
326 | ||
327 | if (!amd_iommu_v2_supported()) | |
328 | return -ENODEV; | |
329 | ||
330 | devid = device_id(pdev); | |
331 | dev_state = get_device_state(devid); | |
332 | ||
333 | if (dev_state == NULL) | |
334 | return -EINVAL; | |
335 | ||
336 | ret = -EINVAL; | |
337 | if (pasid < 0 || pasid >= dev_state->max_pasids) | |
338 | goto out; | |
339 | ||
340 | ret = -ENOMEM; | |
341 | pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL); | |
342 | if (pasid_state == NULL) | |
343 | goto out; | |
344 | ||
345 | atomic_set(&pasid_state->count, 1); | |
346 | pasid_state->task = task; | |
347 | pasid_state->mm = get_task_mm(task); | |
348 | pasid_state->device_state = dev_state; | |
349 | pasid_state->pasid = pasid; | |
350 | ||
351 | if (pasid_state->mm == NULL) | |
352 | goto out_free; | |
353 | ||
354 | ret = set_pasid_state(dev_state, pasid_state, pasid); | |
355 | if (ret) | |
356 | goto out_free; | |
357 | ||
358 | ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid, | |
359 | __pa(pasid_state->mm->pgd)); | |
360 | if (ret) | |
361 | goto out_clear_state; | |
362 | ||
363 | link_pasid_state(pasid_state); | |
364 | ||
365 | return 0; | |
366 | ||
367 | out_clear_state: | |
368 | clear_pasid_state(dev_state, pasid); | |
369 | ||
370 | out_free: | |
371 | put_pasid_state(pasid_state); | |
372 | ||
373 | out: | |
374 | put_device_state(dev_state); | |
375 | ||
376 | return ret; | |
377 | } | |
378 | EXPORT_SYMBOL(amd_iommu_bind_pasid); | |
379 | ||
380 | void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid) | |
381 | { | |
382 | struct device_state *dev_state; | |
383 | u16 devid; | |
384 | ||
385 | might_sleep(); | |
386 | ||
387 | if (!amd_iommu_v2_supported()) | |
388 | return; | |
389 | ||
390 | devid = device_id(pdev); | |
391 | dev_state = get_device_state(devid); | |
392 | if (dev_state == NULL) | |
393 | return; | |
394 | ||
395 | if (pasid < 0 || pasid >= dev_state->max_pasids) | |
396 | goto out; | |
397 | ||
398 | unbind_pasid(dev_state, pasid); | |
399 | ||
400 | out: | |
401 | put_device_state(dev_state); | |
402 | } | |
403 | EXPORT_SYMBOL(amd_iommu_unbind_pasid); | |
404 | ||
ed96f228 JR |
405 | int amd_iommu_init_device(struct pci_dev *pdev, int pasids) |
406 | { | |
407 | struct device_state *dev_state; | |
408 | unsigned long flags; | |
409 | int ret, tmp; | |
410 | u16 devid; | |
411 | ||
412 | might_sleep(); | |
413 | ||
414 | if (!amd_iommu_v2_supported()) | |
415 | return -ENODEV; | |
416 | ||
417 | if (pasids <= 0 || pasids > (PASID_MASK + 1)) | |
418 | return -EINVAL; | |
419 | ||
420 | devid = device_id(pdev); | |
421 | ||
422 | dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL); | |
423 | if (dev_state == NULL) | |
424 | return -ENOMEM; | |
425 | ||
426 | spin_lock_init(&dev_state->lock); | |
427 | dev_state->pdev = pdev; | |
428 | ||
429 | tmp = pasids; | |
430 | for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9) | |
431 | dev_state->pasid_levels += 1; | |
432 | ||
433 | atomic_set(&dev_state->count, 1); | |
434 | dev_state->max_pasids = pasids; | |
435 | ||
436 | ret = -ENOMEM; | |
437 | dev_state->states = (void *)get_zeroed_page(GFP_KERNEL); | |
438 | if (dev_state->states == NULL) | |
439 | goto out_free_dev_state; | |
440 | ||
441 | dev_state->domain = iommu_domain_alloc(&pci_bus_type); | |
442 | if (dev_state->domain == NULL) | |
443 | goto out_free_states; | |
444 | ||
445 | amd_iommu_domain_direct_map(dev_state->domain); | |
446 | ||
447 | ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids); | |
448 | if (ret) | |
449 | goto out_free_domain; | |
450 | ||
451 | ret = iommu_attach_device(dev_state->domain, &pdev->dev); | |
452 | if (ret != 0) | |
453 | goto out_free_domain; | |
454 | ||
455 | spin_lock_irqsave(&state_lock, flags); | |
456 | ||
457 | if (state_table[devid] != NULL) { | |
458 | spin_unlock_irqrestore(&state_lock, flags); | |
459 | ret = -EBUSY; | |
460 | goto out_free_domain; | |
461 | } | |
462 | ||
463 | state_table[devid] = dev_state; | |
464 | ||
465 | spin_unlock_irqrestore(&state_lock, flags); | |
466 | ||
467 | return 0; | |
468 | ||
469 | out_free_domain: | |
470 | iommu_domain_free(dev_state->domain); | |
471 | ||
472 | out_free_states: | |
473 | free_page((unsigned long)dev_state->states); | |
474 | ||
475 | out_free_dev_state: | |
476 | kfree(dev_state); | |
477 | ||
478 | return ret; | |
479 | } | |
480 | EXPORT_SYMBOL(amd_iommu_init_device); | |
481 | ||
482 | void amd_iommu_free_device(struct pci_dev *pdev) | |
483 | { | |
484 | struct device_state *dev_state; | |
485 | unsigned long flags; | |
486 | u16 devid; | |
487 | ||
488 | if (!amd_iommu_v2_supported()) | |
489 | return; | |
490 | ||
491 | devid = device_id(pdev); | |
492 | ||
493 | spin_lock_irqsave(&state_lock, flags); | |
494 | ||
495 | dev_state = state_table[devid]; | |
496 | if (dev_state == NULL) { | |
497 | spin_unlock_irqrestore(&state_lock, flags); | |
498 | return; | |
499 | } | |
500 | ||
501 | state_table[devid] = NULL; | |
502 | ||
503 | spin_unlock_irqrestore(&state_lock, flags); | |
504 | ||
2d5503b6 JR |
505 | /* Get rid of any remaining pasid states */ |
506 | free_pasid_states(dev_state); | |
507 | ||
ed96f228 JR |
508 | put_device_state(dev_state); |
509 | } | |
510 | EXPORT_SYMBOL(amd_iommu_free_device); | |
511 | ||
e3c495c7 JR |
512 | static int __init amd_iommu_v2_init(void) |
513 | { | |
ed96f228 JR |
514 | size_t state_table_size; |
515 | ||
e3c495c7 JR |
516 | pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>"); |
517 | ||
ed96f228 JR |
518 | spin_lock_init(&state_lock); |
519 | ||
520 | state_table_size = MAX_DEVICES * sizeof(struct device_state *); | |
521 | state_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | |
522 | get_order(state_table_size)); | |
523 | if (state_table == NULL) | |
524 | return -ENOMEM; | |
525 | ||
e3c495c7 JR |
526 | return 0; |
527 | } | |
528 | ||
529 | static void __exit amd_iommu_v2_exit(void) | |
530 | { | |
ed96f228 JR |
531 | struct device_state *dev_state; |
532 | size_t state_table_size; | |
533 | int i; | |
534 | ||
535 | for (i = 0; i < MAX_DEVICES; ++i) { | |
536 | dev_state = get_device_state(i); | |
537 | ||
538 | if (dev_state == NULL) | |
539 | continue; | |
540 | ||
541 | WARN_ON_ONCE(1); | |
542 | ||
543 | amd_iommu_free_device(dev_state->pdev); | |
544 | put_device_state(dev_state); | |
545 | } | |
546 | ||
547 | state_table_size = MAX_DEVICES * sizeof(struct device_state *); | |
548 | free_pages((unsigned long)state_table, get_order(state_table_size)); | |
e3c495c7 JR |
549 | } |
550 | ||
551 | module_init(amd_iommu_v2_init); | |
552 | module_exit(amd_iommu_v2_exit); |