]>
Commit | Line | Data |
---|---|---|
fc36479d JPB |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Handle device page faults | |
4 | * | |
5 | * Copyright (C) 2020 ARM Ltd. | |
6 | */ | |
7 | ||
8 | #include <linux/iommu.h> | |
9 | #include <linux/list.h> | |
10 | #include <linux/sched/mm.h> | |
11 | #include <linux/slab.h> | |
12 | #include <linux/workqueue.h> | |
13 | ||
14 | #include "iommu-sva-lib.h" | |
15 | ||
16 | /** | |
17 | * struct iopf_queue - IO Page Fault queue | |
18 | * @wq: the fault workqueue | |
19 | * @devices: devices attached to this queue | |
20 | * @lock: protects the device list | |
21 | */ | |
22 | struct iopf_queue { | |
23 | struct workqueue_struct *wq; | |
24 | struct list_head devices; | |
25 | struct mutex lock; | |
26 | }; | |
27 | ||
28 | /** | |
29 | * struct iopf_device_param - IO Page Fault data attached to a device | |
30 | * @dev: the device that owns this param | |
31 | * @queue: IOPF queue | |
32 | * @queue_list: index into queue->devices | |
33 | * @partial: faults that are part of a Page Request Group for which the last | |
34 | * request hasn't been submitted yet. | |
35 | */ | |
36 | struct iopf_device_param { | |
37 | struct device *dev; | |
38 | struct iopf_queue *queue; | |
39 | struct list_head queue_list; | |
40 | struct list_head partial; | |
41 | }; | |
42 | ||
43 | struct iopf_fault { | |
44 | struct iommu_fault fault; | |
45 | struct list_head list; | |
46 | }; | |
47 | ||
48 | struct iopf_group { | |
49 | struct iopf_fault last_fault; | |
50 | struct list_head faults; | |
51 | struct work_struct work; | |
52 | struct device *dev; | |
53 | }; | |
54 | ||
55 | static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf, | |
56 | enum iommu_page_response_code status) | |
57 | { | |
58 | struct iommu_page_response resp = { | |
59 | .version = IOMMU_PAGE_RESP_VERSION_1, | |
60 | .pasid = iopf->fault.prm.pasid, | |
61 | .grpid = iopf->fault.prm.grpid, | |
62 | .code = status, | |
63 | }; | |
64 | ||
65 | if ((iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) && | |
66 | (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID)) | |
67 | resp.flags = IOMMU_PAGE_RESP_PASID_VALID; | |
68 | ||
69 | return iommu_page_response(dev, &resp); | |
70 | } | |
71 | ||
72 | static enum iommu_page_response_code | |
73 | iopf_handle_single(struct iopf_fault *iopf) | |
74 | { | |
75 | vm_fault_t ret; | |
76 | struct mm_struct *mm; | |
77 | struct vm_area_struct *vma; | |
78 | unsigned int access_flags = 0; | |
79 | unsigned int fault_flags = FAULT_FLAG_REMOTE; | |
80 | struct iommu_fault_page_request *prm = &iopf->fault.prm; | |
81 | enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID; | |
82 | ||
83 | if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)) | |
84 | return status; | |
85 | ||
86 | mm = iommu_sva_find(prm->pasid); | |
87 | if (IS_ERR_OR_NULL(mm)) | |
88 | return status; | |
89 | ||
90 | mmap_read_lock(mm); | |
91 | ||
92 | vma = find_extend_vma(mm, prm->addr); | |
93 | if (!vma) | |
94 | /* Unmapped area */ | |
95 | goto out_put_mm; | |
96 | ||
97 | if (prm->perm & IOMMU_FAULT_PERM_READ) | |
98 | access_flags |= VM_READ; | |
99 | ||
100 | if (prm->perm & IOMMU_FAULT_PERM_WRITE) { | |
101 | access_flags |= VM_WRITE; | |
102 | fault_flags |= FAULT_FLAG_WRITE; | |
103 | } | |
104 | ||
105 | if (prm->perm & IOMMU_FAULT_PERM_EXEC) { | |
106 | access_flags |= VM_EXEC; | |
107 | fault_flags |= FAULT_FLAG_INSTRUCTION; | |
108 | } | |
109 | ||
110 | if (!(prm->perm & IOMMU_FAULT_PERM_PRIV)) | |
111 | fault_flags |= FAULT_FLAG_USER; | |
112 | ||
113 | if (access_flags & ~vma->vm_flags) | |
114 | /* Access fault */ | |
115 | goto out_put_mm; | |
116 | ||
117 | ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL); | |
118 | status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID : | |
119 | IOMMU_PAGE_RESP_SUCCESS; | |
120 | ||
121 | out_put_mm: | |
122 | mmap_read_unlock(mm); | |
123 | mmput(mm); | |
124 | ||
125 | return status; | |
126 | } | |
127 | ||
128 | static void iopf_handle_group(struct work_struct *work) | |
129 | { | |
130 | struct iopf_group *group; | |
131 | struct iopf_fault *iopf, *next; | |
132 | enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS; | |
133 | ||
134 | group = container_of(work, struct iopf_group, work); | |
135 | ||
136 | list_for_each_entry_safe(iopf, next, &group->faults, list) { | |
137 | /* | |
138 | * For the moment, errors are sticky: don't handle subsequent | |
139 | * faults in the group if there is an error. | |
140 | */ | |
141 | if (status == IOMMU_PAGE_RESP_SUCCESS) | |
142 | status = iopf_handle_single(iopf); | |
143 | ||
144 | if (!(iopf->fault.prm.flags & | |
145 | IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) | |
146 | kfree(iopf); | |
147 | } | |
148 | ||
149 | iopf_complete_group(group->dev, &group->last_fault, status); | |
150 | kfree(group); | |
151 | } | |
152 | ||
153 | /** | |
154 | * iommu_queue_iopf - IO Page Fault handler | |
155 | * @fault: fault event | |
156 | * @cookie: struct device, passed to iommu_register_device_fault_handler. | |
157 | * | |
158 | * Add a fault to the device workqueue, to be handled by mm. | |
159 | * | |
160 | * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard | |
161 | * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't | |
162 | * expect a response. It may be generated when disabling a PASID (issuing a | |
163 | * PASID stop request) by some PCI devices. | |
164 | * | |
165 | * The PASID stop request is issued by the device driver before unbind(). Once | |
166 | * it completes, no page request is generated for this PASID anymore and | |
167 | * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1 | |
168 | * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait | |
169 | * for all outstanding page requests to come back with a response before | |
170 | * completing the PASID stop request. Others do not wait for page responses, and | |
171 | * instead issue this Stop Marker that tells us when the PASID can be | |
172 | * reallocated. | |
173 | * | |
174 | * It is safe to discard the Stop Marker because it is an optimization. | |
175 | * a. Page requests, which are posted requests, have been flushed to the IOMMU | |
176 | * when the stop request completes. | |
177 | * b. The IOMMU driver flushes all fault queues on unbind() before freeing the | |
178 | * PASID. | |
179 | * | |
180 | * So even though the Stop Marker might be issued by the device *after* the stop | |
181 | * request completes, outstanding faults will have been dealt with by the time | |
182 | * the PASID is freed. | |
183 | * | |
184 | * Return: 0 on success and <0 on error. | |
185 | */ | |
186 | int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) | |
187 | { | |
188 | int ret; | |
189 | struct iopf_group *group; | |
190 | struct iopf_fault *iopf, *next; | |
191 | struct iopf_device_param *iopf_param; | |
192 | ||
193 | struct device *dev = cookie; | |
194 | struct dev_iommu *param = dev->iommu; | |
195 | ||
196 | lockdep_assert_held(¶m->lock); | |
197 | ||
198 | if (fault->type != IOMMU_FAULT_PAGE_REQ) | |
199 | /* Not a recoverable page fault */ | |
200 | return -EOPNOTSUPP; | |
201 | ||
202 | /* | |
203 | * As long as we're holding param->lock, the queue can't be unlinked | |
204 | * from the device and therefore cannot disappear. | |
205 | */ | |
206 | iopf_param = param->iopf_param; | |
207 | if (!iopf_param) | |
208 | return -ENODEV; | |
209 | ||
210 | if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { | |
211 | iopf = kzalloc(sizeof(*iopf), GFP_KERNEL); | |
212 | if (!iopf) | |
213 | return -ENOMEM; | |
214 | ||
215 | iopf->fault = *fault; | |
216 | ||
217 | /* Non-last request of a group. Postpone until the last one */ | |
218 | list_add(&iopf->list, &iopf_param->partial); | |
219 | ||
220 | return 0; | |
221 | } | |
222 | ||
223 | group = kzalloc(sizeof(*group), GFP_KERNEL); | |
224 | if (!group) { | |
225 | /* | |
226 | * The caller will send a response to the hardware. But we do | |
227 | * need to clean up before leaving, otherwise partial faults | |
228 | * will be stuck. | |
229 | */ | |
230 | ret = -ENOMEM; | |
231 | goto cleanup_partial; | |
232 | } | |
233 | ||
234 | group->dev = dev; | |
235 | group->last_fault.fault = *fault; | |
236 | INIT_LIST_HEAD(&group->faults); | |
237 | list_add(&group->last_fault.list, &group->faults); | |
238 | INIT_WORK(&group->work, iopf_handle_group); | |
239 | ||
240 | /* See if we have partial faults for this group */ | |
241 | list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { | |
242 | if (iopf->fault.prm.grpid == fault->prm.grpid) | |
243 | /* Insert *before* the last fault */ | |
244 | list_move(&iopf->list, &group->faults); | |
245 | } | |
246 | ||
247 | queue_work(iopf_param->queue->wq, &group->work); | |
248 | return 0; | |
249 | ||
250 | cleanup_partial: | |
251 | list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { | |
252 | if (iopf->fault.prm.grpid == fault->prm.grpid) { | |
253 | list_del(&iopf->list); | |
254 | kfree(iopf); | |
255 | } | |
256 | } | |
257 | return ret; | |
258 | } | |
259 | EXPORT_SYMBOL_GPL(iommu_queue_iopf); | |
260 | ||
261 | /** | |
262 | * iopf_queue_flush_dev - Ensure that all queued faults have been processed | |
263 | * @dev: the endpoint whose faults need to be flushed. | |
264 | * | |
265 | * The IOMMU driver calls this before releasing a PASID, to ensure that all | |
266 | * pending faults for this PASID have been handled, and won't hit the address | |
267 | * space of the next process that uses this PASID. The driver must make sure | |
268 | * that no new fault is added to the queue. In particular it must flush its | |
269 | * low-level queue before calling this function. | |
270 | * | |
271 | * Return: 0 on success and <0 on error. | |
272 | */ | |
273 | int iopf_queue_flush_dev(struct device *dev) | |
274 | { | |
275 | int ret = 0; | |
276 | struct iopf_device_param *iopf_param; | |
277 | struct dev_iommu *param = dev->iommu; | |
278 | ||
279 | if (!param) | |
280 | return -ENODEV; | |
281 | ||
282 | mutex_lock(¶m->lock); | |
283 | iopf_param = param->iopf_param; | |
284 | if (iopf_param) | |
285 | flush_workqueue(iopf_param->queue->wq); | |
286 | else | |
287 | ret = -ENODEV; | |
288 | mutex_unlock(¶m->lock); | |
289 | ||
290 | return ret; | |
291 | } | |
292 | EXPORT_SYMBOL_GPL(iopf_queue_flush_dev); | |
293 | ||
294 | /** | |
295 | * iopf_queue_discard_partial - Remove all pending partial fault | |
296 | * @queue: the queue whose partial faults need to be discarded | |
297 | * | |
298 | * When the hardware queue overflows, last page faults in a group may have been | |
299 | * lost and the IOMMU driver calls this to discard all partial faults. The | |
300 | * driver shouldn't be adding new faults to this queue concurrently. | |
301 | * | |
302 | * Return: 0 on success and <0 on error. | |
303 | */ | |
304 | int iopf_queue_discard_partial(struct iopf_queue *queue) | |
305 | { | |
306 | struct iopf_fault *iopf, *next; | |
307 | struct iopf_device_param *iopf_param; | |
308 | ||
309 | if (!queue) | |
310 | return -EINVAL; | |
311 | ||
312 | mutex_lock(&queue->lock); | |
313 | list_for_each_entry(iopf_param, &queue->devices, queue_list) { | |
314 | list_for_each_entry_safe(iopf, next, &iopf_param->partial, | |
315 | list) { | |
316 | list_del(&iopf->list); | |
317 | kfree(iopf); | |
318 | } | |
319 | } | |
320 | mutex_unlock(&queue->lock); | |
321 | return 0; | |
322 | } | |
323 | EXPORT_SYMBOL_GPL(iopf_queue_discard_partial); | |
324 | ||
325 | /** | |
326 | * iopf_queue_add_device - Add producer to the fault queue | |
327 | * @queue: IOPF queue | |
328 | * @dev: device to add | |
329 | * | |
330 | * Return: 0 on success and <0 on error. | |
331 | */ | |
332 | int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) | |
333 | { | |
334 | int ret = -EBUSY; | |
335 | struct iopf_device_param *iopf_param; | |
336 | struct dev_iommu *param = dev->iommu; | |
337 | ||
338 | if (!param) | |
339 | return -ENODEV; | |
340 | ||
341 | iopf_param = kzalloc(sizeof(*iopf_param), GFP_KERNEL); | |
342 | if (!iopf_param) | |
343 | return -ENOMEM; | |
344 | ||
345 | INIT_LIST_HEAD(&iopf_param->partial); | |
346 | iopf_param->queue = queue; | |
347 | iopf_param->dev = dev; | |
348 | ||
349 | mutex_lock(&queue->lock); | |
350 | mutex_lock(¶m->lock); | |
351 | if (!param->iopf_param) { | |
352 | list_add(&iopf_param->queue_list, &queue->devices); | |
353 | param->iopf_param = iopf_param; | |
354 | ret = 0; | |
355 | } | |
356 | mutex_unlock(¶m->lock); | |
357 | mutex_unlock(&queue->lock); | |
358 | ||
359 | if (ret) | |
360 | kfree(iopf_param); | |
361 | ||
362 | return ret; | |
363 | } | |
364 | EXPORT_SYMBOL_GPL(iopf_queue_add_device); | |
365 | ||
366 | /** | |
367 | * iopf_queue_remove_device - Remove producer from fault queue | |
368 | * @queue: IOPF queue | |
369 | * @dev: device to remove | |
370 | * | |
371 | * Caller makes sure that no more faults are reported for this device. | |
372 | * | |
373 | * Return: 0 on success and <0 on error. | |
374 | */ | |
375 | int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) | |
376 | { | |
377 | int ret = -EINVAL; | |
378 | struct iopf_fault *iopf, *next; | |
379 | struct iopf_device_param *iopf_param; | |
380 | struct dev_iommu *param = dev->iommu; | |
381 | ||
382 | if (!param || !queue) | |
383 | return -EINVAL; | |
384 | ||
385 | mutex_lock(&queue->lock); | |
386 | mutex_lock(¶m->lock); | |
387 | iopf_param = param->iopf_param; | |
388 | if (iopf_param && iopf_param->queue == queue) { | |
389 | list_del(&iopf_param->queue_list); | |
390 | param->iopf_param = NULL; | |
391 | ret = 0; | |
392 | } | |
393 | mutex_unlock(¶m->lock); | |
394 | mutex_unlock(&queue->lock); | |
395 | if (ret) | |
396 | return ret; | |
397 | ||
398 | /* Just in case some faults are still stuck */ | |
399 | list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) | |
400 | kfree(iopf); | |
401 | ||
402 | kfree(iopf_param); | |
403 | ||
404 | return 0; | |
405 | } | |
406 | EXPORT_SYMBOL_GPL(iopf_queue_remove_device); | |
407 | ||
408 | /** | |
409 | * iopf_queue_alloc - Allocate and initialize a fault queue | |
410 | * @name: a unique string identifying the queue (for workqueue) | |
411 | * | |
412 | * Return: the queue on success and NULL on error. | |
413 | */ | |
414 | struct iopf_queue *iopf_queue_alloc(const char *name) | |
415 | { | |
416 | struct iopf_queue *queue; | |
417 | ||
418 | queue = kzalloc(sizeof(*queue), GFP_KERNEL); | |
419 | if (!queue) | |
420 | return NULL; | |
421 | ||
422 | /* | |
423 | * The WQ is unordered because the low-level handler enqueues faults by | |
424 | * group. PRI requests within a group have to be ordered, but once | |
425 | * that's dealt with, the high-level function can handle groups out of | |
426 | * order. | |
427 | */ | |
428 | queue->wq = alloc_workqueue("iopf_queue/%s", WQ_UNBOUND, 0, name); | |
429 | if (!queue->wq) { | |
430 | kfree(queue); | |
431 | return NULL; | |
432 | } | |
433 | ||
434 | INIT_LIST_HEAD(&queue->devices); | |
435 | mutex_init(&queue->lock); | |
436 | ||
437 | return queue; | |
438 | } | |
439 | EXPORT_SYMBOL_GPL(iopf_queue_alloc); | |
440 | ||
441 | /** | |
442 | * iopf_queue_free - Free IOPF queue | |
443 | * @queue: queue to free | |
444 | * | |
445 | * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or | |
446 | * adding/removing devices on this queue anymore. | |
447 | */ | |
448 | void iopf_queue_free(struct iopf_queue *queue) | |
449 | { | |
450 | struct iopf_device_param *iopf_param, *next; | |
451 | ||
452 | if (!queue) | |
453 | return; | |
454 | ||
455 | list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list) | |
456 | iopf_queue_remove_device(queue, iopf_param->dev); | |
457 | ||
458 | destroy_workqueue(queue->wq); | |
459 | kfree(queue); | |
460 | } | |
461 | EXPORT_SYMBOL_GPL(iopf_queue_free); |