]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - drivers/misc/cxl/api.c
the rest of drivers/*: annotate ->poll() instances
[mirror_ubuntu-jammy-kernel.git] / drivers / misc / cxl / api.c
1 /*
2 * Copyright 2014 IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9
10 #include <linux/pci.h>
11 #include <linux/slab.h>
12 #include <linux/file.h>
13 #include <misc/cxl.h>
14 #include <linux/msi.h>
15 #include <linux/module.h>
16 #include <linux/mount.h>
17 #include <linux/sched/mm.h>
18 #include <linux/mmu_context.h>
19
20 #include "cxl.h"
21
22 /*
23 * Since we want to track memory mappings to be able to force-unmap
24 * when the AFU is no longer reachable, we need an inode. For devices
25 * opened through the cxl user API, this is not a problem, but a
26 * userland process can also get a cxl fd through the cxl_get_fd()
27 * API, which is used by the cxlflash driver.
28 *
29 * Therefore we implement our own simple pseudo-filesystem and inode
30 * allocator. We don't use the anonymous inode, as we need the
31 * meta-data associated with it (address_space) and it is shared by
32 * other drivers/processes, so it could lead to cxl unmapping VMAs
33 * from random processes.
34 */
35
36 #define CXL_PSEUDO_FS_MAGIC 0x1697697f
37
38 static int cxl_fs_cnt;
39 static struct vfsmount *cxl_vfs_mount;
40
41 static const struct dentry_operations cxl_fs_dops = {
42 .d_dname = simple_dname,
43 };
44
45 static struct dentry *cxl_fs_mount(struct file_system_type *fs_type, int flags,
46 const char *dev_name, void *data)
47 {
48 return mount_pseudo(fs_type, "cxl:", NULL, &cxl_fs_dops,
49 CXL_PSEUDO_FS_MAGIC);
50 }
51
52 static struct file_system_type cxl_fs_type = {
53 .name = "cxl",
54 .owner = THIS_MODULE,
55 .mount = cxl_fs_mount,
56 .kill_sb = kill_anon_super,
57 };
58
59
60 void cxl_release_mapping(struct cxl_context *ctx)
61 {
62 if (ctx->kernelapi && ctx->mapping)
63 simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
64 }
65
66 static struct file *cxl_getfile(const char *name,
67 const struct file_operations *fops,
68 void *priv, int flags)
69 {
70 struct qstr this;
71 struct path path;
72 struct file *file;
73 struct inode *inode = NULL;
74 int rc;
75
76 /* strongly inspired by anon_inode_getfile() */
77
78 if (fops->owner && !try_module_get(fops->owner))
79 return ERR_PTR(-ENOENT);
80
81 rc = simple_pin_fs(&cxl_fs_type, &cxl_vfs_mount, &cxl_fs_cnt);
82 if (rc < 0) {
83 pr_err("Cannot mount cxl pseudo filesystem: %d\n", rc);
84 file = ERR_PTR(rc);
85 goto err_module;
86 }
87
88 inode = alloc_anon_inode(cxl_vfs_mount->mnt_sb);
89 if (IS_ERR(inode)) {
90 file = ERR_CAST(inode);
91 goto err_fs;
92 }
93
94 file = ERR_PTR(-ENOMEM);
95 this.name = name;
96 this.len = strlen(name);
97 this.hash = 0;
98 path.dentry = d_alloc_pseudo(cxl_vfs_mount->mnt_sb, &this);
99 if (!path.dentry)
100 goto err_inode;
101
102 path.mnt = mntget(cxl_vfs_mount);
103 d_instantiate(path.dentry, inode);
104
105 file = alloc_file(&path, OPEN_FMODE(flags), fops);
106 if (IS_ERR(file))
107 goto err_dput;
108 file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
109 file->private_data = priv;
110
111 return file;
112
113 err_dput:
114 path_put(&path);
115 err_inode:
116 iput(inode);
117 err_fs:
118 simple_release_fs(&cxl_vfs_mount, &cxl_fs_cnt);
119 err_module:
120 module_put(fops->owner);
121 return file;
122 }
123
124 struct cxl_context *cxl_dev_context_init(struct pci_dev *dev)
125 {
126 struct cxl_afu *afu;
127 struct cxl_context *ctx;
128 int rc;
129
130 afu = cxl_pci_to_afu(dev);
131 if (IS_ERR(afu))
132 return ERR_CAST(afu);
133
134 ctx = cxl_context_alloc();
135 if (!ctx)
136 return ERR_PTR(-ENOMEM);
137
138 ctx->kernelapi = true;
139
140 /* Make it a slave context. We can promote it later? */
141 rc = cxl_context_init(ctx, afu, false);
142 if (rc)
143 goto err_ctx;
144
145 return ctx;
146
147 err_ctx:
148 kfree(ctx);
149 return ERR_PTR(rc);
150 }
151 EXPORT_SYMBOL_GPL(cxl_dev_context_init);
152
153 struct cxl_context *cxl_get_context(struct pci_dev *dev)
154 {
155 return dev->dev.archdata.cxl_ctx;
156 }
157 EXPORT_SYMBOL_GPL(cxl_get_context);
158
159 int cxl_release_context(struct cxl_context *ctx)
160 {
161 if (ctx->status >= STARTED)
162 return -EBUSY;
163
164 cxl_context_free(ctx);
165
166 return 0;
167 }
168 EXPORT_SYMBOL_GPL(cxl_release_context);
169
170 static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
171 {
172 __u16 range;
173 int r;
174
175 for (r = 0; r < CXL_IRQ_RANGES; r++) {
176 range = ctx->irqs.range[r];
177 if (num < range) {
178 return ctx->irqs.offset[r] + num;
179 }
180 num -= range;
181 }
182 return 0;
183 }
184
185 int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq)
186 {
187 if (*ctx == NULL || *afu_irq == 0) {
188 *afu_irq = 1;
189 *ctx = cxl_get_context(pdev);
190 } else {
191 (*afu_irq)++;
192 if (*afu_irq > cxl_get_max_irqs_per_process(pdev)) {
193 *ctx = list_next_entry(*ctx, extra_irq_contexts);
194 *afu_irq = 1;
195 }
196 }
197 return cxl_find_afu_irq(*ctx, *afu_irq);
198 }
199 /* Exported via cxl_base */
200
201 int cxl_set_priv(struct cxl_context *ctx, void *priv)
202 {
203 if (!ctx)
204 return -EINVAL;
205
206 ctx->priv = priv;
207
208 return 0;
209 }
210 EXPORT_SYMBOL_GPL(cxl_set_priv);
211
212 void *cxl_get_priv(struct cxl_context *ctx)
213 {
214 if (!ctx)
215 return ERR_PTR(-EINVAL);
216
217 return ctx->priv;
218 }
219 EXPORT_SYMBOL_GPL(cxl_get_priv);
220
221 int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
222 {
223 int res;
224 irq_hw_number_t hwirq;
225
226 if (num == 0)
227 num = ctx->afu->pp_irqs;
228 res = afu_allocate_irqs(ctx, num);
229 if (res)
230 return res;
231
232 if (!cpu_has_feature(CPU_FTR_HVMODE)) {
233 /* In a guest, the PSL interrupt is not multiplexed. It was
234 * allocated above, and we need to set its handler
235 */
236 hwirq = cxl_find_afu_irq(ctx, 0);
237 if (hwirq)
238 cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl");
239 }
240
241 if (ctx->status == STARTED) {
242 if (cxl_ops->update_ivtes)
243 cxl_ops->update_ivtes(ctx);
244 else WARN(1, "BUG: cxl_allocate_afu_irqs must be called prior to starting the context on this platform\n");
245 }
246
247 return res;
248 }
249 EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
250
251 void cxl_free_afu_irqs(struct cxl_context *ctx)
252 {
253 irq_hw_number_t hwirq;
254 unsigned int virq;
255
256 if (!cpu_has_feature(CPU_FTR_HVMODE)) {
257 hwirq = cxl_find_afu_irq(ctx, 0);
258 if (hwirq) {
259 virq = irq_find_mapping(NULL, hwirq);
260 if (virq)
261 cxl_unmap_irq(virq, ctx);
262 }
263 }
264 afu_irq_name_free(ctx);
265 cxl_ops->release_irq_ranges(&ctx->irqs, ctx->afu->adapter);
266 }
267 EXPORT_SYMBOL_GPL(cxl_free_afu_irqs);
268
269 int cxl_map_afu_irq(struct cxl_context *ctx, int num,
270 irq_handler_t handler, void *cookie, char *name)
271 {
272 irq_hw_number_t hwirq;
273
274 /*
275 * Find interrupt we are to register.
276 */
277 hwirq = cxl_find_afu_irq(ctx, num);
278 if (!hwirq)
279 return -ENOENT;
280
281 return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name);
282 }
283 EXPORT_SYMBOL_GPL(cxl_map_afu_irq);
284
285 void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie)
286 {
287 irq_hw_number_t hwirq;
288 unsigned int virq;
289
290 hwirq = cxl_find_afu_irq(ctx, num);
291 if (!hwirq)
292 return;
293
294 virq = irq_find_mapping(NULL, hwirq);
295 if (virq)
296 cxl_unmap_irq(virq, cookie);
297 }
298 EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq);
299
300 /*
301 * Start a context
302 * Code here similar to afu_ioctl_start_work().
303 */
304 int cxl_start_context(struct cxl_context *ctx, u64 wed,
305 struct task_struct *task)
306 {
307 int rc = 0;
308 bool kernel = true;
309
310 pr_devel("%s: pe: %i\n", __func__, ctx->pe);
311
312 mutex_lock(&ctx->status_mutex);
313 if (ctx->status == STARTED)
314 goto out; /* already started */
315
316 /*
317 * Increment the mapped context count for adapter. This also checks
318 * if adapter_context_lock is taken.
319 */
320 rc = cxl_adapter_context_get(ctx->afu->adapter);
321 if (rc)
322 goto out;
323
324 if (task) {
325 ctx->pid = get_task_pid(task, PIDTYPE_PID);
326 kernel = false;
327 ctx->real_mode = false;
328
329 /* acquire a reference to the task's mm */
330 ctx->mm = get_task_mm(current);
331
332 /* ensure this mm_struct can't be freed */
333 cxl_context_mm_count_get(ctx);
334
335 if (ctx->mm) {
336 /* decrement the use count from above */
337 mmput(ctx->mm);
338 /* make TLBIs for this context global */
339 mm_context_add_copro(ctx->mm);
340 }
341 }
342
343 /*
344 * Increment driver use count. Enables global TLBIs for hash
345 * and callbacks to handle the segment table
346 */
347 cxl_ctx_get();
348
349 /* See the comment in afu_ioctl_start_work() */
350 smp_mb();
351
352 if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
353 put_pid(ctx->pid);
354 ctx->pid = NULL;
355 cxl_adapter_context_put(ctx->afu->adapter);
356 cxl_ctx_put();
357 if (task) {
358 cxl_context_mm_count_put(ctx);
359 if (ctx->mm)
360 mm_context_remove_copro(ctx->mm);
361 }
362 goto out;
363 }
364
365 ctx->status = STARTED;
366 out:
367 mutex_unlock(&ctx->status_mutex);
368 return rc;
369 }
370 EXPORT_SYMBOL_GPL(cxl_start_context);
371
372 int cxl_process_element(struct cxl_context *ctx)
373 {
374 return ctx->external_pe;
375 }
376 EXPORT_SYMBOL_GPL(cxl_process_element);
377
378 /* Stop a context. Returns 0 on success, otherwise -Errno */
379 int cxl_stop_context(struct cxl_context *ctx)
380 {
381 return __detach_context(ctx);
382 }
383 EXPORT_SYMBOL_GPL(cxl_stop_context);
384
385 void cxl_set_master(struct cxl_context *ctx)
386 {
387 ctx->master = true;
388 }
389 EXPORT_SYMBOL_GPL(cxl_set_master);
390
391 int cxl_set_translation_mode(struct cxl_context *ctx, bool real_mode)
392 {
393 if (ctx->status == STARTED) {
394 /*
395 * We could potentially update the PE and issue an update LLCMD
396 * to support this, but it doesn't seem to have a good use case
397 * since it's trivial to just create a second kernel context
398 * with different translation modes, so until someone convinces
399 * me otherwise:
400 */
401 return -EBUSY;
402 }
403
404 ctx->real_mode = real_mode;
405 return 0;
406 }
407 EXPORT_SYMBOL_GPL(cxl_set_translation_mode);
408
409 /* wrappers around afu_* file ops which are EXPORTED */
410 int cxl_fd_open(struct inode *inode, struct file *file)
411 {
412 return afu_open(inode, file);
413 }
414 EXPORT_SYMBOL_GPL(cxl_fd_open);
415 int cxl_fd_release(struct inode *inode, struct file *file)
416 {
417 return afu_release(inode, file);
418 }
419 EXPORT_SYMBOL_GPL(cxl_fd_release);
420 long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
421 {
422 return afu_ioctl(file, cmd, arg);
423 }
424 EXPORT_SYMBOL_GPL(cxl_fd_ioctl);
425 int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm)
426 {
427 return afu_mmap(file, vm);
428 }
429 EXPORT_SYMBOL_GPL(cxl_fd_mmap);
430 __poll_t cxl_fd_poll(struct file *file, struct poll_table_struct *poll)
431 {
432 return afu_poll(file, poll);
433 }
434 EXPORT_SYMBOL_GPL(cxl_fd_poll);
435 ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count,
436 loff_t *off)
437 {
438 return afu_read(file, buf, count, off);
439 }
440 EXPORT_SYMBOL_GPL(cxl_fd_read);
441
442 #define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME
443
444 /* Get a struct file and fd for a context and attach the ops */
445 struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops,
446 int *fd)
447 {
448 struct file *file;
449 int rc, flags, fdtmp;
450 char *name = NULL;
451
452 /* only allow one per context */
453 if (ctx->mapping)
454 return ERR_PTR(-EEXIST);
455
456 flags = O_RDWR | O_CLOEXEC;
457
458 /* This code is similar to anon_inode_getfd() */
459 rc = get_unused_fd_flags(flags);
460 if (rc < 0)
461 return ERR_PTR(rc);
462 fdtmp = rc;
463
464 /*
465 * Patch the file ops. Needs to be careful that this is rentrant safe.
466 */
467 if (fops) {
468 PATCH_FOPS(open);
469 PATCH_FOPS(poll);
470 PATCH_FOPS(read);
471 PATCH_FOPS(release);
472 PATCH_FOPS(unlocked_ioctl);
473 PATCH_FOPS(compat_ioctl);
474 PATCH_FOPS(mmap);
475 } else /* use default ops */
476 fops = (struct file_operations *)&afu_fops;
477
478 name = kasprintf(GFP_KERNEL, "cxl:%d", ctx->pe);
479 file = cxl_getfile(name, fops, ctx, flags);
480 kfree(name);
481 if (IS_ERR(file))
482 goto err_fd;
483
484 cxl_context_set_mapping(ctx, file->f_mapping);
485 *fd = fdtmp;
486 return file;
487
488 err_fd:
489 put_unused_fd(fdtmp);
490 return NULL;
491 }
492 EXPORT_SYMBOL_GPL(cxl_get_fd);
493
494 struct cxl_context *cxl_fops_get_context(struct file *file)
495 {
496 return file->private_data;
497 }
498 EXPORT_SYMBOL_GPL(cxl_fops_get_context);
499
500 void cxl_set_driver_ops(struct cxl_context *ctx,
501 struct cxl_afu_driver_ops *ops)
502 {
503 WARN_ON(!ops->fetch_event || !ops->event_delivered);
504 atomic_set(&ctx->afu_driver_events, 0);
505 ctx->afu_driver_ops = ops;
506 }
507 EXPORT_SYMBOL_GPL(cxl_set_driver_ops);
508
509 void cxl_context_events_pending(struct cxl_context *ctx,
510 unsigned int new_events)
511 {
512 atomic_add(new_events, &ctx->afu_driver_events);
513 wake_up_all(&ctx->wq);
514 }
515 EXPORT_SYMBOL_GPL(cxl_context_events_pending);
516
517 int cxl_start_work(struct cxl_context *ctx,
518 struct cxl_ioctl_start_work *work)
519 {
520 int rc;
521
522 /* code taken from afu_ioctl_start_work */
523 if (!(work->flags & CXL_START_WORK_NUM_IRQS))
524 work->num_interrupts = ctx->afu->pp_irqs;
525 else if ((work->num_interrupts < ctx->afu->pp_irqs) ||
526 (work->num_interrupts > ctx->afu->irqs_max)) {
527 return -EINVAL;
528 }
529
530 rc = afu_register_irqs(ctx, work->num_interrupts);
531 if (rc)
532 return rc;
533
534 rc = cxl_start_context(ctx, work->work_element_descriptor, current);
535 if (rc < 0) {
536 afu_release_irqs(ctx, ctx);
537 return rc;
538 }
539
540 return 0;
541 }
542 EXPORT_SYMBOL_GPL(cxl_start_work);
543
544 void __iomem *cxl_psa_map(struct cxl_context *ctx)
545 {
546 if (ctx->status != STARTED)
547 return NULL;
548
549 pr_devel("%s: psn_phys%llx size:%llx\n",
550 __func__, ctx->psn_phys, ctx->psn_size);
551 return ioremap(ctx->psn_phys, ctx->psn_size);
552 }
553 EXPORT_SYMBOL_GPL(cxl_psa_map);
554
555 void cxl_psa_unmap(void __iomem *addr)
556 {
557 iounmap(addr);
558 }
559 EXPORT_SYMBOL_GPL(cxl_psa_unmap);
560
561 int cxl_afu_reset(struct cxl_context *ctx)
562 {
563 struct cxl_afu *afu = ctx->afu;
564 int rc;
565
566 rc = cxl_ops->afu_reset(afu);
567 if (rc)
568 return rc;
569
570 return cxl_ops->afu_check_and_enable(afu);
571 }
572 EXPORT_SYMBOL_GPL(cxl_afu_reset);
573
574 void cxl_perst_reloads_same_image(struct cxl_afu *afu,
575 bool perst_reloads_same_image)
576 {
577 afu->adapter->perst_same_image = perst_reloads_same_image;
578 }
579 EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image);
580
581 ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count)
582 {
583 struct cxl_afu *afu = cxl_pci_to_afu(dev);
584 if (IS_ERR(afu))
585 return -ENODEV;
586
587 return cxl_ops->read_adapter_vpd(afu->adapter, buf, count);
588 }
589 EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd);
590
591 int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs)
592 {
593 struct cxl_afu *afu = cxl_pci_to_afu(dev);
594 if (IS_ERR(afu))
595 return -ENODEV;
596
597 if (irqs > afu->adapter->user_irqs)
598 return -EINVAL;
599
600 /* Limit user_irqs to prevent the user increasing this via sysfs */
601 afu->adapter->user_irqs = irqs;
602 afu->irqs_max = irqs;
603
604 return 0;
605 }
606 EXPORT_SYMBOL_GPL(cxl_set_max_irqs_per_process);
607
608 int cxl_get_max_irqs_per_process(struct pci_dev *dev)
609 {
610 struct cxl_afu *afu = cxl_pci_to_afu(dev);
611 if (IS_ERR(afu))
612 return -ENODEV;
613
614 return afu->irqs_max;
615 }
616 EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process);
617
618 /*
619 * This is a special interrupt allocation routine called from the PHB's MSI
620 * setup function. When capi interrupts are allocated in this manner they must
621 * still be associated with a running context, but since the MSI APIs have no
622 * way to specify this we use the default context associated with the device.
623 *
624 * The Mellanox CX4 has a hardware limitation that restricts the maximum AFU
625 * interrupt number, so in order to overcome this their driver informs us of
626 * the restriction by setting the maximum interrupts per context, and we
627 * allocate additional contexts as necessary so that we can keep the AFU
628 * interrupt number within the supported range.
629 */
630 int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
631 {
632 struct cxl_context *ctx, *new_ctx, *default_ctx;
633 int remaining;
634 int rc;
635
636 ctx = default_ctx = cxl_get_context(pdev);
637 if (WARN_ON(!default_ctx))
638 return -ENODEV;
639
640 remaining = nvec;
641 while (remaining > 0) {
642 rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max));
643 if (rc) {
644 pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev));
645 return rc;
646 }
647 remaining -= ctx->afu->irqs_max;
648
649 if (ctx != default_ctx && default_ctx->status == STARTED) {
650 WARN_ON(cxl_start_context(ctx,
651 be64_to_cpu(default_ctx->elem->common.wed),
652 NULL));
653 }
654
655 if (remaining > 0) {
656 new_ctx = cxl_dev_context_init(pdev);
657 if (IS_ERR(new_ctx)) {
658 pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev));
659 return -ENOSPC;
660 }
661 list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts);
662 ctx = new_ctx;
663 }
664 }
665
666 return 0;
667 }
668 /* Exported via cxl_base */
669
670 void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev)
671 {
672 struct cxl_context *ctx, *pos, *tmp;
673
674 ctx = cxl_get_context(pdev);
675 if (WARN_ON(!ctx))
676 return;
677
678 cxl_free_afu_irqs(ctx);
679 list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) {
680 cxl_stop_context(pos);
681 cxl_free_afu_irqs(pos);
682 list_del(&pos->extra_irq_contexts);
683 cxl_release_context(pos);
684 }
685 }
686 /* Exported via cxl_base */