]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - drivers/infiniband/hw/hfi1/file_ops.c
IB/hfi1: Remove unused user context data members
[mirror_ubuntu-focal-kernel.git] / drivers / infiniband / hw / hfi1 / file_ops.c
CommitLineData
77241056 1/*
2280740f 2 * Copyright(c) 2015-2017 Intel Corporation.
77241056
MM
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
77241056
MM
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * BSD LICENSE
19 *
77241056
MM
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 *
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
29 * distribution.
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 *
46 */
77241056
MM
47#include <linux/poll.h>
48#include <linux/cdev.h>
77241056 49#include <linux/vmalloc.h>
77241056 50#include <linux/io.h>
6e84f315 51#include <linux/sched/mm.h>
8737ce95 52#include <linux/bitmap.h>
77241056 53
e6bd18f5
JG
54#include <rdma/ib.h>
55
77241056
MM
56#include "hfi.h"
57#include "pio.h"
58#include "device.h"
59#include "common.h"
60#include "trace.h"
61#include "user_sdma.h"
701e441d 62#include "user_exp_rcv.h"
affa48de 63#include "aspm.h"
06e0ffa6 64#include "mmu_rb.h"
77241056
MM
65
66#undef pr_fmt
67#define pr_fmt(fmt) DRIVER_NAME ": " fmt
68
69#define SEND_CTXT_HALT_TIMEOUT 1000 /* msecs */
70
71/*
72 * File operation functions
73 */
f4cd8765
MR
74static int hfi1_file_open(struct inode *inode, struct file *fp);
75static int hfi1_file_close(struct inode *inode, struct file *fp);
76static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from);
77static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt);
78static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma);
77241056 79
f4cd8765 80static u64 kvirt_to_phys(void *addr);
5fbded48 81static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo);
f4cd8765
MR
82static int init_subctxts(struct hfi1_ctxtdata *uctxt,
83 const struct hfi1_user_info *uinfo);
9b60d2cb 84static int init_user_ctxt(struct hfi1_filedata *fd);
62239fc6 85static void user_init(struct hfi1_ctxtdata *uctxt);
5042cddf
MR
86static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
87 __u32 len);
88static int get_base_info(struct hfi1_filedata *fd, void __user *ubase,
89 __u32 len);
9b60d2cb 90static int setup_base_ctxt(struct hfi1_filedata *fd);
f4cd8765 91static int setup_subctxt(struct hfi1_ctxtdata *uctxt);
5fbded48 92
9b60d2cb
MR
93static int find_sub_ctxt(struct hfi1_filedata *fd,
94 const struct hfi1_user_info *uinfo);
5042cddf 95static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
f4cd8765 96 struct hfi1_user_info *uinfo);
42492011 97static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt);
f4cd8765
MR
98static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt);
99static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt);
8737ce95 100static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
f4cd8765 101 unsigned long events);
8737ce95
MR
102static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey);
103static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
f4cd8765
MR
104 int start_stop);
105static int vma_fault(struct vm_fault *vmf);
8d970cf9
DD
106static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
107 unsigned long arg);
77241056
MM
108
109static const struct file_operations hfi1_file_ops = {
110 .owner = THIS_MODULE,
77241056
MM
111 .write_iter = hfi1_write_iter,
112 .open = hfi1_file_open,
113 .release = hfi1_file_close,
8d970cf9 114 .unlocked_ioctl = hfi1_file_ioctl,
77241056
MM
115 .poll = hfi1_poll,
116 .mmap = hfi1_file_mmap,
117 .llseek = noop_llseek,
118};
119
120static struct vm_operations_struct vm_ops = {
121 .fault = vma_fault,
122};
123
124/*
125 * Types of memories mapped into user processes' space
126 */
127enum mmap_types {
128 PIO_BUFS = 1,
129 PIO_BUFS_SOP,
130 PIO_CRED,
131 RCV_HDRQ,
132 RCV_EGRBUF,
133 UREGS,
134 EVENTS,
135 STATUS,
136 RTAIL,
137 SUBCTXT_UREGS,
138 SUBCTXT_RCV_HDRQ,
139 SUBCTXT_EGRBUF,
140 SDMA_COMP
141};
142
143/*
144 * Masks and offsets defining the mmap tokens
145 */
146#define HFI1_MMAP_OFFSET_MASK 0xfffULL
147#define HFI1_MMAP_OFFSET_SHIFT 0
148#define HFI1_MMAP_SUBCTXT_MASK 0xfULL
149#define HFI1_MMAP_SUBCTXT_SHIFT 12
150#define HFI1_MMAP_CTXT_MASK 0xffULL
151#define HFI1_MMAP_CTXT_SHIFT 16
152#define HFI1_MMAP_TYPE_MASK 0xfULL
153#define HFI1_MMAP_TYPE_SHIFT 24
154#define HFI1_MMAP_MAGIC_MASK 0xffffffffULL
155#define HFI1_MMAP_MAGIC_SHIFT 32
156
157#define HFI1_MMAP_MAGIC 0xdabbad00
158
159#define HFI1_MMAP_TOKEN_SET(field, val) \
160 (((val) & HFI1_MMAP_##field##_MASK) << HFI1_MMAP_##field##_SHIFT)
161#define HFI1_MMAP_TOKEN_GET(field, token) \
162 (((token) >> HFI1_MMAP_##field##_SHIFT) & HFI1_MMAP_##field##_MASK)
163#define HFI1_MMAP_TOKEN(type, ctxt, subctxt, addr) \
164 (HFI1_MMAP_TOKEN_SET(MAGIC, HFI1_MMAP_MAGIC) | \
165 HFI1_MMAP_TOKEN_SET(TYPE, type) | \
166 HFI1_MMAP_TOKEN_SET(CTXT, ctxt) | \
167 HFI1_MMAP_TOKEN_SET(SUBCTXT, subctxt) | \
e260e404 168 HFI1_MMAP_TOKEN_SET(OFFSET, (offset_in_page(addr))))
77241056 169
77241056
MM
170#define dbg(fmt, ...) \
171 pr_info(fmt, ##__VA_ARGS__)
172
77241056
MM
173static inline int is_valid_mmap(u64 token)
174{
175 return (HFI1_MMAP_TOKEN_GET(MAGIC, token) == HFI1_MMAP_MAGIC);
176}
177
178static int hfi1_file_open(struct inode *inode, struct file *fp)
179{
ea3a0ee5 180 struct hfi1_filedata *fd;
e11ffbd5
DD
181 struct hfi1_devdata *dd = container_of(inode->i_cdev,
182 struct hfi1_devdata,
183 user_cdev);
184
cb51c5d2 185 if (!((dd->flags & HFI1_PRESENT) && dd->kregbase1))
5fbded48
MR
186 return -EINVAL;
187
acd7c8fe
TS
188 if (!atomic_inc_not_zero(&dd->user_refcount))
189 return -ENXIO;
190
e11ffbd5
DD
191 /* Just take a ref now. Not all opens result in a context assign */
192 kobject_get(&dd->kobj);
193
77241056 194 /* The real work is performed later in assign_ctxt() */
ea3a0ee5
IW
195
196 fd = kzalloc(sizeof(*fd), GFP_KERNEL);
197
3faa3d9a
IW
198 if (fd) {
199 fd->rec_cpu_num = -1; /* no cpu affinity by default */
200 fd->mm = current->mm;
f1f10076 201 mmgrab(fd->mm);
5fbded48 202 fd->dd = dd;
acd7c8fe
TS
203 fp->private_data = fd;
204 } else {
205 fp->private_data = NULL;
206
207 if (atomic_dec_and_test(&dd->user_refcount))
208 complete(&dd->user_comp);
ea3a0ee5 209
acd7c8fe
TS
210 return -ENOMEM;
211 }
ea3a0ee5 212
acd7c8fe 213 return 0;
77241056
MM
214}
215
8d970cf9
DD
216static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
217 unsigned long arg)
218{
219 struct hfi1_filedata *fd = fp->private_data;
220 struct hfi1_ctxtdata *uctxt = fd->uctxt;
221 struct hfi1_user_info uinfo;
222 struct hfi1_tid_info tinfo;
223 int ret = 0;
224 unsigned long addr;
225 int uval = 0;
226 unsigned long ul_uval = 0;
227 u16 uval16 = 0;
228
8a1882eb 229 hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd);
8d970cf9
DD
230 if (cmd != HFI1_IOCTL_ASSIGN_CTXT &&
231 cmd != HFI1_IOCTL_GET_VERS &&
232 !uctxt)
233 return -EINVAL;
234
235 switch (cmd) {
236 case HFI1_IOCTL_ASSIGN_CTXT:
ca2f30a0
IW
237 if (uctxt)
238 return -EINVAL;
239
8d970cf9
DD
240 if (copy_from_user(&uinfo,
241 (struct hfi1_user_info __user *)arg,
242 sizeof(uinfo)))
243 return -EFAULT;
244
5fbded48 245 ret = assign_ctxt(fd, &uinfo);
8d970cf9
DD
246 break;
247 case HFI1_IOCTL_CTXT_INFO:
5042cddf 248 ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg,
8d970cf9
DD
249 sizeof(struct hfi1_ctxt_info));
250 break;
251 case HFI1_IOCTL_USER_INFO:
5042cddf 252 ret = get_base_info(fd, (void __user *)(unsigned long)arg,
8d970cf9
DD
253 sizeof(struct hfi1_base_info));
254 break;
255 case HFI1_IOCTL_CREDIT_UPD:
f7ca535b 256 if (uctxt)
8d970cf9
DD
257 sc_return_credits(uctxt->sc);
258 break;
259
260 case HFI1_IOCTL_TID_UPDATE:
261 if (copy_from_user(&tinfo,
262 (struct hfi11_tid_info __user *)arg,
263 sizeof(tinfo)))
264 return -EFAULT;
265
5042cddf 266 ret = hfi1_user_exp_rcv_setup(fd, &tinfo);
8d970cf9
DD
267 if (!ret) {
268 /*
269 * Copy the number of tidlist entries we used
270 * and the length of the buffer we registered.
271 * These fields are adjacent in the structure so
272 * we can copy them at the same time.
273 */
274 addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
275 if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
276 sizeof(tinfo.tidcnt) +
277 sizeof(tinfo.length)))
278 ret = -EFAULT;
279 }
280 break;
281
282 case HFI1_IOCTL_TID_FREE:
283 if (copy_from_user(&tinfo,
284 (struct hfi11_tid_info __user *)arg,
285 sizeof(tinfo)))
286 return -EFAULT;
287
5042cddf 288 ret = hfi1_user_exp_rcv_clear(fd, &tinfo);
8d970cf9
DD
289 if (ret)
290 break;
291 addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
292 if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
293 sizeof(tinfo.tidcnt)))
294 ret = -EFAULT;
295 break;
296
297 case HFI1_IOCTL_TID_INVAL_READ:
298 if (copy_from_user(&tinfo,
299 (struct hfi11_tid_info __user *)arg,
300 sizeof(tinfo)))
301 return -EFAULT;
302
5042cddf 303 ret = hfi1_user_exp_rcv_invalid(fd, &tinfo);
8d970cf9
DD
304 if (ret)
305 break;
306 addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
307 if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
308 sizeof(tinfo.tidcnt)))
309 ret = -EFAULT;
310 break;
311
312 case HFI1_IOCTL_RECV_CTRL:
313 ret = get_user(uval, (int __user *)arg);
314 if (ret != 0)
315 return -EFAULT;
316 ret = manage_rcvq(uctxt, fd->subctxt, uval);
317 break;
318
319 case HFI1_IOCTL_POLL_TYPE:
320 ret = get_user(uval, (int __user *)arg);
321 if (ret != 0)
322 return -EFAULT;
323 uctxt->poll_type = (typeof(uctxt->poll_type))uval;
324 break;
325
326 case HFI1_IOCTL_ACK_EVENT:
327 ret = get_user(ul_uval, (unsigned long __user *)arg);
328 if (ret != 0)
329 return -EFAULT;
330 ret = user_event_ack(uctxt, fd->subctxt, ul_uval);
331 break;
332
333 case HFI1_IOCTL_SET_PKEY:
334 ret = get_user(uval16, (u16 __user *)arg);
335 if (ret != 0)
336 return -EFAULT;
337 if (HFI1_CAP_IS_USET(PKEY_CHECK))
338 ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16);
339 else
340 return -EPERM;
341 break;
342
343 case HFI1_IOCTL_CTXT_RESET: {
344 struct send_context *sc;
345 struct hfi1_devdata *dd;
346
347 if (!uctxt || !uctxt->dd || !uctxt->sc)
348 return -EINVAL;
349
350 /*
351 * There is no protection here. User level has to
352 * guarantee that no one will be writing to the send
353 * context while it is being re-initialized.
354 * If user level breaks that guarantee, it will break
355 * it's own context and no one else's.
356 */
357 dd = uctxt->dd;
358 sc = uctxt->sc;
359 /*
360 * Wait until the interrupt handler has marked the
361 * context as halted or frozen. Report error if we time
362 * out.
363 */
364 wait_event_interruptible_timeout(
365 sc->halt_wait, (sc->flags & SCF_HALTED),
366 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
367 if (!(sc->flags & SCF_HALTED))
368 return -ENOLCK;
369
370 /*
371 * If the send context was halted due to a Freeze,
372 * wait until the device has been "unfrozen" before
373 * resetting the context.
374 */
375 if (sc->flags & SCF_FROZEN) {
376 wait_event_interruptible_timeout(
377 dd->event_queue,
378 !(ACCESS_ONCE(dd->flags) & HFI1_FROZEN),
379 msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
380 if (dd->flags & HFI1_FROZEN)
381 return -ENOLCK;
382
383 if (dd->flags & HFI1_FORCED_FREEZE)
384 /*
385 * Don't allow context reset if we are into
386 * forced freeze
387 */
388 return -ENODEV;
389
390 sc_disable(sc);
391 ret = sc_enable(sc);
392 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB,
393 uctxt->ctxt);
394 } else {
395 ret = sc_restart(sc);
396 }
397 if (!ret)
398 sc_return_credits(sc);
399 break;
400 }
401
402 case HFI1_IOCTL_GET_VERS:
403 uval = HFI1_USER_SWVERSION;
404 if (put_user(uval, (int __user *)arg))
405 return -EFAULT;
406 break;
407
408 default:
409 return -EINVAL;
410 }
411
412 return ret;
413}
414
77241056
MM
415static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
416{
9e10af47
IW
417 struct hfi1_filedata *fd = kiocb->ki_filp->private_data;
418 struct hfi1_user_sdma_pkt_q *pq = fd->pq;
419 struct hfi1_user_sdma_comp_q *cq = fd->cq;
0904f327 420 int done = 0, reqs = 0;
77241056
MM
421 unsigned long dim = from->nr_segs;
422
0904f327
IW
423 if (!cq || !pq)
424 return -EIO;
77241056 425
0904f327
IW
426 if (!iter_is_iovec(from) || !dim)
427 return -EINVAL;
77241056
MM
428
429 hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)",
9e10af47 430 fd->uctxt->ctxt, fd->subctxt, dim);
77241056 431
0904f327
IW
432 if (atomic_read(&pq->n_reqs) == pq->n_max_reqs)
433 return -ENOSPC;
77241056
MM
434
435 while (dim) {
0904f327 436 int ret;
77241056
MM
437 unsigned long count = 0;
438
439 ret = hfi1_user_sdma_process_request(
5042cddf 440 fd, (struct iovec *)(from->iov + done),
77241056 441 dim, &count);
0904f327
IW
442 if (ret) {
443 reqs = ret;
444 break;
445 }
77241056
MM
446 dim -= count;
447 done += count;
448 reqs++;
449 }
0904f327
IW
450
451 return reqs;
77241056
MM
452}
453
454static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
455{
9e10af47
IW
456 struct hfi1_filedata *fd = fp->private_data;
457 struct hfi1_ctxtdata *uctxt = fd->uctxt;
77241056 458 struct hfi1_devdata *dd;
60368186 459 unsigned long flags;
77241056
MM
460 u64 token = vma->vm_pgoff << PAGE_SHIFT,
461 memaddr = 0;
60368186 462 void *memvirt = NULL;
77241056
MM
463 u8 subctxt, mapio = 0, vmf = 0, type;
464 ssize_t memlen = 0;
465 int ret = 0;
466 u16 ctxt;
467
77241056
MM
468 if (!is_valid_mmap(token) || !uctxt ||
469 !(vma->vm_flags & VM_SHARED)) {
470 ret = -EINVAL;
471 goto done;
472 }
473 dd = uctxt->dd;
474 ctxt = HFI1_MMAP_TOKEN_GET(CTXT, token);
475 subctxt = HFI1_MMAP_TOKEN_GET(SUBCTXT, token);
476 type = HFI1_MMAP_TOKEN_GET(TYPE, token);
9e10af47 477 if (ctxt != uctxt->ctxt || subctxt != fd->subctxt) {
77241056
MM
478 ret = -EINVAL;
479 goto done;
480 }
481
482 flags = vma->vm_flags;
483
484 switch (type) {
485 case PIO_BUFS:
486 case PIO_BUFS_SOP:
487 memaddr = ((dd->physaddr + TXE_PIO_SEND) +
488 /* chip pio base */
d32cf44a 489 (uctxt->sc->hw_context * BIT(16))) +
77241056
MM
490 /* 64K PIO space / ctxt */
491 (type == PIO_BUFS_SOP ?
492 (TXE_PIO_SIZE / 2) : 0); /* sop? */
493 /*
494 * Map only the amount allocated to the context, not the
495 * entire available context's PIO space.
496 */
437b29d1 497 memlen = PAGE_ALIGN(uctxt->sc->credits * PIO_BLOCK_SIZE);
77241056
MM
498 flags &= ~VM_MAYREAD;
499 flags |= VM_DONTCOPY | VM_DONTEXPAND;
500 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
501 mapio = 1;
502 break;
503 case PIO_CRED:
504 if (flags & VM_WRITE) {
505 ret = -EPERM;
506 goto done;
507 }
508 /*
509 * The credit return location for this context could be on the
510 * second or third page allocated for credit returns (if number
511 * of enabled contexts > 64 and 128 respectively).
512 */
60368186
TK
513 memvirt = dd->cr_base[uctxt->numa_id].va;
514 memaddr = virt_to_phys(memvirt) +
77241056
MM
515 (((u64)uctxt->sc->hw_free -
516 (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK);
517 memlen = PAGE_SIZE;
518 flags &= ~VM_MAYWRITE;
519 flags |= VM_DONTCOPY | VM_DONTEXPAND;
520 /*
521 * The driver has already allocated memory for credit
522 * returns and programmed it into the chip. Has that
523 * memory been flagged as non-cached?
524 */
525 /* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */
526 mapio = 1;
527 break;
528 case RCV_HDRQ:
77241056 529 memlen = uctxt->rcvhdrq_size;
60368186 530 memvirt = uctxt->rcvhdrq;
77241056
MM
531 break;
532 case RCV_EGRBUF: {
533 unsigned long addr;
534 int i;
535 /*
536 * The RcvEgr buffer need to be handled differently
537 * as multiple non-contiguous pages need to be mapped
538 * into the user process.
539 */
540 memlen = uctxt->egrbufs.size;
541 if ((vma->vm_end - vma->vm_start) != memlen) {
542 dd_dev_err(dd, "Eager buffer map size invalid (%lu != %lu)\n",
543 (vma->vm_end - vma->vm_start), memlen);
544 ret = -EINVAL;
545 goto done;
546 }
547 if (vma->vm_flags & VM_WRITE) {
548 ret = -EPERM;
549 goto done;
550 }
551 vma->vm_flags &= ~VM_MAYWRITE;
552 addr = vma->vm_start;
553 for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) {
60368186
TK
554 memlen = uctxt->egrbufs.buffers[i].len;
555 memvirt = uctxt->egrbufs.buffers[i].addr;
77241056
MM
556 ret = remap_pfn_range(
557 vma, addr,
60368186
TK
558 /*
559 * virt_to_pfn() does the same, but
560 * it's not available on x86_64
561 * when CONFIG_MMU is enabled.
562 */
563 PFN_DOWN(__pa(memvirt)),
564 memlen,
77241056
MM
565 vma->vm_page_prot);
566 if (ret < 0)
567 goto done;
60368186 568 addr += memlen;
77241056
MM
569 }
570 ret = 0;
571 goto done;
572 }
573 case UREGS:
574 /*
575 * Map only the page that contains this context's user
576 * registers.
577 */
578 memaddr = (unsigned long)
579 (dd->physaddr + RXE_PER_CONTEXT_USER)
580 + (uctxt->ctxt * RXE_PER_CONTEXT_SIZE);
581 /*
582 * TidFlow table is on the same page as the rest of the
583 * user registers.
584 */
585 memlen = PAGE_SIZE;
586 flags |= VM_DONTCOPY | VM_DONTEXPAND;
587 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
588 mapio = 1;
589 break;
590 case EVENTS:
591 /*
592 * Use the page where this context's flags are. User level
593 * knows where it's own bitmap is within the page.
594 */
3c6c065a 595 memaddr = (unsigned long)(dd->events +
2280740f
VN
596 ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
597 HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK;
77241056
MM
598 memlen = PAGE_SIZE;
599 /*
600 * v3.7 removes VM_RESERVED but the effect is kept by
601 * using VM_IO.
602 */
603 flags |= VM_IO | VM_DONTEXPAND;
604 vmf = 1;
605 break;
606 case STATUS:
12220267
IW
607 if (flags & (unsigned long)(VM_WRITE | VM_EXEC)) {
608 ret = -EPERM;
609 goto done;
610 }
77241056
MM
611 memaddr = kvirt_to_phys((void *)dd->status);
612 memlen = PAGE_SIZE;
613 flags |= VM_IO | VM_DONTEXPAND;
614 break;
615 case RTAIL:
616 if (!HFI1_CAP_IS_USET(DMA_RTAIL)) {
617 /*
618 * If the memory allocation failed, the context alloc
619 * also would have failed, so we would never get here
620 */
621 ret = -EINVAL;
622 goto done;
623 }
624 if (flags & VM_WRITE) {
625 ret = -EPERM;
626 goto done;
627 }
77241056 628 memlen = PAGE_SIZE;
60368186 629 memvirt = (void *)uctxt->rcvhdrtail_kvaddr;
77241056
MM
630 flags &= ~VM_MAYWRITE;
631 break;
632 case SUBCTXT_UREGS:
633 memaddr = (u64)uctxt->subctxt_uregbase;
634 memlen = PAGE_SIZE;
635 flags |= VM_IO | VM_DONTEXPAND;
636 vmf = 1;
637 break;
638 case SUBCTXT_RCV_HDRQ:
639 memaddr = (u64)uctxt->subctxt_rcvhdr_base;
640 memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt;
641 flags |= VM_IO | VM_DONTEXPAND;
642 vmf = 1;
643 break;
644 case SUBCTXT_EGRBUF:
645 memaddr = (u64)uctxt->subctxt_rcvegrbuf;
646 memlen = uctxt->egrbufs.size * uctxt->subctxt_cnt;
647 flags |= VM_IO | VM_DONTEXPAND;
648 flags &= ~VM_MAYWRITE;
649 vmf = 1;
650 break;
651 case SDMA_COMP: {
9e10af47 652 struct hfi1_user_sdma_comp_q *cq = fd->cq;
77241056 653
9e10af47 654 if (!cq) {
77241056
MM
655 ret = -EFAULT;
656 goto done;
657 }
77241056 658 memaddr = (u64)cq->comps;
437b29d1 659 memlen = PAGE_ALIGN(sizeof(*cq->comps) * cq->nentries);
77241056
MM
660 flags |= VM_IO | VM_DONTEXPAND;
661 vmf = 1;
662 break;
663 }
664 default:
665 ret = -EINVAL;
666 break;
667 }
668
669 if ((vma->vm_end - vma->vm_start) != memlen) {
670 hfi1_cdbg(PROC, "%u:%u Memory size mismatch %lu:%lu",
9e10af47 671 uctxt->ctxt, fd->subctxt,
77241056
MM
672 (vma->vm_end - vma->vm_start), memlen);
673 ret = -EINVAL;
674 goto done;
675 }
676
677 vma->vm_flags = flags;
6c63e423
SS
678 hfi1_cdbg(PROC,
679 "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n",
680 ctxt, subctxt, type, mapio, vmf, memaddr, memlen,
77241056 681 vma->vm_end - vma->vm_start, vma->vm_flags);
77241056 682 if (vmf) {
60368186 683 vma->vm_pgoff = PFN_DOWN(memaddr);
77241056
MM
684 vma->vm_ops = &vm_ops;
685 ret = 0;
686 } else if (mapio) {
60368186
TK
687 ret = io_remap_pfn_range(vma, vma->vm_start,
688 PFN_DOWN(memaddr),
689 memlen,
77241056 690 vma->vm_page_prot);
60368186
TK
691 } else if (memvirt) {
692 ret = remap_pfn_range(vma, vma->vm_start,
693 PFN_DOWN(__pa(memvirt)),
694 memlen,
695 vma->vm_page_prot);
77241056 696 } else {
60368186
TK
697 ret = remap_pfn_range(vma, vma->vm_start,
698 PFN_DOWN(memaddr),
699 memlen,
77241056
MM
700 vma->vm_page_prot);
701 }
702done:
703 return ret;
704}
705
706/*
707 * Local (non-chip) user memory is not mapped right away but as it is
708 * accessed by the user-level code.
709 */
11bac800 710static int vma_fault(struct vm_fault *vmf)
77241056
MM
711{
712 struct page *page;
713
714 page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT));
715 if (!page)
716 return VM_FAULT_SIGBUS;
717
718 get_page(page);
719 vmf->page = page;
720
721 return 0;
722}
723
724static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt)
725{
726 struct hfi1_ctxtdata *uctxt;
727 unsigned pollflag;
728
9e10af47 729 uctxt = ((struct hfi1_filedata *)fp->private_data)->uctxt;
77241056
MM
730 if (!uctxt)
731 pollflag = POLLERR;
732 else if (uctxt->poll_type == HFI1_POLL_TYPE_URGENT)
733 pollflag = poll_urgent(fp, pt);
734 else if (uctxt->poll_type == HFI1_POLL_TYPE_ANYRCV)
735 pollflag = poll_next(fp, pt);
736 else /* invalid */
737 pollflag = POLLERR;
738
739 return pollflag;
740}
741
742static int hfi1_file_close(struct inode *inode, struct file *fp)
743{
744 struct hfi1_filedata *fdata = fp->private_data;
745 struct hfi1_ctxtdata *uctxt = fdata->uctxt;
e11ffbd5
DD
746 struct hfi1_devdata *dd = container_of(inode->i_cdev,
747 struct hfi1_devdata,
748 user_cdev);
77241056
MM
749 unsigned long flags, *ev;
750
751 fp->private_data = NULL;
752
753 if (!uctxt)
754 goto done;
755
756 hfi1_cdbg(PROC, "freeing ctxt %u:%u", uctxt->ctxt, fdata->subctxt);
77241056
MM
757 mutex_lock(&hfi1_mutex);
758
759 flush_wc();
760 /* drain user sdma queue */
483119a7 761 hfi1_user_sdma_free_queues(fdata);
77241056 762
957558c9 763 /* release the cpu */
b094a36f 764 hfi1_put_proc_affinity(fdata->rec_cpu_num);
957558c9 765
224d71f9
MR
766 /* clean up rcv side */
767 hfi1_user_exp_rcv_free(fdata);
768
77241056
MM
769 /*
770 * Clear any left over, unhandled events so the next process that
771 * gets this context doesn't get confused.
772 */
2280740f 773 ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
77241056
MM
774 HFI1_MAX_SHARED_CTXTS) + fdata->subctxt;
775 *ev = 0;
776
8737ce95 777 __clear_bit(fdata->subctxt, uctxt->in_use_ctxts);
f683c80c
MR
778 fdata->uctxt = NULL;
779 hfi1_rcd_put(uctxt); /* fdata reference */
8737ce95 780 if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) {
77241056
MM
781 mutex_unlock(&hfi1_mutex);
782 goto done;
783 }
784
785 spin_lock_irqsave(&dd->uctxt_lock, flags);
786 /*
787 * Disable receive context and interrupt available, reset all
788 * RcvCtxtCtrl bits to default values.
789 */
790 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
791 HFI1_RCVCTRL_TIDFLOW_DIS |
792 HFI1_RCVCTRL_INTRAVAIL_DIS |
566c157c 793 HFI1_RCVCTRL_TAILUPD_DIS |
77241056
MM
794 HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
795 HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
796 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt->ctxt);
797 /* Clear the context's J_KEY */
798 hfi1_clear_ctxt_jkey(dd, uctxt->ctxt);
799 /*
f683c80c
MR
800 * If a send context is allocated, reset context integrity
801 * checks to default and disable the send context.
77241056 802 */
f683c80c
MR
803 if (uctxt->sc) {
804 set_pio_integrity(uctxt->sc);
805 sc_disable(uctxt->sc);
806 }
77241056
MM
807 spin_unlock_irqrestore(&dd->uctxt_lock, flags);
808
9c1a99c3 809 hfi1_free_ctxt_rcv_groups(uctxt);
637a9a7f 810 hfi1_clear_ctxt_pkey(dd, uctxt);
94158442 811
77241056 812 uctxt->event_flags = 0;
77241056 813 mutex_unlock(&hfi1_mutex);
42492011
MR
814
815 deallocate_ctxt(uctxt);
77241056 816done:
e0cf75de 817 mmdrop(fdata->mm);
e11ffbd5 818 kobject_put(&dd->kobj);
acd7c8fe
TS
819
820 if (atomic_dec_and_test(&dd->user_refcount))
821 complete(&dd->user_comp);
822
77241056
MM
823 kfree(fdata);
824 return 0;
825}
826
827/*
828 * Convert kernel *virtual* addresses to physical addresses.
829 * This is used to vmalloc'ed addresses.
830 */
831static u64 kvirt_to_phys(void *addr)
832{
833 struct page *page;
834 u64 paddr = 0;
835
836 page = vmalloc_to_page(addr);
837 if (page)
838 paddr = page_to_pfn(page) << PAGE_SHIFT;
839
840 return paddr;
841}
842
5fbded48 843static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
77241056 844{
62239fc6 845 int ret;
0eb62659 846 unsigned int swmajor, swminor;
77241056
MM
847
848 swmajor = uinfo->userversion >> 16;
9b60d2cb
MR
849 if (swmajor != HFI1_USER_SWMAJOR)
850 return -ENODEV;
77241056
MM
851
852 swminor = uinfo->userversion & 0xffff;
853
77241056 854 mutex_lock(&hfi1_mutex);
62239fc6
MR
855 /*
856 * Get a sub context if necessary.
857 * ret < 0 error, 0 no context, 1 sub-context found
858 */
859 ret = 0;
957558c9 860 if (uinfo->subctxt_cnt) {
9b60d2cb 861 ret = find_sub_ctxt(fd, uinfo);
62239fc6 862 if (ret > 0)
b094a36f
SS
863 fd->rec_cpu_num =
864 hfi1_get_proc_affinity(fd->uctxt->numa_id);
957558c9 865 }
77241056
MM
866
867 /*
8737ce95 868 * Allocate a base context if context sharing is not required or we
9b60d2cb 869 * couldn't find a sub context.
77241056 870 */
5fbded48
MR
871 if (!ret)
872 ret = allocate_ctxt(fd, fd->dd, uinfo);
873
77241056 874 mutex_unlock(&hfi1_mutex);
9b60d2cb
MR
875
876 /* Depending on the context type, do the appropriate init */
877 if (ret > 0) {
878 /*
879 * sub-context info can only be set up after the base
880 * context has been completed.
881 */
882 ret = wait_event_interruptible(fd->uctxt->wait, !test_bit(
883 HFI1_CTXT_BASE_UNINIT,
884 &fd->uctxt->event_flags));
f683c80c
MR
885 if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags))
886 ret = -ENOMEM;
887
9b60d2cb
MR
888 /* The only thing a sub context needs is the user_xxx stuff */
889 if (!ret)
62239fc6
MR
890 ret = init_user_ctxt(fd);
891
42492011 892 if (ret)
62239fc6 893 clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
42492011 894
9b60d2cb
MR
895 } else if (!ret) {
896 ret = setup_base_ctxt(fd);
62239fc6
MR
897 if (fd->uctxt->subctxt_cnt) {
898 /* If there is an error, set the failed bit. */
899 if (ret)
900 set_bit(HFI1_CTXT_BASE_FAILED,
901 &fd->uctxt->event_flags);
902 /*
903 * Base context is done, notify anybody using a
904 * sub-context that is waiting for this completion
905 */
9b60d2cb
MR
906 clear_bit(HFI1_CTXT_BASE_UNINIT,
907 &fd->uctxt->event_flags);
908 wake_up(&fd->uctxt->wait);
909 }
42492011
MR
910 if (ret)
911 deallocate_ctxt(fd->uctxt);
912 }
913
914 /* If an error occurred, clear the reference */
915 if (ret && fd->uctxt) {
916 hfi1_rcd_put(fd->uctxt);
917 fd->uctxt = NULL;
9b60d2cb
MR
918 }
919
77241056
MM
920 return ret;
921}
922
8737ce95
MR
923/*
924 * The hfi1_mutex must be held when this function is called. It is
925 * necessary to ensure serialized access to the bitmask in_use_ctxts.
926 */
9b60d2cb
MR
927static int find_sub_ctxt(struct hfi1_filedata *fd,
928 const struct hfi1_user_info *uinfo)
77241056 929{
5fbded48
MR
930 int i;
931 struct hfi1_devdata *dd = fd->dd;
8737ce95 932 u16 subctxt;
77241056 933
5fbded48
MR
934 for (i = dd->first_dyn_alloc_ctxt; i < dd->num_rcv_contexts; i++) {
935 struct hfi1_ctxtdata *uctxt = dd->rcd[i];
77241056 936
5fbded48 937 /* Skip ctxts which are not yet open */
8737ce95
MR
938 if (!uctxt ||
939 bitmap_empty(uctxt->in_use_ctxts,
940 HFI1_MAX_SHARED_CTXTS))
5fbded48 941 continue;
77241056 942
5fbded48
MR
943 /* Skip dynamically allocted kernel contexts */
944 if (uctxt->sc && (uctxt->sc->type == SC_KERNEL))
77241056 945 continue;
5fbded48
MR
946
947 /* Skip ctxt if it doesn't match the requested one */
948 if (memcmp(uctxt->uuid, uinfo->uuid,
949 sizeof(uctxt->uuid)) ||
950 uctxt->jkey != generate_jkey(current_uid()) ||
951 uctxt->subctxt_id != uinfo->subctxt_id ||
952 uctxt->subctxt_cnt != uinfo->subctxt_cnt)
953 continue;
954
955 /* Verify the sharing process matches the master */
8737ce95 956 if (uctxt->userversion != uinfo->userversion)
5fbded48 957 return -EINVAL;
8737ce95
MR
958
959 /* Find an unused context */
960 subctxt = find_first_zero_bit(uctxt->in_use_ctxts,
961 HFI1_MAX_SHARED_CTXTS);
962 if (subctxt >= uctxt->subctxt_cnt)
62239fc6 963 return -EBUSY;
8737ce95 964
5fbded48 965 fd->uctxt = uctxt;
8737ce95 966 fd->subctxt = subctxt;
f683c80c
MR
967
968 hfi1_rcd_get(uctxt);
8737ce95
MR
969 __set_bit(fd->subctxt, uctxt->in_use_ctxts);
970
5fbded48 971 return 1;
77241056
MM
972 }
973
5fbded48 974 return 0;
77241056
MM
975}
976
5042cddf 977static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
77241056
MM
978 struct hfi1_user_info *uinfo)
979{
980 struct hfi1_ctxtdata *uctxt;
5fbded48 981 unsigned int ctxt;
957558c9 982 int ret, numa;
77241056
MM
983
984 if (dd->flags & HFI1_FROZEN) {
985 /*
986 * Pick an error that is unique from all other errors
987 * that are returned so the user process knows that
988 * it tried to allocate while the SPC was frozen. It
989 * it should be able to retry with success in a short
990 * while.
991 */
992 return -EIO;
993 }
994
5fbded48
MR
995 /*
996 * This check is sort of redundant to the next EBUSY error. It would
997 * also indicate an inconsistancy in the driver if this value was
998 * zero, but there were still contexts available.
999 */
1000 if (!dd->freectxts)
1001 return -EBUSY;
1002
2280740f
VN
1003 for (ctxt = dd->first_dyn_alloc_ctxt;
1004 ctxt < dd->num_rcv_contexts; ctxt++)
77241056
MM
1005 if (!dd->rcd[ctxt])
1006 break;
1007
1008 if (ctxt == dd->num_rcv_contexts)
1009 return -EBUSY;
1010
b094a36f
SS
1011 /*
1012 * If we don't have a NUMA node requested, preference is towards
1013 * device NUMA node.
1014 */
1015 fd->rec_cpu_num = hfi1_get_proc_affinity(dd->node);
957558c9
MH
1016 if (fd->rec_cpu_num != -1)
1017 numa = cpu_to_node(fd->rec_cpu_num);
1018 else
1019 numa = numa_node_id();
1020 uctxt = hfi1_create_ctxtdata(dd->pport, ctxt, numa);
77241056
MM
1021 if (!uctxt) {
1022 dd_dev_err(dd,
1023 "Unable to allocate ctxtdata memory, failing open\n");
1024 return -ENOMEM;
1025 }
957558c9
MH
1026 hfi1_cdbg(PROC, "[%u:%u] pid %u assigned to CPU %d (NUMA %u)",
1027 uctxt->ctxt, fd->subctxt, current->pid, fd->rec_cpu_num,
1028 uctxt->numa_id);
1029
77241056
MM
1030 /*
1031 * Allocate and enable a PIO send context.
1032 */
1033 uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize,
cc57236f 1034 uctxt->dd->node);
3a6982df
JP
1035 if (!uctxt->sc) {
1036 ret = -ENOMEM;
1037 goto ctxdata_free;
1038 }
6c63e423
SS
1039 hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index,
1040 uctxt->sc->hw_context);
77241056
MM
1041 ret = sc_enable(uctxt->sc);
1042 if (ret)
3a6982df
JP
1043 goto ctxdata_free;
1044
77241056 1045 /*
9b60d2cb
MR
1046 * Setup sub context resources if the user-level has requested
1047 * sub contexts.
77241056
MM
1048 * This has to be done here so the rest of the sub-contexts find the
1049 * proper master.
1050 */
9b60d2cb 1051 if (uinfo->subctxt_cnt) {
77241056
MM
1052 ret = init_subctxts(uctxt, uinfo);
1053 /*
1054 * On error, we don't need to disable and de-allocate the
1055 * send context because it will be done during file close
1056 */
1057 if (ret)
3a6982df 1058 goto ctxdata_free;
77241056
MM
1059 }
1060 uctxt->userversion = uinfo->userversion;
bdf7752e 1061 uctxt->flags = hfi1_cap_mask; /* save current flag state */
77241056
MM
1062 init_waitqueue_head(&uctxt->wait);
1063 strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm));
1064 memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid));
1065 uctxt->jkey = generate_jkey(current_uid());
77241056 1066 hfi1_stats.sps_ctxts++;
affa48de
AD
1067 /*
1068 * Disable ASPM when there are open user/PSM contexts to avoid
1069 * issues with ASPM L1 exit latency
1070 */
1071 if (dd->freectxts-- == dd->num_user_contexts)
1072 aspm_disable_all(dd);
9e10af47 1073 fd->uctxt = uctxt;
77241056 1074
f683c80c
MR
1075 /* Count the reference for the fd */
1076 hfi1_rcd_get(uctxt);
1077
77241056 1078 return 0;
3a6982df
JP
1079
1080ctxdata_free:
1081 dd->rcd[ctxt] = NULL;
f683c80c 1082 hfi1_rcd_put(uctxt);
3a6982df 1083 return ret;
77241056
MM
1084}
1085
42492011
MR
1086static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt)
1087{
1088 mutex_lock(&hfi1_mutex);
1089 hfi1_stats.sps_ctxts--;
1090 if (++uctxt->dd->freectxts == uctxt->dd->num_user_contexts)
1091 aspm_enable_all(uctxt->dd);
1092
1093 /* _rcd_put() should be done after releasing mutex */
1094 uctxt->dd->rcd[uctxt->ctxt] = NULL;
1095 mutex_unlock(&hfi1_mutex);
1096 hfi1_rcd_put(uctxt); /* dd reference */
1097}
1098
77241056
MM
1099static int init_subctxts(struct hfi1_ctxtdata *uctxt,
1100 const struct hfi1_user_info *uinfo)
1101{
8737ce95 1102 u16 num_subctxts;
77241056
MM
1103
1104 num_subctxts = uinfo->subctxt_cnt;
acac10fd
MH
1105 if (num_subctxts > HFI1_MAX_SHARED_CTXTS)
1106 return -EINVAL;
77241056
MM
1107
1108 uctxt->subctxt_cnt = uinfo->subctxt_cnt;
1109 uctxt->subctxt_id = uinfo->subctxt_id;
77241056 1110 uctxt->redirect_seq_cnt = 1;
9b60d2cb 1111 set_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
acac10fd
MH
1112
1113 return 0;
77241056
MM
1114}
1115
1116static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
1117{
1118 int ret = 0;
8737ce95 1119 u16 num_subctxts = uctxt->subctxt_cnt;
77241056
MM
1120
1121 uctxt->subctxt_uregbase = vmalloc_user(PAGE_SIZE);
9b60d2cb
MR
1122 if (!uctxt->subctxt_uregbase)
1123 return -ENOMEM;
1124
77241056
MM
1125 /* We can take the size of the RcvHdr Queue from the master */
1126 uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size *
1127 num_subctxts);
1128 if (!uctxt->subctxt_rcvhdr_base) {
1129 ret = -ENOMEM;
1130 goto bail_ureg;
1131 }
1132
1133 uctxt->subctxt_rcvegrbuf = vmalloc_user(uctxt->egrbufs.size *
1134 num_subctxts);
1135 if (!uctxt->subctxt_rcvegrbuf) {
1136 ret = -ENOMEM;
1137 goto bail_rhdr;
1138 }
9b60d2cb
MR
1139
1140 return 0;
1141
77241056
MM
1142bail_rhdr:
1143 vfree(uctxt->subctxt_rcvhdr_base);
9b60d2cb 1144 uctxt->subctxt_rcvhdr_base = NULL;
77241056
MM
1145bail_ureg:
1146 vfree(uctxt->subctxt_uregbase);
1147 uctxt->subctxt_uregbase = NULL;
9b60d2cb 1148
77241056
MM
1149 return ret;
1150}
1151
62239fc6 1152static void user_init(struct hfi1_ctxtdata *uctxt)
77241056 1153{
77241056 1154 unsigned int rcvctrl_ops = 0;
77241056
MM
1155
1156 /* initialize poll variables... */
1157 uctxt->urgent = 0;
1158 uctxt->urgent_poll = 0;
1159
1160 /*
1161 * Now enable the ctxt for receive.
1162 * For chips that are set to DMA the tail register to memory
1163 * when they change (and when the update bit transitions from
1164 * 0 to 1. So for those chips, we turn it off and then back on.
1165 * This will (very briefly) affect any other open ctxts, but the
1166 * duration is very short, and therefore isn't an issue. We
1167 * explicitly set the in-memory tail copy to 0 beforehand, so we
1168 * don't have to wait to be sure the DMA update has happened
1169 * (chip resets head/tail to 0 on transition to enable).
1170 */
1171 if (uctxt->rcvhdrtail_kvaddr)
1172 clear_rcvhdrtail(uctxt);
1173
1174 /* Setup J_KEY before enabling the context */
1175 hfi1_set_ctxt_jkey(uctxt->dd, uctxt->ctxt, uctxt->jkey);
1176
1177 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
bdf7752e 1178 if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP))
77241056
MM
1179 rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB;
1180 /*
1181 * Ignore the bit in the flags for now until proper
1182 * support for multiple packet per rcv array entry is
1183 * added.
1184 */
bdf7752e 1185 if (!HFI1_CAP_UGET_MASK(uctxt->flags, MULTI_PKT_EGR))
77241056 1186 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
bdf7752e 1187 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_EGR_FULL))
77241056 1188 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
bdf7752e 1189 if (HFI1_CAP_UGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
77241056 1190 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
566c157c
MH
1191 /*
1192 * The RcvCtxtCtrl.TailUpd bit has to be explicitly written.
1193 * We can't rely on the correct value to be set from prior
1194 * uses of the chip or ctxt. Therefore, add the rcvctrl op
1195 * for both cases.
1196 */
bdf7752e 1197 if (HFI1_CAP_UGET_MASK(uctxt->flags, DMA_RTAIL))
77241056 1198 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
566c157c
MH
1199 else
1200 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_DIS;
77241056 1201 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt->ctxt);
77241056
MM
1202}
1203
5042cddf
MR
1204static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase,
1205 __u32 len)
77241056
MM
1206{
1207 struct hfi1_ctxt_info cinfo;
9e10af47 1208 struct hfi1_ctxtdata *uctxt = fd->uctxt;
77241056
MM
1209 int ret = 0;
1210
ebe6b2e8 1211 memset(&cinfo, 0, sizeof(cinfo));
bdf7752e
DL
1212 cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) &
1213 HFI1_CAP_MISC_MASK) << HFI1_CAP_USER_SHIFT) |
1214 HFI1_CAP_UGET_MASK(uctxt->flags, MASK) |
1215 HFI1_CAP_KGET_MASK(uctxt->flags, K2U);
622c202c
DL
1216 /* adjust flag if this fd is not able to cache */
1217 if (!fd->handler)
1218 cinfo.runtime_flags |= HFI1_CAP_TID_UNMAP; /* no caching */
1219
77241056
MM
1220 cinfo.num_active = hfi1_count_active_units();
1221 cinfo.unit = uctxt->dd->unit;
1222 cinfo.ctxt = uctxt->ctxt;
9e10af47 1223 cinfo.subctxt = fd->subctxt;
77241056
MM
1224 cinfo.rcvtids = roundup(uctxt->egrbufs.alloced,
1225 uctxt->dd->rcv_entries.group_size) +
1226 uctxt->expected_count;
1227 cinfo.credits = uctxt->sc->credits;
1228 cinfo.numa_node = uctxt->numa_id;
1229 cinfo.rec_cpu = fd->rec_cpu_num;
1230 cinfo.send_ctxt = uctxt->sc->hw_context;
1231
1232 cinfo.egrtids = uctxt->egrbufs.alloced;
1233 cinfo.rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
1234 cinfo.rcvhdrq_entsize = uctxt->rcvhdrqentsize << 2;
9e10af47 1235 cinfo.sdma_ring_size = fd->cq->nentries;
77241056
MM
1236 cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size;
1237
9e10af47 1238 trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo);
77241056
MM
1239 if (copy_to_user(ubase, &cinfo, sizeof(cinfo)))
1240 ret = -EFAULT;
bdf7752e 1241
77241056
MM
1242 return ret;
1243}
1244
9b60d2cb
MR
1245static int init_user_ctxt(struct hfi1_filedata *fd)
1246{
1247 struct hfi1_ctxtdata *uctxt = fd->uctxt;
1248 int ret;
1249
1250 ret = hfi1_user_sdma_alloc_queues(uctxt, fd);
1251 if (ret)
1252 return ret;
1253
1254 ret = hfi1_user_exp_rcv_init(fd);
1255
1256 return ret;
1257}
1258
1259static int setup_base_ctxt(struct hfi1_filedata *fd)
77241056 1260{
9e10af47 1261 struct hfi1_ctxtdata *uctxt = fd->uctxt;
77241056
MM
1262 struct hfi1_devdata *dd = uctxt->dd;
1263 int ret = 0;
1264
9b60d2cb 1265 hfi1_init_ctxt(uctxt->sc);
77241056 1266
9b60d2cb
MR
1267 /* Now allocate the RcvHdr queue and eager buffers. */
1268 ret = hfi1_create_rcvhdrq(dd, uctxt);
1269 if (ret)
62239fc6 1270 return ret;
94158442 1271
9b60d2cb 1272 ret = hfi1_setup_eagerbufs(uctxt);
94158442 1273 if (ret)
62239fc6 1274 goto setup_failed;
9b60d2cb
MR
1275
1276 /* If sub-contexts are enabled, do the appropriate setup */
1277 if (uctxt->subctxt_cnt)
1278 ret = setup_subctxt(uctxt);
1279 if (ret)
62239fc6 1280 goto setup_failed;
9b60d2cb 1281
9c1a99c3 1282 ret = hfi1_alloc_ctxt_rcv_groups(uctxt);
9b60d2cb 1283 if (ret)
62239fc6 1284 goto setup_failed;
9b60d2cb
MR
1285
1286 ret = init_user_ctxt(fd);
77241056 1287 if (ret)
62239fc6 1288 goto setup_failed;
77241056 1289
62239fc6
MR
1290 user_init(uctxt);
1291
1292 return 0;
1293
1294setup_failed:
f683c80c 1295 /* Call _free_ctxtdata, not _rcd_put(). We still need the context. */
62239fc6 1296 hfi1_free_ctxtdata(dd, uctxt);
77241056
MM
1297 return ret;
1298}
1299
5042cddf
MR
1300static int get_base_info(struct hfi1_filedata *fd, void __user *ubase,
1301 __u32 len)
77241056
MM
1302{
1303 struct hfi1_base_info binfo;
9e10af47 1304 struct hfi1_ctxtdata *uctxt = fd->uctxt;
77241056
MM
1305 struct hfi1_devdata *dd = uctxt->dd;
1306 ssize_t sz;
1307 unsigned offset;
1308 int ret = 0;
1309
9b60d2cb 1310 trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt);
77241056
MM
1311
1312 memset(&binfo, 0, sizeof(binfo));
1313 binfo.hw_version = dd->revision;
1314 binfo.sw_version = HFI1_KERN_SWVERSION;
1315 binfo.bthqp = kdeth_qp;
1316 binfo.jkey = uctxt->jkey;
1317 /*
1318 * If more than 64 contexts are enabled the allocated credit
1319 * return will span two or three contiguous pages. Since we only
1320 * map the page containing the context's credit return address,
1321 * we need to calculate the offset in the proper page.
1322 */
1323 offset = ((u64)uctxt->sc->hw_free -
1324 (u64)dd->cr_base[uctxt->numa_id].va) % PAGE_SIZE;
1325 binfo.sc_credits_addr = HFI1_MMAP_TOKEN(PIO_CRED, uctxt->ctxt,
9e10af47 1326 fd->subctxt, offset);
77241056 1327 binfo.pio_bufbase = HFI1_MMAP_TOKEN(PIO_BUFS, uctxt->ctxt,
9e10af47 1328 fd->subctxt,
77241056
MM
1329 uctxt->sc->base_addr);
1330 binfo.pio_bufbase_sop = HFI1_MMAP_TOKEN(PIO_BUFS_SOP,
1331 uctxt->ctxt,
9e10af47 1332 fd->subctxt,
77241056
MM
1333 uctxt->sc->base_addr);
1334 binfo.rcvhdr_bufbase = HFI1_MMAP_TOKEN(RCV_HDRQ, uctxt->ctxt,
9e10af47 1335 fd->subctxt,
77241056
MM
1336 uctxt->rcvhdrq);
1337 binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt,
9e10af47 1338 fd->subctxt,
60368186 1339 uctxt->egrbufs.rcvtids[0].dma);
77241056 1340 binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt,
9e10af47 1341 fd->subctxt, 0);
77241056
MM
1342 /*
1343 * user regs are at
1344 * (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE))
1345 */
1346 binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt,
9e10af47 1347 fd->subctxt, 0);
2280740f 1348 offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
9e10af47 1349 HFI1_MAX_SHARED_CTXTS) + fd->subctxt) *
e260e404 1350 sizeof(*dd->events));
77241056 1351 binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt,
9e10af47 1352 fd->subctxt,
77241056
MM
1353 offset);
1354 binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt,
9e10af47 1355 fd->subctxt,
77241056
MM
1356 dd->status);
1357 if (HFI1_CAP_IS_USET(DMA_RTAIL))
1358 binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt,
9e10af47 1359 fd->subctxt, 0);
77241056
MM
1360 if (uctxt->subctxt_cnt) {
1361 binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS,
1362 uctxt->ctxt,
9e10af47 1363 fd->subctxt, 0);
77241056
MM
1364 binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ,
1365 uctxt->ctxt,
9e10af47 1366 fd->subctxt, 0);
77241056
MM
1367 binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF,
1368 uctxt->ctxt,
9e10af47 1369 fd->subctxt, 0);
77241056
MM
1370 }
1371 sz = (len < sizeof(binfo)) ? len : sizeof(binfo);
1372 if (copy_to_user(ubase, &binfo, sz))
1373 ret = -EFAULT;
1374 return ret;
1375}
1376
1377static unsigned int poll_urgent(struct file *fp,
1378 struct poll_table_struct *pt)
1379{
9e10af47
IW
1380 struct hfi1_filedata *fd = fp->private_data;
1381 struct hfi1_ctxtdata *uctxt = fd->uctxt;
77241056
MM
1382 struct hfi1_devdata *dd = uctxt->dd;
1383 unsigned pollflag;
1384
1385 poll_wait(fp, &uctxt->wait, pt);
1386
1387 spin_lock_irq(&dd->uctxt_lock);
1388 if (uctxt->urgent != uctxt->urgent_poll) {
1389 pollflag = POLLIN | POLLRDNORM;
1390 uctxt->urgent_poll = uctxt->urgent;
1391 } else {
1392 pollflag = 0;
1393 set_bit(HFI1_CTXT_WAITING_URG, &uctxt->event_flags);
1394 }
1395 spin_unlock_irq(&dd->uctxt_lock);
1396
1397 return pollflag;
1398}
1399
1400static unsigned int poll_next(struct file *fp,
1401 struct poll_table_struct *pt)
1402{
9e10af47
IW
1403 struct hfi1_filedata *fd = fp->private_data;
1404 struct hfi1_ctxtdata *uctxt = fd->uctxt;
77241056
MM
1405 struct hfi1_devdata *dd = uctxt->dd;
1406 unsigned pollflag;
1407
1408 poll_wait(fp, &uctxt->wait, pt);
1409
1410 spin_lock_irq(&dd->uctxt_lock);
1411 if (hdrqempty(uctxt)) {
1412 set_bit(HFI1_CTXT_WAITING_RCV, &uctxt->event_flags);
1413 hfi1_rcvctrl(dd, HFI1_RCVCTRL_INTRAVAIL_ENB, uctxt->ctxt);
1414 pollflag = 0;
e490974e 1415 } else {
77241056 1416 pollflag = POLLIN | POLLRDNORM;
e490974e 1417 }
77241056
MM
1418 spin_unlock_irq(&dd->uctxt_lock);
1419
1420 return pollflag;
1421}
1422
1423/*
1424 * Find all user contexts in use, and set the specified bit in their
1425 * event mask.
1426 * See also find_ctxt() for a similar use, that is specific to send buffers.
1427 */
1428int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit)
1429{
1430 struct hfi1_ctxtdata *uctxt;
1431 struct hfi1_devdata *dd = ppd->dd;
1432 unsigned ctxt;
1433 int ret = 0;
1434 unsigned long flags;
1435
1436 if (!dd->events) {
1437 ret = -EINVAL;
1438 goto done;
1439 }
1440
1441 spin_lock_irqsave(&dd->uctxt_lock, flags);
2280740f 1442 for (ctxt = dd->first_dyn_alloc_ctxt; ctxt < dd->num_rcv_contexts;
77241056
MM
1443 ctxt++) {
1444 uctxt = dd->rcd[ctxt];
1445 if (uctxt) {
1446 unsigned long *evs = dd->events +
2280740f 1447 (uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
77241056
MM
1448 HFI1_MAX_SHARED_CTXTS;
1449 int i;
1450 /*
1451 * subctxt_cnt is 0 if not shared, so do base
1452 * separately, first, then remaining subctxt, if any
1453 */
1454 set_bit(evtbit, evs);
1455 for (i = 1; i < uctxt->subctxt_cnt; i++)
1456 set_bit(evtbit, evs + i);
1457 }
1458 }
1459 spin_unlock_irqrestore(&dd->uctxt_lock, flags);
1460done:
1461 return ret;
1462}
1463
1464/**
1465 * manage_rcvq - manage a context's receive queue
1466 * @uctxt: the context
1467 * @subctxt: the sub-context
1468 * @start_stop: action to carry out
1469 *
1470 * start_stop == 0 disables receive on the context, for use in queue
1471 * overflow conditions. start_stop==1 re-enables, to be used to
1472 * re-init the software copy of the head register
1473 */
8737ce95 1474static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
77241056
MM
1475 int start_stop)
1476{
1477 struct hfi1_devdata *dd = uctxt->dd;
1478 unsigned int rcvctrl_op;
1479
1480 if (subctxt)
1481 goto bail;
1482 /* atomically clear receive enable ctxt. */
1483 if (start_stop) {
1484 /*
1485 * On enable, force in-memory copy of the tail register to
1486 * 0, so that protocol code doesn't have to worry about
1487 * whether or not the chip has yet updated the in-memory
1488 * copy or not on return from the system call. The chip
1489 * always resets it's tail register back to 0 on a
1490 * transition from disabled to enabled.
1491 */
1492 if (uctxt->rcvhdrtail_kvaddr)
1493 clear_rcvhdrtail(uctxt);
1494 rcvctrl_op = HFI1_RCVCTRL_CTXT_ENB;
e490974e 1495 } else {
77241056 1496 rcvctrl_op = HFI1_RCVCTRL_CTXT_DIS;
e490974e 1497 }
77241056
MM
1498 hfi1_rcvctrl(dd, rcvctrl_op, uctxt->ctxt);
1499 /* always; new head should be equal to new tail; see above */
1500bail:
1501 return 0;
1502}
1503
1504/*
1505 * clear the event notifier events for this context.
1506 * User process then performs actions appropriate to bit having been
1507 * set, if desired, and checks again in future.
1508 */
8737ce95 1509static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt,
77241056
MM
1510 unsigned long events)
1511{
1512 int i;
1513 struct hfi1_devdata *dd = uctxt->dd;
1514 unsigned long *evs;
1515
1516 if (!dd->events)
1517 return 0;
1518
2280740f 1519 evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) *
77241056
MM
1520 HFI1_MAX_SHARED_CTXTS) + subctxt;
1521
1522 for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) {
1523 if (!test_bit(i, &events))
1524 continue;
1525 clear_bit(i, evs);
1526 }
1527 return 0;
1528}
1529
8737ce95 1530static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey)
77241056
MM
1531{
1532 int ret = -ENOENT, i, intable = 0;
1533 struct hfi1_pportdata *ppd = uctxt->ppd;
1534 struct hfi1_devdata *dd = uctxt->dd;
1535
1536 if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) {
1537 ret = -EINVAL;
1538 goto done;
1539 }
1540
1541 for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++)
1542 if (pkey == ppd->pkeys[i]) {
1543 intable = 1;
1544 break;
1545 }
1546
1547 if (intable)
1548 ret = hfi1_set_ctxt_pkey(dd, uctxt->ctxt, pkey);
1549done:
1550 return ret;
1551}
1552
77241056
MM
1553static void user_remove(struct hfi1_devdata *dd)
1554{
77241056
MM
1555
1556 hfi1_cdev_cleanup(&dd->user_cdev, &dd->user_device);
77241056
MM
1557}
1558
1559static int user_add(struct hfi1_devdata *dd)
1560{
1561 char name[10];
1562 int ret;
1563
77241056 1564 snprintf(name, sizeof(name), "%s_%d", class_name(), dd->unit);
0eb62659 1565 ret = hfi1_cdev_init(dd->unit, name, &hfi1_file_ops,
e116a64f 1566 &dd->user_cdev, &dd->user_device,
e11ffbd5 1567 true, &dd->kobj);
77241056 1568 if (ret)
7312f29d 1569 user_remove(dd);
77241056 1570
77241056
MM
1571 return ret;
1572}
1573
1574/*
1575 * Create per-unit files in /dev
1576 */
1577int hfi1_device_create(struct hfi1_devdata *dd)
1578{
0f7b1f91 1579 return user_add(dd);
77241056
MM
1580}
1581
1582/*
1583 * Remove per-unit files in /dev
1584 * void, core kernel returns no errors for this stuff
1585 */
1586void hfi1_device_remove(struct hfi1_devdata *dd)
1587{
1588 user_remove(dd);
77241056 1589}