]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - drivers/infiniband/hw/hfi1/tid_rdma.c
IB/hfi1: Set PbcInsertHcrc for TID RDMA packets
[mirror_ubuntu-focal-kernel.git] / drivers / infiniband / hw / hfi1 / tid_rdma.c
CommitLineData
5190f052
MM
1// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
2/*
3 * Copyright(c) 2018 Intel Corporation.
4 *
5 */
6
7#include "hfi.h"
37356e78 8#include "qp.h"
742a3826 9#include "rc.h"
5190f052
MM
10#include "verbs.h"
11#include "tid_rdma.h"
838b6fd2 12#include "exp_rcv.h"
a131d164 13#include "trace.h"
5190f052 14
742a3826
KW
15/**
16 * DOC: TID RDMA READ protocol
17 *
18 * This is an end-to-end protocol at the hfi1 level between two nodes that
19 * improves performance by avoiding data copy on the requester side. It
20 * converts a qualified RDMA READ request into a TID RDMA READ request on
21 * the requester side and thereafter handles the request and response
22 * differently. To be qualified, the RDMA READ request should meet the
23 * following:
24 * -- The total data length should be greater than 256K;
25 * -- The total data length should be a multiple of 4K page size;
26 * -- Each local scatter-gather entry should be 4K page aligned;
27 * -- Each local scatter-gather entry should be a multiple of 4K page size;
28 */
29
37356e78
KW
30#define RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK BIT_ULL(32)
31#define RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK BIT_ULL(33)
32#define RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK BIT_ULL(34)
33#define RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK BIT_ULL(35)
34#define RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK BIT_ULL(37)
35#define RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK BIT_ULL(38)
36
742a3826
KW
37/* Maximum number of packets within a flow generation. */
38#define MAX_TID_FLOW_PSN BIT(HFI1_KDETH_BTH_SEQ_SHIFT)
39
37356e78
KW
40#define GENERATION_MASK 0xFFFFF
41
42static u32 mask_generation(u32 a)
43{
44 return a & GENERATION_MASK;
45}
46
47/* Reserved generation value to set to unused flows for kernel contexts */
48#define KERN_GENERATION_RESERVED mask_generation(U32_MAX)
49
d22a207d
KW
50/*
51 * J_KEY for kernel contexts when TID RDMA is used.
52 * See generate_jkey() in hfi.h for more information.
53 */
54#define TID_RDMA_JKEY 32
55#define HFI1_KERNEL_MIN_JKEY HFI1_ADMIN_JKEY_RANGE
56#define HFI1_KERNEL_MAX_JKEY (2 * HFI1_ADMIN_JKEY_RANGE - 1)
57
838b6fd2 58/* Maximum number of segments in flight per QP request. */
d22a207d
KW
59#define TID_RDMA_MAX_READ_SEGS_PER_REQ 6
60#define TID_RDMA_MAX_WRITE_SEGS_PER_REQ 4
838b6fd2
KW
61#define MAX_REQ max_t(u16, TID_RDMA_MAX_READ_SEGS_PER_REQ, \
62 TID_RDMA_MAX_WRITE_SEGS_PER_REQ)
63#define MAX_FLOWS roundup_pow_of_two(MAX_REQ + 1)
64
65#define MAX_EXPECTED_PAGES (MAX_EXPECTED_BUFFER / PAGE_SIZE)
d22a207d 66
742a3826
KW
67#define TID_RDMA_DESTQP_FLOW_SHIFT 11
68#define TID_RDMA_DESTQP_FLOW_MASK 0x1f
69
d22a207d
KW
70#define TID_OPFN_QP_CTXT_MASK 0xff
71#define TID_OPFN_QP_CTXT_SHIFT 56
72#define TID_OPFN_QP_KDETH_MASK 0xff
73#define TID_OPFN_QP_KDETH_SHIFT 48
74#define TID_OPFN_MAX_LEN_MASK 0x7ff
75#define TID_OPFN_MAX_LEN_SHIFT 37
76#define TID_OPFN_TIMEOUT_MASK 0x1f
77#define TID_OPFN_TIMEOUT_SHIFT 32
78#define TID_OPFN_RESERVED_MASK 0x3f
79#define TID_OPFN_RESERVED_SHIFT 26
80#define TID_OPFN_URG_MASK 0x1
81#define TID_OPFN_URG_SHIFT 25
82#define TID_OPFN_VER_MASK 0x7
83#define TID_OPFN_VER_SHIFT 22
84#define TID_OPFN_JKEY_MASK 0x3f
85#define TID_OPFN_JKEY_SHIFT 16
86#define TID_OPFN_MAX_READ_MASK 0x3f
87#define TID_OPFN_MAX_READ_SHIFT 10
88#define TID_OPFN_MAX_WRITE_MASK 0x3f
89#define TID_OPFN_MAX_WRITE_SHIFT 4
90
91/*
92 * OPFN TID layout
93 *
94 * 63 47 31 15
95 * NNNNNNNNKKKKKKKK MMMMMMMMMMMTTTTT DDDDDDUVVVJJJJJJ RRRRRRWWWWWWCCCC
96 * 3210987654321098 7654321098765432 1098765432109876 5432109876543210
97 * N - the context Number
98 * K - the Kdeth_qp
99 * M - Max_len
100 * T - Timeout
101 * D - reserveD
102 * V - version
103 * U - Urg capable
104 * J - Jkey
105 * R - max_Read
106 * W - max_Write
107 * C - Capcode
108 */
109
37356e78 110static void tid_rdma_trigger_resume(struct work_struct *work);
838b6fd2
KW
111static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
112static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
113 gfp_t gfp);
114static void hfi1_init_trdma_req(struct rvt_qp *qp,
115 struct tid_rdma_request *req);
37356e78 116
d22a207d
KW
117static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
118{
119 return
120 (((u64)p->qp & TID_OPFN_QP_CTXT_MASK) <<
121 TID_OPFN_QP_CTXT_SHIFT) |
122 ((((u64)p->qp >> 16) & TID_OPFN_QP_KDETH_MASK) <<
123 TID_OPFN_QP_KDETH_SHIFT) |
124 (((u64)((p->max_len >> PAGE_SHIFT) - 1) &
125 TID_OPFN_MAX_LEN_MASK) << TID_OPFN_MAX_LEN_SHIFT) |
126 (((u64)p->timeout & TID_OPFN_TIMEOUT_MASK) <<
127 TID_OPFN_TIMEOUT_SHIFT) |
128 (((u64)p->urg & TID_OPFN_URG_MASK) << TID_OPFN_URG_SHIFT) |
129 (((u64)p->jkey & TID_OPFN_JKEY_MASK) << TID_OPFN_JKEY_SHIFT) |
130 (((u64)p->max_read & TID_OPFN_MAX_READ_MASK) <<
131 TID_OPFN_MAX_READ_SHIFT) |
132 (((u64)p->max_write & TID_OPFN_MAX_WRITE_MASK) <<
133 TID_OPFN_MAX_WRITE_SHIFT);
134}
135
136static void tid_rdma_opfn_decode(struct tid_rdma_params *p, u64 data)
137{
138 p->max_len = (((data >> TID_OPFN_MAX_LEN_SHIFT) &
139 TID_OPFN_MAX_LEN_MASK) + 1) << PAGE_SHIFT;
140 p->jkey = (data >> TID_OPFN_JKEY_SHIFT) & TID_OPFN_JKEY_MASK;
141 p->max_write = (data >> TID_OPFN_MAX_WRITE_SHIFT) &
142 TID_OPFN_MAX_WRITE_MASK;
143 p->max_read = (data >> TID_OPFN_MAX_READ_SHIFT) &
144 TID_OPFN_MAX_READ_MASK;
145 p->qp =
146 ((((data >> TID_OPFN_QP_KDETH_SHIFT) & TID_OPFN_QP_KDETH_MASK)
147 << 16) |
148 ((data >> TID_OPFN_QP_CTXT_SHIFT) & TID_OPFN_QP_CTXT_MASK));
149 p->urg = (data >> TID_OPFN_URG_SHIFT) & TID_OPFN_URG_MASK;
150 p->timeout = (data >> TID_OPFN_TIMEOUT_SHIFT) & TID_OPFN_TIMEOUT_MASK;
151}
152
153void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p)
154{
155 struct hfi1_qp_priv *priv = qp->priv;
156
157 p->qp = (kdeth_qp << 16) | priv->rcd->ctxt;
158 p->max_len = TID_RDMA_MAX_SEGMENT_SIZE;
159 p->jkey = priv->rcd->jkey;
160 p->max_read = TID_RDMA_MAX_READ_SEGS_PER_REQ;
161 p->max_write = TID_RDMA_MAX_WRITE_SEGS_PER_REQ;
162 p->timeout = qp->timeout;
163 p->urg = is_urg_masked(priv->rcd);
164}
165
166bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data)
167{
168 struct hfi1_qp_priv *priv = qp->priv;
169
170 *data = tid_rdma_opfn_encode(&priv->tid_rdma.local);
171 return true;
172}
173
174bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data)
175{
176 struct hfi1_qp_priv *priv = qp->priv;
177 struct tid_rdma_params *remote, *old;
178 bool ret = true;
179
180 old = rcu_dereference_protected(priv->tid_rdma.remote,
181 lockdep_is_held(&priv->opfn.lock));
182 data &= ~0xfULL;
183 /*
184 * If data passed in is zero, return true so as not to continue the
185 * negotiation process
186 */
187 if (!data || !HFI1_CAP_IS_KSET(TID_RDMA))
188 goto null;
189 /*
190 * If kzalloc fails, return false. This will result in:
191 * * at the requester a new OPFN request being generated to retry
192 * the negotiation
193 * * at the responder, 0 being returned to the requester so as to
194 * disable TID RDMA at both the requester and the responder
195 */
196 remote = kzalloc(sizeof(*remote), GFP_ATOMIC);
197 if (!remote) {
198 ret = false;
199 goto null;
200 }
201
202 tid_rdma_opfn_decode(remote, data);
203 priv->tid_timer_timeout_jiffies =
204 usecs_to_jiffies((((4096UL * (1UL << remote->timeout)) /
205 1000UL) << 3) * 7);
a131d164
KW
206 trace_hfi1_opfn_param(qp, 0, &priv->tid_rdma.local);
207 trace_hfi1_opfn_param(qp, 1, remote);
d22a207d
KW
208 rcu_assign_pointer(priv->tid_rdma.remote, remote);
209 /*
210 * A TID RDMA READ request's segment size is not equal to
211 * remote->max_len only when the request's data length is smaller
212 * than remote->max_len. In that case, there will be only one segment.
213 * Therefore, when priv->pkts_ps is used to calculate req->cur_seg
214 * during retry, it will lead to req->cur_seg = 0, which is exactly
215 * what is expected.
216 */
217 priv->pkts_ps = (u16)rvt_div_mtu(qp, remote->max_len);
218 priv->timeout_shift = ilog2(priv->pkts_ps - 1) + 1;
219 goto free;
220null:
221 RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
222 priv->timeout_shift = 0;
223free:
224 if (old)
225 kfree_rcu(old, rcu_head);
226 return ret;
227}
228
229bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data)
230{
231 bool ret;
232
233 ret = tid_rdma_conn_reply(qp, *data);
234 *data = 0;
235 /*
236 * If tid_rdma_conn_reply() returns error, set *data as 0 to indicate
237 * TID RDMA could not be enabled. This will result in TID RDMA being
238 * disabled at the requester too.
239 */
240 if (ret)
241 (void)tid_rdma_conn_req(qp, data);
242 return ret;
243}
244
245void tid_rdma_conn_error(struct rvt_qp *qp)
246{
247 struct hfi1_qp_priv *priv = qp->priv;
248 struct tid_rdma_params *old;
249
250 old = rcu_dereference_protected(priv->tid_rdma.remote,
251 lockdep_is_held(&priv->opfn.lock));
252 RCU_INIT_POINTER(priv->tid_rdma.remote, NULL);
253 if (old)
254 kfree_rcu(old, rcu_head);
255}
256
257/* This is called at context initialization time */
258int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
259{
260 if (reinit)
261 return 0;
262
263 BUILD_BUG_ON(TID_RDMA_JKEY < HFI1_KERNEL_MIN_JKEY);
264 BUILD_BUG_ON(TID_RDMA_JKEY > HFI1_KERNEL_MAX_JKEY);
265 rcd->jkey = TID_RDMA_JKEY;
266 hfi1_set_ctxt_jkey(rcd->dd, rcd, rcd->jkey);
838b6fd2 267 return hfi1_alloc_ctxt_rcv_groups(rcd);
d22a207d
KW
268}
269
5190f052
MM
270/**
271 * qp_to_rcd - determine the receive context used by a qp
272 * @qp - the qp
273 *
274 * This routine returns the receive context associated
275 * with a a qp's qpn.
276 *
277 * Returns the context.
278 */
279static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
280 struct rvt_qp *qp)
281{
282 struct hfi1_ibdev *verbs_dev = container_of(rdi,
283 struct hfi1_ibdev,
284 rdi);
285 struct hfi1_devdata *dd = container_of(verbs_dev,
286 struct hfi1_devdata,
287 verbs_dev);
288 unsigned int ctxt;
289
290 if (qp->ibqp.qp_num == 0)
291 ctxt = 0;
292 else
293 ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) %
294 (dd->n_krcv_queues - 1)) + 1;
295
296 return dd->rcd[ctxt];
297}
298
299int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
300 struct ib_qp_init_attr *init_attr)
301{
302 struct hfi1_qp_priv *qpriv = qp->priv;
838b6fd2 303 int i, ret;
5190f052
MM
304
305 qpriv->rcd = qp_to_rcd(rdi, qp);
306
48a615dc
KW
307 spin_lock_init(&qpriv->opfn.lock);
308 INIT_WORK(&qpriv->opfn.opfn_work, opfn_send_conn_request);
37356e78
KW
309 INIT_WORK(&qpriv->tid_rdma.trigger_work, tid_rdma_trigger_resume);
310 qpriv->flow_state.psn = 0;
311 qpriv->flow_state.index = RXE_NUM_TID_FLOWS;
312 qpriv->flow_state.last_index = RXE_NUM_TID_FLOWS;
313 qpriv->flow_state.generation = KERN_GENERATION_RESERVED;
314 INIT_LIST_HEAD(&qpriv->tid_wait);
48a615dc 315
838b6fd2
KW
316 if (init_attr->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
317 struct hfi1_devdata *dd = qpriv->rcd->dd;
318
319 qpriv->pages = kzalloc_node(TID_RDMA_MAX_PAGES *
320 sizeof(*qpriv->pages),
321 GFP_KERNEL, dd->node);
322 if (!qpriv->pages)
323 return -ENOMEM;
324 for (i = 0; i < qp->s_size; i++) {
325 struct hfi1_swqe_priv *priv;
326 struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
327
328 priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
329 dd->node);
330 if (!priv)
331 return -ENOMEM;
332
333 hfi1_init_trdma_req(qp, &priv->tid_req);
334 priv->tid_req.e.swqe = wqe;
335 wqe->priv = priv;
336 }
337 for (i = 0; i < rvt_max_atomic(rdi); i++) {
338 struct hfi1_ack_priv *priv;
339
340 priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
341 dd->node);
342 if (!priv)
343 return -ENOMEM;
344
345 hfi1_init_trdma_req(qp, &priv->tid_req);
346 priv->tid_req.e.ack = &qp->s_ack_queue[i];
347
348 ret = hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req,
349 GFP_KERNEL);
350 if (ret) {
351 kfree(priv);
352 return ret;
353 }
354 qp->s_ack_queue[i].priv = priv;
355 }
356 }
357
5190f052
MM
358 return 0;
359}
48a615dc
KW
360
361void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
362{
838b6fd2
KW
363 struct hfi1_qp_priv *qpriv = qp->priv;
364 struct rvt_swqe *wqe;
365 u32 i;
366
367 if (qp->ibqp.qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
368 for (i = 0; i < qp->s_size; i++) {
369 wqe = rvt_get_swqe_ptr(qp, i);
370 kfree(wqe->priv);
371 wqe->priv = NULL;
372 }
373 for (i = 0; i < rvt_max_atomic(rdi); i++) {
374 struct hfi1_ack_priv *priv = qp->s_ack_queue[i].priv;
375
376 if (priv)
377 hfi1_kern_exp_rcv_free_flows(&priv->tid_req);
378 kfree(priv);
379 qp->s_ack_queue[i].priv = NULL;
380 }
381 cancel_work_sync(&qpriv->opfn.opfn_work);
382 kfree(qpriv->pages);
383 qpriv->pages = NULL;
384 }
48a615dc 385}
37356e78
KW
386
387/* Flow and tid waiter functions */
388/**
389 * DOC: lock ordering
390 *
391 * There are two locks involved with the queuing
392 * routines: the qp s_lock and the exp_lock.
393 *
394 * Since the tid space allocation is called from
395 * the send engine, the qp s_lock is already held.
396 *
397 * The allocation routines will get the exp_lock.
398 *
399 * The first_qp() call is provided to allow the head of
400 * the rcd wait queue to be fetched under the exp_lock and
401 * followed by a drop of the exp_lock.
402 *
403 * Any qp in the wait list will have the qp reference count held
404 * to hold the qp in memory.
405 */
406
407/*
408 * return head of rcd wait list
409 *
410 * Must hold the exp_lock.
411 *
412 * Get a reference to the QP to hold the QP in memory.
413 *
414 * The caller must release the reference when the local
415 * is no longer being used.
416 */
417static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd,
418 struct tid_queue *queue)
419 __must_hold(&rcd->exp_lock)
420{
421 struct hfi1_qp_priv *priv;
422
423 lockdep_assert_held(&rcd->exp_lock);
424 priv = list_first_entry_or_null(&queue->queue_head,
425 struct hfi1_qp_priv,
426 tid_wait);
427 if (!priv)
428 return NULL;
429 rvt_get_qp(priv->owner);
430 return priv->owner;
431}
432
433/**
434 * kernel_tid_waiters - determine rcd wait
435 * @rcd: the receive context
436 * @qp: the head of the qp being processed
437 *
438 * This routine will return false IFF
439 * the list is NULL or the head of the
440 * list is the indicated qp.
441 *
442 * Must hold the qp s_lock and the exp_lock.
443 *
444 * Return:
445 * false if either of the conditions below are statisfied:
446 * 1. The list is empty or
447 * 2. The indicated qp is at the head of the list and the
448 * HFI1_S_WAIT_TID_SPACE bit is set in qp->s_flags.
449 * true is returned otherwise.
450 */
451static bool kernel_tid_waiters(struct hfi1_ctxtdata *rcd,
452 struct tid_queue *queue, struct rvt_qp *qp)
453 __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
454{
455 struct rvt_qp *fqp;
456 bool ret = true;
457
458 lockdep_assert_held(&qp->s_lock);
459 lockdep_assert_held(&rcd->exp_lock);
460 fqp = first_qp(rcd, queue);
461 if (!fqp || (fqp == qp && (qp->s_flags & HFI1_S_WAIT_TID_SPACE)))
462 ret = false;
463 rvt_put_qp(fqp);
464 return ret;
465}
466
467/**
468 * dequeue_tid_waiter - dequeue the qp from the list
469 * @qp - the qp to remove the wait list
470 *
471 * This routine removes the indicated qp from the
472 * wait list if it is there.
473 *
474 * This should be done after the hardware flow and
475 * tid array resources have been allocated.
476 *
477 * Must hold the qp s_lock and the rcd exp_lock.
478 *
479 * It assumes the s_lock to protect the s_flags
480 * field and to reliably test the HFI1_S_WAIT_TID_SPACE flag.
481 */
482static void dequeue_tid_waiter(struct hfi1_ctxtdata *rcd,
483 struct tid_queue *queue, struct rvt_qp *qp)
484 __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
485{
486 struct hfi1_qp_priv *priv = qp->priv;
487
488 lockdep_assert_held(&qp->s_lock);
489 lockdep_assert_held(&rcd->exp_lock);
490 if (list_empty(&priv->tid_wait))
491 return;
492 list_del_init(&priv->tid_wait);
493 qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
494 queue->dequeue++;
495 rvt_put_qp(qp);
496}
497
498/**
499 * queue_qp_for_tid_wait - suspend QP on tid space
500 * @rcd: the receive context
501 * @qp: the qp
502 *
503 * The qp is inserted at the tail of the rcd
504 * wait queue and the HFI1_S_WAIT_TID_SPACE s_flag is set.
505 *
506 * Must hold the qp s_lock and the exp_lock.
507 */
508static void queue_qp_for_tid_wait(struct hfi1_ctxtdata *rcd,
509 struct tid_queue *queue, struct rvt_qp *qp)
510 __must_hold(&rcd->exp_lock) __must_hold(&qp->s_lock)
511{
512 struct hfi1_qp_priv *priv = qp->priv;
513
514 lockdep_assert_held(&qp->s_lock);
515 lockdep_assert_held(&rcd->exp_lock);
516 if (list_empty(&priv->tid_wait)) {
517 qp->s_flags |= HFI1_S_WAIT_TID_SPACE;
518 list_add_tail(&priv->tid_wait, &queue->queue_head);
519 priv->tid_enqueue = ++queue->enqueue;
2f16a696 520 rcd->dd->verbs_dev.n_tidwait++;
37356e78
KW
521 trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TID_SPACE);
522 rvt_get_qp(qp);
523 }
524}
525
526/**
527 * __trigger_tid_waiter - trigger tid waiter
528 * @qp: the qp
529 *
530 * This is a private entrance to schedule the qp
531 * assuming the caller is holding the qp->s_lock.
532 */
533static void __trigger_tid_waiter(struct rvt_qp *qp)
534 __must_hold(&qp->s_lock)
535{
536 lockdep_assert_held(&qp->s_lock);
537 if (!(qp->s_flags & HFI1_S_WAIT_TID_SPACE))
538 return;
539 trace_hfi1_qpwakeup(qp, HFI1_S_WAIT_TID_SPACE);
540 hfi1_schedule_send(qp);
541}
542
543/**
544 * tid_rdma_schedule_tid_wakeup - schedule wakeup for a qp
545 * @qp - the qp
546 *
547 * trigger a schedule or a waiting qp in a deadlock
548 * safe manner. The qp reference is held prior
549 * to this call via first_qp().
550 *
551 * If the qp trigger was already scheduled (!rval)
552 * the the reference is dropped, otherwise the resume
553 * or the destroy cancel will dispatch the reference.
554 */
555static void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp)
556{
557 struct hfi1_qp_priv *priv;
558 struct hfi1_ibport *ibp;
559 struct hfi1_pportdata *ppd;
560 struct hfi1_devdata *dd;
561 bool rval;
562
563 if (!qp)
564 return;
565
566 priv = qp->priv;
567 ibp = to_iport(qp->ibqp.device, qp->port_num);
568 ppd = ppd_from_ibp(ibp);
569 dd = dd_from_ibdev(qp->ibqp.device);
570
571 rval = queue_work_on(priv->s_sde ?
572 priv->s_sde->cpu :
573 cpumask_first(cpumask_of_node(dd->node)),
574 ppd->hfi1_wq,
575 &priv->tid_rdma.trigger_work);
576 if (!rval)
577 rvt_put_qp(qp);
578}
579
580/**
581 * tid_rdma_trigger_resume - field a trigger work request
582 * @work - the work item
583 *
584 * Complete the off qp trigger processing by directly
585 * calling the progress routine.
586 */
587static void tid_rdma_trigger_resume(struct work_struct *work)
588{
589 struct tid_rdma_qp_params *tr;
590 struct hfi1_qp_priv *priv;
591 struct rvt_qp *qp;
592
593 tr = container_of(work, struct tid_rdma_qp_params, trigger_work);
594 priv = container_of(tr, struct hfi1_qp_priv, tid_rdma);
595 qp = priv->owner;
596 spin_lock_irq(&qp->s_lock);
597 if (qp->s_flags & HFI1_S_WAIT_TID_SPACE) {
598 spin_unlock_irq(&qp->s_lock);
599 hfi1_do_send(priv->owner, true);
600 } else {
601 spin_unlock_irq(&qp->s_lock);
602 }
603 rvt_put_qp(qp);
604}
605
606/**
607 * tid_rdma_flush_wait - unwind any tid space wait
608 *
609 * This is called when resetting a qp to
610 * allow a destroy or reset to get rid
611 * of any tid space linkage and reference counts.
612 */
613static void _tid_rdma_flush_wait(struct rvt_qp *qp, struct tid_queue *queue)
614 __must_hold(&qp->s_lock)
615{
616 struct hfi1_qp_priv *priv;
617
618 if (!qp)
619 return;
620 lockdep_assert_held(&qp->s_lock);
621 priv = qp->priv;
622 qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
623 spin_lock(&priv->rcd->exp_lock);
624 if (!list_empty(&priv->tid_wait)) {
625 list_del_init(&priv->tid_wait);
626 qp->s_flags &= ~HFI1_S_WAIT_TID_SPACE;
627 queue->dequeue++;
628 rvt_put_qp(qp);
629 }
630 spin_unlock(&priv->rcd->exp_lock);
631}
632
633void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
634 __must_hold(&qp->s_lock)
635{
636 struct hfi1_qp_priv *priv = qp->priv;
637
638 _tid_rdma_flush_wait(qp, &priv->rcd->flow_queue);
838b6fd2 639 _tid_rdma_flush_wait(qp, &priv->rcd->rarr_queue);
37356e78
KW
640}
641
642/* Flow functions */
643/**
644 * kern_reserve_flow - allocate a hardware flow
645 * @rcd - the context to use for allocation
646 * @last - the index of the preferred flow. Use RXE_NUM_TID_FLOWS to
647 * signify "don't care".
648 *
649 * Use a bit mask based allocation to reserve a hardware
650 * flow for use in receiving KDETH data packets. If a preferred flow is
651 * specified the function will attempt to reserve that flow again, if
652 * available.
653 *
654 * The exp_lock must be held.
655 *
656 * Return:
657 * On success: a value postive value between 0 and RXE_NUM_TID_FLOWS - 1
658 * On failure: -EAGAIN
659 */
660static int kern_reserve_flow(struct hfi1_ctxtdata *rcd, int last)
661 __must_hold(&rcd->exp_lock)
662{
663 int nr;
664
665 /* Attempt to reserve the preferred flow index */
666 if (last >= 0 && last < RXE_NUM_TID_FLOWS &&
667 !test_and_set_bit(last, &rcd->flow_mask))
668 return last;
669
670 nr = ffz(rcd->flow_mask);
671 BUILD_BUG_ON(RXE_NUM_TID_FLOWS >=
672 (sizeof(rcd->flow_mask) * BITS_PER_BYTE));
673 if (nr > (RXE_NUM_TID_FLOWS - 1))
674 return -EAGAIN;
675 set_bit(nr, &rcd->flow_mask);
676 return nr;
677}
678
679static void kern_set_hw_flow(struct hfi1_ctxtdata *rcd, u32 generation,
680 u32 flow_idx)
681{
682 u64 reg;
683
684 reg = ((u64)generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
685 RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK |
686 RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK |
687 RCV_TID_FLOW_TABLE_CTRL_KEEP_ON_GEN_ERR_SMASK |
688 RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK |
689 RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK;
690
691 if (generation != KERN_GENERATION_RESERVED)
692 reg |= RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK;
693
694 write_uctxt_csr(rcd->dd, rcd->ctxt,
695 RCV_TID_FLOW_TABLE + 8 * flow_idx, reg);
696}
697
698static u32 kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
699 __must_hold(&rcd->exp_lock)
700{
701 u32 generation = rcd->flows[flow_idx].generation;
702
703 kern_set_hw_flow(rcd, generation, flow_idx);
704 return generation;
705}
706
707static u32 kern_flow_generation_next(u32 gen)
708{
709 u32 generation = mask_generation(gen + 1);
710
711 if (generation == KERN_GENERATION_RESERVED)
712 generation = mask_generation(generation + 1);
713 return generation;
714}
715
716static void kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, u32 flow_idx)
717 __must_hold(&rcd->exp_lock)
718{
719 rcd->flows[flow_idx].generation =
720 kern_flow_generation_next(rcd->flows[flow_idx].generation);
721 kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, flow_idx);
722}
723
724int hfi1_kern_setup_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
725{
726 struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
727 struct tid_flow_state *fs = &qpriv->flow_state;
728 struct rvt_qp *fqp;
729 unsigned long flags;
730 int ret = 0;
731
732 /* The QP already has an allocated flow */
733 if (fs->index != RXE_NUM_TID_FLOWS)
734 return ret;
735
736 spin_lock_irqsave(&rcd->exp_lock, flags);
737 if (kernel_tid_waiters(rcd, &rcd->flow_queue, qp))
738 goto queue;
739
740 ret = kern_reserve_flow(rcd, fs->last_index);
741 if (ret < 0)
742 goto queue;
743 fs->index = ret;
744 fs->last_index = fs->index;
745
746 /* Generation received in a RESYNC overrides default flow generation */
747 if (fs->generation != KERN_GENERATION_RESERVED)
748 rcd->flows[fs->index].generation = fs->generation;
749 fs->generation = kern_setup_hw_flow(rcd, fs->index);
750 fs->psn = 0;
751 fs->flags = 0;
752 dequeue_tid_waiter(rcd, &rcd->flow_queue, qp);
753 /* get head before dropping lock */
754 fqp = first_qp(rcd, &rcd->flow_queue);
755 spin_unlock_irqrestore(&rcd->exp_lock, flags);
756
757 tid_rdma_schedule_tid_wakeup(fqp);
758 return 0;
759queue:
760 queue_qp_for_tid_wait(rcd, &rcd->flow_queue, qp);
761 spin_unlock_irqrestore(&rcd->exp_lock, flags);
762 return -EAGAIN;
763}
764
765void hfi1_kern_clear_hw_flow(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp)
766{
767 struct hfi1_qp_priv *qpriv = (struct hfi1_qp_priv *)qp->priv;
768 struct tid_flow_state *fs = &qpriv->flow_state;
769 struct rvt_qp *fqp;
770 unsigned long flags;
771
772 if (fs->index >= RXE_NUM_TID_FLOWS)
773 return;
774 spin_lock_irqsave(&rcd->exp_lock, flags);
775 kern_clear_hw_flow(rcd, fs->index);
776 clear_bit(fs->index, &rcd->flow_mask);
777 fs->index = RXE_NUM_TID_FLOWS;
778 fs->psn = 0;
779 fs->generation = KERN_GENERATION_RESERVED;
780
781 /* get head before dropping lock */
782 fqp = first_qp(rcd, &rcd->flow_queue);
783 spin_unlock_irqrestore(&rcd->exp_lock, flags);
784
785 if (fqp == qp) {
786 __trigger_tid_waiter(fqp);
787 rvt_put_qp(fqp);
788 } else {
789 tid_rdma_schedule_tid_wakeup(fqp);
790 }
791}
792
793void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd)
794{
795 int i;
796
797 for (i = 0; i < RXE_NUM_TID_FLOWS; i++) {
798 rcd->flows[i].generation = mask_generation(prandom_u32());
799 kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, i);
800 }
801}
838b6fd2
KW
802
803/* TID allocation functions */
804static u8 trdma_pset_order(struct tid_rdma_pageset *s)
805{
806 u8 count = s->count;
807
808 return ilog2(count) + 1;
809}
810
811/**
812 * tid_rdma_find_phys_blocks_4k - get groups base on mr info
813 * @npages - number of pages
814 * @pages - pointer to an array of page structs
815 * @list - page set array to return
816 *
817 * This routine returns the number of groups associated with
818 * the current sge information. This implementation is based
819 * on the expected receive find_phys_blocks() adjusted to
820 * use the MR information vs. the pfn.
821 *
822 * Return:
823 * the number of RcvArray entries
824 */
825static u32 tid_rdma_find_phys_blocks_4k(struct tid_rdma_flow *flow,
826 struct page **pages,
827 u32 npages,
828 struct tid_rdma_pageset *list)
829{
830 u32 pagecount, pageidx, setcount = 0, i;
831 void *vaddr, *this_vaddr;
832
833 if (!npages)
834 return 0;
835
836 /*
837 * Look for sets of physically contiguous pages in the user buffer.
838 * This will allow us to optimize Expected RcvArray entry usage by
839 * using the bigger supported sizes.
840 */
841 vaddr = page_address(pages[0]);
84f4a40d 842 trace_hfi1_tid_flow_page(flow->req->qp, flow, 0, 0, 0, vaddr);
838b6fd2
KW
843 for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) {
844 this_vaddr = i < npages ? page_address(pages[i]) : NULL;
84f4a40d
KW
845 trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 0, 0,
846 this_vaddr);
838b6fd2
KW
847 /*
848 * If the vaddr's are not sequential, pages are not physically
849 * contiguous.
850 */
851 if (this_vaddr != (vaddr + PAGE_SIZE)) {
852 /*
853 * At this point we have to loop over the set of
854 * physically contiguous pages and break them down it
855 * sizes supported by the HW.
856 * There are two main constraints:
857 * 1. The max buffer size is MAX_EXPECTED_BUFFER.
858 * If the total set size is bigger than that
859 * program only a MAX_EXPECTED_BUFFER chunk.
860 * 2. The buffer size has to be a power of two. If
861 * it is not, round down to the closes power of
862 * 2 and program that size.
863 */
864 while (pagecount) {
865 int maxpages = pagecount;
866 u32 bufsize = pagecount * PAGE_SIZE;
867
868 if (bufsize > MAX_EXPECTED_BUFFER)
869 maxpages =
870 MAX_EXPECTED_BUFFER >>
871 PAGE_SHIFT;
872 else if (!is_power_of_2(bufsize))
873 maxpages =
874 rounddown_pow_of_two(bufsize) >>
875 PAGE_SHIFT;
876
877 list[setcount].idx = pageidx;
878 list[setcount].count = maxpages;
84f4a40d
KW
879 trace_hfi1_tid_pageset(flow->req->qp, setcount,
880 list[setcount].idx,
881 list[setcount].count);
838b6fd2
KW
882 pagecount -= maxpages;
883 pageidx += maxpages;
884 setcount++;
885 }
886 pageidx = i;
887 pagecount = 1;
888 vaddr = this_vaddr;
889 } else {
890 vaddr += PAGE_SIZE;
891 pagecount++;
892 }
893 }
894 /* insure we always return an even number of sets */
895 if (setcount & 1)
896 list[setcount++].count = 0;
897 return setcount;
898}
899
900/**
901 * tid_flush_pages - dump out pages into pagesets
902 * @list - list of pagesets
903 * @idx - pointer to current page index
904 * @pages - number of pages to dump
905 * @sets - current number of pagesset
906 *
907 * This routine flushes out accumuated pages.
908 *
909 * To insure an even number of sets the
910 * code may add a filler.
911 *
912 * This can happen with when pages is not
913 * a power of 2 or pages is a power of 2
914 * less than the maximum pages.
915 *
916 * Return:
917 * The new number of sets
918 */
919
920static u32 tid_flush_pages(struct tid_rdma_pageset *list,
921 u32 *idx, u32 pages, u32 sets)
922{
923 while (pages) {
924 u32 maxpages = pages;
925
926 if (maxpages > MAX_EXPECTED_PAGES)
927 maxpages = MAX_EXPECTED_PAGES;
928 else if (!is_power_of_2(maxpages))
929 maxpages = rounddown_pow_of_two(maxpages);
930 list[sets].idx = *idx;
931 list[sets++].count = maxpages;
932 *idx += maxpages;
933 pages -= maxpages;
934 }
935 /* might need a filler */
936 if (sets & 1)
937 list[sets++].count = 0;
938 return sets;
939}
940
941/**
942 * tid_rdma_find_phys_blocks_8k - get groups base on mr info
943 * @pages - pointer to an array of page structs
944 * @npages - number of pages
945 * @list - page set array to return
946 *
947 * This routine parses an array of pages to compute pagesets
948 * in an 8k compatible way.
949 *
950 * pages are tested two at a time, i, i + 1 for contiguous
951 * pages and i - 1 and i contiguous pages.
952 *
953 * If any condition is false, any accumlated pages are flushed and
954 * v0,v1 are emitted as separate PAGE_SIZE pagesets
955 *
956 * Otherwise, the current 8k is totaled for a future flush.
957 *
958 * Return:
959 * The number of pagesets
960 * list set with the returned number of pagesets
961 *
962 */
963static u32 tid_rdma_find_phys_blocks_8k(struct tid_rdma_flow *flow,
964 struct page **pages,
965 u32 npages,
966 struct tid_rdma_pageset *list)
967{
968 u32 idx, sets = 0, i;
969 u32 pagecnt = 0;
970 void *v0, *v1, *vm1;
971
972 if (!npages)
973 return 0;
974 for (idx = 0, i = 0, vm1 = NULL; i < npages; i += 2) {
975 /* get a new v0 */
976 v0 = page_address(pages[i]);
84f4a40d 977 trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 0, v0);
838b6fd2
KW
978 v1 = i + 1 < npages ?
979 page_address(pages[i + 1]) : NULL;
84f4a40d 980 trace_hfi1_tid_flow_page(flow->req->qp, flow, i, 1, 1, v1);
838b6fd2
KW
981 /* compare i, i + 1 vaddr */
982 if (v1 != (v0 + PAGE_SIZE)) {
983 /* flush out pages */
984 sets = tid_flush_pages(list, &idx, pagecnt, sets);
985 /* output v0,v1 as two pagesets */
986 list[sets].idx = idx++;
987 list[sets++].count = 1;
988 if (v1) {
989 list[sets].count = 1;
990 list[sets++].idx = idx++;
991 } else {
992 list[sets++].count = 0;
993 }
994 vm1 = NULL;
995 pagecnt = 0;
996 continue;
997 }
998 /* i,i+1 consecutive, look at i-1,i */
999 if (vm1 && v0 != (vm1 + PAGE_SIZE)) {
1000 /* flush out pages */
1001 sets = tid_flush_pages(list, &idx, pagecnt, sets);
1002 pagecnt = 0;
1003 }
1004 /* pages will always be a multiple of 8k */
1005 pagecnt += 2;
1006 /* save i-1 */
1007 vm1 = v1;
1008 /* move to next pair */
1009 }
1010 /* dump residual pages at end */
1011 sets = tid_flush_pages(list, &idx, npages - idx, sets);
1012 /* by design cannot be odd sets */
1013 WARN_ON(sets & 1);
1014 return sets;
1015}
1016
1017/**
1018 * Find pages for one segment of a sge array represented by @ss. The function
1019 * does not check the sge, the sge must have been checked for alignment with a
1020 * prior call to hfi1_kern_trdma_ok. Other sge checking is done as part of
1021 * rvt_lkey_ok and rvt_rkey_ok. Also, the function only modifies the local sge
1022 * copy maintained in @ss->sge, the original sge is not modified.
1023 *
1024 * Unlike IB RDMA WRITE, we can't decrement ss->num_sge here because we are not
1025 * releasing the MR reference count at the same time. Otherwise, we'll "leak"
1026 * references to the MR. This difference requires that we keep track of progress
1027 * into the sg_list. This is done by the cur_seg cursor in the tid_rdma_request
1028 * structure.
1029 */
1030static u32 kern_find_pages(struct tid_rdma_flow *flow,
1031 struct page **pages,
1032 struct rvt_sge_state *ss, bool *last)
1033{
1034 struct tid_rdma_request *req = flow->req;
1035 struct rvt_sge *sge = &ss->sge;
1036 u32 length = flow->req->seg_len;
1037 u32 len = PAGE_SIZE;
1038 u32 i = 0;
1039
1040 while (length && req->isge < ss->num_sge) {
1041 pages[i++] = virt_to_page(sge->vaddr);
1042
1043 sge->vaddr += len;
1044 sge->length -= len;
1045 sge->sge_length -= len;
1046 if (!sge->sge_length) {
1047 if (++req->isge < ss->num_sge)
1048 *sge = ss->sg_list[req->isge - 1];
1049 } else if (sge->length == 0 && sge->mr->lkey) {
1050 if (++sge->n >= RVT_SEGSZ) {
1051 ++sge->m;
1052 sge->n = 0;
1053 }
1054 sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
1055 sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
1056 }
1057 length -= len;
1058 }
1059
1060 flow->length = flow->req->seg_len - length;
1061 *last = req->isge == ss->num_sge ? false : true;
1062 return i;
1063}
1064
1065static void dma_unmap_flow(struct tid_rdma_flow *flow)
1066{
1067 struct hfi1_devdata *dd;
1068 int i;
1069 struct tid_rdma_pageset *pset;
1070
1071 dd = flow->req->rcd->dd;
1072 for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
1073 i++, pset++) {
1074 if (pset->count && pset->addr) {
1075 dma_unmap_page(&dd->pcidev->dev,
1076 pset->addr,
1077 PAGE_SIZE * pset->count,
1078 DMA_FROM_DEVICE);
1079 pset->mapped = 0;
1080 }
1081 }
1082}
1083
1084static int dma_map_flow(struct tid_rdma_flow *flow, struct page **pages)
1085{
1086 int i;
1087 struct hfi1_devdata *dd = flow->req->rcd->dd;
1088 struct tid_rdma_pageset *pset;
1089
1090 for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
1091 i++, pset++) {
1092 if (pset->count) {
1093 pset->addr = dma_map_page(&dd->pcidev->dev,
1094 pages[pset->idx],
1095 0,
1096 PAGE_SIZE * pset->count,
1097 DMA_FROM_DEVICE);
1098
1099 if (dma_mapping_error(&dd->pcidev->dev, pset->addr)) {
1100 dma_unmap_flow(flow);
1101 return -ENOMEM;
1102 }
1103 pset->mapped = 1;
1104 }
1105 }
1106 return 0;
1107}
1108
1109static inline bool dma_mapped(struct tid_rdma_flow *flow)
1110{
1111 return !!flow->pagesets[0].mapped;
1112}
1113
1114/*
1115 * Get pages pointers and identify contiguous physical memory chunks for a
1116 * segment. All segments are of length flow->req->seg_len.
1117 */
1118static int kern_get_phys_blocks(struct tid_rdma_flow *flow,
1119 struct page **pages,
1120 struct rvt_sge_state *ss, bool *last)
1121{
1122 u8 npages;
1123
1124 /* Reuse previously computed pagesets, if any */
1125 if (flow->npagesets) {
84f4a40d
KW
1126 trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head,
1127 flow);
838b6fd2
KW
1128 if (!dma_mapped(flow))
1129 return dma_map_flow(flow, pages);
1130 return 0;
1131 }
1132
1133 npages = kern_find_pages(flow, pages, ss, last);
1134
1135 if (flow->req->qp->pmtu == enum_to_mtu(OPA_MTU_4096))
1136 flow->npagesets =
1137 tid_rdma_find_phys_blocks_4k(flow, pages, npages,
1138 flow->pagesets);
1139 else
1140 flow->npagesets =
1141 tid_rdma_find_phys_blocks_8k(flow, pages, npages,
1142 flow->pagesets);
1143
1144 return dma_map_flow(flow, pages);
1145}
1146
1147static inline void kern_add_tid_node(struct tid_rdma_flow *flow,
1148 struct hfi1_ctxtdata *rcd, char *s,
1149 struct tid_group *grp, u8 cnt)
1150{
1151 struct kern_tid_node *node = &flow->tnode[flow->tnode_cnt++];
1152
1153 WARN_ON_ONCE(flow->tnode_cnt >=
1154 (TID_RDMA_MAX_SEGMENT_SIZE >> PAGE_SHIFT));
1155 if (WARN_ON_ONCE(cnt & 1))
1156 dd_dev_err(rcd->dd,
1157 "unexpected odd allocation cnt %u map 0x%x used %u",
1158 cnt, grp->map, grp->used);
1159
1160 node->grp = grp;
1161 node->map = grp->map;
1162 node->cnt = cnt;
84f4a40d
KW
1163 trace_hfi1_tid_node_add(flow->req->qp, s, flow->tnode_cnt - 1,
1164 grp->base, grp->map, grp->used, cnt);
838b6fd2
KW
1165}
1166
1167/*
1168 * Try to allocate pageset_count TID's from TID groups for a context
1169 *
1170 * This function allocates TID's without moving groups between lists or
1171 * modifying grp->map. This is done as follows, being cogizant of the lists
1172 * between which the TID groups will move:
1173 * 1. First allocate complete groups of 8 TID's since this is more efficient,
1174 * these groups will move from group->full without affecting used
1175 * 2. If more TID's are needed allocate from used (will move from used->full or
1176 * stay in used)
1177 * 3. If we still don't have the required number of TID's go back and look again
1178 * at a complete group (will move from group->used)
1179 */
1180static int kern_alloc_tids(struct tid_rdma_flow *flow)
1181{
1182 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1183 struct hfi1_devdata *dd = rcd->dd;
1184 u32 ngroups, pageidx = 0;
1185 struct tid_group *group = NULL, *used;
1186 u8 use;
1187
1188 flow->tnode_cnt = 0;
1189 ngroups = flow->npagesets / dd->rcv_entries.group_size;
1190 if (!ngroups)
1191 goto used_list;
1192
1193 /* First look at complete groups */
1194 list_for_each_entry(group, &rcd->tid_group_list.list, list) {
1195 kern_add_tid_node(flow, rcd, "complete groups", group,
1196 group->size);
1197
1198 pageidx += group->size;
1199 if (!--ngroups)
1200 break;
1201 }
1202
1203 if (pageidx >= flow->npagesets)
1204 goto ok;
1205
1206used_list:
1207 /* Now look at partially used groups */
1208 list_for_each_entry(used, &rcd->tid_used_list.list, list) {
1209 use = min_t(u32, flow->npagesets - pageidx,
1210 used->size - used->used);
1211 kern_add_tid_node(flow, rcd, "used groups", used, use);
1212
1213 pageidx += use;
1214 if (pageidx >= flow->npagesets)
1215 goto ok;
1216 }
1217
1218 /*
1219 * Look again at a complete group, continuing from where we left.
1220 * However, if we are at the head, we have reached the end of the
1221 * complete groups list from the first loop above
1222 */
1223 if (group && &group->list == &rcd->tid_group_list.list)
1224 goto bail_eagain;
1225 group = list_prepare_entry(group, &rcd->tid_group_list.list,
1226 list);
1227 if (list_is_last(&group->list, &rcd->tid_group_list.list))
1228 goto bail_eagain;
1229 group = list_next_entry(group, list);
1230 use = min_t(u32, flow->npagesets - pageidx, group->size);
1231 kern_add_tid_node(flow, rcd, "complete continue", group, use);
1232 pageidx += use;
1233 if (pageidx >= flow->npagesets)
1234 goto ok;
1235bail_eagain:
84f4a40d
KW
1236 trace_hfi1_msg_alloc_tids(flow->req->qp, " insufficient tids: needed ",
1237 (u64)flow->npagesets);
838b6fd2
KW
1238 return -EAGAIN;
1239ok:
1240 return 0;
1241}
1242
1243static void kern_program_rcv_group(struct tid_rdma_flow *flow, int grp_num,
1244 u32 *pset_idx)
1245{
1246 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1247 struct hfi1_devdata *dd = rcd->dd;
1248 struct kern_tid_node *node = &flow->tnode[grp_num];
1249 struct tid_group *grp = node->grp;
1250 struct tid_rdma_pageset *pset;
1251 u32 pmtu_pg = flow->req->qp->pmtu >> PAGE_SHIFT;
1252 u32 rcventry, npages = 0, pair = 0, tidctrl;
1253 u8 i, cnt = 0;
1254
1255 for (i = 0; i < grp->size; i++) {
1256 rcventry = grp->base + i;
1257
1258 if (node->map & BIT(i) || cnt >= node->cnt) {
1259 rcv_array_wc_fill(dd, rcventry);
1260 continue;
1261 }
1262 pset = &flow->pagesets[(*pset_idx)++];
1263 if (pset->count) {
1264 hfi1_put_tid(dd, rcventry, PT_EXPECTED,
1265 pset->addr, trdma_pset_order(pset));
1266 } else {
1267 hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
1268 }
1269 npages += pset->count;
1270
1271 rcventry -= rcd->expected_base;
1272 tidctrl = pair ? 0x3 : rcventry & 0x1 ? 0x2 : 0x1;
1273 /*
1274 * A single TID entry will be used to use a rcvarr pair (with
1275 * tidctrl 0x3), if ALL these are true (a) the bit pos is even
1276 * (b) the group map shows current and the next bits as free
1277 * indicating two consecutive rcvarry entries are available (c)
1278 * we actually need 2 more entries
1279 */
1280 pair = !(i & 0x1) && !((node->map >> i) & 0x3) &&
1281 node->cnt >= cnt + 2;
1282 if (!pair) {
1283 if (!pset->count)
1284 tidctrl = 0x1;
1285 flow->tid_entry[flow->tidcnt++] =
1286 EXP_TID_SET(IDX, rcventry >> 1) |
1287 EXP_TID_SET(CTRL, tidctrl) |
1288 EXP_TID_SET(LEN, npages);
84f4a40d
KW
1289 trace_hfi1_tid_entry_alloc(/* entry */
1290 flow->req->qp, flow->tidcnt - 1,
1291 flow->tid_entry[flow->tidcnt - 1]);
1292
838b6fd2
KW
1293 /* Efficient DIV_ROUND_UP(npages, pmtu_pg) */
1294 flow->npkts += (npages + pmtu_pg - 1) >> ilog2(pmtu_pg);
1295 npages = 0;
1296 }
1297
1298 if (grp->used == grp->size - 1)
1299 tid_group_move(grp, &rcd->tid_used_list,
1300 &rcd->tid_full_list);
1301 else if (!grp->used)
1302 tid_group_move(grp, &rcd->tid_group_list,
1303 &rcd->tid_used_list);
1304
1305 grp->used++;
1306 grp->map |= BIT(i);
1307 cnt++;
1308 }
1309}
1310
1311static void kern_unprogram_rcv_group(struct tid_rdma_flow *flow, int grp_num)
1312{
1313 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1314 struct hfi1_devdata *dd = rcd->dd;
1315 struct kern_tid_node *node = &flow->tnode[grp_num];
1316 struct tid_group *grp = node->grp;
1317 u32 rcventry;
1318 u8 i, cnt = 0;
1319
1320 for (i = 0; i < grp->size; i++) {
1321 rcventry = grp->base + i;
1322
1323 if (node->map & BIT(i) || cnt >= node->cnt) {
1324 rcv_array_wc_fill(dd, rcventry);
1325 continue;
1326 }
1327
1328 hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
1329
1330 grp->used--;
1331 grp->map &= ~BIT(i);
1332 cnt++;
1333
1334 if (grp->used == grp->size - 1)
1335 tid_group_move(grp, &rcd->tid_full_list,
1336 &rcd->tid_used_list);
1337 else if (!grp->used)
1338 tid_group_move(grp, &rcd->tid_used_list,
1339 &rcd->tid_group_list);
1340 }
1341 if (WARN_ON_ONCE(cnt & 1)) {
1342 struct hfi1_ctxtdata *rcd = flow->req->rcd;
1343 struct hfi1_devdata *dd = rcd->dd;
1344
1345 dd_dev_err(dd, "unexpected odd free cnt %u map 0x%x used %u",
1346 cnt, grp->map, grp->used);
1347 }
1348}
1349
1350static void kern_program_rcvarray(struct tid_rdma_flow *flow)
1351{
1352 u32 pset_idx = 0;
1353 int i;
1354
1355 flow->npkts = 0;
1356 flow->tidcnt = 0;
1357 for (i = 0; i < flow->tnode_cnt; i++)
1358 kern_program_rcv_group(flow, i, &pset_idx);
84f4a40d 1359 trace_hfi1_tid_flow_alloc(flow->req->qp, flow->req->setup_head, flow);
838b6fd2
KW
1360}
1361
1362/**
1363 * hfi1_kern_exp_rcv_setup() - setup TID's and flow for one segment of a
1364 * TID RDMA request
1365 *
1366 * @req: TID RDMA request for which the segment/flow is being set up
1367 * @ss: sge state, maintains state across successive segments of a sge
1368 * @last: set to true after the last sge segment has been processed
1369 *
1370 * This function
1371 * (1) finds a free flow entry in the flow circular buffer
1372 * (2) finds pages and continuous physical chunks constituing one segment
1373 * of an sge
1374 * (3) allocates TID group entries for those chunks
1375 * (4) programs rcvarray entries in the hardware corresponding to those
1376 * TID's
1377 * (5) computes a tidarray with formatted TID entries which can be sent
1378 * to the sender
1379 * (6) Reserves and programs HW flows.
1380 * (7) It also manages queing the QP when TID/flow resources are not
1381 * available.
1382 *
1383 * @req points to struct tid_rdma_request of which the segments are a part. The
1384 * function uses qp, rcd and seg_len members of @req. In the absence of errors,
1385 * req->flow_idx is the index of the flow which has been prepared in this
1386 * invocation of function call. With flow = &req->flows[req->flow_idx],
1387 * flow->tid_entry contains the TID array which the sender can use for TID RDMA
1388 * sends and flow->npkts contains number of packets required to send the
1389 * segment.
1390 *
1391 * hfi1_check_sge_align should be called prior to calling this function and if
1392 * it signals error TID RDMA cannot be used for this sge and this function
1393 * should not be called.
1394 *
1395 * For the queuing, caller must hold the flow->req->qp s_lock from the send
1396 * engine and the function will procure the exp_lock.
1397 *
1398 * Return:
1399 * The function returns -EAGAIN if sufficient number of TID/flow resources to
1400 * map the segment could not be allocated. In this case the function should be
1401 * called again with previous arguments to retry the TID allocation. There are
1402 * no other error returns. The function returns 0 on success.
1403 */
1404int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
1405 struct rvt_sge_state *ss, bool *last)
1406 __must_hold(&req->qp->s_lock)
1407{
1408 struct tid_rdma_flow *flow = &req->flows[req->setup_head];
1409 struct hfi1_ctxtdata *rcd = req->rcd;
1410 struct hfi1_qp_priv *qpriv = req->qp->priv;
1411 unsigned long flags;
1412 struct rvt_qp *fqp;
1413 u16 clear_tail = req->clear_tail;
1414
1415 lockdep_assert_held(&req->qp->s_lock);
1416 /*
1417 * We return error if either (a) we don't have space in the flow
1418 * circular buffer, or (b) we already have max entries in the buffer.
1419 * Max entries depend on the type of request we are processing and the
1420 * negotiated TID RDMA parameters.
1421 */
1422 if (!CIRC_SPACE(req->setup_head, clear_tail, MAX_FLOWS) ||
1423 CIRC_CNT(req->setup_head, clear_tail, MAX_FLOWS) >=
1424 req->n_flows)
1425 return -EINVAL;
1426
1427 /*
1428 * Get pages, identify contiguous physical memory chunks for the segment
1429 * If we can not determine a DMA address mapping we will treat it just
1430 * like if we ran out of space above.
1431 */
1432 if (kern_get_phys_blocks(flow, qpriv->pages, ss, last)) {
1433 hfi1_wait_kmem(flow->req->qp);
1434 return -ENOMEM;
1435 }
1436
1437 spin_lock_irqsave(&rcd->exp_lock, flags);
1438 if (kernel_tid_waiters(rcd, &rcd->rarr_queue, flow->req->qp))
1439 goto queue;
1440
1441 /*
1442 * At this point we know the number of pagesets and hence the number of
1443 * TID's to map the segment. Allocate the TID's from the TID groups. If
1444 * we cannot allocate the required number we exit and try again later
1445 */
1446 if (kern_alloc_tids(flow))
1447 goto queue;
1448 /*
1449 * Finally program the TID entries with the pagesets, compute the
1450 * tidarray and enable the HW flow
1451 */
1452 kern_program_rcvarray(flow);
1453
1454 /*
1455 * Setup the flow state with relevant information.
1456 * This information is used for tracking the sequence of data packets
1457 * for the segment.
1458 * The flow is setup here as this is the most accurate time and place
1459 * to do so. Doing at a later time runs the risk of the flow data in
1460 * qpriv getting out of sync.
1461 */
1462 memset(&flow->flow_state, 0x0, sizeof(flow->flow_state));
1463 flow->idx = qpriv->flow_state.index;
1464 flow->flow_state.generation = qpriv->flow_state.generation;
1465 flow->flow_state.spsn = qpriv->flow_state.psn;
1466 flow->flow_state.lpsn = flow->flow_state.spsn + flow->npkts - 1;
1467 flow->flow_state.r_next_psn =
1468 full_flow_psn(flow, flow->flow_state.spsn);
1469 qpriv->flow_state.psn += flow->npkts;
1470
1471 dequeue_tid_waiter(rcd, &rcd->rarr_queue, flow->req->qp);
1472 /* get head before dropping lock */
1473 fqp = first_qp(rcd, &rcd->rarr_queue);
1474 spin_unlock_irqrestore(&rcd->exp_lock, flags);
1475 tid_rdma_schedule_tid_wakeup(fqp);
1476
1477 req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
1478 return 0;
1479queue:
1480 queue_qp_for_tid_wait(rcd, &rcd->rarr_queue, flow->req->qp);
1481 spin_unlock_irqrestore(&rcd->exp_lock, flags);
1482 return -EAGAIN;
1483}
1484
1485static void hfi1_tid_rdma_reset_flow(struct tid_rdma_flow *flow)
1486{
1487 flow->npagesets = 0;
1488}
1489
1490/*
1491 * This function is called after one segment has been successfully sent to
1492 * release the flow and TID HW/SW resources for that segment. The segments for a
1493 * TID RDMA request are setup and cleared in FIFO order which is managed using a
1494 * circular buffer.
1495 */
1496int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req)
1497 __must_hold(&req->qp->s_lock)
1498{
1499 struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
1500 struct hfi1_ctxtdata *rcd = req->rcd;
1501 unsigned long flags;
1502 int i;
1503 struct rvt_qp *fqp;
1504
1505 lockdep_assert_held(&req->qp->s_lock);
1506 /* Exit if we have nothing in the flow circular buffer */
1507 if (!CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS))
1508 return -EINVAL;
1509
1510 spin_lock_irqsave(&rcd->exp_lock, flags);
1511
1512 for (i = 0; i < flow->tnode_cnt; i++)
1513 kern_unprogram_rcv_group(flow, i);
1514 /* To prevent double unprogramming */
1515 flow->tnode_cnt = 0;
1516 /* get head before dropping lock */
1517 fqp = first_qp(rcd, &rcd->rarr_queue);
1518 spin_unlock_irqrestore(&rcd->exp_lock, flags);
1519
1520 dma_unmap_flow(flow);
1521
1522 hfi1_tid_rdma_reset_flow(flow);
1523 req->clear_tail = (req->clear_tail + 1) & (MAX_FLOWS - 1);
1524
1525 if (fqp == req->qp) {
1526 __trigger_tid_waiter(fqp);
1527 rvt_put_qp(fqp);
1528 } else {
1529 tid_rdma_schedule_tid_wakeup(fqp);
1530 }
1531
1532 return 0;
1533}
1534
1535/*
1536 * This function is called to release all the tid entries for
1537 * a request.
1538 */
1539void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req)
1540 __must_hold(&req->qp->s_lock)
1541{
1542 /* Use memory barrier for proper ordering */
1543 while (CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS)) {
1544 if (hfi1_kern_exp_rcv_clear(req))
1545 break;
1546 }
1547}
1548
1549/**
1550 * hfi1_kern_exp_rcv_free_flows - free priviously allocated flow information
1551 * @req - the tid rdma request to be cleaned
1552 */
1553static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req)
1554{
1555 kfree(req->flows);
1556 req->flows = NULL;
1557}
1558
1559/**
1560 * __trdma_clean_swqe - clean up for large sized QPs
1561 * @qp: the queue patch
1562 * @wqe: the send wqe
1563 */
1564void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
1565{
1566 struct hfi1_swqe_priv *p = wqe->priv;
1567
1568 hfi1_kern_exp_rcv_free_flows(&p->tid_req);
1569}
1570
1571/*
1572 * This can be called at QP create time or in the data path.
1573 */
1574static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
1575 gfp_t gfp)
1576{
1577 struct tid_rdma_flow *flows;
1578 int i;
1579
1580 if (likely(req->flows))
1581 return 0;
1582 flows = kmalloc_node(MAX_FLOWS * sizeof(*flows), gfp,
1583 req->rcd->numa_id);
1584 if (!flows)
1585 return -ENOMEM;
1586 /* mini init */
1587 for (i = 0; i < MAX_FLOWS; i++) {
1588 flows[i].req = req;
1589 flows[i].npagesets = 0;
1590 flows[i].pagesets[0].mapped = 0;
1591 }
1592 req->flows = flows;
1593 return 0;
1594}
1595
1596static void hfi1_init_trdma_req(struct rvt_qp *qp,
1597 struct tid_rdma_request *req)
1598{
1599 struct hfi1_qp_priv *qpriv = qp->priv;
1600
1601 /*
1602 * Initialize various TID RDMA request variables.
1603 * These variables are "static", which is why they
1604 * can be pre-initialized here before the WRs has
1605 * even been submitted.
1606 * However, non-NULL values for these variables do not
1607 * imply that this WQE has been enabled for TID RDMA.
1608 * Drivers should check the WQE's opcode to determine
1609 * if a request is a TID RDMA one or not.
1610 */
1611 req->qp = qp;
1612 req->rcd = qpriv->rcd;
1613}
2f16a696
KW
1614
1615u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
1616 void *context, int vl, int mode, u64 data)
1617{
1618 struct hfi1_devdata *dd = context;
1619
1620 return dd->verbs_dev.n_tidwait;
1621}
742a3826
KW
1622
1623/* TID RDMA READ functions */
1624u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
1625 struct ib_other_headers *ohdr, u32 *bth1,
1626 u32 *bth2, u32 *len)
1627{
1628 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
1629 struct tid_rdma_flow *flow = &req->flows[req->flow_idx];
1630 struct rvt_qp *qp = req->qp;
1631 struct hfi1_qp_priv *qpriv = qp->priv;
1632 struct hfi1_swqe_priv *wpriv = wqe->priv;
1633 struct tid_rdma_read_req *rreq = &ohdr->u.tid_rdma.r_req;
1634 struct tid_rdma_params *remote;
1635 u32 req_len = 0;
1636 void *req_addr = NULL;
1637
1638 /* This is the IB psn used to send the request */
1639 *bth2 = mask_psn(flow->flow_state.ib_spsn + flow->pkt);
1640
1641 /* TID Entries for TID RDMA READ payload */
1642 req_addr = &flow->tid_entry[flow->tid_idx];
1643 req_len = sizeof(*flow->tid_entry) *
1644 (flow->tidcnt - flow->tid_idx);
1645
1646 memset(&ohdr->u.tid_rdma.r_req, 0, sizeof(ohdr->u.tid_rdma.r_req));
1647 wpriv->ss.sge.vaddr = req_addr;
1648 wpriv->ss.sge.sge_length = req_len;
1649 wpriv->ss.sge.length = wpriv->ss.sge.sge_length;
1650 /*
1651 * We can safely zero these out. Since the first SGE covers the
1652 * entire packet, nothing else should even look at the MR.
1653 */
1654 wpriv->ss.sge.mr = NULL;
1655 wpriv->ss.sge.m = 0;
1656 wpriv->ss.sge.n = 0;
1657
1658 wpriv->ss.sg_list = NULL;
1659 wpriv->ss.total_len = wpriv->ss.sge.sge_length;
1660 wpriv->ss.num_sge = 1;
1661
1662 /* Construct the TID RDMA READ REQ packet header */
1663 rcu_read_lock();
1664 remote = rcu_dereference(qpriv->tid_rdma.remote);
1665
1666 KDETH_RESET(rreq->kdeth0, KVER, 0x1);
1667 KDETH_RESET(rreq->kdeth1, JKEY, remote->jkey);
1668 rreq->reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr +
1669 req->cur_seg * req->seg_len + flow->sent);
1670 rreq->reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey);
1671 rreq->reth.length = cpu_to_be32(*len);
1672 rreq->tid_flow_psn =
1673 cpu_to_be32((flow->flow_state.generation <<
1674 HFI1_KDETH_BTH_SEQ_SHIFT) |
1675 ((flow->flow_state.spsn + flow->pkt) &
1676 HFI1_KDETH_BTH_SEQ_MASK));
1677 rreq->tid_flow_qp =
1678 cpu_to_be32(qpriv->tid_rdma.local.qp |
1679 ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
1680 TID_RDMA_DESTQP_FLOW_SHIFT) |
1681 qpriv->rcd->ctxt);
1682 rreq->verbs_qp = cpu_to_be32(qp->remote_qpn);
1683 *bth1 &= ~RVT_QPN_MASK;
1684 *bth1 |= remote->qp;
1685 *bth2 |= IB_BTH_REQ_ACK;
1686 rcu_read_unlock();
1687
1688 /* We are done with this segment */
1689 flow->sent += *len;
1690 req->cur_seg++;
1691 qp->s_state = TID_OP(READ_REQ);
1692 req->ack_pending++;
1693 req->flow_idx = (req->flow_idx + 1) & (MAX_FLOWS - 1);
1694 qpriv->pending_tid_r_segs++;
1695 qp->s_num_rd_atomic++;
1696
1697 /* Set the TID RDMA READ request payload size */
1698 *len = req_len;
1699
1700 return sizeof(ohdr->u.tid_rdma.r_req) / sizeof(u32);
1701}
1702
1703/*
1704 * @len: contains the data length to read upon entry and the read request
1705 * payload length upon exit.
1706 */
1707u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
1708 struct ib_other_headers *ohdr, u32 *bth1,
1709 u32 *bth2, u32 *len)
1710 __must_hold(&qp->s_lock)
1711{
1712 struct hfi1_qp_priv *qpriv = qp->priv;
1713 struct tid_rdma_request *req = wqe_to_tid_req(wqe);
1714 struct tid_rdma_flow *flow = NULL;
1715 u32 hdwords = 0;
1716 bool last;
1717 bool retry = true;
1718 u32 npkts = rvt_div_round_up_mtu(qp, *len);
1719
1720 /*
1721 * Check sync conditions. Make sure that there are no pending
1722 * segments before freeing the flow.
1723 */
1724sync_check:
1725 if (req->state == TID_REQUEST_SYNC) {
1726 if (qpriv->pending_tid_r_segs)
1727 goto done;
1728
1729 hfi1_kern_clear_hw_flow(req->rcd, qp);
1730 req->state = TID_REQUEST_ACTIVE;
1731 }
1732
1733 /*
1734 * If the request for this segment is resent, the tid resources should
1735 * have been allocated before. In this case, req->flow_idx should
1736 * fall behind req->setup_head.
1737 */
1738 if (req->flow_idx == req->setup_head) {
1739 retry = false;
1740 if (req->state == TID_REQUEST_RESEND) {
1741 /*
1742 * This is the first new segment for a request whose
1743 * earlier segments have been re-sent. We need to
1744 * set up the sge pointer correctly.
1745 */
1746 restart_sge(&qp->s_sge, wqe, req->s_next_psn,
1747 qp->pmtu);
1748 req->isge = 0;
1749 req->state = TID_REQUEST_ACTIVE;
1750 }
1751
1752 /*
1753 * Check sync. The last PSN of each generation is reserved for
1754 * RESYNC.
1755 */
1756 if ((qpriv->flow_state.psn + npkts) > MAX_TID_FLOW_PSN - 1) {
1757 req->state = TID_REQUEST_SYNC;
1758 goto sync_check;
1759 }
1760
1761 /* Allocate the flow if not yet */
1762 if (hfi1_kern_setup_hw_flow(qpriv->rcd, qp))
1763 goto done;
1764
1765 /*
1766 * The following call will advance req->setup_head after
1767 * allocating the tid entries.
1768 */
1769 if (hfi1_kern_exp_rcv_setup(req, &qp->s_sge, &last)) {
1770 req->state = TID_REQUEST_QUEUED;
1771
1772 /*
1773 * We don't have resources for this segment. The QP has
1774 * already been queued.
1775 */
1776 goto done;
1777 }
1778 }
1779
1780 /* req->flow_idx should only be one slot behind req->setup_head */
1781 flow = &req->flows[req->flow_idx];
1782 flow->pkt = 0;
1783 flow->tid_idx = 0;
1784 flow->sent = 0;
1785 if (!retry) {
1786 /* Set the first and last IB PSN for the flow in use.*/
1787 flow->flow_state.ib_spsn = req->s_next_psn;
1788 flow->flow_state.ib_lpsn =
1789 flow->flow_state.ib_spsn + flow->npkts - 1;
1790 }
1791
1792 /* Calculate the next segment start psn.*/
1793 req->s_next_psn += flow->npkts;
1794
1795 /* Build the packet header */
1796 hdwords = hfi1_build_tid_rdma_read_packet(wqe, ohdr, bth1, bth2, len);
1797done:
1798 return hdwords;
1799}