]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/commitdiff
IB/hfi1: Add an s_acked_ack_queue pointer
authorKaike Wan <kaike.wan@intel.com>
Thu, 24 Jan 2019 05:48:48 +0000 (21:48 -0800)
committerDoug Ledford <dledford@redhat.com>
Tue, 5 Feb 2019 23:07:43 +0000 (18:07 -0500)
The s_ack_queue is managed by two pointers into the ring:
r_head_ack_queue and s_tail_ack_queue. r_head_ack_queue is the index of
where the next received request is going to be placed and s_tail_ack_queue
is the entry of the request currently being processed. This works
perfectly fine for normal Verbs as the requests are processed one at a
time and the s_tail_ack_queue is not moved until the request that it
points to is fully completed.

In this fashion, s_tail_ack_queue constantly chases r_head_ack_queue and
the two pointers can easily be used to determine "queue full" and "queue
empty" conditions.

The detection of these two conditions are imported in determining when an
old entry can safely be overwritten with a new received request and the
resources associated with the old request be safely released.

When pipelined TID RDMA WRITE is introduced into this mix, things look
very different. r_head_ack_queue is still the point at which a newly
received request will be inserted, s_tail_ack_queue is still the
currently processed request. However, with pipelined TID RDMA WRITE
requests, s_tail_ack_queue moves to the next request once all TID RDMA
WRITE responses for that request have been sent. The rest of the protocol
for a particular request is managed by other pointers specific to TID RDMA
- r_tid_tail and r_tid_ack - which point to the entries for which the next
TID RDMA DATA packets are going to arrive and the request for which
the next TID RDMA ACK packets are to be generated, respectively.

What this means is that entries in the ring, which are "behind"
s_tail_ack_queue (entries which s_tail_ack_queue has gone past) are no
longer considered complete. This is where the problem is - a newly
received request could potentially overwrite a still active TID RDMA WRITE
request.

The reason why the TID RDMA pointers trail s_tail_ack_queue is that the
normal Verbs send engine uses s_tail_ack_queue as the pointer for the next
response. Since TID RDMA WRITE responses are processed by the normal Verbs
send engine, s_tail_ack_queue had to be moved to the next entry once all
TID RDMA WRITE response packets were sent to get the desired pipelining
between requests. Doing otherwise would mean that the normal Verbs send
engine would not be able to send the TID RDMA WRITE responses for the next
TID RDMA request until the current one is fully completed.

This patch introduces the s_acked_ack_queue index to point to the next
request to complete on the responder side. For requests other than TID
RDMA WRITE, s_acked_ack_queue should always be kept in sync with
s_tail_ack_queue. For TID RDMA WRITE request, it may fall behind
s_tail_ack_queue.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/rc.c
drivers/infiniband/hw/hfi1/rc.h
drivers/infiniband/hw/hfi1/tid_rdma.c
drivers/infiniband/hw/hfi1/trace_tid.h
drivers/infiniband/sw/rdmavt/qp.c
include/rdma/rdmavt_qp.h

index 6c9ef572fc69c2068f990e3e64a8c262eae61f07..9dc8e524510e0653c5148d0729e5709f05930581 100644 (file)
@@ -120,6 +120,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
        struct hfi1_qp_priv *priv = qp->priv;
        bool last_pkt;
        u32 delta;
+       u8 next = qp->s_tail_ack_queue;
 
        trace_hfi1_rsp_make_rc_ack(qp, 0);
        lockdep_assert_held(&qp->s_lock);
@@ -149,9 +150,17 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
                 * response has been sent instead of only being
                 * constructed.
                 */
-               if (++qp->s_tail_ack_queue >
-                   rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
-                       qp->s_tail_ack_queue = 0;
+               if (++next > rvt_size_atomic(&dev->rdi))
+                       next = 0;
+               /*
+                * Only advance the s_acked_ack_queue pointer if there
+                * have been no TID RDMA requests.
+                */
+               e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+               if (e->opcode != TID_OP(WRITE_REQ) &&
+                   qp->s_acked_ack_queue == qp->s_tail_ack_queue)
+                       qp->s_acked_ack_queue = next;
+               qp->s_tail_ack_queue = next;
                /* FALLTHROUGH */
        case OP(SEND_ONLY):
        case OP(ACKNOWLEDGE):
@@ -172,6 +181,10 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
                         */
                        len = e->rdma_sge.sge_length;
                        if (len && !e->rdma_sge.mr) {
+                               if (qp->s_acked_ack_queue ==
+                                   qp->s_tail_ack_queue)
+                                       qp->s_acked_ack_queue =
+                                               qp->r_head_ack_queue;
                                qp->s_tail_ack_queue = qp->r_head_ack_queue;
                                goto bail;
                        }
@@ -202,6 +215,10 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
                         */
                        len = e->rdma_sge.sge_length;
                        if (len && !e->rdma_sge.mr) {
+                               if (qp->s_acked_ack_queue ==
+                                   qp->s_tail_ack_queue)
+                                       qp->s_acked_ack_queue =
+                                               qp->r_head_ack_queue;
                                qp->s_tail_ack_queue = qp->r_head_ack_queue;
                                goto bail;
                        }
@@ -2235,6 +2252,8 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
                e->psn = psn;
                if (old_req)
                        goto unlock_done;
+               if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
+                       qp->s_acked_ack_queue = prev;
                qp->s_tail_ack_queue = prev;
                break;
        }
@@ -2248,6 +2267,8 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
                 */
                if (!e || e->opcode != (u8)opcode || old_req)
                        goto unlock_done;
+               if (qp->s_tail_ack_queue == qp->s_acked_ack_queue)
+                       qp->s_acked_ack_queue = prev;
                qp->s_tail_ack_queue = prev;
                break;
        }
@@ -2274,6 +2295,8 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
                 * Resend the RDMA read or atomic op which
                 * ACKs this duplicate request.
                 */
+               if (qp->s_tail_ack_queue == qp->s_acked_ack_queue)
+                       qp->s_acked_ack_queue = mra;
                qp->s_tail_ack_queue = mra;
                break;
        }
@@ -2646,7 +2669,7 @@ send_last:
                if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
                        next = 0;
                spin_lock_irqsave(&qp->s_lock, flags);
-               if (unlikely(next == qp->s_tail_ack_queue)) {
+               if (unlikely(next == qp->s_acked_ack_queue)) {
                        if (!qp->s_ack_queue[next].sent)
                                goto nack_inv_unlck;
                        update_ack_queue(qp, next);
@@ -2723,7 +2746,7 @@ send_last:
                if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
                        next = 0;
                spin_lock_irqsave(&qp->s_lock, flags);
-               if (unlikely(next == qp->s_tail_ack_queue)) {
+               if (unlikely(next == qp->s_acked_ack_queue)) {
                        if (!qp->s_ack_queue[next].sent)
                                goto nack_inv_unlck;
                        update_ack_queue(qp, next);
index 4329eadcb3dfa40ce712b70ec98256f95a0aee70..8e0935b9bf2a6166881580f5780a39e3c38b3e37 100644 (file)
@@ -18,6 +18,7 @@ static inline void update_ack_queue(struct rvt_qp *qp, unsigned int n)
        if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
                next = 0;
        qp->s_tail_ack_queue = next;
+       qp->s_acked_ack_queue = next;
        qp->s_ack_state = OP(ACKNOWLEDGE);
 }
 
index 089e301d9bcdac0e076f4f131b69d716f72458fa..c320a99afb35818c1a19d0e978e10430332d4cc1 100644 (file)
@@ -2044,6 +2044,8 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
                        goto unlock;
        }
        /* Re-process old requests.*/
+       if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
+               qp->s_acked_ack_queue = prev;
        qp->s_tail_ack_queue = prev;
        /*
         * Since the qp->s_tail_ack_queue is modified, the
index b71638c22d4b3a74eaf1e3a36d67e5f6233f4a7a..51f5b0e8da714f6e24fa0c8b475d79ddd6412f5a 100644 (file)
@@ -40,7 +40,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent);
 #define RSP_INFO_PRN "[%s] qpn 0x%x state 0x%x s_state 0x%x psn 0x%x " \
                     "r_psn 0x%x r_state 0x%x r_flags 0x%x " \
                     "r_head_ack_queue %u s_tail_ack_queue %u " \
-                    "s_ack_state 0x%x " \
+                    "s_acked_ack_queue %u s_ack_state 0x%x " \
                     "s_nak_state 0x%x s_flags 0x%x ps_flags 0x%x " \
                     "iow_flags 0x%lx"
 
@@ -62,7 +62,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent);
                    "s_next_psn 0x%x"
 
 #define RCV_ERR_PRN "[%s] qpn 0x%x s_flags 0x%x state 0x%x " \
-                   "s_tail_ack_queue %u " \
+                   "s_acked_ack_queue %u s_tail_ack_queue %u " \
                    "r_head_ack_queue %u opcode 0x%x psn 0x%x r_psn 0x%x " \
                    " diff %d"
 
@@ -671,6 +671,7 @@ DECLARE_EVENT_CLASS(/* rsp_info */
                __field(u8, r_flags)
                __field(u8, r_head_ack_queue)
                __field(u8, s_tail_ack_queue)
+               __field(u8, s_acked_ack_queue)
                __field(u8, s_ack_state)
                __field(u8, s_nak_state)
                __field(u8, r_nak_state)
@@ -691,6 +692,7 @@ DECLARE_EVENT_CLASS(/* rsp_info */
                __entry->r_flags = qp->r_flags;
                __entry->r_head_ack_queue = qp->r_head_ack_queue;
                __entry->s_tail_ack_queue = qp->s_tail_ack_queue;
+               __entry->s_acked_ack_queue = qp->s_acked_ack_queue;
                __entry->s_ack_state = qp->s_ack_state;
                __entry->s_nak_state = qp->s_nak_state;
                __entry->s_flags = qp->s_flags;
@@ -709,6 +711,7 @@ DECLARE_EVENT_CLASS(/* rsp_info */
                __entry->r_flags,
                __entry->r_head_ack_queue,
                __entry->s_tail_ack_queue,
+               __entry->s_acked_ack_queue,
                __entry->s_ack_state,
                __entry->s_nak_state,
                __entry->s_flags,
@@ -1007,6 +1010,7 @@ DECLARE_EVENT_CLASS(/* rc_rcv_err */
                __field(u32, qpn)
                __field(u32, s_flags)
                __field(u8, state)
+               __field(u8, s_acked_ack_queue)
                __field(u8, s_tail_ack_queue)
                __field(u8, r_head_ack_queue)
                __field(u32, opcode)
@@ -1019,6 +1023,7 @@ DECLARE_EVENT_CLASS(/* rc_rcv_err */
                __entry->qpn = qp->ibqp.qp_num;
                __entry->s_flags = qp->s_flags;
                __entry->state = qp->state;
+               __entry->s_acked_ack_queue = qp->s_acked_ack_queue;
                __entry->s_tail_ack_queue = qp->s_tail_ack_queue;
                __entry->r_head_ack_queue = qp->r_head_ack_queue;
                __entry->opcode = opcode;
@@ -1032,6 +1037,7 @@ DECLARE_EVENT_CLASS(/* rc_rcv_err */
                __entry->qpn,
                __entry->s_flags,
                __entry->state,
+               __entry->s_acked_ack_queue,
                __entry->s_tail_ack_queue,
                __entry->r_head_ack_queue,
                __entry->opcode,
index 2769ebdf89fb605a8071f2e0142d3b4f16cd2769..14ec2577bcaa13283d325988bcd9fa5eb4e10782 100644 (file)
@@ -854,6 +854,7 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
        qp->s_mig_state = IB_MIG_MIGRATED;
        qp->r_head_ack_queue = 0;
        qp->s_tail_ack_queue = 0;
+       qp->s_acked_ack_queue = 0;
        qp->s_num_rd_atomic = 0;
        if (qp->r_rq.wq) {
                qp->r_rq.wq->head = 0;
index d8d88d0230921862eff80a71b4ccd369007f850b..4ee612ab6cb4a226fa94a6abd9ec613174fc6917 100644 (file)
@@ -375,6 +375,7 @@ struct rvt_qp {
        u8 s_rnr_retry;         /* requester RNR retry counter */
        u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
        u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
+       u8 s_acked_ack_queue;   /* index into s_ack_queue[] */
 
        struct rvt_sge_state s_ack_rdma_sge;
        struct timer_list s_timer;