]>
Commit | Line | Data |
---|---|---|
b4e64397 DD |
1 | #ifndef DEF_RDMAVT_INCQP_H |
2 | #define DEF_RDMAVT_INCQP_H | |
3 | ||
4 | /* | |
5 | * Copyright(c) 2015 Intel Corporation. | |
6 | * | |
7 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
8 | * redistributing this file, you may do so under either license. | |
9 | * | |
10 | * GPL LICENSE SUMMARY | |
11 | * | |
12 | * This program is free software; you can redistribute it and/or modify | |
13 | * it under the terms of version 2 of the GNU General Public License as | |
14 | * published by the Free Software Foundation. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, but | |
17 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19 | * General Public License for more details. | |
20 | * | |
21 | * BSD LICENSE | |
22 | * | |
23 | * Redistribution and use in source and binary forms, with or without | |
24 | * modification, are permitted provided that the following conditions | |
25 | * are met: | |
26 | * | |
27 | * - Redistributions of source code must retain the above copyright | |
28 | * notice, this list of conditions and the following disclaimer. | |
29 | * - Redistributions in binary form must reproduce the above copyright | |
30 | * notice, this list of conditions and the following disclaimer in | |
31 | * the documentation and/or other materials provided with the | |
32 | * distribution. | |
33 | * - Neither the name of Intel Corporation nor the names of its | |
34 | * contributors may be used to endorse or promote products derived | |
35 | * from this software without specific prior written permission. | |
36 | * | |
37 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
38 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
39 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
40 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
41 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
42 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
43 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
44 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
45 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
46 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
47 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
48 | * | |
49 | */ | |
50 | ||
050eb7fb DD |
51 | #include <rdma/ib_pack.h> |
52 | /* | |
53 | * Atomic bit definitions for r_aflags. | |
54 | */ | |
55 | #define RVT_R_WRID_VALID 0 | |
56 | #define RVT_R_REWIND_SGE 1 | |
57 | ||
58 | /* | |
59 | * Bit definitions for r_flags. | |
60 | */ | |
61 | #define RVT_R_REUSE_SGE 0x01 | |
62 | #define RVT_R_RDMAR_SEQ 0x02 | |
63 | #define RVT_R_RSP_NAK 0x04 | |
64 | #define RVT_R_RSP_SEND 0x08 | |
65 | #define RVT_R_COMM_EST 0x10 | |
66 | ||
67 | /* | |
68 | * Bit definitions for s_flags. | |
69 | * | |
70 | * RVT_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled | |
71 | * RVT_S_BUSY - send tasklet is processing the QP | |
72 | * RVT_S_TIMER - the RC retry timer is active | |
73 | * RVT_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics | |
74 | * RVT_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs | |
75 | * before processing the next SWQE | |
76 | * RVT_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete | |
77 | * before processing the next SWQE | |
78 | * RVT_S_WAIT_RNR - waiting for RNR timeout | |
79 | * RVT_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE | |
80 | * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating | |
81 | * next send completion entry not via send DMA | |
82 | * RVT_S_WAIT_PIO - waiting for a send buffer to be available | |
83 | * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available | |
84 | * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available | |
85 | * RVT_S_WAIT_KMEM - waiting for kernel memory to be available | |
86 | * RVT_S_WAIT_PSN - waiting for a packet to exit the send DMA queue | |
87 | * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests | |
88 | * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK | |
89 | * RVT_S_ECN - a BECN was queued to the send engine | |
90 | */ | |
91 | #define RVT_S_SIGNAL_REQ_WR 0x0001 | |
92 | #define RVT_S_BUSY 0x0002 | |
93 | #define RVT_S_TIMER 0x0004 | |
94 | #define RVT_S_RESP_PENDING 0x0008 | |
95 | #define RVT_S_ACK_PENDING 0x0010 | |
96 | #define RVT_S_WAIT_FENCE 0x0020 | |
97 | #define RVT_S_WAIT_RDMAR 0x0040 | |
98 | #define RVT_S_WAIT_RNR 0x0080 | |
99 | #define RVT_S_WAIT_SSN_CREDIT 0x0100 | |
100 | #define RVT_S_WAIT_DMA 0x0200 | |
101 | #define RVT_S_WAIT_PIO 0x0400 | |
102 | #define RVT_S_WAIT_TX 0x0800 | |
103 | #define RVT_S_WAIT_DMA_DESC 0x1000 | |
104 | #define RVT_S_WAIT_KMEM 0x2000 | |
105 | #define RVT_S_WAIT_PSN 0x4000 | |
106 | #define RVT_S_WAIT_ACK 0x8000 | |
107 | #define RVT_S_SEND_ONE 0x10000 | |
108 | #define RVT_S_UNLIMITED_CREDIT 0x20000 | |
109 | #define RVT_S_AHG_VALID 0x40000 | |
110 | #define RVT_S_AHG_CLEAR 0x80000 | |
111 | #define RVT_S_ECN 0x100000 | |
112 | ||
113 | /* | |
114 | * Wait flags that would prevent any packet type from being sent. | |
115 | */ | |
116 | #define RVT_S_ANY_WAIT_IO (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \ | |
117 | RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) | |
118 | ||
119 | /* | |
120 | * Wait flags that would prevent send work requests from making progress. | |
121 | */ | |
122 | #define RVT_S_ANY_WAIT_SEND (RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | \ | |
123 | RVT_S_WAIT_RNR | RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA | \ | |
124 | RVT_S_WAIT_PSN | RVT_S_WAIT_ACK) | |
125 | ||
126 | #define RVT_S_ANY_WAIT (RVT_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) | |
127 | ||
128 | /* Number of bits to pay attention to in the opcode for checking qp type */ | |
129 | #define RVT_OPCODE_QP_MASK 0xE0 | |
130 | ||
b4e64397 DD |
131 | /* |
132 | * Send work request queue entry. | |
133 | * The size of the sg_list is determined when the QP is created and stored | |
134 | * in qp->s_max_sge. | |
135 | */ | |
136 | struct rvt_swqe { | |
137 | union { | |
138 | struct ib_send_wr wr; /* don't use wr.sg_list */ | |
139 | struct ib_ud_wr ud_wr; | |
140 | struct ib_reg_wr reg_wr; | |
141 | struct ib_rdma_wr rdma_wr; | |
142 | struct ib_atomic_wr atomic_wr; | |
143 | }; | |
144 | u32 psn; /* first packet sequence number */ | |
145 | u32 lpsn; /* last packet sequence number */ | |
146 | u32 ssn; /* send sequence number */ | |
147 | u32 length; /* total length of data in sg_list */ | |
148 | struct rvt_sge sg_list[0]; | |
149 | }; | |
150 | ||
151 | /* | |
152 | * Receive work request queue entry. | |
153 | * The size of the sg_list is determined when the QP (or SRQ) is created | |
154 | * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). | |
155 | */ | |
156 | struct rvt_rwqe { | |
157 | u64 wr_id; | |
158 | u8 num_sge; | |
159 | struct ib_sge sg_list[0]; | |
160 | }; | |
161 | ||
162 | /* | |
163 | * This structure is used to contain the head pointer, tail pointer, | |
164 | * and receive work queue entries as a single memory allocation so | |
165 | * it can be mmap'ed into user space. | |
166 | * Note that the wq array elements are variable size so you can't | |
167 | * just index into the array to get the N'th element; | |
168 | * use get_rwqe_ptr() instead. | |
169 | */ | |
170 | struct rvt_rwq { | |
171 | u32 head; /* new work requests posted to the head */ | |
172 | u32 tail; /* receives pull requests from here. */ | |
173 | struct rvt_rwqe wq[0]; | |
174 | }; | |
175 | ||
176 | struct rvt_rq { | |
177 | struct rvt_rwq *wq; | |
178 | u32 size; /* size of RWQE array */ | |
179 | u8 max_sge; | |
180 | /* protect changes in this struct */ | |
181 | spinlock_t lock ____cacheline_aligned_in_smp; | |
182 | }; | |
183 | ||
184 | /* | |
185 | * This structure is used by rvt_mmap() to validate an offset | |
186 | * when an mmap() request is made. The vm_area_struct then uses | |
187 | * this as its vm_private_data. | |
188 | */ | |
189 | struct rvt_mmap_info { | |
190 | struct list_head pending_mmaps; | |
191 | struct ib_ucontext *context; | |
192 | void *obj; | |
193 | __u64 offset; | |
194 | struct kref ref; | |
195 | unsigned size; | |
196 | }; | |
197 | ||
198 | #define RVT_MAX_RDMA_ATOMIC 16 | |
199 | ||
200 | /* | |
201 | * This structure holds the information that the send tasklet needs | |
202 | * to send a RDMA read response or atomic operation. | |
203 | */ | |
204 | struct rvt_ack_entry { | |
205 | u8 opcode; | |
206 | u8 sent; | |
207 | u32 psn; | |
208 | u32 lpsn; | |
209 | union { | |
210 | struct rvt_sge rdma_sge; | |
211 | u64 atomic_data; | |
212 | }; | |
213 | }; | |
214 | ||
215 | /* | |
216 | * Variables prefixed with s_ are for the requester (sender). | |
217 | * Variables prefixed with r_ are for the responder (receiver). | |
218 | * Variables prefixed with ack_ are for responder replies. | |
219 | * | |
220 | * Common variables are protected by both r_rq.lock and s_lock in that order | |
221 | * which only happens in modify_qp() or changing the QP 'state'. | |
222 | */ | |
223 | struct rvt_qp { | |
224 | struct ib_qp ibqp; | |
225 | void *priv; /* Driver private data */ | |
226 | /* read mostly fields above and below */ | |
227 | struct ib_ah_attr remote_ah_attr; | |
228 | struct ib_ah_attr alt_ah_attr; | |
229 | struct rvt_qp __rcu *next; /* link list for QPN hash table */ | |
230 | struct rvt_swqe *s_wq; /* send work queue */ | |
231 | struct rvt_mmap_info *ip; | |
232 | ||
233 | unsigned long timeout_jiffies; /* computed from timeout */ | |
234 | ||
235 | enum ib_mtu path_mtu; | |
236 | int srate_mbps; /* s_srate (below) converted to Mbit/s */ | |
237 | u32 remote_qpn; | |
238 | u32 pmtu; /* decoded from path_mtu */ | |
239 | u32 qkey; /* QKEY for this QP (for UD or RD) */ | |
240 | u32 s_size; /* send work queue size */ | |
241 | u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ | |
242 | u32 s_ahgpsn; /* set to the psn in the copy of the header */ | |
243 | ||
244 | u8 state; /* QP state */ | |
245 | u8 allowed_ops; /* high order bits of allowed opcodes */ | |
246 | u8 qp_access_flags; | |
247 | u8 alt_timeout; /* Alternate path timeout for this QP */ | |
248 | u8 timeout; /* Timeout for this QP */ | |
249 | u8 s_srate; | |
250 | u8 s_mig_state; | |
251 | u8 port_num; | |
252 | u8 s_pkey_index; /* PKEY index to use */ | |
253 | u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ | |
254 | u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ | |
255 | u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ | |
256 | u8 s_retry_cnt; /* number of times to retry */ | |
257 | u8 s_rnr_retry_cnt; | |
258 | u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ | |
259 | u8 s_max_sge; /* size of s_wq->sg_list */ | |
260 | u8 s_draining; | |
261 | ||
262 | /* start of read/write fields */ | |
263 | atomic_t refcount ____cacheline_aligned_in_smp; | |
264 | wait_queue_head_t wait; | |
265 | ||
266 | struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] | |
267 | ____cacheline_aligned_in_smp; | |
268 | struct rvt_sge_state s_rdma_read_sge; | |
269 | ||
270 | spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ | |
271 | unsigned long r_aflags; | |
272 | u64 r_wr_id; /* ID for current receive WQE */ | |
273 | u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ | |
274 | u32 r_len; /* total length of r_sge */ | |
275 | u32 r_rcv_len; /* receive data len processed */ | |
276 | u32 r_psn; /* expected rcv packet sequence number */ | |
277 | u32 r_msn; /* message sequence number */ | |
278 | ||
279 | u8 r_state; /* opcode of last packet received */ | |
280 | u8 r_flags; | |
281 | u8 r_head_ack_queue; /* index into s_ack_queue[] */ | |
282 | ||
283 | struct list_head rspwait; /* link for waiting to respond */ | |
284 | ||
285 | struct rvt_sge_state r_sge; /* current receive data */ | |
286 | struct rvt_rq r_rq; /* receive work queue */ | |
287 | ||
288 | spinlock_t s_lock ____cacheline_aligned_in_smp; | |
289 | struct rvt_sge_state *s_cur_sge; | |
290 | u32 s_flags; | |
291 | struct rvt_swqe *s_wqe; | |
292 | struct rvt_sge_state s_sge; /* current send request data */ | |
293 | struct rvt_mregion *s_rdma_mr; | |
294 | struct sdma_engine *s_sde; /* current sde */ | |
295 | u32 s_cur_size; /* size of send packet in bytes */ | |
296 | u32 s_len; /* total length of s_sge */ | |
297 | u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ | |
298 | u32 s_next_psn; /* PSN for next request */ | |
299 | u32 s_last_psn; /* last response PSN processed */ | |
300 | u32 s_sending_psn; /* lowest PSN that is being sent */ | |
301 | u32 s_sending_hpsn; /* highest PSN that is being sent */ | |
302 | u32 s_psn; /* current packet sequence number */ | |
303 | u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ | |
304 | u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ | |
305 | u32 s_head; /* new entries added here */ | |
306 | u32 s_tail; /* next entry to process */ | |
307 | u32 s_cur; /* current work queue entry */ | |
308 | u32 s_acked; /* last un-ACK'ed entry */ | |
309 | u32 s_last; /* last completed entry */ | |
310 | u32 s_ssn; /* SSN of tail entry */ | |
311 | u32 s_lsn; /* limit sequence number (credit) */ | |
312 | u16 s_hdrwords; /* size of s_hdr in 32 bit words */ | |
313 | u16 s_rdma_ack_cnt; | |
314 | s8 s_ahgidx; | |
315 | u8 s_state; /* opcode of last packet sent */ | |
316 | u8 s_ack_state; /* opcode of packet to ACK */ | |
317 | u8 s_nak_state; /* non-zero if NAK is pending */ | |
318 | u8 r_nak_state; /* non-zero if NAK is pending */ | |
319 | u8 s_retry; /* requester retry counter */ | |
320 | u8 s_rnr_retry; /* requester RNR retry counter */ | |
321 | u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ | |
322 | u8 s_tail_ack_queue; /* index into s_ack_queue[] */ | |
323 | ||
324 | struct rvt_sge_state s_ack_rdma_sge; | |
325 | struct timer_list s_timer; | |
326 | ||
327 | /* | |
328 | * This sge list MUST be last. Do not add anything below here. | |
329 | */ | |
330 | struct rvt_sge r_sg_list[0] /* verified SGEs */ | |
331 | ____cacheline_aligned_in_smp; | |
332 | }; | |
333 | ||
334 | struct rvt_srq { | |
335 | struct ib_srq ibsrq; | |
336 | struct rvt_rq rq; | |
337 | struct rvt_mmap_info *ip; | |
338 | /* send signal when number of RWQEs < limit */ | |
339 | u32 limit; | |
340 | }; | |
341 | ||
0acb0cc7 DD |
342 | #define RVT_QPN_MAX BIT(24) |
343 | #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) | |
344 | #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) | |
345 | #define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) | |
346 | ||
347 | /* | |
348 | * QPN-map pages start out as NULL, they get allocated upon | |
349 | * first use and are never deallocated. This way, | |
350 | * large bitmaps are not allocated unless large numbers of QPs are used. | |
351 | */ | |
352 | struct rvt_qpn_map { | |
353 | void *page; | |
354 | }; | |
355 | ||
356 | struct rvt_qpn_table { | |
357 | spinlock_t lock; /* protect changes to the qp table */ | |
358 | unsigned flags; /* flags for QP0/1 allocated for each port */ | |
359 | u32 last; /* last QP number allocated */ | |
360 | u32 nmaps; /* size of the map table */ | |
361 | u16 limit; | |
362 | u8 incr; | |
363 | /* bit map of free QP numbers other than 0/1 */ | |
364 | struct rvt_qpn_map map[RVT_QPNMAP_ENTRIES]; | |
365 | }; | |
366 | ||
367 | struct rvt_qp_ibdev { | |
368 | u32 qp_table_size; | |
369 | u32 qp_table_bits; | |
370 | struct rvt_qp __rcu **qp_table; | |
371 | spinlock_t qpt_lock; /* qptable lock */ | |
372 | struct rvt_qpn_table qpn_table; | |
373 | }; | |
374 | ||
b4e64397 | 375 | #endif /* DEF_RDMAVT_INCQP_H */ |