]>
Commit | Line | Data |
---|---|---|
b4e64397 DD |
1 | #ifndef DEF_RDMAVT_INCQP_H |
2 | #define DEF_RDMAVT_INCQP_H | |
3 | ||
4 | /* | |
5 | * Copyright(c) 2015 Intel Corporation. | |
6 | * | |
7 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
8 | * redistributing this file, you may do so under either license. | |
9 | * | |
10 | * GPL LICENSE SUMMARY | |
11 | * | |
12 | * This program is free software; you can redistribute it and/or modify | |
13 | * it under the terms of version 2 of the GNU General Public License as | |
14 | * published by the Free Software Foundation. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, but | |
17 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19 | * General Public License for more details. | |
20 | * | |
21 | * BSD LICENSE | |
22 | * | |
23 | * Redistribution and use in source and binary forms, with or without | |
24 | * modification, are permitted provided that the following conditions | |
25 | * are met: | |
26 | * | |
27 | * - Redistributions of source code must retain the above copyright | |
28 | * notice, this list of conditions and the following disclaimer. | |
29 | * - Redistributions in binary form must reproduce the above copyright | |
30 | * notice, this list of conditions and the following disclaimer in | |
31 | * the documentation and/or other materials provided with the | |
32 | * distribution. | |
33 | * - Neither the name of Intel Corporation nor the names of its | |
34 | * contributors may be used to endorse or promote products derived | |
35 | * from this software without specific prior written permission. | |
36 | * | |
37 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
38 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
39 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
40 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
41 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
42 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
43 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
44 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
45 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
46 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
47 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
48 | * | |
49 | */ | |
50 | ||
5a9cf6f2 | 51 | #include <rdma/rdma_vt.h> |
050eb7fb DD |
52 | #include <rdma/ib_pack.h> |
53 | /* | |
54 | * Atomic bit definitions for r_aflags. | |
55 | */ | |
56 | #define RVT_R_WRID_VALID 0 | |
57 | #define RVT_R_REWIND_SGE 1 | |
58 | ||
59 | /* | |
60 | * Bit definitions for r_flags. | |
61 | */ | |
62 | #define RVT_R_REUSE_SGE 0x01 | |
63 | #define RVT_R_RDMAR_SEQ 0x02 | |
64 | #define RVT_R_RSP_NAK 0x04 | |
65 | #define RVT_R_RSP_SEND 0x08 | |
66 | #define RVT_R_COMM_EST 0x10 | |
67 | ||
68 | /* | |
69 | * Bit definitions for s_flags. | |
70 | * | |
71 | * RVT_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled | |
72 | * RVT_S_BUSY - send tasklet is processing the QP | |
73 | * RVT_S_TIMER - the RC retry timer is active | |
74 | * RVT_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics | |
75 | * RVT_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs | |
76 | * before processing the next SWQE | |
77 | * RVT_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete | |
78 | * before processing the next SWQE | |
79 | * RVT_S_WAIT_RNR - waiting for RNR timeout | |
80 | * RVT_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE | |
81 | * RVT_S_WAIT_DMA - waiting for send DMA queue to drain before generating | |
82 | * next send completion entry not via send DMA | |
83 | * RVT_S_WAIT_PIO - waiting for a send buffer to be available | |
84 | * RVT_S_WAIT_TX - waiting for a struct verbs_txreq to be available | |
85 | * RVT_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available | |
86 | * RVT_S_WAIT_KMEM - waiting for kernel memory to be available | |
87 | * RVT_S_WAIT_PSN - waiting for a packet to exit the send DMA queue | |
88 | * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests | |
89 | * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK | |
90 | * RVT_S_ECN - a BECN was queued to the send engine | |
91 | */ | |
92 | #define RVT_S_SIGNAL_REQ_WR 0x0001 | |
93 | #define RVT_S_BUSY 0x0002 | |
94 | #define RVT_S_TIMER 0x0004 | |
95 | #define RVT_S_RESP_PENDING 0x0008 | |
96 | #define RVT_S_ACK_PENDING 0x0010 | |
97 | #define RVT_S_WAIT_FENCE 0x0020 | |
98 | #define RVT_S_WAIT_RDMAR 0x0040 | |
99 | #define RVT_S_WAIT_RNR 0x0080 | |
100 | #define RVT_S_WAIT_SSN_CREDIT 0x0100 | |
101 | #define RVT_S_WAIT_DMA 0x0200 | |
102 | #define RVT_S_WAIT_PIO 0x0400 | |
103 | #define RVT_S_WAIT_TX 0x0800 | |
104 | #define RVT_S_WAIT_DMA_DESC 0x1000 | |
105 | #define RVT_S_WAIT_KMEM 0x2000 | |
106 | #define RVT_S_WAIT_PSN 0x4000 | |
107 | #define RVT_S_WAIT_ACK 0x8000 | |
108 | #define RVT_S_SEND_ONE 0x10000 | |
109 | #define RVT_S_UNLIMITED_CREDIT 0x20000 | |
110 | #define RVT_S_AHG_VALID 0x40000 | |
111 | #define RVT_S_AHG_CLEAR 0x80000 | |
112 | #define RVT_S_ECN 0x100000 | |
113 | ||
114 | /* | |
115 | * Wait flags that would prevent any packet type from being sent. | |
116 | */ | |
117 | #define RVT_S_ANY_WAIT_IO (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \ | |
118 | RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM) | |
119 | ||
120 | /* | |
121 | * Wait flags that would prevent send work requests from making progress. | |
122 | */ | |
123 | #define RVT_S_ANY_WAIT_SEND (RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR | \ | |
124 | RVT_S_WAIT_RNR | RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_DMA | \ | |
125 | RVT_S_WAIT_PSN | RVT_S_WAIT_ACK) | |
126 | ||
127 | #define RVT_S_ANY_WAIT (RVT_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND) | |
128 | ||
129 | /* Number of bits to pay attention to in the opcode for checking qp type */ | |
130 | #define RVT_OPCODE_QP_MASK 0xE0 | |
131 | ||
b4e64397 DD |
132 | /* |
133 | * Send work request queue entry. | |
134 | * The size of the sg_list is determined when the QP is created and stored | |
135 | * in qp->s_max_sge. | |
136 | */ | |
137 | struct rvt_swqe { | |
138 | union { | |
139 | struct ib_send_wr wr; /* don't use wr.sg_list */ | |
140 | struct ib_ud_wr ud_wr; | |
141 | struct ib_reg_wr reg_wr; | |
142 | struct ib_rdma_wr rdma_wr; | |
143 | struct ib_atomic_wr atomic_wr; | |
144 | }; | |
145 | u32 psn; /* first packet sequence number */ | |
146 | u32 lpsn; /* last packet sequence number */ | |
147 | u32 ssn; /* send sequence number */ | |
148 | u32 length; /* total length of data in sg_list */ | |
149 | struct rvt_sge sg_list[0]; | |
150 | }; | |
151 | ||
152 | /* | |
153 | * Receive work request queue entry. | |
154 | * The size of the sg_list is determined when the QP (or SRQ) is created | |
155 | * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). | |
156 | */ | |
157 | struct rvt_rwqe { | |
158 | u64 wr_id; | |
159 | u8 num_sge; | |
160 | struct ib_sge sg_list[0]; | |
161 | }; | |
162 | ||
163 | /* | |
164 | * This structure is used to contain the head pointer, tail pointer, | |
165 | * and receive work queue entries as a single memory allocation so | |
166 | * it can be mmap'ed into user space. | |
167 | * Note that the wq array elements are variable size so you can't | |
168 | * just index into the array to get the N'th element; | |
169 | * use get_rwqe_ptr() instead. | |
170 | */ | |
171 | struct rvt_rwq { | |
172 | u32 head; /* new work requests posted to the head */ | |
173 | u32 tail; /* receives pull requests from here. */ | |
174 | struct rvt_rwqe wq[0]; | |
175 | }; | |
176 | ||
177 | struct rvt_rq { | |
178 | struct rvt_rwq *wq; | |
179 | u32 size; /* size of RWQE array */ | |
180 | u8 max_sge; | |
181 | /* protect changes in this struct */ | |
182 | spinlock_t lock ____cacheline_aligned_in_smp; | |
183 | }; | |
184 | ||
185 | /* | |
186 | * This structure is used by rvt_mmap() to validate an offset | |
187 | * when an mmap() request is made. The vm_area_struct then uses | |
188 | * this as its vm_private_data. | |
189 | */ | |
190 | struct rvt_mmap_info { | |
191 | struct list_head pending_mmaps; | |
192 | struct ib_ucontext *context; | |
193 | void *obj; | |
194 | __u64 offset; | |
195 | struct kref ref; | |
196 | unsigned size; | |
197 | }; | |
198 | ||
199 | #define RVT_MAX_RDMA_ATOMIC 16 | |
200 | ||
201 | /* | |
202 | * This structure holds the information that the send tasklet needs | |
203 | * to send a RDMA read response or atomic operation. | |
204 | */ | |
205 | struct rvt_ack_entry { | |
206 | u8 opcode; | |
207 | u8 sent; | |
208 | u32 psn; | |
209 | u32 lpsn; | |
210 | union { | |
211 | struct rvt_sge rdma_sge; | |
212 | u64 atomic_data; | |
213 | }; | |
214 | }; | |
215 | ||
216 | /* | |
217 | * Variables prefixed with s_ are for the requester (sender). | |
218 | * Variables prefixed with r_ are for the responder (receiver). | |
219 | * Variables prefixed with ack_ are for responder replies. | |
220 | * | |
221 | * Common variables are protected by both r_rq.lock and s_lock in that order | |
222 | * which only happens in modify_qp() or changing the QP 'state'. | |
223 | */ | |
224 | struct rvt_qp { | |
225 | struct ib_qp ibqp; | |
226 | void *priv; /* Driver private data */ | |
227 | /* read mostly fields above and below */ | |
228 | struct ib_ah_attr remote_ah_attr; | |
229 | struct ib_ah_attr alt_ah_attr; | |
230 | struct rvt_qp __rcu *next; /* link list for QPN hash table */ | |
231 | struct rvt_swqe *s_wq; /* send work queue */ | |
232 | struct rvt_mmap_info *ip; | |
233 | ||
234 | unsigned long timeout_jiffies; /* computed from timeout */ | |
235 | ||
236 | enum ib_mtu path_mtu; | |
237 | int srate_mbps; /* s_srate (below) converted to Mbit/s */ | |
238 | u32 remote_qpn; | |
239 | u32 pmtu; /* decoded from path_mtu */ | |
240 | u32 qkey; /* QKEY for this QP (for UD or RD) */ | |
241 | u32 s_size; /* send work queue size */ | |
242 | u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ | |
243 | u32 s_ahgpsn; /* set to the psn in the copy of the header */ | |
244 | ||
245 | u8 state; /* QP state */ | |
246 | u8 allowed_ops; /* high order bits of allowed opcodes */ | |
247 | u8 qp_access_flags; | |
248 | u8 alt_timeout; /* Alternate path timeout for this QP */ | |
249 | u8 timeout; /* Timeout for this QP */ | |
250 | u8 s_srate; | |
251 | u8 s_mig_state; | |
252 | u8 port_num; | |
253 | u8 s_pkey_index; /* PKEY index to use */ | |
254 | u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ | |
255 | u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ | |
256 | u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ | |
257 | u8 s_retry_cnt; /* number of times to retry */ | |
258 | u8 s_rnr_retry_cnt; | |
259 | u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ | |
260 | u8 s_max_sge; /* size of s_wq->sg_list */ | |
261 | u8 s_draining; | |
262 | ||
263 | /* start of read/write fields */ | |
264 | atomic_t refcount ____cacheline_aligned_in_smp; | |
265 | wait_queue_head_t wait; | |
266 | ||
267 | struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] | |
268 | ____cacheline_aligned_in_smp; | |
269 | struct rvt_sge_state s_rdma_read_sge; | |
270 | ||
271 | spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ | |
272 | unsigned long r_aflags; | |
273 | u64 r_wr_id; /* ID for current receive WQE */ | |
274 | u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ | |
275 | u32 r_len; /* total length of r_sge */ | |
276 | u32 r_rcv_len; /* receive data len processed */ | |
277 | u32 r_psn; /* expected rcv packet sequence number */ | |
278 | u32 r_msn; /* message sequence number */ | |
279 | ||
280 | u8 r_state; /* opcode of last packet received */ | |
281 | u8 r_flags; | |
282 | u8 r_head_ack_queue; /* index into s_ack_queue[] */ | |
283 | ||
284 | struct list_head rspwait; /* link for waiting to respond */ | |
285 | ||
286 | struct rvt_sge_state r_sge; /* current receive data */ | |
287 | struct rvt_rq r_rq; /* receive work queue */ | |
288 | ||
289 | spinlock_t s_lock ____cacheline_aligned_in_smp; | |
290 | struct rvt_sge_state *s_cur_sge; | |
291 | u32 s_flags; | |
292 | struct rvt_swqe *s_wqe; | |
293 | struct rvt_sge_state s_sge; /* current send request data */ | |
294 | struct rvt_mregion *s_rdma_mr; | |
295 | struct sdma_engine *s_sde; /* current sde */ | |
296 | u32 s_cur_size; /* size of send packet in bytes */ | |
297 | u32 s_len; /* total length of s_sge */ | |
298 | u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ | |
299 | u32 s_next_psn; /* PSN for next request */ | |
300 | u32 s_last_psn; /* last response PSN processed */ | |
301 | u32 s_sending_psn; /* lowest PSN that is being sent */ | |
302 | u32 s_sending_hpsn; /* highest PSN that is being sent */ | |
303 | u32 s_psn; /* current packet sequence number */ | |
304 | u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ | |
305 | u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ | |
306 | u32 s_head; /* new entries added here */ | |
307 | u32 s_tail; /* next entry to process */ | |
308 | u32 s_cur; /* current work queue entry */ | |
309 | u32 s_acked; /* last un-ACK'ed entry */ | |
310 | u32 s_last; /* last completed entry */ | |
311 | u32 s_ssn; /* SSN of tail entry */ | |
312 | u32 s_lsn; /* limit sequence number (credit) */ | |
313 | u16 s_hdrwords; /* size of s_hdr in 32 bit words */ | |
314 | u16 s_rdma_ack_cnt; | |
315 | s8 s_ahgidx; | |
316 | u8 s_state; /* opcode of last packet sent */ | |
317 | u8 s_ack_state; /* opcode of packet to ACK */ | |
318 | u8 s_nak_state; /* non-zero if NAK is pending */ | |
319 | u8 r_nak_state; /* non-zero if NAK is pending */ | |
320 | u8 s_retry; /* requester retry counter */ | |
321 | u8 s_rnr_retry; /* requester RNR retry counter */ | |
322 | u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ | |
323 | u8 s_tail_ack_queue; /* index into s_ack_queue[] */ | |
324 | ||
325 | struct rvt_sge_state s_ack_rdma_sge; | |
326 | struct timer_list s_timer; | |
327 | ||
328 | /* | |
329 | * This sge list MUST be last. Do not add anything below here. | |
330 | */ | |
331 | struct rvt_sge r_sg_list[0] /* verified SGEs */ | |
332 | ____cacheline_aligned_in_smp; | |
333 | }; | |
334 | ||
335 | struct rvt_srq { | |
336 | struct ib_srq ibsrq; | |
337 | struct rvt_rq rq; | |
338 | struct rvt_mmap_info *ip; | |
339 | /* send signal when number of RWQEs < limit */ | |
340 | u32 limit; | |
341 | }; | |
342 | ||
0acb0cc7 DD |
343 | #define RVT_QPN_MAX BIT(24) |
344 | #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) | |
345 | #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) | |
346 | #define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) | |
347 | ||
348 | /* | |
349 | * QPN-map pages start out as NULL, they get allocated upon | |
350 | * first use and are never deallocated. This way, | |
351 | * large bitmaps are not allocated unless large numbers of QPs are used. | |
352 | */ | |
353 | struct rvt_qpn_map { | |
354 | void *page; | |
355 | }; | |
356 | ||
357 | struct rvt_qpn_table { | |
358 | spinlock_t lock; /* protect changes to the qp table */ | |
359 | unsigned flags; /* flags for QP0/1 allocated for each port */ | |
360 | u32 last; /* last QP number allocated */ | |
361 | u32 nmaps; /* size of the map table */ | |
362 | u16 limit; | |
363 | u8 incr; | |
364 | /* bit map of free QP numbers other than 0/1 */ | |
365 | struct rvt_qpn_map map[RVT_QPNMAP_ENTRIES]; | |
366 | }; | |
367 | ||
368 | struct rvt_qp_ibdev { | |
369 | u32 qp_table_size; | |
370 | u32 qp_table_bits; | |
371 | struct rvt_qp __rcu **qp_table; | |
372 | spinlock_t qpt_lock; /* qptable lock */ | |
373 | struct rvt_qpn_table qpn_table; | |
374 | }; | |
375 | ||
b4e64397 | 376 | #endif /* DEF_RDMAVT_INCQP_H */ |