]>
Commit | Line | Data |
---|---|---|
0194621b DD |
1 | #ifndef DEF_RDMA_VT_H |
2 | #define DEF_RDMA_VT_H | |
3 | ||
4 | /* | |
5 | * Copyright(c) 2015 Intel Corporation. | |
6 | * | |
7 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
8 | * redistributing this file, you may do so under either license. | |
9 | * | |
10 | * GPL LICENSE SUMMARY | |
11 | * | |
12 | * This program is free software; you can redistribute it and/or modify | |
13 | * it under the terms of version 2 of the GNU General Public License as | |
14 | * published by the Free Software Foundation. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, but | |
17 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19 | * General Public License for more details. | |
20 | * | |
21 | * BSD LICENSE | |
22 | * | |
23 | * Redistribution and use in source and binary forms, with or without | |
24 | * modification, are permitted provided that the following conditions | |
25 | * are met: | |
26 | * | |
27 | * - Redistributions of source code must retain the above copyright | |
28 | * notice, this list of conditions and the following disclaimer. | |
29 | * - Redistributions in binary form must reproduce the above copyright | |
30 | * notice, this list of conditions and the following disclaimer in | |
31 | * the documentation and/or other materials provided with the | |
32 | * distribution. | |
33 | * - Neither the name of Intel Corporation nor the names of its | |
34 | * contributors may be used to endorse or promote products derived | |
35 | * from this software without specific prior written permission. | |
36 | * | |
37 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
38 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
39 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
40 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
41 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
42 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
43 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
44 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
45 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
46 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
47 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
48 | * | |
49 | */ | |
50 | ||
51 | /* | |
52 | * Structure that low level drivers will populate in order to register with the | |
53 | * rdmavt layer. | |
54 | */ | |
55 | ||
56 | #include "ib_verbs.h" | |
8afd32eb | 57 | |
0b8a8aae DD |
58 | /* |
59 | * For some of the IBTA objects there will likely be some | |
60 | * initializations required. We need flags to determine whether it is OK | |
61 | * for rdmavt to do this or not. This does not imply any functions of a | |
62 | * partiuclar IBTA object are overridden. | |
63 | */ | |
64 | #define RVT_FLAG_MR_INIT_DRIVER BIT(1) | |
65 | #define RVT_FLAG_QP_INIT_DRIVER BIT(2) | |
66 | #define RVT_FLAG_CQ_INIT_DRIVER BIT(3) | |
67 | ||
b92a7568 DD |
68 | /* |
69 | * For Memory Regions. This stuff should probably be moved into rdmavt/mr.h once | |
70 | * drivers no longer need access to the MR directly. | |
71 | */ | |
72 | ||
73 | /* | |
74 | * A segment is a linear region of low physical memory. | |
75 | * Used by the verbs layer. | |
76 | */ | |
77 | struct rvt_seg { | |
78 | void *vaddr; | |
79 | size_t length; | |
80 | }; | |
81 | ||
82 | /* The number of rvt_segs that fit in a page. */ | |
83 | #define RVT_SEGSZ (PAGE_SIZE / sizeof(struct rvt_seg)) | |
84 | ||
85 | struct rvt_segarray { | |
86 | struct rvt_seg segs[RVT_SEGSZ]; | |
87 | }; | |
88 | ||
89 | struct rvt_mregion { | |
90 | struct ib_pd *pd; /* shares refcnt of ibmr.pd */ | |
91 | u64 user_base; /* User's address for this region */ | |
92 | u64 iova; /* IB start address of this region */ | |
93 | size_t length; | |
94 | u32 lkey; | |
95 | u32 offset; /* offset (bytes) to start of region */ | |
96 | int access_flags; | |
97 | u32 max_segs; /* number of rvt_segs in all the arrays */ | |
98 | u32 mapsz; /* size of the map array */ | |
99 | u8 page_shift; /* 0 - non unform/non powerof2 sizes */ | |
100 | u8 lkey_published; /* in global table */ | |
101 | struct completion comp; /* complete when refcount goes to zero */ | |
102 | atomic_t refcount; | |
103 | struct rvt_segarray *map[0]; /* the segments */ | |
104 | }; | |
105 | ||
106 | #define RVT_MAX_LKEY_TABLE_BITS 23 | |
107 | ||
108 | struct rvt_lkey_table { | |
109 | spinlock_t lock; /* protect changes in this struct */ | |
110 | u32 next; /* next unused index (speeds search) */ | |
111 | u32 gen; /* generation count */ | |
112 | u32 max; /* size of the table */ | |
113 | struct rvt_mregion __rcu **table; | |
114 | }; | |
115 | ||
116 | /* End Memmory Region */ | |
117 | ||
ca889e8a DD |
118 | /* |
119 | * Things needed for the Queue Pair definition. Like the MR stuff above the | |
120 | * following should probably get moved to qp.h once drivers stop trying to make | |
121 | * and manipulate thier own QPs. For the few instnaces where a driver may need | |
122 | * to look into a queue pair there should be a pointer to a driver priavte data | |
123 | * structure that they can look at. | |
124 | */ | |
125 | ||
126 | /* | |
127 | * These keep track of the copy progress within a memory region. | |
128 | * Used by the verbs layer. | |
129 | */ | |
130 | struct rvt_sge { | |
131 | struct rvt_mregion *mr; | |
132 | void *vaddr; /* kernel virtual address of segment */ | |
133 | u32 sge_length; /* length of the SGE */ | |
134 | u32 length; /* remaining length of the segment */ | |
135 | u16 m; /* current index: mr->map[m] */ | |
136 | u16 n; /* current index: mr->map[m]->segs[n] */ | |
137 | }; | |
138 | ||
139 | /* | |
140 | * Send work request queue entry. | |
141 | * The size of the sg_list is determined when the QP is created and stored | |
142 | * in qp->s_max_sge. | |
143 | */ | |
144 | struct rvt_swqe { | |
145 | union { | |
146 | struct ib_send_wr wr; /* don't use wr.sg_list */ | |
147 | struct ib_ud_wr ud_wr; | |
148 | struct ib_reg_wr reg_wr; | |
149 | struct ib_rdma_wr rdma_wr; | |
150 | struct ib_atomic_wr atomic_wr; | |
151 | }; | |
152 | u32 psn; /* first packet sequence number */ | |
153 | u32 lpsn; /* last packet sequence number */ | |
154 | u32 ssn; /* send sequence number */ | |
155 | u32 length; /* total length of data in sg_list */ | |
156 | struct rvt_sge sg_list[0]; | |
157 | }; | |
158 | ||
159 | /* | |
160 | * Receive work request queue entry. | |
161 | * The size of the sg_list is determined when the QP (or SRQ) is created | |
162 | * and stored in qp->r_rq.max_sge (or srq->rq.max_sge). | |
163 | */ | |
164 | struct rvt_rwqe { | |
165 | u64 wr_id; | |
166 | u8 num_sge; | |
167 | struct ib_sge sg_list[0]; | |
168 | }; | |
169 | ||
170 | /* | |
171 | * This structure is used to contain the head pointer, tail pointer, | |
172 | * and receive work queue entries as a single memory allocation so | |
173 | * it can be mmap'ed into user space. | |
174 | * Note that the wq array elements are variable size so you can't | |
175 | * just index into the array to get the N'th element; | |
176 | * use get_rwqe_ptr() instead. | |
177 | */ | |
178 | struct rvt_rwq { | |
179 | u32 head; /* new work requests posted to the head */ | |
180 | u32 tail; /* receives pull requests from here. */ | |
181 | struct rvt_rwqe wq[0]; | |
182 | }; | |
183 | ||
184 | struct rvt_rq { | |
185 | struct rvt_rwq *wq; | |
186 | u32 size; /* size of RWQE array */ | |
187 | u8 max_sge; | |
188 | /* protect changes in this struct */ | |
189 | spinlock_t lock ____cacheline_aligned_in_smp; | |
190 | }; | |
191 | ||
192 | /* | |
193 | * This structure is used by rvt_mmap() to validate an offset | |
194 | * when an mmap() request is made. The vm_area_struct then uses | |
195 | * this as its vm_private_data. | |
196 | */ | |
197 | struct rvt_mmap_info { | |
198 | struct list_head pending_mmaps; | |
199 | struct ib_ucontext *context; | |
200 | void *obj; | |
201 | __u64 offset; | |
202 | struct kref ref; | |
203 | unsigned size; | |
204 | }; | |
205 | ||
206 | #define RVT_MAX_RDMA_ATOMIC 16 | |
207 | ||
208 | /* | |
209 | * This structure holds the information that the send tasklet needs | |
210 | * to send a RDMA read response or atomic operation. | |
211 | */ | |
212 | struct rvt_ack_entry { | |
213 | u8 opcode; | |
214 | u8 sent; | |
215 | u32 psn; | |
216 | u32 lpsn; | |
217 | union { | |
218 | struct rvt_sge rdma_sge; | |
219 | u64 atomic_data; | |
220 | }; | |
221 | }; | |
222 | ||
223 | struct rvt_sge_state { | |
224 | struct rvt_sge *sg_list; /* next SGE to be used if any */ | |
225 | struct rvt_sge sge; /* progress state for the current SGE */ | |
226 | u32 total_len; | |
227 | u8 num_sge; | |
228 | }; | |
229 | ||
230 | /* | |
231 | * Variables prefixed with s_ are for the requester (sender). | |
232 | * Variables prefixed with r_ are for the responder (receiver). | |
233 | * Variables prefixed with ack_ are for responder replies. | |
234 | * | |
235 | * Common variables are protected by both r_rq.lock and s_lock in that order | |
236 | * which only happens in modify_qp() or changing the QP 'state'. | |
237 | */ | |
238 | struct rvt_qp { | |
239 | struct ib_qp ibqp; | |
240 | void *priv; /* Driver private data */ | |
241 | /* read mostly fields above and below */ | |
242 | struct ib_ah_attr remote_ah_attr; | |
243 | struct ib_ah_attr alt_ah_attr; | |
244 | struct rvt_qp __rcu *next; /* link list for QPN hash table */ | |
245 | struct rvt_swqe *s_wq; /* send work queue */ | |
246 | struct rvt_mmap_info *ip; | |
247 | ||
248 | unsigned long timeout_jiffies; /* computed from timeout */ | |
249 | ||
250 | enum ib_mtu path_mtu; | |
251 | int srate_mbps; /* s_srate (below) converted to Mbit/s */ | |
252 | u32 remote_qpn; | |
253 | u32 pmtu; /* decoded from path_mtu */ | |
254 | u32 qkey; /* QKEY for this QP (for UD or RD) */ | |
255 | u32 s_size; /* send work queue size */ | |
256 | u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ | |
257 | u32 s_ahgpsn; /* set to the psn in the copy of the header */ | |
258 | ||
259 | u8 state; /* QP state */ | |
260 | u8 allowed_ops; /* high order bits of allowed opcodes */ | |
261 | u8 qp_access_flags; | |
262 | u8 alt_timeout; /* Alternate path timeout for this QP */ | |
263 | u8 timeout; /* Timeout for this QP */ | |
264 | u8 s_srate; | |
265 | u8 s_mig_state; | |
266 | u8 port_num; | |
267 | u8 s_pkey_index; /* PKEY index to use */ | |
268 | u8 s_alt_pkey_index; /* Alternate path PKEY index to use */ | |
269 | u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ | |
270 | u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ | |
271 | u8 s_retry_cnt; /* number of times to retry */ | |
272 | u8 s_rnr_retry_cnt; | |
273 | u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ | |
274 | u8 s_max_sge; /* size of s_wq->sg_list */ | |
275 | u8 s_draining; | |
276 | ||
277 | /* start of read/write fields */ | |
278 | atomic_t refcount ____cacheline_aligned_in_smp; | |
279 | wait_queue_head_t wait; | |
280 | ||
281 | struct rvt_ack_entry s_ack_queue[RVT_MAX_RDMA_ATOMIC + 1] | |
282 | ____cacheline_aligned_in_smp; | |
283 | struct rvt_sge_state s_rdma_read_sge; | |
284 | ||
285 | spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */ | |
286 | unsigned long r_aflags; | |
287 | u64 r_wr_id; /* ID for current receive WQE */ | |
288 | u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ | |
289 | u32 r_len; /* total length of r_sge */ | |
290 | u32 r_rcv_len; /* receive data len processed */ | |
291 | u32 r_psn; /* expected rcv packet sequence number */ | |
292 | u32 r_msn; /* message sequence number */ | |
293 | ||
294 | u8 r_state; /* opcode of last packet received */ | |
295 | u8 r_flags; | |
296 | u8 r_head_ack_queue; /* index into s_ack_queue[] */ | |
297 | ||
298 | struct list_head rspwait; /* link for waiting to respond */ | |
299 | ||
300 | struct rvt_sge_state r_sge; /* current receive data */ | |
301 | struct rvt_rq r_rq; /* receive work queue */ | |
302 | ||
303 | spinlock_t s_lock ____cacheline_aligned_in_smp; | |
304 | struct rvt_sge_state *s_cur_sge; | |
305 | u32 s_flags; | |
306 | struct rvt_swqe *s_wqe; | |
307 | struct rvt_sge_state s_sge; /* current send request data */ | |
308 | struct rvt_mregion *s_rdma_mr; | |
309 | struct sdma_engine *s_sde; /* current sde */ | |
310 | u32 s_cur_size; /* size of send packet in bytes */ | |
311 | u32 s_len; /* total length of s_sge */ | |
312 | u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ | |
313 | u32 s_next_psn; /* PSN for next request */ | |
314 | u32 s_last_psn; /* last response PSN processed */ | |
315 | u32 s_sending_psn; /* lowest PSN that is being sent */ | |
316 | u32 s_sending_hpsn; /* highest PSN that is being sent */ | |
317 | u32 s_psn; /* current packet sequence number */ | |
318 | u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ | |
319 | u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ | |
320 | u32 s_head; /* new entries added here */ | |
321 | u32 s_tail; /* next entry to process */ | |
322 | u32 s_cur; /* current work queue entry */ | |
323 | u32 s_acked; /* last un-ACK'ed entry */ | |
324 | u32 s_last; /* last completed entry */ | |
325 | u32 s_ssn; /* SSN of tail entry */ | |
326 | u32 s_lsn; /* limit sequence number (credit) */ | |
327 | u16 s_hdrwords; /* size of s_hdr in 32 bit words */ | |
328 | u16 s_rdma_ack_cnt; | |
329 | s8 s_ahgidx; | |
330 | u8 s_state; /* opcode of last packet sent */ | |
331 | u8 s_ack_state; /* opcode of packet to ACK */ | |
332 | u8 s_nak_state; /* non-zero if NAK is pending */ | |
333 | u8 r_nak_state; /* non-zero if NAK is pending */ | |
334 | u8 s_retry; /* requester retry counter */ | |
335 | u8 s_rnr_retry; /* requester RNR retry counter */ | |
336 | u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ | |
337 | u8 s_tail_ack_queue; /* index into s_ack_queue[] */ | |
338 | ||
339 | struct rvt_sge_state s_ack_rdma_sge; | |
340 | struct timer_list s_timer; | |
341 | ||
342 | /* | |
343 | * This sge list MUST be last. Do not add anything below here. | |
344 | */ | |
345 | struct rvt_sge r_sg_list[0] /* verified SGEs */ | |
346 | ____cacheline_aligned_in_smp; | |
347 | }; | |
348 | ||
349 | /* End QP section */ | |
350 | ||
8afd32eb DD |
351 | /* |
352 | * Things that are driver specific, module parameters in hfi1 and qib | |
353 | */ | |
354 | struct rvt_driver_params { | |
b1070a7a DD |
355 | /* |
356 | * driver required fields: | |
357 | * node_guid | |
358 | * phys_port_cnt | |
359 | * dma_device | |
360 | * owner | |
361 | * driver optional fields (rvt will provide generic value if blank): | |
362 | * name | |
363 | * node_desc | |
364 | * rvt fields, driver value ignored: | |
365 | * uverbs_abi_ver | |
366 | * node_type | |
367 | * num_comp_vectors | |
368 | * uverbs_cmd_mask | |
369 | */ | |
370 | struct ib_device_attr props; | |
371 | ||
372 | /* | |
373 | * Drivers will need to support a number of notifications to rvt in | |
374 | * accordance with certain events. This structure should contain a mask | |
375 | * of the supported events. Such events that the rvt may need to know | |
376 | * about include: | |
377 | * port errors | |
378 | * port active | |
379 | * lid change | |
380 | * sm change | |
381 | * client reregister | |
382 | * pkey change | |
383 | * | |
384 | * There may also be other events that the rvt layers needs to know | |
385 | * about this is not an exhaustive list. Some events though rvt does not | |
386 | * need to rely on the driver for such as completion queue error. | |
387 | */ | |
388 | int rvt_signal_supported; | |
389 | ||
390 | /* | |
391 | * Anything driver specific that is not covered by props | |
392 | * For instance special module parameters. Goes here. | |
393 | */ | |
8afd32eb DD |
394 | }; |
395 | ||
aec57787 DD |
396 | /* |
397 | * Functions that drivers are required to support | |
398 | */ | |
b534875d | 399 | struct rvt_dev_info; |
aec57787 DD |
400 | struct rvt_driver_provided { |
401 | /* | |
402 | * The work to create port files in /sys/class Infiniband is different | |
403 | * depending on the driver. This should not be extracted away and | |
404 | * instead drivers are responsible for setting the correct callback for | |
405 | * this. | |
406 | */ | |
407 | int (*port_callback)(struct ib_device *, u8, struct kobject *); | |
b534875d DD |
408 | const char * (*get_card_name)(struct rvt_dev_info *rdi); |
409 | struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); | |
aec57787 DD |
410 | }; |
411 | ||
8afd32eb DD |
412 | /* Protection domain */ |
413 | struct rvt_pd { | |
414 | struct ib_pd ibpd; | |
415 | int user; /* non-zero if created from user space */ | |
416 | }; | |
417 | ||
0194621b | 418 | struct rvt_dev_info { |
b1070a7a DD |
419 | /* |
420 | * Prior to calling for registration the driver will be responsible for | |
421 | * allocating space for this structure. | |
422 | * | |
423 | * The driver will also be responsible for filling in certain members of | |
424 | * dparms.props | |
425 | */ | |
0194621b | 426 | struct ib_device ibdev; |
8afd32eb | 427 | |
b1070a7a | 428 | /* Driver specific properties */ |
8afd32eb | 429 | struct rvt_driver_params dparms; |
b1070a7a | 430 | |
b92a7568 DD |
431 | struct rvt_mregion __rcu *dma_mr; |
432 | struct rvt_lkey_table lkey_table; | |
433 | ||
30588643 DD |
434 | /* PKey Table goes here */ |
435 | ||
aec57787 DD |
436 | /* Driver specific helper functions */ |
437 | struct rvt_driver_provided driver_f; | |
0194621b | 438 | |
8afd32eb DD |
439 | /* Internal use */ |
440 | int n_pds_allocated; | |
441 | spinlock_t n_pds_lock; /* Protect pd allocated count */ | |
0b8a8aae DD |
442 | |
443 | int flags; | |
0194621b DD |
444 | }; |
445 | ||
8afd32eb DD |
446 | static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) |
447 | { | |
448 | return container_of(ibpd, struct rvt_pd, ibpd); | |
449 | } | |
450 | ||
451 | static inline struct rvt_dev_info *ib_to_rvt(struct ib_device *ibdev) | |
452 | { | |
453 | return container_of(ibdev, struct rvt_dev_info, ibdev); | |
454 | } | |
455 | ||
0194621b DD |
456 | int rvt_register_device(struct rvt_dev_info *rvd); |
457 | void rvt_unregister_device(struct rvt_dev_info *rvd); | |
458 | ||
459 | #endif /* DEF_RDMA_VT_H */ |