]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/smc/smc_wr.c
Merge branch 'next-general' of git://git.kernel.org:/pub/scm/linux/kernel/git/jmorris...
[mirror_ubuntu-jammy-kernel.git] / net / smc / smc_wr.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
f38ba179
UB
2/*
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 *
5 * Work Requests exploiting Infiniband API
6 *
7 * Work requests (WR) of type ib_post_send or ib_post_recv respectively
8 * are submitted to either RC SQ or RC RQ respectively
9 * (reliably connected send/receive queue)
10 * and become work queue entries (WQEs).
11 * While an SQ WR/WQE is pending, we track it until transmission completion.
12 * Through a send or receive completion queue (CQ) respectively,
13 * we get completion queue entries (CQEs) [aka work completions (WCs)].
14 * Since the CQ callback is called from IRQ context, we split work by using
15 * bottom halves implemented by tasklets.
16 *
17 * SMC uses this to exchange LLC (link layer control)
18 * and CDC (connection data control) messages.
19 *
20 * Copyright IBM Corp. 2016
21 *
22 * Author(s): Steffen Maier <maier@linux.vnet.ibm.com>
23 */
24
25#include <linux/atomic.h>
26#include <linux/hashtable.h>
27#include <linux/wait.h>
28#include <rdma/ib_verbs.h>
29#include <asm/div64.h>
30
31#include "smc.h"
32#include "smc_wr.h"
33
34#define SMC_WR_MAX_POLL_CQE 10 /* max. # of compl. queue elements in 1 poll */
35
36#define SMC_WR_RX_HASH_BITS 4
37static DEFINE_HASHTABLE(smc_wr_rx_hash, SMC_WR_RX_HASH_BITS);
38static DEFINE_SPINLOCK(smc_wr_rx_hash_lock);
39
40struct smc_wr_tx_pend { /* control data for a pending send request */
41 u64 wr_id; /* work request id sent */
42 smc_wr_tx_handler handler;
43 enum ib_wc_status wc_status; /* CQE status */
44 struct smc_link *link;
45 u32 idx;
46 struct smc_wr_tx_pend_priv priv;
47};
48
49/******************************** send queue *********************************/
50
51/*------------------------------- completion --------------------------------*/
52
6a37ad3d
UB
53/* returns true if at least one tx work request is pending on the given link */
54static inline bool smc_wr_is_tx_pend(struct smc_link *link)
55{
56 if (find_first_bit(link->wr_tx_mask, link->wr_tx_cnt) !=
57 link->wr_tx_cnt) {
58 return true;
59 }
60 return false;
61}
62
63/* wait till all pending tx work requests on the given link are completed */
64static inline int smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
65{
66 if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link),
67 SMC_WR_TX_WAIT_PENDING_TIME))
68 return 0;
69 else /* timeout */
70 return -EPIPE;
71}
72
f38ba179
UB
73static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id)
74{
75 u32 i;
76
77 for (i = 0; i < link->wr_tx_cnt; i++) {
78 if (link->wr_tx_pends[i].wr_id == wr_id)
79 return i;
80 }
81 return link->wr_tx_cnt;
82}
83
84static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
85{
86 struct smc_wr_tx_pend pnd_snd;
87 struct smc_link *link;
88 u32 pnd_snd_idx;
89 int i;
90
91 link = wc->qp->qp_context;
652a1e41
UB
92
93 if (wc->opcode == IB_WC_REG_MR) {
94 if (wc->status)
95 link->wr_reg_state = FAILED;
96 else
97 link->wr_reg_state = CONFIRMED;
15e1b99a 98 smc_wr_wakeup_reg_wait(link);
652a1e41
UB
99 return;
100 }
101
f38ba179
UB
102 pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id);
103 if (pnd_snd_idx == link->wr_tx_cnt)
104 return;
105 link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
106 memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd));
107 /* clear the full struct smc_wr_tx_pend including .priv */
108 memset(&link->wr_tx_pends[pnd_snd_idx], 0,
109 sizeof(link->wr_tx_pends[pnd_snd_idx]));
110 memset(&link->wr_tx_bufs[pnd_snd_idx], 0,
111 sizeof(link->wr_tx_bufs[pnd_snd_idx]));
112 if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask))
113 return;
114 if (wc->status) {
115 for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
116 /* clear full struct smc_wr_tx_pend including .priv */
117 memset(&link->wr_tx_pends[i], 0,
118 sizeof(link->wr_tx_pends[i]));
119 memset(&link->wr_tx_bufs[i], 0,
120 sizeof(link->wr_tx_bufs[i]));
121 clear_bit(i, link->wr_tx_mask);
122 }
b38d7324 123 /* terminate connections of this link group abnormally */
f528ba24 124 smc_lgr_terminate_sched(smc_get_lgr(link));
f38ba179
UB
125 }
126 if (pnd_snd.handler)
127 pnd_snd.handler(&pnd_snd.priv, link, wc->status);
128 wake_up(&link->wr_tx_wait);
129}
130
131static void smc_wr_tx_tasklet_fn(unsigned long data)
132{
133 struct smc_ib_device *dev = (struct smc_ib_device *)data;
134 struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
135 int i = 0, rc;
136 int polled = 0;
137
138again:
139 polled++;
140 do {
86e780d3 141 memset(&wc, 0, sizeof(wc));
f38ba179
UB
142 rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
143 if (polled == 1) {
144 ib_req_notify_cq(dev->roce_cq_send,
145 IB_CQ_NEXT_COMP |
146 IB_CQ_REPORT_MISSED_EVENTS);
147 }
148 if (!rc)
149 break;
150 for (i = 0; i < rc; i++)
151 smc_wr_tx_process_cqe(&wc[i]);
152 } while (rc > 0);
153 if (polled == 1)
154 goto again;
155}
156
157void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
158{
159 struct smc_ib_device *dev = (struct smc_ib_device *)cq_context;
160
161 tasklet_schedule(&dev->send_tasklet);
162}
163
164/*---------------------------- request submission ---------------------------*/
165
166static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx)
167{
168 *idx = link->wr_tx_cnt;
169 for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) {
170 if (!test_and_set_bit(*idx, link->wr_tx_mask))
171 return 0;
172 }
173 *idx = link->wr_tx_cnt;
174 return -EBUSY;
175}
176
177/**
178 * smc_wr_tx_get_free_slot() - returns buffer for message assembly,
179 * and sets info for pending transmit tracking
180 * @link: Pointer to smc_link used to later send the message.
181 * @handler: Send completion handler function pointer.
182 * @wr_buf: Out value returns pointer to message buffer.
ad6f317f 183 * @wr_rdma_buf: Out value returns pointer to rdma work request.
f38ba179
UB
184 * @wr_pend_priv: Out value returns pointer serving as handler context.
185 *
186 * Return: 0 on success, or -errno on error.
187 */
188int smc_wr_tx_get_free_slot(struct smc_link *link,
189 smc_wr_tx_handler handler,
190 struct smc_wr_buf **wr_buf,
ad6f317f 191 struct smc_rdma_wr **wr_rdma_buf,
f38ba179
UB
192 struct smc_wr_tx_pend_priv **wr_pend_priv)
193{
15e1b99a 194 struct smc_link_group *lgr = smc_get_lgr(link);
f38ba179 195 struct smc_wr_tx_pend *wr_pend;
1a0a04c7 196 u32 idx = link->wr_tx_cnt;
f38ba179
UB
197 struct ib_send_wr *wr_ib;
198 u64 wr_id;
f38ba179
UB
199 int rc;
200
201 *wr_buf = NULL;
202 *wr_pend_priv = NULL;
15e1b99a 203 if (in_softirq() || lgr->terminating) {
f38ba179
UB
204 rc = smc_wr_tx_get_free_slot_index(link, &idx);
205 if (rc)
206 return rc;
207 } else {
15e1b99a 208 rc = wait_event_interruptible_timeout(
f38ba179 209 link->wr_tx_wait,
0d18a0cb 210 link->state == SMC_LNK_INACTIVE ||
15e1b99a 211 lgr->terminating ||
f38ba179
UB
212 (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
213 SMC_WR_TX_WAIT_FREE_SLOT_TIME);
214 if (!rc) {
b38d7324 215 /* timeout - terminate connections */
15e1b99a 216 smc_lgr_terminate_sched(lgr);
f38ba179
UB
217 return -EPIPE;
218 }
f38ba179
UB
219 if (idx == link->wr_tx_cnt)
220 return -EPIPE;
221 }
222 wr_id = smc_wr_tx_get_next_wr_id(link);
223 wr_pend = &link->wr_tx_pends[idx];
224 wr_pend->wr_id = wr_id;
225 wr_pend->handler = handler;
226 wr_pend->link = link;
227 wr_pend->idx = idx;
228 wr_ib = &link->wr_tx_ibs[idx];
229 wr_ib->wr_id = wr_id;
230 *wr_buf = &link->wr_tx_bufs[idx];
ad6f317f
UB
231 if (wr_rdma_buf)
232 *wr_rdma_buf = &link->wr_tx_rdmas[idx];
f38ba179
UB
233 *wr_pend_priv = &wr_pend->priv;
234 return 0;
235}
236
237int smc_wr_tx_put_slot(struct smc_link *link,
238 struct smc_wr_tx_pend_priv *wr_pend_priv)
239{
240 struct smc_wr_tx_pend *pend;
241
242 pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv);
243 if (pend->idx < link->wr_tx_cnt) {
e438bae4
UB
244 u32 idx = pend->idx;
245
f38ba179 246 /* clear the full struct smc_wr_tx_pend including .priv */
46ad0222
KG
247 memset(&link->wr_tx_pends[idx], 0,
248 sizeof(link->wr_tx_pends[idx]));
249 memset(&link->wr_tx_bufs[idx], 0,
250 sizeof(link->wr_tx_bufs[idx]));
e438bae4 251 test_and_clear_bit(idx, link->wr_tx_mask);
6a37ad3d 252 wake_up(&link->wr_tx_wait);
f38ba179
UB
253 return 1;
254 }
255
256 return 0;
257}
258
259/* Send prepared WR slot via ib_post_send.
260 * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer
261 */
262int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
263{
f38ba179
UB
264 struct smc_wr_tx_pend *pend;
265 int rc;
266
267 ib_req_notify_cq(link->smcibdev->roce_cq_send,
8301fa44 268 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
f38ba179 269 pend = container_of(priv, struct smc_wr_tx_pend, priv);
2e3bbe46 270 rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
b4772b3a 271 if (rc) {
f38ba179 272 smc_wr_tx_put_slot(link, priv);
f528ba24 273 smc_lgr_terminate_sched(smc_get_lgr(link));
b4772b3a 274 }
f38ba179
UB
275 return rc;
276}
277
652a1e41
UB
278/* Register a memory region and wait for result. */
279int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
280{
652a1e41
UB
281 int rc;
282
283 ib_req_notify_cq(link->smcibdev->roce_cq_send,
284 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
285 link->wr_reg_state = POSTED;
286 link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr;
287 link->wr_reg.mr = mr;
288 link->wr_reg.key = mr->rkey;
2e3bbe46 289 rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, NULL);
652a1e41
UB
290 if (rc)
291 return rc;
292
293 rc = wait_event_interruptible_timeout(link->wr_reg_wait,
294 (link->wr_reg_state != POSTED),
295 SMC_WR_REG_MR_WAIT_TIME);
296 if (!rc) {
297 /* timeout - terminate connections */
f528ba24 298 smc_lgr_terminate_sched(smc_get_lgr(link));
652a1e41
UB
299 return -EPIPE;
300 }
301 if (rc == -ERESTARTSYS)
302 return -EINTR;
303 switch (link->wr_reg_state) {
304 case CONFIRMED:
305 rc = 0;
306 break;
307 case FAILED:
308 rc = -EIO;
309 break;
310 case POSTED:
311 rc = -EPIPE;
312 break;
313 }
314 return rc;
315}
316
86e780d3 317void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type,
5f08318f
UB
318 smc_wr_tx_filter filter,
319 smc_wr_tx_dismisser dismisser,
320 unsigned long data)
321{
322 struct smc_wr_tx_pend_priv *tx_pend;
86e780d3 323 struct smc_wr_rx_hdr *wr_tx;
5f08318f
UB
324 int i;
325
326 for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
86e780d3
UB
327 wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i];
328 if (wr_tx->type != wr_tx_hdr_type)
5f08318f
UB
329 continue;
330 tx_pend = &link->wr_tx_pends[i].priv;
331 if (filter(tx_pend, data))
332 dismisser(tx_pend);
333 }
334}
335
f38ba179
UB
336/****************************** receive queue ********************************/
337
338int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
339{
340 struct smc_wr_rx_handler *h_iter;
341 int rc = 0;
342
343 spin_lock(&smc_wr_rx_hash_lock);
344 hash_for_each_possible(smc_wr_rx_hash, h_iter, list, handler->type) {
345 if (h_iter->type == handler->type) {
346 rc = -EEXIST;
347 goto out_unlock;
348 }
349 }
350 hash_add(smc_wr_rx_hash, &handler->list, handler->type);
351out_unlock:
352 spin_unlock(&smc_wr_rx_hash_lock);
353 return rc;
354}
355
356/* Demultiplex a received work request based on the message type to its handler.
357 * Relies on smc_wr_rx_hash having been completely filled before any IB WRs,
358 * and not being modified any more afterwards so we don't need to lock it.
359 */
360static inline void smc_wr_rx_demultiplex(struct ib_wc *wc)
361{
362 struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
363 struct smc_wr_rx_handler *handler;
364 struct smc_wr_rx_hdr *wr_rx;
365 u64 temp_wr_id;
366 u32 index;
367
368 if (wc->byte_len < sizeof(*wr_rx))
369 return; /* short message */
370 temp_wr_id = wc->wr_id;
371 index = do_div(temp_wr_id, link->wr_rx_cnt);
372 wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[index];
373 hash_for_each_possible(smc_wr_rx_hash, handler, list, wr_rx->type) {
374 if (handler->type == wr_rx->type)
375 handler->handler(wc, wr_rx);
376 }
377}
378
379static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
380{
381 struct smc_link *link;
382 int i;
383
384 for (i = 0; i < num; i++) {
385 link = wc[i].qp->qp_context;
386 if (wc[i].status == IB_WC_SUCCESS) {
877ae5be 387 link->wr_rx_tstamp = jiffies;
f38ba179
UB
388 smc_wr_rx_demultiplex(&wc[i]);
389 smc_wr_rx_post(link); /* refill WR RX */
390 } else {
391 /* handle status errors */
392 switch (wc[i].status) {
393 case IB_WC_RETRY_EXC_ERR:
394 case IB_WC_RNR_RETRY_EXC_ERR:
395 case IB_WC_WR_FLUSH_ERR:
b38d7324
UB
396 /* terminate connections of this link group
397 * abnormally
398 */
f528ba24 399 smc_lgr_terminate_sched(smc_get_lgr(link));
f38ba179
UB
400 break;
401 default:
402 smc_wr_rx_post(link); /* refill WR RX */
403 break;
404 }
405 }
406 }
407}
408
409static void smc_wr_rx_tasklet_fn(unsigned long data)
410{
411 struct smc_ib_device *dev = (struct smc_ib_device *)data;
412 struct ib_wc wc[SMC_WR_MAX_POLL_CQE];
413 int polled = 0;
414 int rc;
415
416again:
417 polled++;
418 do {
419 memset(&wc, 0, sizeof(wc));
420 rc = ib_poll_cq(dev->roce_cq_recv, SMC_WR_MAX_POLL_CQE, wc);
421 if (polled == 1) {
422 ib_req_notify_cq(dev->roce_cq_recv,
423 IB_CQ_SOLICITED_MASK
424 | IB_CQ_REPORT_MISSED_EVENTS);
425 }
426 if (!rc)
427 break;
428 smc_wr_rx_process_cqes(&wc[0], rc);
429 } while (rc > 0);
430 if (polled == 1)
431 goto again;
432}
433
434void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context)
435{
436 struct smc_ib_device *dev = (struct smc_ib_device *)cq_context;
437
438 tasklet_schedule(&dev->recv_tasklet);
439}
440
441int smc_wr_rx_post_init(struct smc_link *link)
442{
443 u32 i;
444 int rc = 0;
445
446 for (i = 0; i < link->wr_rx_cnt; i++)
447 rc = smc_wr_rx_post(link);
448 return rc;
449}
450
451/***************************** init, exit, misc ******************************/
452
453void smc_wr_remember_qp_attr(struct smc_link *lnk)
454{
455 struct ib_qp_attr *attr = &lnk->qp_attr;
456 struct ib_qp_init_attr init_attr;
457
458 memset(attr, 0, sizeof(*attr));
459 memset(&init_attr, 0, sizeof(init_attr));
460 ib_query_qp(lnk->roce_qp, attr,
461 IB_QP_STATE |
462 IB_QP_CUR_STATE |
463 IB_QP_PKEY_INDEX |
464 IB_QP_PORT |
465 IB_QP_QKEY |
466 IB_QP_AV |
467 IB_QP_PATH_MTU |
468 IB_QP_TIMEOUT |
469 IB_QP_RETRY_CNT |
470 IB_QP_RNR_RETRY |
471 IB_QP_RQ_PSN |
472 IB_QP_ALT_PATH |
473 IB_QP_MIN_RNR_TIMER |
474 IB_QP_SQ_PSN |
475 IB_QP_PATH_MIG_STATE |
476 IB_QP_CAP |
477 IB_QP_DEST_QPN,
478 &init_attr);
479
480 lnk->wr_tx_cnt = min_t(size_t, SMC_WR_BUF_CNT,
481 lnk->qp_attr.cap.max_send_wr);
482 lnk->wr_rx_cnt = min_t(size_t, SMC_WR_BUF_CNT * 3,
483 lnk->qp_attr.cap.max_recv_wr);
484}
485
486static void smc_wr_init_sge(struct smc_link *lnk)
487{
488 u32 i;
489
490 for (i = 0; i < lnk->wr_tx_cnt; i++) {
491 lnk->wr_tx_sges[i].addr =
492 lnk->wr_tx_dma_addr + i * SMC_WR_BUF_SIZE;
493 lnk->wr_tx_sges[i].length = SMC_WR_TX_SIZE;
bd4ad577 494 lnk->wr_tx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
ad6f317f
UB
495 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[0].lkey =
496 lnk->roce_pd->local_dma_lkey;
497 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge[1].lkey =
498 lnk->roce_pd->local_dma_lkey;
499 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[0].lkey =
500 lnk->roce_pd->local_dma_lkey;
501 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge[1].lkey =
502 lnk->roce_pd->local_dma_lkey;
f38ba179
UB
503 lnk->wr_tx_ibs[i].next = NULL;
504 lnk->wr_tx_ibs[i].sg_list = &lnk->wr_tx_sges[i];
505 lnk->wr_tx_ibs[i].num_sge = 1;
506 lnk->wr_tx_ibs[i].opcode = IB_WR_SEND;
507 lnk->wr_tx_ibs[i].send_flags =
2c9c1682 508 IB_SEND_SIGNALED | IB_SEND_SOLICITED;
ad6f317f
UB
509 lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.opcode = IB_WR_RDMA_WRITE;
510 lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.opcode = IB_WR_RDMA_WRITE;
511 lnk->wr_tx_rdmas[i].wr_tx_rdma[0].wr.sg_list =
512 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[0].wr_tx_rdma_sge;
513 lnk->wr_tx_rdmas[i].wr_tx_rdma[1].wr.sg_list =
514 lnk->wr_tx_rdma_sges[i].tx_rdma_sge[1].wr_tx_rdma_sge;
f38ba179
UB
515 }
516 for (i = 0; i < lnk->wr_rx_cnt; i++) {
517 lnk->wr_rx_sges[i].addr =
518 lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE;
519 lnk->wr_rx_sges[i].length = SMC_WR_BUF_SIZE;
bd4ad577 520 lnk->wr_rx_sges[i].lkey = lnk->roce_pd->local_dma_lkey;
f38ba179
UB
521 lnk->wr_rx_ibs[i].next = NULL;
522 lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i];
523 lnk->wr_rx_ibs[i].num_sge = 1;
524 }
652a1e41
UB
525 lnk->wr_reg.wr.next = NULL;
526 lnk->wr_reg.wr.num_sge = 0;
527 lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED;
528 lnk->wr_reg.wr.opcode = IB_WR_REG_MR;
529 lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE;
f38ba179
UB
530}
531
532void smc_wr_free_link(struct smc_link *lnk)
533{
534 struct ib_device *ibdev;
535
6a37ad3d
UB
536 if (smc_wr_tx_wait_no_pending_sends(lnk))
537 memset(lnk->wr_tx_mask, 0,
538 BITS_TO_LONGS(SMC_WR_BUF_CNT) *
539 sizeof(*lnk->wr_tx_mask));
f38ba179
UB
540
541 if (!lnk->smcibdev)
542 return;
543 ibdev = lnk->smcibdev->ibdev;
544
545 if (lnk->wr_rx_dma_addr) {
546 ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
547 SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
548 DMA_FROM_DEVICE);
549 lnk->wr_rx_dma_addr = 0;
550 }
551 if (lnk->wr_tx_dma_addr) {
552 ib_dma_unmap_single(ibdev, lnk->wr_tx_dma_addr,
553 SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
554 DMA_TO_DEVICE);
555 lnk->wr_tx_dma_addr = 0;
556 }
557}
558
559void smc_wr_free_link_mem(struct smc_link *lnk)
560{
561 kfree(lnk->wr_tx_pends);
562 lnk->wr_tx_pends = NULL;
563 kfree(lnk->wr_tx_mask);
564 lnk->wr_tx_mask = NULL;
565 kfree(lnk->wr_tx_sges);
566 lnk->wr_tx_sges = NULL;
ad6f317f
UB
567 kfree(lnk->wr_tx_rdma_sges);
568 lnk->wr_tx_rdma_sges = NULL;
f38ba179
UB
569 kfree(lnk->wr_rx_sges);
570 lnk->wr_rx_sges = NULL;
ad6f317f
UB
571 kfree(lnk->wr_tx_rdmas);
572 lnk->wr_tx_rdmas = NULL;
f38ba179
UB
573 kfree(lnk->wr_rx_ibs);
574 lnk->wr_rx_ibs = NULL;
575 kfree(lnk->wr_tx_ibs);
576 lnk->wr_tx_ibs = NULL;
577 kfree(lnk->wr_tx_bufs);
578 lnk->wr_tx_bufs = NULL;
579 kfree(lnk->wr_rx_bufs);
580 lnk->wr_rx_bufs = NULL;
581}
582
583int smc_wr_alloc_link_mem(struct smc_link *link)
584{
585 /* allocate link related memory */
586 link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL);
587 if (!link->wr_tx_bufs)
588 goto no_mem;
589 link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, SMC_WR_BUF_SIZE,
590 GFP_KERNEL);
591 if (!link->wr_rx_bufs)
592 goto no_mem_wr_tx_bufs;
593 link->wr_tx_ibs = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_ibs[0]),
594 GFP_KERNEL);
595 if (!link->wr_tx_ibs)
596 goto no_mem_wr_rx_bufs;
597 link->wr_rx_ibs = kcalloc(SMC_WR_BUF_CNT * 3,
598 sizeof(link->wr_rx_ibs[0]),
599 GFP_KERNEL);
600 if (!link->wr_rx_ibs)
601 goto no_mem_wr_tx_ibs;
ad6f317f
UB
602 link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT,
603 sizeof(link->wr_tx_rdmas[0]),
604 GFP_KERNEL);
605 if (!link->wr_tx_rdmas)
606 goto no_mem_wr_rx_ibs;
607 link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT,
608 sizeof(link->wr_tx_rdma_sges[0]),
609 GFP_KERNEL);
610 if (!link->wr_tx_rdma_sges)
611 goto no_mem_wr_tx_rdmas;
f38ba179
UB
612 link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]),
613 GFP_KERNEL);
614 if (!link->wr_tx_sges)
ad6f317f 615 goto no_mem_wr_tx_rdma_sges;
f38ba179
UB
616 link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
617 sizeof(link->wr_rx_sges[0]),
618 GFP_KERNEL);
619 if (!link->wr_rx_sges)
620 goto no_mem_wr_tx_sges;
6396bb22
KC
621 link->wr_tx_mask = kcalloc(BITS_TO_LONGS(SMC_WR_BUF_CNT),
622 sizeof(*link->wr_tx_mask),
623 GFP_KERNEL);
f38ba179
UB
624 if (!link->wr_tx_mask)
625 goto no_mem_wr_rx_sges;
626 link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT,
627 sizeof(link->wr_tx_pends[0]),
628 GFP_KERNEL);
629 if (!link->wr_tx_pends)
630 goto no_mem_wr_tx_mask;
631 return 0;
632
633no_mem_wr_tx_mask:
634 kfree(link->wr_tx_mask);
635no_mem_wr_rx_sges:
636 kfree(link->wr_rx_sges);
637no_mem_wr_tx_sges:
638 kfree(link->wr_tx_sges);
ad6f317f
UB
639no_mem_wr_tx_rdma_sges:
640 kfree(link->wr_tx_rdma_sges);
641no_mem_wr_tx_rdmas:
642 kfree(link->wr_tx_rdmas);
f38ba179
UB
643no_mem_wr_rx_ibs:
644 kfree(link->wr_rx_ibs);
645no_mem_wr_tx_ibs:
646 kfree(link->wr_tx_ibs);
647no_mem_wr_rx_bufs:
648 kfree(link->wr_rx_bufs);
649no_mem_wr_tx_bufs:
650 kfree(link->wr_tx_bufs);
651no_mem:
652 return -ENOMEM;
653}
654
655void smc_wr_remove_dev(struct smc_ib_device *smcibdev)
656{
657 tasklet_kill(&smcibdev->recv_tasklet);
658 tasklet_kill(&smcibdev->send_tasklet);
659}
660
661void smc_wr_add_dev(struct smc_ib_device *smcibdev)
662{
663 tasklet_init(&smcibdev->recv_tasklet, smc_wr_rx_tasklet_fn,
664 (unsigned long)smcibdev);
665 tasklet_init(&smcibdev->send_tasklet, smc_wr_tx_tasklet_fn,
666 (unsigned long)smcibdev);
667}
668
669int smc_wr_create_link(struct smc_link *lnk)
670{
671 struct ib_device *ibdev = lnk->smcibdev->ibdev;
672 int rc = 0;
673
674 smc_wr_tx_set_wr_id(&lnk->wr_tx_id, 0);
675 lnk->wr_rx_id = 0;
676 lnk->wr_rx_dma_addr = ib_dma_map_single(
677 ibdev, lnk->wr_rx_bufs, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
678 DMA_FROM_DEVICE);
679 if (ib_dma_mapping_error(ibdev, lnk->wr_rx_dma_addr)) {
680 lnk->wr_rx_dma_addr = 0;
681 rc = -EIO;
682 goto out;
683 }
684 lnk->wr_tx_dma_addr = ib_dma_map_single(
685 ibdev, lnk->wr_tx_bufs, SMC_WR_BUF_SIZE * lnk->wr_tx_cnt,
686 DMA_TO_DEVICE);
687 if (ib_dma_mapping_error(ibdev, lnk->wr_tx_dma_addr)) {
688 rc = -EIO;
689 goto dma_unmap;
690 }
691 smc_wr_init_sge(lnk);
692 memset(lnk->wr_tx_mask, 0,
693 BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask));
652a1e41
UB
694 init_waitqueue_head(&lnk->wr_tx_wait);
695 init_waitqueue_head(&lnk->wr_reg_wait);
f38ba179
UB
696 return rc;
697
698dma_unmap:
699 ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr,
700 SMC_WR_BUF_SIZE * lnk->wr_rx_cnt,
701 DMA_FROM_DEVICE);
702 lnk->wr_rx_dma_addr = 0;
703out:
704 return rc;
705}