1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Basic Transport Functions exploiting Infiniband API
7 * Copyright IBM Corp. 2016
9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
20 #include <rdma/ib_verbs.h>
21 #include <rdma/ib_cache.h>
30 #include "smc_close.h"
33 #define SMC_LGR_NUM_INCR 256
34 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
35 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
36 #define SMC_LGR_FREE_DELAY_FAST (8 * HZ)
38 static struct smc_lgr_list smc_lgr_list
= { /* established link groups */
39 .lock
= __SPIN_LOCK_UNLOCKED(smc_lgr_list
.lock
),
40 .list
= LIST_HEAD_INIT(smc_lgr_list
.list
),
44 static atomic_t lgr_cnt
= ATOMIC_INIT(0); /* number of existing link groups */
45 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted
);
47 struct smc_ib_up_work
{
48 struct work_struct work
;
49 struct smc_link_group
*lgr
;
50 struct smc_ib_device
*smcibdev
;
54 static void smc_buf_free(struct smc_link_group
*lgr
, bool is_rmb
,
55 struct smc_buf_desc
*buf_desc
);
56 static void __smc_lgr_terminate(struct smc_link_group
*lgr
, bool soft
);
58 static void smc_link_up_work(struct work_struct
*work
);
59 static void smc_link_down_work(struct work_struct
*work
);
61 /* return head of link group list and its lock for a given link group */
62 static inline struct list_head
*smc_lgr_list_head(struct smc_link_group
*lgr
,
63 spinlock_t
**lgr_lock
)
66 *lgr_lock
= &lgr
->smcd
->lgr_lock
;
67 return &lgr
->smcd
->lgr_list
;
70 *lgr_lock
= &smc_lgr_list
.lock
;
71 return &smc_lgr_list
.list
;
74 static void smc_lgr_schedule_free_work(struct smc_link_group
*lgr
)
76 /* client link group creation always follows the server link group
77 * creation. For client use a somewhat higher removal delay time,
78 * otherwise there is a risk of out-of-sync link groups.
80 if (!lgr
->freeing
&& !lgr
->freefast
) {
81 mod_delayed_work(system_wq
, &lgr
->free_work
,
82 (!lgr
->is_smcd
&& lgr
->role
== SMC_CLNT
) ?
83 SMC_LGR_FREE_DELAY_CLNT
:
84 SMC_LGR_FREE_DELAY_SERV
);
88 void smc_lgr_schedule_free_work_fast(struct smc_link_group
*lgr
)
90 if (!lgr
->freeing
&& !lgr
->freefast
) {
92 mod_delayed_work(system_wq
, &lgr
->free_work
,
93 SMC_LGR_FREE_DELAY_FAST
);
97 /* Register connection's alert token in our lookup structure.
98 * To use rbtrees we have to implement our own insert core.
99 * Requires @conns_lock
100 * @smc connection to register
101 * Returns 0 on success, != otherwise.
103 static void smc_lgr_add_alert_token(struct smc_connection
*conn
)
105 struct rb_node
**link
, *parent
= NULL
;
106 u32 token
= conn
->alert_token_local
;
108 link
= &conn
->lgr
->conns_all
.rb_node
;
110 struct smc_connection
*cur
= rb_entry(*link
,
111 struct smc_connection
, alert_node
);
114 if (cur
->alert_token_local
> token
)
115 link
= &parent
->rb_left
;
117 link
= &parent
->rb_right
;
119 /* Put the new node there */
120 rb_link_node(&conn
->alert_node
, parent
, link
);
121 rb_insert_color(&conn
->alert_node
, &conn
->lgr
->conns_all
);
124 /* Register connection in link group by assigning an alert token
125 * registered in a search tree.
126 * Requires @conns_lock
127 * Note that '0' is a reserved value and not assigned.
129 static int smc_lgr_register_conn(struct smc_connection
*conn
)
131 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
132 static atomic_t nexttoken
= ATOMIC_INIT(0);
134 /* find a new alert_token_local value not yet used by some connection
137 sock_hold(&smc
->sk
); /* sock_put in smc_lgr_unregister_conn() */
138 while (!conn
->alert_token_local
) {
139 conn
->alert_token_local
= atomic_inc_return(&nexttoken
);
140 if (smc_lgr_find_conn(conn
->alert_token_local
, conn
->lgr
))
141 conn
->alert_token_local
= 0;
143 smc_lgr_add_alert_token(conn
);
145 /* assign the new connection to a link */
146 if (!conn
->lgr
->is_smcd
) {
147 struct smc_link
*lnk
;
150 /* tbd - link balancing */
151 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
152 lnk
= &conn
->lgr
->lnk
[i
];
153 if (lnk
->state
== SMC_LNK_ACTIVATING
||
154 lnk
->state
== SMC_LNK_ACTIVE
)
158 return SMC_CLC_DECL_NOACTLINK
;
160 conn
->lgr
->conns_num
++;
164 /* Unregister connection and reset the alert token of the given connection<
166 static void __smc_lgr_unregister_conn(struct smc_connection
*conn
)
168 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
169 struct smc_link_group
*lgr
= conn
->lgr
;
171 rb_erase(&conn
->alert_node
, &lgr
->conns_all
);
173 conn
->alert_token_local
= 0;
174 sock_put(&smc
->sk
); /* sock_hold in smc_lgr_register_conn() */
177 /* Unregister connection from lgr
179 static void smc_lgr_unregister_conn(struct smc_connection
*conn
)
181 struct smc_link_group
*lgr
= conn
->lgr
;
185 write_lock_bh(&lgr
->conns_lock
);
186 if (conn
->alert_token_local
) {
187 __smc_lgr_unregister_conn(conn
);
189 write_unlock_bh(&lgr
->conns_lock
);
193 void smc_lgr_cleanup_early(struct smc_connection
*conn
)
195 struct smc_link_group
*lgr
= conn
->lgr
;
202 smc_lgr_schedule_free_work_fast(lgr
);
205 /* Send delete link, either as client to request the initiation
206 * of the DELETE LINK sequence from server; or as server to
207 * initiate the delete processing. See smc_llc_rx_delete_link().
209 static int smcr_link_send_delete(struct smc_link
*lnk
, bool orderly
)
211 if (lnk
->state
== SMC_LNK_ACTIVE
&&
212 !smc_llc_send_delete_link(lnk
, 0, SMC_LLC_REQ
, orderly
,
213 SMC_LLC_DEL_PROG_INIT_TERM
)) {
219 static void smc_lgr_free(struct smc_link_group
*lgr
);
221 static void smc_lgr_free_work(struct work_struct
*work
)
223 struct smc_link_group
*lgr
= container_of(to_delayed_work(work
),
224 struct smc_link_group
,
226 spinlock_t
*lgr_lock
;
230 smc_lgr_list_head(lgr
, &lgr_lock
);
231 spin_lock_bh(lgr_lock
);
233 spin_unlock_bh(lgr_lock
);
236 read_lock_bh(&lgr
->conns_lock
);
237 conns
= RB_EMPTY_ROOT(&lgr
->conns_all
);
238 read_unlock_bh(&lgr
->conns_lock
);
239 if (!conns
) { /* number of lgr connections is no longer zero */
240 spin_unlock_bh(lgr_lock
);
243 list_del_init(&lgr
->list
); /* remove from smc_lgr_list */
245 if (!lgr
->is_smcd
&& !lgr
->terminating
) {
246 bool do_wait
= false;
248 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
249 struct smc_link
*lnk
= &lgr
->lnk
[i
];
250 /* try to send del link msg, on err free immediately */
251 if (lnk
->state
== SMC_LNK_ACTIVE
&&
252 !smcr_link_send_delete(lnk
, true)) {
253 /* reschedule in case we never receive a resp */
254 smc_lgr_schedule_free_work(lgr
);
259 spin_unlock_bh(lgr_lock
);
260 return; /* wait for resp, see smc_llc_rx_delete_link */
263 lgr
->freeing
= 1; /* this instance does the freeing, no new schedule */
264 spin_unlock_bh(lgr_lock
);
265 cancel_delayed_work(&lgr
->free_work
);
267 if (lgr
->is_smcd
&& !lgr
->terminating
)
268 smc_ism_signal_shutdown(lgr
);
270 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
271 struct smc_link
*lnk
= &lgr
->lnk
[i
];
273 if (smc_link_usable(lnk
))
274 lnk
->state
= SMC_LNK_INACTIVE
;
276 wake_up_interruptible_all(&lgr
->llc_waiter
);
281 static void smc_lgr_terminate_work(struct work_struct
*work
)
283 struct smc_link_group
*lgr
= container_of(work
, struct smc_link_group
,
286 __smc_lgr_terminate(lgr
, true);
289 /* return next unique link id for the lgr */
290 static u8
smcr_next_link_id(struct smc_link_group
*lgr
)
296 link_id
= ++lgr
->next_link_id
;
297 if (!link_id
) /* skip zero as link_id */
298 link_id
= ++lgr
->next_link_id
;
299 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
300 if (smc_link_usable(&lgr
->lnk
[i
]) &&
301 lgr
->lnk
[i
].link_id
== link_id
)
309 static int smcr_link_init(struct smc_link_group
*lgr
, struct smc_link
*lnk
,
310 u8 link_idx
, struct smc_init_info
*ini
)
315 get_device(&ini
->ib_dev
->ibdev
->dev
);
316 atomic_inc(&ini
->ib_dev
->lnk_cnt
);
317 lnk
->state
= SMC_LNK_ACTIVATING
;
318 lnk
->link_id
= smcr_next_link_id(lgr
);
320 lnk
->link_idx
= link_idx
;
321 lnk
->smcibdev
= ini
->ib_dev
;
322 lnk
->ibport
= ini
->ib_port
;
323 lnk
->path_mtu
= ini
->ib_dev
->pattr
[ini
->ib_port
- 1].active_mtu
;
324 INIT_WORK(&lnk
->link_down_wrk
, smc_link_down_work
);
325 if (!ini
->ib_dev
->initialized
) {
326 rc
= (int)smc_ib_setup_per_ibdev(ini
->ib_dev
);
330 get_random_bytes(rndvec
, sizeof(rndvec
));
331 lnk
->psn_initial
= rndvec
[0] + (rndvec
[1] << 8) +
333 rc
= smc_ib_determine_gid(lnk
->smcibdev
, lnk
->ibport
,
334 ini
->vlan_id
, lnk
->gid
, &lnk
->sgid_index
);
337 rc
= smc_llc_link_init(lnk
);
340 rc
= smc_wr_alloc_link_mem(lnk
);
343 rc
= smc_ib_create_protection_domain(lnk
);
346 rc
= smc_ib_create_queue_pair(lnk
);
349 rc
= smc_wr_create_link(lnk
);
355 smc_ib_destroy_queue_pair(lnk
);
357 smc_ib_dealloc_protection_domain(lnk
);
359 smc_wr_free_link_mem(lnk
);
361 smc_llc_link_clear(lnk
);
363 put_device(&ini
->ib_dev
->ibdev
->dev
);
364 memset(lnk
, 0, sizeof(struct smc_link
));
365 lnk
->state
= SMC_LNK_UNUSED
;
366 if (!atomic_dec_return(&ini
->ib_dev
->lnk_cnt
))
367 wake_up(&ini
->ib_dev
->lnks_deleted
);
371 /* create a new SMC link group */
372 static int smc_lgr_create(struct smc_sock
*smc
, struct smc_init_info
*ini
)
374 struct smc_link_group
*lgr
;
375 struct list_head
*lgr_list
;
376 struct smc_link
*lnk
;
377 spinlock_t
*lgr_lock
;
382 if (ini
->is_smcd
&& ini
->vlan_id
) {
383 if (smc_ism_get_vlan(ini
->ism_dev
, ini
->vlan_id
)) {
384 rc
= SMC_CLC_DECL_ISMVLANERR
;
389 lgr
= kzalloc(sizeof(*lgr
), GFP_KERNEL
);
391 rc
= SMC_CLC_DECL_MEM
;
394 lgr
->is_smcd
= ini
->is_smcd
;
396 lgr
->terminating
= 0;
399 lgr
->vlan_id
= ini
->vlan_id
;
400 mutex_init(&lgr
->sndbufs_lock
);
401 mutex_init(&lgr
->rmbs_lock
);
402 rwlock_init(&lgr
->conns_lock
);
403 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
404 INIT_LIST_HEAD(&lgr
->sndbufs
[i
]);
405 INIT_LIST_HEAD(&lgr
->rmbs
[i
]);
407 lgr
->next_link_id
= 0;
408 smc_lgr_list
.num
+= SMC_LGR_NUM_INCR
;
409 memcpy(&lgr
->id
, (u8
*)&smc_lgr_list
.num
, SMC_LGR_ID_SIZE
);
410 INIT_DELAYED_WORK(&lgr
->free_work
, smc_lgr_free_work
);
411 INIT_WORK(&lgr
->terminate_work
, smc_lgr_terminate_work
);
412 lgr
->conns_all
= RB_ROOT
;
414 /* SMC-D specific settings */
415 get_device(&ini
->ism_dev
->dev
);
416 lgr
->peer_gid
= ini
->ism_gid
;
417 lgr
->smcd
= ini
->ism_dev
;
418 lgr_list
= &ini
->ism_dev
->lgr_list
;
419 lgr_lock
= &lgr
->smcd
->lgr_lock
;
420 lgr
->peer_shutdown
= 0;
421 atomic_inc(&ini
->ism_dev
->lgr_cnt
);
423 /* SMC-R specific settings */
424 lgr
->role
= smc
->listen_smc
? SMC_SERV
: SMC_CLNT
;
425 memcpy(lgr
->peer_systemid
, ini
->ib_lcl
->id_for_peer
,
427 memcpy(lgr
->pnet_id
, ini
->ib_dev
->pnetid
[ini
->ib_port
- 1],
429 smc_llc_lgr_init(lgr
, smc
);
431 link_idx
= SMC_SINGLE_LINK
;
432 lnk
= &lgr
->lnk
[link_idx
];
433 rc
= smcr_link_init(lgr
, lnk
, link_idx
, ini
);
436 lgr_list
= &smc_lgr_list
.list
;
437 lgr_lock
= &smc_lgr_list
.lock
;
438 atomic_inc(&lgr_cnt
);
441 spin_lock_bh(lgr_lock
);
442 list_add(&lgr
->list
, lgr_list
);
443 spin_unlock_bh(lgr_lock
);
449 if (ini
->is_smcd
&& ini
->vlan_id
)
450 smc_ism_put_vlan(ini
->ism_dev
, ini
->vlan_id
);
454 rc
= SMC_CLC_DECL_MEM
;
456 rc
= SMC_CLC_DECL_INTERR
;
461 static void smcr_buf_unuse(struct smc_buf_desc
*rmb_desc
,
462 struct smc_link_group
*lgr
)
466 if (rmb_desc
->is_conf_rkey
&& !list_empty(&lgr
->list
)) {
467 /* unregister rmb with peer */
468 rc
= smc_llc_flow_initiate(lgr
, SMC_LLC_FLOW_RKEY
);
470 /* protect against smc_llc_cli_rkey_exchange() */
471 mutex_lock(&lgr
->llc_conf_mutex
);
472 smc_llc_do_delete_rkey(lgr
, rmb_desc
);
473 rmb_desc
->is_conf_rkey
= false;
474 mutex_unlock(&lgr
->llc_conf_mutex
);
475 smc_llc_flow_stop(lgr
, &lgr
->llc_flow_lcl
);
479 if (rmb_desc
->is_reg_err
) {
480 /* buf registration failed, reuse not possible */
481 mutex_lock(&lgr
->rmbs_lock
);
482 list_del(&rmb_desc
->list
);
483 mutex_unlock(&lgr
->rmbs_lock
);
485 smc_buf_free(lgr
, true, rmb_desc
);
491 static void smc_buf_unuse(struct smc_connection
*conn
,
492 struct smc_link_group
*lgr
)
494 if (conn
->sndbuf_desc
)
495 conn
->sndbuf_desc
->used
= 0;
496 if (conn
->rmb_desc
&& lgr
->is_smcd
)
497 conn
->rmb_desc
->used
= 0;
498 else if (conn
->rmb_desc
)
499 smcr_buf_unuse(conn
->rmb_desc
, lgr
);
502 /* remove a finished connection from its link group */
503 void smc_conn_free(struct smc_connection
*conn
)
505 struct smc_link_group
*lgr
= conn
->lgr
;
510 if (!list_empty(&lgr
->list
))
511 smc_ism_unset_conn(conn
);
512 tasklet_kill(&conn
->rx_tsklet
);
514 smc_cdc_tx_dismiss_slots(conn
);
516 if (!list_empty(&lgr
->list
)) {
517 smc_lgr_unregister_conn(conn
);
518 smc_buf_unuse(conn
, lgr
); /* allow buffer reuse */
522 smc_lgr_schedule_free_work(lgr
);
525 /* unregister a link from a buf_desc */
526 static void smcr_buf_unmap_link(struct smc_buf_desc
*buf_desc
, bool is_rmb
,
527 struct smc_link
*lnk
)
530 buf_desc
->is_reg_mr
[lnk
->link_idx
] = false;
531 if (!buf_desc
->is_map_ib
[lnk
->link_idx
])
534 if (buf_desc
->mr_rx
[lnk
->link_idx
]) {
535 smc_ib_put_memory_region(
536 buf_desc
->mr_rx
[lnk
->link_idx
]);
537 buf_desc
->mr_rx
[lnk
->link_idx
] = NULL
;
539 smc_ib_buf_unmap_sg(lnk
, buf_desc
, DMA_FROM_DEVICE
);
541 smc_ib_buf_unmap_sg(lnk
, buf_desc
, DMA_TO_DEVICE
);
543 sg_free_table(&buf_desc
->sgt
[lnk
->link_idx
]);
544 buf_desc
->is_map_ib
[lnk
->link_idx
] = false;
547 /* unmap all buffers of lgr for a deleted link */
548 static void smcr_buf_unmap_lgr(struct smc_link
*lnk
)
550 struct smc_link_group
*lgr
= lnk
->lgr
;
551 struct smc_buf_desc
*buf_desc
, *bf
;
554 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
555 mutex_lock(&lgr
->rmbs_lock
);
556 list_for_each_entry_safe(buf_desc
, bf
, &lgr
->rmbs
[i
], list
)
557 smcr_buf_unmap_link(buf_desc
, true, lnk
);
558 mutex_unlock(&lgr
->rmbs_lock
);
559 mutex_lock(&lgr
->sndbufs_lock
);
560 list_for_each_entry_safe(buf_desc
, bf
, &lgr
->sndbufs
[i
],
562 smcr_buf_unmap_link(buf_desc
, false, lnk
);
563 mutex_unlock(&lgr
->sndbufs_lock
);
567 static void smcr_rtoken_clear_link(struct smc_link
*lnk
)
569 struct smc_link_group
*lgr
= lnk
->lgr
;
572 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
573 lgr
->rtokens
[i
][lnk
->link_idx
].rkey
= 0;
574 lgr
->rtokens
[i
][lnk
->link_idx
].dma_addr
= 0;
578 /* must be called under lgr->llc_conf_mutex lock */
579 void smcr_link_clear(struct smc_link
*lnk
)
581 struct smc_ib_device
*smcibdev
;
583 if (!lnk
->lgr
|| lnk
->state
== SMC_LNK_UNUSED
)
586 smc_llc_link_clear(lnk
);
587 smcr_buf_unmap_lgr(lnk
);
588 smcr_rtoken_clear_link(lnk
);
589 smc_ib_modify_qp_reset(lnk
);
590 smc_wr_free_link(lnk
);
591 smc_ib_destroy_queue_pair(lnk
);
592 smc_ib_dealloc_protection_domain(lnk
);
593 smc_wr_free_link_mem(lnk
);
594 put_device(&lnk
->smcibdev
->ibdev
->dev
);
595 smcibdev
= lnk
->smcibdev
;
596 memset(lnk
, 0, sizeof(struct smc_link
));
597 lnk
->state
= SMC_LNK_UNUSED
;
598 if (!atomic_dec_return(&smcibdev
->lnk_cnt
))
599 wake_up(&smcibdev
->lnks_deleted
);
602 static void smcr_buf_free(struct smc_link_group
*lgr
, bool is_rmb
,
603 struct smc_buf_desc
*buf_desc
)
607 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++)
608 smcr_buf_unmap_link(buf_desc
, is_rmb
, &lgr
->lnk
[i
]);
611 __free_pages(buf_desc
->pages
, buf_desc
->order
);
615 static void smcd_buf_free(struct smc_link_group
*lgr
, bool is_dmb
,
616 struct smc_buf_desc
*buf_desc
)
619 /* restore original buf len */
620 buf_desc
->len
+= sizeof(struct smcd_cdc_msg
);
621 smc_ism_unregister_dmb(lgr
->smcd
, buf_desc
);
623 kfree(buf_desc
->cpu_addr
);
628 static void smc_buf_free(struct smc_link_group
*lgr
, bool is_rmb
,
629 struct smc_buf_desc
*buf_desc
)
632 smcd_buf_free(lgr
, is_rmb
, buf_desc
);
634 smcr_buf_free(lgr
, is_rmb
, buf_desc
);
637 static void __smc_lgr_free_bufs(struct smc_link_group
*lgr
, bool is_rmb
)
639 struct smc_buf_desc
*buf_desc
, *bf_desc
;
640 struct list_head
*buf_list
;
643 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
645 buf_list
= &lgr
->rmbs
[i
];
647 buf_list
= &lgr
->sndbufs
[i
];
648 list_for_each_entry_safe(buf_desc
, bf_desc
, buf_list
,
650 list_del(&buf_desc
->list
);
651 smc_buf_free(lgr
, is_rmb
, buf_desc
);
656 static void smc_lgr_free_bufs(struct smc_link_group
*lgr
)
658 /* free send buffers */
659 __smc_lgr_free_bufs(lgr
, false);
661 __smc_lgr_free_bufs(lgr
, true);
664 /* remove a link group */
665 static void smc_lgr_free(struct smc_link_group
*lgr
)
669 smc_lgr_free_bufs(lgr
);
671 if (!lgr
->terminating
) {
672 smc_ism_put_vlan(lgr
->smcd
, lgr
->vlan_id
);
673 put_device(&lgr
->smcd
->dev
);
675 if (!atomic_dec_return(&lgr
->smcd
->lgr_cnt
))
676 wake_up(&lgr
->smcd
->lgrs_deleted
);
678 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
679 if (lgr
->lnk
[i
].state
!= SMC_LNK_UNUSED
)
680 smcr_link_clear(&lgr
->lnk
[i
]);
682 smc_llc_lgr_clear(lgr
);
683 if (!atomic_dec_return(&lgr_cnt
))
684 wake_up(&lgrs_deleted
);
689 void smc_lgr_forget(struct smc_link_group
*lgr
)
691 struct list_head
*lgr_list
;
692 spinlock_t
*lgr_lock
;
694 lgr_list
= smc_lgr_list_head(lgr
, &lgr_lock
);
695 spin_lock_bh(lgr_lock
);
696 /* do not use this link group for new connections */
697 if (!list_empty(lgr_list
))
698 list_del_init(lgr_list
);
699 spin_unlock_bh(lgr_lock
);
702 static void smcd_unregister_all_dmbs(struct smc_link_group
*lgr
)
706 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
707 struct smc_buf_desc
*buf_desc
;
709 list_for_each_entry(buf_desc
, &lgr
->rmbs
[i
], list
) {
710 buf_desc
->len
+= sizeof(struct smcd_cdc_msg
);
711 smc_ism_unregister_dmb(lgr
->smcd
, buf_desc
);
716 static void smc_sk_wake_ups(struct smc_sock
*smc
)
718 smc
->sk
.sk_write_space(&smc
->sk
);
719 smc
->sk
.sk_data_ready(&smc
->sk
);
720 smc
->sk
.sk_state_change(&smc
->sk
);
723 /* kill a connection */
724 static void smc_conn_kill(struct smc_connection
*conn
, bool soft
)
726 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
728 if (conn
->lgr
->is_smcd
&& conn
->lgr
->peer_shutdown
)
729 conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_abort
= 1;
731 smc_close_abort(conn
);
733 smc
->sk
.sk_err
= ECONNABORTED
;
734 smc_sk_wake_ups(smc
);
735 if (conn
->lgr
->is_smcd
) {
736 smc_ism_unset_conn(conn
);
738 tasklet_kill(&conn
->rx_tsklet
);
740 tasklet_unlock_wait(&conn
->rx_tsklet
);
742 smc_cdc_tx_dismiss_slots(conn
);
744 smc_lgr_unregister_conn(conn
);
745 smc_close_active_abort(smc
);
748 static void smc_lgr_cleanup(struct smc_link_group
*lgr
)
753 smc_ism_signal_shutdown(lgr
);
754 smcd_unregister_all_dmbs(lgr
);
755 smc_ism_put_vlan(lgr
->smcd
, lgr
->vlan_id
);
756 put_device(&lgr
->smcd
->dev
);
758 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
759 struct smc_link
*lnk
= &lgr
->lnk
[i
];
761 if (smc_link_usable(lnk
))
762 lnk
->state
= SMC_LNK_INACTIVE
;
764 wake_up_interruptible_all(&lgr
->llc_waiter
);
768 /* terminate link group
769 * @soft: true if link group shutdown can take its time
770 * false if immediate link group shutdown is required
772 static void __smc_lgr_terminate(struct smc_link_group
*lgr
, bool soft
)
774 struct smc_connection
*conn
;
775 struct smc_sock
*smc
;
776 struct rb_node
*node
;
778 if (lgr
->terminating
)
779 return; /* lgr already terminating */
781 cancel_delayed_work_sync(&lgr
->free_work
);
782 lgr
->terminating
= 1;
784 /* kill remaining link group connections */
785 read_lock_bh(&lgr
->conns_lock
);
786 node
= rb_first(&lgr
->conns_all
);
788 read_unlock_bh(&lgr
->conns_lock
);
789 conn
= rb_entry(node
, struct smc_connection
, alert_node
);
790 smc
= container_of(conn
, struct smc_sock
, conn
);
791 sock_hold(&smc
->sk
); /* sock_put below */
793 smc_conn_kill(conn
, soft
);
794 release_sock(&smc
->sk
);
795 sock_put(&smc
->sk
); /* sock_hold above */
796 read_lock_bh(&lgr
->conns_lock
);
797 node
= rb_first(&lgr
->conns_all
);
799 read_unlock_bh(&lgr
->conns_lock
);
800 smc_lgr_cleanup(lgr
);
802 smc_lgr_schedule_free_work_fast(lgr
);
807 /* unlink link group and schedule termination */
808 void smc_lgr_terminate_sched(struct smc_link_group
*lgr
)
810 spinlock_t
*lgr_lock
;
812 smc_lgr_list_head(lgr
, &lgr_lock
);
813 spin_lock_bh(lgr_lock
);
814 if (list_empty(&lgr
->list
) || lgr
->terminating
|| lgr
->freeing
) {
815 spin_unlock_bh(lgr_lock
);
816 return; /* lgr already terminating */
818 list_del_init(&lgr
->list
);
819 spin_unlock_bh(lgr_lock
);
820 schedule_work(&lgr
->terminate_work
);
823 /* Called when peer lgr shutdown (regularly or abnormally) is received */
824 void smc_smcd_terminate(struct smcd_dev
*dev
, u64 peer_gid
, unsigned short vlan
)
826 struct smc_link_group
*lgr
, *l
;
827 LIST_HEAD(lgr_free_list
);
829 /* run common cleanup function and build free list */
830 spin_lock_bh(&dev
->lgr_lock
);
831 list_for_each_entry_safe(lgr
, l
, &dev
->lgr_list
, list
) {
832 if ((!peer_gid
|| lgr
->peer_gid
== peer_gid
) &&
833 (vlan
== VLAN_VID_MASK
|| lgr
->vlan_id
== vlan
)) {
834 if (peer_gid
) /* peer triggered termination */
835 lgr
->peer_shutdown
= 1;
836 list_move(&lgr
->list
, &lgr_free_list
);
839 spin_unlock_bh(&dev
->lgr_lock
);
841 /* cancel the regular free workers and actually free lgrs */
842 list_for_each_entry_safe(lgr
, l
, &lgr_free_list
, list
) {
843 list_del_init(&lgr
->list
);
844 schedule_work(&lgr
->terminate_work
);
848 /* Called when an SMCD device is removed or the smc module is unloaded */
849 void smc_smcd_terminate_all(struct smcd_dev
*smcd
)
851 struct smc_link_group
*lgr
, *lg
;
852 LIST_HEAD(lgr_free_list
);
854 spin_lock_bh(&smcd
->lgr_lock
);
855 list_splice_init(&smcd
->lgr_list
, &lgr_free_list
);
856 list_for_each_entry(lgr
, &lgr_free_list
, list
)
858 spin_unlock_bh(&smcd
->lgr_lock
);
860 list_for_each_entry_safe(lgr
, lg
, &lgr_free_list
, list
) {
861 list_del_init(&lgr
->list
);
862 __smc_lgr_terminate(lgr
, false);
865 if (atomic_read(&smcd
->lgr_cnt
))
866 wait_event(smcd
->lgrs_deleted
, !atomic_read(&smcd
->lgr_cnt
));
869 /* Called when an SMCR device is removed or the smc module is unloaded.
870 * If smcibdev is given, all SMCR link groups using this device are terminated.
871 * If smcibdev is NULL, all SMCR link groups are terminated.
873 void smc_smcr_terminate_all(struct smc_ib_device
*smcibdev
)
875 struct smc_link_group
*lgr
, *lg
;
876 LIST_HEAD(lgr_free_list
);
879 spin_lock_bh(&smc_lgr_list
.lock
);
881 list_splice_init(&smc_lgr_list
.list
, &lgr_free_list
);
882 list_for_each_entry(lgr
, &lgr_free_list
, list
)
885 list_for_each_entry_safe(lgr
, lg
, &smc_lgr_list
.list
, list
) {
886 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
887 if (lgr
->lnk
[i
].smcibdev
== smcibdev
)
888 smcr_link_down_cond_sched(&lgr
->lnk
[i
]);
892 spin_unlock_bh(&smc_lgr_list
.lock
);
894 list_for_each_entry_safe(lgr
, lg
, &lgr_free_list
, list
) {
895 list_del_init(&lgr
->list
);
896 __smc_lgr_terminate(lgr
, false);
900 if (atomic_read(&smcibdev
->lnk_cnt
))
901 wait_event(smcibdev
->lnks_deleted
,
902 !atomic_read(&smcibdev
->lnk_cnt
));
904 if (atomic_read(&lgr_cnt
))
905 wait_event(lgrs_deleted
, !atomic_read(&lgr_cnt
));
909 /* link is up - establish alternate link if applicable */
910 static void smcr_link_up(struct smc_link_group
*lgr
,
911 struct smc_ib_device
*smcibdev
, u8 ibport
)
913 struct smc_link
*link
= NULL
;
915 if (list_empty(&lgr
->list
) ||
916 lgr
->type
== SMC_LGR_SYMMETRIC
||
917 lgr
->type
== SMC_LGR_ASYMMETRIC_PEER
)
920 if (lgr
->role
== SMC_SERV
) {
921 /* trigger local add link processing */
922 link
= smc_llc_usable_link(lgr
);
925 /* tbd: call smc_llc_srv_add_link_local(link); */
927 /* invite server to start add link processing */
928 u8 gid
[SMC_GID_SIZE
];
930 if (smc_ib_determine_gid(smcibdev
, ibport
, lgr
->vlan_id
, gid
,
933 if (lgr
->llc_flow_lcl
.type
!= SMC_LLC_FLOW_NONE
) {
934 /* some other llc task is ongoing */
935 wait_event_interruptible_timeout(lgr
->llc_waiter
,
936 (lgr
->llc_flow_lcl
.type
== SMC_LLC_FLOW_NONE
),
939 if (list_empty(&lgr
->list
) ||
940 !smc_ib_port_active(smcibdev
, ibport
))
941 return; /* lgr or device no longer active */
942 link
= smc_llc_usable_link(lgr
);
945 smc_llc_send_add_link(link
, smcibdev
->mac
[ibport
- 1], gid
,
950 void smcr_port_add(struct smc_ib_device
*smcibdev
, u8 ibport
)
952 struct smc_ib_up_work
*ib_work
;
953 struct smc_link_group
*lgr
, *n
;
955 list_for_each_entry_safe(lgr
, n
, &smc_lgr_list
.list
, list
) {
956 if (strncmp(smcibdev
->pnetid
[ibport
- 1], lgr
->pnet_id
,
957 SMC_MAX_PNETID_LEN
) ||
958 lgr
->type
== SMC_LGR_SYMMETRIC
||
959 lgr
->type
== SMC_LGR_ASYMMETRIC_PEER
)
961 ib_work
= kmalloc(sizeof(*ib_work
), GFP_KERNEL
);
964 INIT_WORK(&ib_work
->work
, smc_link_up_work
);
966 ib_work
->smcibdev
= smcibdev
;
967 ib_work
->ibport
= ibport
;
968 schedule_work(&ib_work
->work
);
972 /* link is down - switch connections to alternate link,
973 * must be called under lgr->llc_conf_mutex lock
975 static void smcr_link_down(struct smc_link
*lnk
)
977 struct smc_link_group
*lgr
= lnk
->lgr
;
978 struct smc_link
*to_lnk
;
981 if (!lgr
|| lnk
->state
== SMC_LNK_UNUSED
|| list_empty(&lgr
->list
))
984 smc_ib_modify_qp_reset(lnk
);
986 /* tbd: call to_lnk = smc_switch_conns(lgr, lnk, true); */
987 if (!to_lnk
) { /* no backup link available */
988 smcr_link_clear(lnk
);
991 lgr
->type
= SMC_LGR_SINGLE
;
992 del_link_id
= lnk
->link_id
;
994 if (lgr
->role
== SMC_SERV
) {
995 /* trigger local delete link processing */
997 if (lgr
->llc_flow_lcl
.type
!= SMC_LLC_FLOW_NONE
) {
998 /* another llc task is ongoing */
999 mutex_unlock(&lgr
->llc_conf_mutex
);
1000 wait_event_interruptible_timeout(lgr
->llc_waiter
,
1001 (lgr
->llc_flow_lcl
.type
== SMC_LLC_FLOW_NONE
),
1003 mutex_lock(&lgr
->llc_conf_mutex
);
1005 smc_llc_send_delete_link(to_lnk
, del_link_id
, SMC_LLC_REQ
, true,
1006 SMC_LLC_DEL_LOST_PATH
);
1010 /* must be called under lgr->llc_conf_mutex lock */
1011 void smcr_link_down_cond(struct smc_link
*lnk
)
1013 if (smc_link_downing(&lnk
->state
))
1014 smcr_link_down(lnk
);
1017 /* will get the lgr->llc_conf_mutex lock */
1018 void smcr_link_down_cond_sched(struct smc_link
*lnk
)
1020 if (smc_link_downing(&lnk
->state
))
1021 schedule_work(&lnk
->link_down_wrk
);
1024 void smcr_port_err(struct smc_ib_device
*smcibdev
, u8 ibport
)
1026 struct smc_link_group
*lgr
, *n
;
1029 list_for_each_entry_safe(lgr
, n
, &smc_lgr_list
.list
, list
) {
1030 if (strncmp(smcibdev
->pnetid
[ibport
- 1], lgr
->pnet_id
,
1031 SMC_MAX_PNETID_LEN
))
1032 continue; /* lgr is not affected */
1033 if (list_empty(&lgr
->list
))
1035 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1036 struct smc_link
*lnk
= &lgr
->lnk
[i
];
1038 if (smc_link_usable(lnk
) &&
1039 lnk
->smcibdev
== smcibdev
&& lnk
->ibport
== ibport
)
1040 smcr_link_down_cond_sched(lnk
);
1045 static void smc_link_up_work(struct work_struct
*work
)
1047 struct smc_ib_up_work
*ib_work
= container_of(work
,
1048 struct smc_ib_up_work
,
1050 struct smc_link_group
*lgr
= ib_work
->lgr
;
1052 if (list_empty(&lgr
->list
))
1054 smcr_link_up(lgr
, ib_work
->smcibdev
, ib_work
->ibport
);
1059 static void smc_link_down_work(struct work_struct
*work
)
1061 struct smc_link
*link
= container_of(work
, struct smc_link
,
1063 struct smc_link_group
*lgr
= link
->lgr
;
1065 if (list_empty(&lgr
->list
))
1067 wake_up_interruptible_all(&lgr
->llc_waiter
);
1068 mutex_lock(&lgr
->llc_conf_mutex
);
1069 smcr_link_down(link
);
1070 mutex_unlock(&lgr
->llc_conf_mutex
);
1073 /* Determine vlan of internal TCP socket.
1074 * @vlan_id: address to store the determined vlan id into
1076 int smc_vlan_by_tcpsk(struct socket
*clcsock
, struct smc_init_info
*ini
)
1078 struct dst_entry
*dst
= sk_dst_get(clcsock
->sk
);
1079 struct net_device
*ndev
;
1080 int i
, nest_lvl
, rc
= 0;
1093 if (is_vlan_dev(ndev
)) {
1094 ini
->vlan_id
= vlan_dev_vlan_id(ndev
);
1099 nest_lvl
= ndev
->lower_level
;
1100 for (i
= 0; i
< nest_lvl
; i
++) {
1101 struct list_head
*lower
= &ndev
->adj_list
.lower
;
1103 if (list_empty(lower
))
1105 lower
= lower
->next
;
1106 ndev
= (struct net_device
*)netdev_lower_get_next(ndev
, &lower
);
1107 if (is_vlan_dev(ndev
)) {
1108 ini
->vlan_id
= vlan_dev_vlan_id(ndev
);
1120 static bool smcr_lgr_match(struct smc_link_group
*lgr
,
1121 struct smc_clc_msg_local
*lcl
,
1122 enum smc_lgr_role role
, u32 clcqpn
)
1126 if (memcmp(lgr
->peer_systemid
, lcl
->id_for_peer
, SMC_SYSTEMID_LEN
) ||
1130 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1131 if (lgr
->lnk
[i
].state
!= SMC_LNK_ACTIVE
)
1133 if ((lgr
->role
== SMC_SERV
|| lgr
->lnk
[i
].peer_qpn
== clcqpn
) &&
1134 !memcmp(lgr
->lnk
[i
].peer_gid
, &lcl
->gid
, SMC_GID_SIZE
) &&
1135 !memcmp(lgr
->lnk
[i
].peer_mac
, lcl
->mac
, sizeof(lcl
->mac
)))
1141 static bool smcd_lgr_match(struct smc_link_group
*lgr
,
1142 struct smcd_dev
*smcismdev
, u64 peer_gid
)
1144 return lgr
->peer_gid
== peer_gid
&& lgr
->smcd
== smcismdev
;
1147 /* create a new SMC connection (and a new link group if necessary) */
1148 int smc_conn_create(struct smc_sock
*smc
, struct smc_init_info
*ini
)
1150 struct smc_connection
*conn
= &smc
->conn
;
1151 struct list_head
*lgr_list
;
1152 struct smc_link_group
*lgr
;
1153 enum smc_lgr_role role
;
1154 spinlock_t
*lgr_lock
;
1157 lgr_list
= ini
->is_smcd
? &ini
->ism_dev
->lgr_list
: &smc_lgr_list
.list
;
1158 lgr_lock
= ini
->is_smcd
? &ini
->ism_dev
->lgr_lock
: &smc_lgr_list
.lock
;
1159 ini
->cln_first_contact
= SMC_FIRST_CONTACT
;
1160 role
= smc
->listen_smc
? SMC_SERV
: SMC_CLNT
;
1161 if (role
== SMC_CLNT
&& ini
->srv_first_contact
)
1162 /* create new link group as well */
1165 /* determine if an existing link group can be reused */
1166 spin_lock_bh(lgr_lock
);
1167 list_for_each_entry(lgr
, lgr_list
, list
) {
1168 write_lock_bh(&lgr
->conns_lock
);
1170 smcd_lgr_match(lgr
, ini
->ism_dev
, ini
->ism_gid
) :
1171 smcr_lgr_match(lgr
, ini
->ib_lcl
, role
, ini
->ib_clcqpn
)) &&
1173 lgr
->vlan_id
== ini
->vlan_id
&&
1174 (role
== SMC_CLNT
||
1175 lgr
->conns_num
< SMC_RMBS_PER_LGR_MAX
)) {
1176 /* link group found */
1177 ini
->cln_first_contact
= SMC_REUSE_CONTACT
;
1179 rc
= smc_lgr_register_conn(conn
); /* add conn to lgr */
1180 write_unlock_bh(&lgr
->conns_lock
);
1181 if (!rc
&& delayed_work_pending(&lgr
->free_work
))
1182 cancel_delayed_work(&lgr
->free_work
);
1185 write_unlock_bh(&lgr
->conns_lock
);
1187 spin_unlock_bh(lgr_lock
);
1191 if (role
== SMC_CLNT
&& !ini
->srv_first_contact
&&
1192 ini
->cln_first_contact
== SMC_FIRST_CONTACT
) {
1193 /* Server reuses a link group, but Client wants to start
1195 * send out_of_sync decline, reason synchr. error
1197 return SMC_CLC_DECL_SYNCERR
;
1201 if (ini
->cln_first_contact
== SMC_FIRST_CONTACT
) {
1202 rc
= smc_lgr_create(smc
, ini
);
1206 write_lock_bh(&lgr
->conns_lock
);
1207 rc
= smc_lgr_register_conn(conn
); /* add smc conn to lgr */
1208 write_unlock_bh(&lgr
->conns_lock
);
1212 conn
->local_tx_ctrl
.common
.type
= SMC_CDC_MSG_TYPE
;
1213 conn
->local_tx_ctrl
.len
= SMC_WR_TX_SIZE
;
1214 conn
->urg_state
= SMC_URG_READ
;
1216 conn
->rx_off
= sizeof(struct smcd_cdc_msg
);
1217 smcd_cdc_rx_init(conn
); /* init tasklet for this conn */
1219 #ifndef KERNEL_HAS_ATOMIC64
1220 spin_lock_init(&conn
->acurs_lock
);
1227 /* convert the RMB size into the compressed notation - minimum 16K.
1228 * In contrast to plain ilog2, this rounds towards the next power of 2,
1229 * so the socket application gets at least its desired sndbuf / rcvbuf size.
1231 static u8
smc_compress_bufsize(int size
)
1235 if (size
<= SMC_BUF_MIN_SIZE
)
1238 size
= (size
- 1) >> 14;
1239 compressed
= ilog2(size
) + 1;
1240 if (compressed
>= SMC_RMBE_SIZES
)
1241 compressed
= SMC_RMBE_SIZES
- 1;
1245 /* convert the RMB size from compressed notation into integer */
1246 int smc_uncompress_bufsize(u8 compressed
)
1250 size
= 0x00000001 << (((int)compressed
) + 14);
1254 /* try to reuse a sndbuf or rmb description slot for a certain
1255 * buffer size; if not available, return NULL
1257 static struct smc_buf_desc
*smc_buf_get_slot(int compressed_bufsize
,
1259 struct list_head
*buf_list
)
1261 struct smc_buf_desc
*buf_slot
;
1264 list_for_each_entry(buf_slot
, buf_list
, list
) {
1265 if (cmpxchg(&buf_slot
->used
, 0, 1) == 0) {
1274 /* one of the conditions for announcing a receiver's current window size is
1275 * that it "results in a minimum increase in the window size of 10% of the
1276 * receive buffer space" [RFC7609]
1278 static inline int smc_rmb_wnd_update_limit(int rmbe_size
)
1280 return min_t(int, rmbe_size
/ 10, SOCK_MIN_SNDBUF
/ 2);
1283 /* map an rmb buf to a link */
1284 static int smcr_buf_map_link(struct smc_buf_desc
*buf_desc
, bool is_rmb
,
1285 struct smc_link
*lnk
)
1289 if (buf_desc
->is_map_ib
[lnk
->link_idx
])
1292 rc
= sg_alloc_table(&buf_desc
->sgt
[lnk
->link_idx
], 1, GFP_KERNEL
);
1295 sg_set_buf(buf_desc
->sgt
[lnk
->link_idx
].sgl
,
1296 buf_desc
->cpu_addr
, buf_desc
->len
);
1298 /* map sg table to DMA address */
1299 rc
= smc_ib_buf_map_sg(lnk
, buf_desc
,
1300 is_rmb
? DMA_FROM_DEVICE
: DMA_TO_DEVICE
);
1301 /* SMC protocol depends on mapping to one DMA address only */
1307 /* create a new memory region for the RMB */
1309 rc
= smc_ib_get_memory_region(lnk
->roce_pd
,
1310 IB_ACCESS_REMOTE_WRITE
|
1311 IB_ACCESS_LOCAL_WRITE
,
1312 buf_desc
, lnk
->link_idx
);
1315 smc_ib_sync_sg_for_device(lnk
, buf_desc
, DMA_FROM_DEVICE
);
1317 buf_desc
->is_map_ib
[lnk
->link_idx
] = true;
1321 smc_ib_buf_unmap_sg(lnk
, buf_desc
,
1322 is_rmb
? DMA_FROM_DEVICE
: DMA_TO_DEVICE
);
1324 sg_free_table(&buf_desc
->sgt
[lnk
->link_idx
]);
1328 /* register a new rmb on IB device,
1329 * must be called under lgr->llc_conf_mutex lock
1331 int smcr_link_reg_rmb(struct smc_link
*link
, struct smc_buf_desc
*rmb_desc
)
1333 if (list_empty(&link
->lgr
->list
))
1335 if (!rmb_desc
->is_reg_mr
[link
->link_idx
]) {
1336 /* register memory region for new rmb */
1337 if (smc_wr_reg_send(link
, rmb_desc
->mr_rx
[link
->link_idx
])) {
1338 rmb_desc
->is_reg_err
= true;
1341 rmb_desc
->is_reg_mr
[link
->link_idx
] = true;
1346 static int _smcr_buf_map_lgr(struct smc_link
*lnk
, struct mutex
*lock
,
1347 struct list_head
*lst
, bool is_rmb
)
1349 struct smc_buf_desc
*buf_desc
, *bf
;
1353 list_for_each_entry_safe(buf_desc
, bf
, lst
, list
) {
1354 if (!buf_desc
->used
)
1356 rc
= smcr_buf_map_link(buf_desc
, is_rmb
, lnk
);
1365 /* map all used buffers of lgr for a new link */
1366 int smcr_buf_map_lgr(struct smc_link
*lnk
)
1368 struct smc_link_group
*lgr
= lnk
->lgr
;
1371 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
1372 rc
= _smcr_buf_map_lgr(lnk
, &lgr
->rmbs_lock
,
1373 &lgr
->rmbs
[i
], true);
1376 rc
= _smcr_buf_map_lgr(lnk
, &lgr
->sndbufs_lock
,
1377 &lgr
->sndbufs
[i
], false);
1384 /* register all used buffers of lgr for a new link,
1385 * must be called under lgr->llc_conf_mutex lock
1387 int smcr_buf_reg_lgr(struct smc_link
*lnk
)
1389 struct smc_link_group
*lgr
= lnk
->lgr
;
1390 struct smc_buf_desc
*buf_desc
, *bf
;
1393 mutex_lock(&lgr
->rmbs_lock
);
1394 for (i
= 0; i
< SMC_RMBE_SIZES
; i
++) {
1395 list_for_each_entry_safe(buf_desc
, bf
, &lgr
->rmbs
[i
], list
) {
1396 if (!buf_desc
->used
)
1398 rc
= smcr_link_reg_rmb(lnk
, buf_desc
);
1404 mutex_unlock(&lgr
->rmbs_lock
);
1408 static struct smc_buf_desc
*smcr_new_buf_create(struct smc_link_group
*lgr
,
1409 bool is_rmb
, int bufsize
)
1411 struct smc_buf_desc
*buf_desc
;
1413 /* try to alloc a new buffer */
1414 buf_desc
= kzalloc(sizeof(*buf_desc
), GFP_KERNEL
);
1416 return ERR_PTR(-ENOMEM
);
1418 buf_desc
->order
= get_order(bufsize
);
1419 buf_desc
->pages
= alloc_pages(GFP_KERNEL
| __GFP_NOWARN
|
1420 __GFP_NOMEMALLOC
| __GFP_COMP
|
1421 __GFP_NORETRY
| __GFP_ZERO
,
1423 if (!buf_desc
->pages
) {
1425 return ERR_PTR(-EAGAIN
);
1427 buf_desc
->cpu_addr
= (void *)page_address(buf_desc
->pages
);
1428 buf_desc
->len
= bufsize
;
1432 /* map buf_desc on all usable links,
1433 * unused buffers stay mapped as long as the link is up
1435 static int smcr_buf_map_usable_links(struct smc_link_group
*lgr
,
1436 struct smc_buf_desc
*buf_desc
, bool is_rmb
)
1440 /* protect against parallel link reconfiguration */
1441 mutex_lock(&lgr
->llc_conf_mutex
);
1442 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1443 struct smc_link
*lnk
= &lgr
->lnk
[i
];
1445 if (!smc_link_usable(lnk
))
1447 if (smcr_buf_map_link(buf_desc
, is_rmb
, lnk
)) {
1453 mutex_unlock(&lgr
->llc_conf_mutex
);
1457 #define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1459 static struct smc_buf_desc
*smcd_new_buf_create(struct smc_link_group
*lgr
,
1460 bool is_dmb
, int bufsize
)
1462 struct smc_buf_desc
*buf_desc
;
1465 if (smc_compress_bufsize(bufsize
) > SMCD_DMBE_SIZES
)
1466 return ERR_PTR(-EAGAIN
);
1468 /* try to alloc a new DMB */
1469 buf_desc
= kzalloc(sizeof(*buf_desc
), GFP_KERNEL
);
1471 return ERR_PTR(-ENOMEM
);
1473 rc
= smc_ism_register_dmb(lgr
, bufsize
, buf_desc
);
1476 return ERR_PTR(-EAGAIN
);
1478 buf_desc
->pages
= virt_to_page(buf_desc
->cpu_addr
);
1479 /* CDC header stored in buf. So, pretend it was smaller */
1480 buf_desc
->len
= bufsize
- sizeof(struct smcd_cdc_msg
);
1482 buf_desc
->cpu_addr
= kzalloc(bufsize
, GFP_KERNEL
|
1483 __GFP_NOWARN
| __GFP_NORETRY
|
1485 if (!buf_desc
->cpu_addr
) {
1487 return ERR_PTR(-EAGAIN
);
1489 buf_desc
->len
= bufsize
;
1494 static int __smc_buf_create(struct smc_sock
*smc
, bool is_smcd
, bool is_rmb
)
1496 struct smc_buf_desc
*buf_desc
= ERR_PTR(-ENOMEM
);
1497 struct smc_connection
*conn
= &smc
->conn
;
1498 struct smc_link_group
*lgr
= conn
->lgr
;
1499 struct list_head
*buf_list
;
1500 int bufsize
, bufsize_short
;
1501 struct mutex
*lock
; /* lock buffer list */
1505 /* use socket recv buffer size (w/o overhead) as start value */
1506 sk_buf_size
= smc
->sk
.sk_rcvbuf
/ 2;
1508 /* use socket send buffer size (w/o overhead) as start value */
1509 sk_buf_size
= smc
->sk
.sk_sndbuf
/ 2;
1511 for (bufsize_short
= smc_compress_bufsize(sk_buf_size
);
1512 bufsize_short
>= 0; bufsize_short
--) {
1515 lock
= &lgr
->rmbs_lock
;
1516 buf_list
= &lgr
->rmbs
[bufsize_short
];
1518 lock
= &lgr
->sndbufs_lock
;
1519 buf_list
= &lgr
->sndbufs
[bufsize_short
];
1521 bufsize
= smc_uncompress_bufsize(bufsize_short
);
1522 if ((1 << get_order(bufsize
)) > SG_MAX_SINGLE_ALLOC
)
1525 /* check for reusable slot in the link group */
1526 buf_desc
= smc_buf_get_slot(bufsize_short
, lock
, buf_list
);
1528 memset(buf_desc
->cpu_addr
, 0, bufsize
);
1529 break; /* found reusable slot */
1533 buf_desc
= smcd_new_buf_create(lgr
, is_rmb
, bufsize
);
1535 buf_desc
= smcr_new_buf_create(lgr
, is_rmb
, bufsize
);
1537 if (PTR_ERR(buf_desc
) == -ENOMEM
)
1539 if (IS_ERR(buf_desc
))
1544 list_add(&buf_desc
->list
, buf_list
);
1549 if (IS_ERR(buf_desc
))
1553 if (smcr_buf_map_usable_links(lgr
, buf_desc
, is_rmb
)) {
1554 smcr_buf_unuse(buf_desc
, lgr
);
1560 conn
->rmb_desc
= buf_desc
;
1561 conn
->rmbe_size_short
= bufsize_short
;
1562 smc
->sk
.sk_rcvbuf
= bufsize
* 2;
1563 atomic_set(&conn
->bytes_to_rcv
, 0);
1564 conn
->rmbe_update_limit
=
1565 smc_rmb_wnd_update_limit(buf_desc
->len
);
1567 smc_ism_set_conn(conn
); /* map RMB/smcd_dev to conn */
1569 conn
->sndbuf_desc
= buf_desc
;
1570 smc
->sk
.sk_sndbuf
= bufsize
* 2;
1571 atomic_set(&conn
->sndbuf_space
, bufsize
);
1576 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection
*conn
)
1578 if (!conn
->lgr
|| conn
->lgr
->is_smcd
|| !smc_link_usable(conn
->lnk
))
1580 smc_ib_sync_sg_for_cpu(conn
->lnk
, conn
->sndbuf_desc
, DMA_TO_DEVICE
);
1583 void smc_sndbuf_sync_sg_for_device(struct smc_connection
*conn
)
1585 if (!conn
->lgr
|| conn
->lgr
->is_smcd
|| !smc_link_usable(conn
->lnk
))
1587 smc_ib_sync_sg_for_device(conn
->lnk
, conn
->sndbuf_desc
, DMA_TO_DEVICE
);
1590 void smc_rmb_sync_sg_for_cpu(struct smc_connection
*conn
)
1594 if (!conn
->lgr
|| conn
->lgr
->is_smcd
)
1596 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1597 if (!smc_link_usable(&conn
->lgr
->lnk
[i
]))
1599 smc_ib_sync_sg_for_cpu(&conn
->lgr
->lnk
[i
], conn
->rmb_desc
,
1604 void smc_rmb_sync_sg_for_device(struct smc_connection
*conn
)
1608 if (!conn
->lgr
|| conn
->lgr
->is_smcd
)
1610 for (i
= 0; i
< SMC_LINKS_PER_LGR_MAX
; i
++) {
1611 if (!smc_link_usable(&conn
->lgr
->lnk
[i
]))
1613 smc_ib_sync_sg_for_device(&conn
->lgr
->lnk
[i
], conn
->rmb_desc
,
1618 /* create the send and receive buffer for an SMC socket;
1619 * receive buffers are called RMBs;
1620 * (even though the SMC protocol allows more than one RMB-element per RMB,
1621 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
1622 * extra RMB for every connection in a link group
1624 int smc_buf_create(struct smc_sock
*smc
, bool is_smcd
)
1628 /* create send buffer */
1629 rc
= __smc_buf_create(smc
, is_smcd
, false);
1633 rc
= __smc_buf_create(smc
, is_smcd
, true);
1635 smc_buf_free(smc
->conn
.lgr
, false, smc
->conn
.sndbuf_desc
);
1639 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group
*lgr
)
1643 for_each_clear_bit(i
, lgr
->rtokens_used_mask
, SMC_RMBS_PER_LGR_MAX
) {
1644 if (!test_and_set_bit(i
, lgr
->rtokens_used_mask
))
1650 static int smc_rtoken_find_by_link(struct smc_link_group
*lgr
, int lnk_idx
,
1655 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
1656 if (test_bit(i
, lgr
->rtokens_used_mask
) &&
1657 lgr
->rtokens
[i
][lnk_idx
].rkey
== rkey
)
1663 /* set rtoken for a new link to an existing rmb */
1664 void smc_rtoken_set(struct smc_link_group
*lgr
, int link_idx
, int link_idx_new
,
1665 __be32 nw_rkey_known
, __be64 nw_vaddr
, __be32 nw_rkey
)
1669 rtok_idx
= smc_rtoken_find_by_link(lgr
, link_idx
, ntohl(nw_rkey_known
));
1670 if (rtok_idx
== -ENOENT
)
1672 lgr
->rtokens
[rtok_idx
][link_idx_new
].rkey
= ntohl(nw_rkey
);
1673 lgr
->rtokens
[rtok_idx
][link_idx_new
].dma_addr
= be64_to_cpu(nw_vaddr
);
1676 /* set rtoken for a new link whose link_id is given */
1677 void smc_rtoken_set2(struct smc_link_group
*lgr
, int rtok_idx
, int link_id
,
1678 __be64 nw_vaddr
, __be32 nw_rkey
)
1680 u64 dma_addr
= be64_to_cpu(nw_vaddr
);
1681 u32 rkey
= ntohl(nw_rkey
);
1685 for (link_idx
= 0; link_idx
< SMC_LINKS_PER_LGR_MAX
; link_idx
++) {
1686 if (lgr
->lnk
[link_idx
].link_id
== link_id
) {
1693 lgr
->rtokens
[rtok_idx
][link_idx
].rkey
= rkey
;
1694 lgr
->rtokens
[rtok_idx
][link_idx
].dma_addr
= dma_addr
;
1697 /* add a new rtoken from peer */
1698 int smc_rtoken_add(struct smc_link
*lnk
, __be64 nw_vaddr
, __be32 nw_rkey
)
1700 struct smc_link_group
*lgr
= smc_get_lgr(lnk
);
1701 u64 dma_addr
= be64_to_cpu(nw_vaddr
);
1702 u32 rkey
= ntohl(nw_rkey
);
1705 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
1706 if (lgr
->rtokens
[i
][lnk
->link_idx
].rkey
== rkey
&&
1707 lgr
->rtokens
[i
][lnk
->link_idx
].dma_addr
== dma_addr
&&
1708 test_bit(i
, lgr
->rtokens_used_mask
)) {
1709 /* already in list */
1713 i
= smc_rmb_reserve_rtoken_idx(lgr
);
1716 lgr
->rtokens
[i
][lnk
->link_idx
].rkey
= rkey
;
1717 lgr
->rtokens
[i
][lnk
->link_idx
].dma_addr
= dma_addr
;
1721 /* delete an rtoken from all links */
1722 int smc_rtoken_delete(struct smc_link
*lnk
, __be32 nw_rkey
)
1724 struct smc_link_group
*lgr
= smc_get_lgr(lnk
);
1725 u32 rkey
= ntohl(nw_rkey
);
1728 for (i
= 0; i
< SMC_RMBS_PER_LGR_MAX
; i
++) {
1729 if (lgr
->rtokens
[i
][lnk
->link_idx
].rkey
== rkey
&&
1730 test_bit(i
, lgr
->rtokens_used_mask
)) {
1731 for (j
= 0; j
< SMC_LINKS_PER_LGR_MAX
; j
++) {
1732 lgr
->rtokens
[i
][j
].rkey
= 0;
1733 lgr
->rtokens
[i
][j
].dma_addr
= 0;
1735 clear_bit(i
, lgr
->rtokens_used_mask
);
1742 /* save rkey and dma_addr received from peer during clc handshake */
1743 int smc_rmb_rtoken_handling(struct smc_connection
*conn
,
1744 struct smc_link
*lnk
,
1745 struct smc_clc_msg_accept_confirm
*clc
)
1747 conn
->rtoken_idx
= smc_rtoken_add(lnk
, clc
->rmb_dma_addr
,
1749 if (conn
->rtoken_idx
< 0)
1750 return conn
->rtoken_idx
;
1754 static void smc_core_going_away(void)
1756 struct smc_ib_device
*smcibdev
;
1757 struct smcd_dev
*smcd
;
1759 spin_lock(&smc_ib_devices
.lock
);
1760 list_for_each_entry(smcibdev
, &smc_ib_devices
.list
, list
) {
1763 for (i
= 0; i
< SMC_MAX_PORTS
; i
++)
1764 set_bit(i
, smcibdev
->ports_going_away
);
1766 spin_unlock(&smc_ib_devices
.lock
);
1768 spin_lock(&smcd_dev_list
.lock
);
1769 list_for_each_entry(smcd
, &smcd_dev_list
.list
, list
) {
1770 smcd
->going_away
= 1;
1772 spin_unlock(&smcd_dev_list
.lock
);
1775 /* Clean up all SMC link groups */
1776 static void smc_lgrs_shutdown(void)
1778 struct smcd_dev
*smcd
;
1780 smc_core_going_away();
1782 smc_smcr_terminate_all(NULL
);
1784 spin_lock(&smcd_dev_list
.lock
);
1785 list_for_each_entry(smcd
, &smcd_dev_list
.list
, list
)
1786 smc_smcd_terminate_all(smcd
);
1787 spin_unlock(&smcd_dev_list
.lock
);
1790 static int smc_core_reboot_event(struct notifier_block
*this,
1791 unsigned long event
, void *ptr
)
1793 smc_lgrs_shutdown();
1794 smc_ib_unregister_client();
1798 static struct notifier_block smc_reboot_notifier
= {
1799 .notifier_call
= smc_core_reboot_event
,
1802 int __init
smc_core_init(void)
1804 return register_reboot_notifier(&smc_reboot_notifier
);
1807 /* Called (from smc_exit) when module is removed */
1808 void smc_core_exit(void)
1810 unregister_reboot_notifier(&smc_reboot_notifier
);
1811 smc_lgrs_shutdown();