]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - net/smc/smc_core.c
net/smc: take link down instead of terminating the link group
[mirror_ubuntu-jammy-kernel.git] / net / smc / smc_core.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 *
5 * Basic Transport Functions exploiting Infiniband API
6 *
7 * Copyright IBM Corp. 2016
8 *
9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
10 */
11
12 #include <linux/socket.h>
13 #include <linux/if_vlan.h>
14 #include <linux/random.h>
15 #include <linux/workqueue.h>
16 #include <linux/wait.h>
17 #include <linux/reboot.h>
18 #include <net/tcp.h>
19 #include <net/sock.h>
20 #include <rdma/ib_verbs.h>
21 #include <rdma/ib_cache.h>
22
23 #include "smc.h"
24 #include "smc_clc.h"
25 #include "smc_core.h"
26 #include "smc_ib.h"
27 #include "smc_wr.h"
28 #include "smc_llc.h"
29 #include "smc_cdc.h"
30 #include "smc_close.h"
31 #include "smc_ism.h"
32
33 #define SMC_LGR_NUM_INCR 256
34 #define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
35 #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
36 #define SMC_LGR_FREE_DELAY_FAST (8 * HZ)
37
38 static struct smc_lgr_list smc_lgr_list = { /* established link groups */
39 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
40 .list = LIST_HEAD_INIT(smc_lgr_list.list),
41 .num = 0,
42 };
43
44 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
45 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
46
47 struct smc_ib_up_work {
48 struct work_struct work;
49 struct smc_link_group *lgr;
50 struct smc_ib_device *smcibdev;
51 u8 ibport;
52 };
53
54 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
55 struct smc_buf_desc *buf_desc);
56 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
57
58 static void smc_link_up_work(struct work_struct *work);
59 static void smc_link_down_work(struct work_struct *work);
60
61 /* return head of link group list and its lock for a given link group */
62 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
63 spinlock_t **lgr_lock)
64 {
65 if (lgr->is_smcd) {
66 *lgr_lock = &lgr->smcd->lgr_lock;
67 return &lgr->smcd->lgr_list;
68 }
69
70 *lgr_lock = &smc_lgr_list.lock;
71 return &smc_lgr_list.list;
72 }
73
74 static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
75 {
76 /* client link group creation always follows the server link group
77 * creation. For client use a somewhat higher removal delay time,
78 * otherwise there is a risk of out-of-sync link groups.
79 */
80 if (!lgr->freeing && !lgr->freefast) {
81 mod_delayed_work(system_wq, &lgr->free_work,
82 (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
83 SMC_LGR_FREE_DELAY_CLNT :
84 SMC_LGR_FREE_DELAY_SERV);
85 }
86 }
87
88 void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
89 {
90 if (!lgr->freeing && !lgr->freefast) {
91 lgr->freefast = 1;
92 mod_delayed_work(system_wq, &lgr->free_work,
93 SMC_LGR_FREE_DELAY_FAST);
94 }
95 }
96
97 /* Register connection's alert token in our lookup structure.
98 * To use rbtrees we have to implement our own insert core.
99 * Requires @conns_lock
100 * @smc connection to register
101 * Returns 0 on success, != otherwise.
102 */
103 static void smc_lgr_add_alert_token(struct smc_connection *conn)
104 {
105 struct rb_node **link, *parent = NULL;
106 u32 token = conn->alert_token_local;
107
108 link = &conn->lgr->conns_all.rb_node;
109 while (*link) {
110 struct smc_connection *cur = rb_entry(*link,
111 struct smc_connection, alert_node);
112
113 parent = *link;
114 if (cur->alert_token_local > token)
115 link = &parent->rb_left;
116 else
117 link = &parent->rb_right;
118 }
119 /* Put the new node there */
120 rb_link_node(&conn->alert_node, parent, link);
121 rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
122 }
123
124 /* Register connection in link group by assigning an alert token
125 * registered in a search tree.
126 * Requires @conns_lock
127 * Note that '0' is a reserved value and not assigned.
128 */
129 static int smc_lgr_register_conn(struct smc_connection *conn)
130 {
131 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
132 static atomic_t nexttoken = ATOMIC_INIT(0);
133
134 /* find a new alert_token_local value not yet used by some connection
135 * in this link group
136 */
137 sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
138 while (!conn->alert_token_local) {
139 conn->alert_token_local = atomic_inc_return(&nexttoken);
140 if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
141 conn->alert_token_local = 0;
142 }
143 smc_lgr_add_alert_token(conn);
144
145 /* assign the new connection to a link */
146 if (!conn->lgr->is_smcd) {
147 struct smc_link *lnk;
148 int i;
149
150 /* tbd - link balancing */
151 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
152 lnk = &conn->lgr->lnk[i];
153 if (lnk->state == SMC_LNK_ACTIVATING ||
154 lnk->state == SMC_LNK_ACTIVE)
155 conn->lnk = lnk;
156 }
157 if (!conn->lnk)
158 return SMC_CLC_DECL_NOACTLINK;
159 }
160 conn->lgr->conns_num++;
161 return 0;
162 }
163
164 /* Unregister connection and reset the alert token of the given connection<
165 */
166 static void __smc_lgr_unregister_conn(struct smc_connection *conn)
167 {
168 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
169 struct smc_link_group *lgr = conn->lgr;
170
171 rb_erase(&conn->alert_node, &lgr->conns_all);
172 lgr->conns_num--;
173 conn->alert_token_local = 0;
174 sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
175 }
176
177 /* Unregister connection from lgr
178 */
179 static void smc_lgr_unregister_conn(struct smc_connection *conn)
180 {
181 struct smc_link_group *lgr = conn->lgr;
182
183 if (!lgr)
184 return;
185 write_lock_bh(&lgr->conns_lock);
186 if (conn->alert_token_local) {
187 __smc_lgr_unregister_conn(conn);
188 }
189 write_unlock_bh(&lgr->conns_lock);
190 conn->lgr = NULL;
191 }
192
193 void smc_lgr_cleanup_early(struct smc_connection *conn)
194 {
195 struct smc_link_group *lgr = conn->lgr;
196
197 if (!lgr)
198 return;
199
200 smc_conn_free(conn);
201 smc_lgr_forget(lgr);
202 smc_lgr_schedule_free_work_fast(lgr);
203 }
204
205 /* Send delete link, either as client to request the initiation
206 * of the DELETE LINK sequence from server; or as server to
207 * initiate the delete processing. See smc_llc_rx_delete_link().
208 */
209 static int smcr_link_send_delete(struct smc_link *lnk, bool orderly)
210 {
211 if (lnk->state == SMC_LNK_ACTIVE &&
212 !smc_llc_send_delete_link(lnk, 0, SMC_LLC_REQ, orderly,
213 SMC_LLC_DEL_PROG_INIT_TERM)) {
214 return 0;
215 }
216 return -ENOTCONN;
217 }
218
219 static void smc_lgr_free(struct smc_link_group *lgr);
220
221 static void smc_lgr_free_work(struct work_struct *work)
222 {
223 struct smc_link_group *lgr = container_of(to_delayed_work(work),
224 struct smc_link_group,
225 free_work);
226 spinlock_t *lgr_lock;
227 bool conns;
228 int i;
229
230 smc_lgr_list_head(lgr, &lgr_lock);
231 spin_lock_bh(lgr_lock);
232 if (lgr->freeing) {
233 spin_unlock_bh(lgr_lock);
234 return;
235 }
236 read_lock_bh(&lgr->conns_lock);
237 conns = RB_EMPTY_ROOT(&lgr->conns_all);
238 read_unlock_bh(&lgr->conns_lock);
239 if (!conns) { /* number of lgr connections is no longer zero */
240 spin_unlock_bh(lgr_lock);
241 return;
242 }
243 list_del_init(&lgr->list); /* remove from smc_lgr_list */
244
245 if (!lgr->is_smcd && !lgr->terminating) {
246 bool do_wait = false;
247
248 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
249 struct smc_link *lnk = &lgr->lnk[i];
250 /* try to send del link msg, on err free immediately */
251 if (lnk->state == SMC_LNK_ACTIVE &&
252 !smcr_link_send_delete(lnk, true)) {
253 /* reschedule in case we never receive a resp */
254 smc_lgr_schedule_free_work(lgr);
255 do_wait = true;
256 }
257 }
258 if (do_wait) {
259 spin_unlock_bh(lgr_lock);
260 return; /* wait for resp, see smc_llc_rx_delete_link */
261 }
262 }
263 lgr->freeing = 1; /* this instance does the freeing, no new schedule */
264 spin_unlock_bh(lgr_lock);
265 cancel_delayed_work(&lgr->free_work);
266
267 if (lgr->is_smcd && !lgr->terminating)
268 smc_ism_signal_shutdown(lgr);
269 if (!lgr->is_smcd) {
270 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
271 struct smc_link *lnk = &lgr->lnk[i];
272
273 if (smc_link_usable(lnk))
274 lnk->state = SMC_LNK_INACTIVE;
275 }
276 wake_up_interruptible_all(&lgr->llc_waiter);
277 }
278 smc_lgr_free(lgr);
279 }
280
281 static void smc_lgr_terminate_work(struct work_struct *work)
282 {
283 struct smc_link_group *lgr = container_of(work, struct smc_link_group,
284 terminate_work);
285
286 __smc_lgr_terminate(lgr, true);
287 }
288
289 /* return next unique link id for the lgr */
290 static u8 smcr_next_link_id(struct smc_link_group *lgr)
291 {
292 u8 link_id;
293 int i;
294
295 while (1) {
296 link_id = ++lgr->next_link_id;
297 if (!link_id) /* skip zero as link_id */
298 link_id = ++lgr->next_link_id;
299 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
300 if (smc_link_usable(&lgr->lnk[i]) &&
301 lgr->lnk[i].link_id == link_id)
302 continue;
303 }
304 break;
305 }
306 return link_id;
307 }
308
309 static int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
310 u8 link_idx, struct smc_init_info *ini)
311 {
312 u8 rndvec[3];
313 int rc;
314
315 get_device(&ini->ib_dev->ibdev->dev);
316 atomic_inc(&ini->ib_dev->lnk_cnt);
317 lnk->state = SMC_LNK_ACTIVATING;
318 lnk->link_id = smcr_next_link_id(lgr);
319 lnk->lgr = lgr;
320 lnk->link_idx = link_idx;
321 lnk->smcibdev = ini->ib_dev;
322 lnk->ibport = ini->ib_port;
323 lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
324 INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
325 if (!ini->ib_dev->initialized) {
326 rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
327 if (rc)
328 goto out;
329 }
330 get_random_bytes(rndvec, sizeof(rndvec));
331 lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
332 (rndvec[2] << 16);
333 rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
334 ini->vlan_id, lnk->gid, &lnk->sgid_index);
335 if (rc)
336 goto out;
337 rc = smc_llc_link_init(lnk);
338 if (rc)
339 goto out;
340 rc = smc_wr_alloc_link_mem(lnk);
341 if (rc)
342 goto clear_llc_lnk;
343 rc = smc_ib_create_protection_domain(lnk);
344 if (rc)
345 goto free_link_mem;
346 rc = smc_ib_create_queue_pair(lnk);
347 if (rc)
348 goto dealloc_pd;
349 rc = smc_wr_create_link(lnk);
350 if (rc)
351 goto destroy_qp;
352 return 0;
353
354 destroy_qp:
355 smc_ib_destroy_queue_pair(lnk);
356 dealloc_pd:
357 smc_ib_dealloc_protection_domain(lnk);
358 free_link_mem:
359 smc_wr_free_link_mem(lnk);
360 clear_llc_lnk:
361 smc_llc_link_clear(lnk);
362 out:
363 put_device(&ini->ib_dev->ibdev->dev);
364 memset(lnk, 0, sizeof(struct smc_link));
365 lnk->state = SMC_LNK_UNUSED;
366 if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
367 wake_up(&ini->ib_dev->lnks_deleted);
368 return rc;
369 }
370
371 /* create a new SMC link group */
372 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
373 {
374 struct smc_link_group *lgr;
375 struct list_head *lgr_list;
376 struct smc_link *lnk;
377 spinlock_t *lgr_lock;
378 u8 link_idx;
379 int rc = 0;
380 int i;
381
382 if (ini->is_smcd && ini->vlan_id) {
383 if (smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) {
384 rc = SMC_CLC_DECL_ISMVLANERR;
385 goto out;
386 }
387 }
388
389 lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
390 if (!lgr) {
391 rc = SMC_CLC_DECL_MEM;
392 goto ism_put_vlan;
393 }
394 lgr->is_smcd = ini->is_smcd;
395 lgr->sync_err = 0;
396 lgr->terminating = 0;
397 lgr->freefast = 0;
398 lgr->freeing = 0;
399 lgr->vlan_id = ini->vlan_id;
400 mutex_init(&lgr->sndbufs_lock);
401 mutex_init(&lgr->rmbs_lock);
402 rwlock_init(&lgr->conns_lock);
403 for (i = 0; i < SMC_RMBE_SIZES; i++) {
404 INIT_LIST_HEAD(&lgr->sndbufs[i]);
405 INIT_LIST_HEAD(&lgr->rmbs[i]);
406 }
407 lgr->next_link_id = 0;
408 smc_lgr_list.num += SMC_LGR_NUM_INCR;
409 memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
410 INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
411 INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
412 lgr->conns_all = RB_ROOT;
413 if (ini->is_smcd) {
414 /* SMC-D specific settings */
415 get_device(&ini->ism_dev->dev);
416 lgr->peer_gid = ini->ism_gid;
417 lgr->smcd = ini->ism_dev;
418 lgr_list = &ini->ism_dev->lgr_list;
419 lgr_lock = &lgr->smcd->lgr_lock;
420 lgr->peer_shutdown = 0;
421 atomic_inc(&ini->ism_dev->lgr_cnt);
422 } else {
423 /* SMC-R specific settings */
424 lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
425 memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
426 SMC_SYSTEMID_LEN);
427 memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
428 SMC_MAX_PNETID_LEN);
429 smc_llc_lgr_init(lgr, smc);
430
431 link_idx = SMC_SINGLE_LINK;
432 lnk = &lgr->lnk[link_idx];
433 rc = smcr_link_init(lgr, lnk, link_idx, ini);
434 if (rc)
435 goto free_lgr;
436 lgr_list = &smc_lgr_list.list;
437 lgr_lock = &smc_lgr_list.lock;
438 atomic_inc(&lgr_cnt);
439 }
440 smc->conn.lgr = lgr;
441 spin_lock_bh(lgr_lock);
442 list_add(&lgr->list, lgr_list);
443 spin_unlock_bh(lgr_lock);
444 return 0;
445
446 free_lgr:
447 kfree(lgr);
448 ism_put_vlan:
449 if (ini->is_smcd && ini->vlan_id)
450 smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
451 out:
452 if (rc < 0) {
453 if (rc == -ENOMEM)
454 rc = SMC_CLC_DECL_MEM;
455 else
456 rc = SMC_CLC_DECL_INTERR;
457 }
458 return rc;
459 }
460
461 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
462 struct smc_link_group *lgr)
463 {
464 int rc;
465
466 if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
467 /* unregister rmb with peer */
468 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
469 if (!rc) {
470 /* protect against smc_llc_cli_rkey_exchange() */
471 mutex_lock(&lgr->llc_conf_mutex);
472 smc_llc_do_delete_rkey(lgr, rmb_desc);
473 rmb_desc->is_conf_rkey = false;
474 mutex_unlock(&lgr->llc_conf_mutex);
475 smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
476 }
477 }
478
479 if (rmb_desc->is_reg_err) {
480 /* buf registration failed, reuse not possible */
481 mutex_lock(&lgr->rmbs_lock);
482 list_del(&rmb_desc->list);
483 mutex_unlock(&lgr->rmbs_lock);
484
485 smc_buf_free(lgr, true, rmb_desc);
486 } else {
487 rmb_desc->used = 0;
488 }
489 }
490
491 static void smc_buf_unuse(struct smc_connection *conn,
492 struct smc_link_group *lgr)
493 {
494 if (conn->sndbuf_desc)
495 conn->sndbuf_desc->used = 0;
496 if (conn->rmb_desc && lgr->is_smcd)
497 conn->rmb_desc->used = 0;
498 else if (conn->rmb_desc)
499 smcr_buf_unuse(conn->rmb_desc, lgr);
500 }
501
502 /* remove a finished connection from its link group */
503 void smc_conn_free(struct smc_connection *conn)
504 {
505 struct smc_link_group *lgr = conn->lgr;
506
507 if (!lgr)
508 return;
509 if (lgr->is_smcd) {
510 if (!list_empty(&lgr->list))
511 smc_ism_unset_conn(conn);
512 tasklet_kill(&conn->rx_tsklet);
513 } else {
514 smc_cdc_tx_dismiss_slots(conn);
515 }
516 if (!list_empty(&lgr->list)) {
517 smc_lgr_unregister_conn(conn);
518 smc_buf_unuse(conn, lgr); /* allow buffer reuse */
519 }
520
521 if (!lgr->conns_num)
522 smc_lgr_schedule_free_work(lgr);
523 }
524
525 /* unregister a link from a buf_desc */
526 static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
527 struct smc_link *lnk)
528 {
529 if (is_rmb)
530 buf_desc->is_reg_mr[lnk->link_idx] = false;
531 if (!buf_desc->is_map_ib[lnk->link_idx])
532 return;
533 if (is_rmb) {
534 if (buf_desc->mr_rx[lnk->link_idx]) {
535 smc_ib_put_memory_region(
536 buf_desc->mr_rx[lnk->link_idx]);
537 buf_desc->mr_rx[lnk->link_idx] = NULL;
538 }
539 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
540 } else {
541 smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
542 }
543 sg_free_table(&buf_desc->sgt[lnk->link_idx]);
544 buf_desc->is_map_ib[lnk->link_idx] = false;
545 }
546
547 /* unmap all buffers of lgr for a deleted link */
548 static void smcr_buf_unmap_lgr(struct smc_link *lnk)
549 {
550 struct smc_link_group *lgr = lnk->lgr;
551 struct smc_buf_desc *buf_desc, *bf;
552 int i;
553
554 for (i = 0; i < SMC_RMBE_SIZES; i++) {
555 mutex_lock(&lgr->rmbs_lock);
556 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
557 smcr_buf_unmap_link(buf_desc, true, lnk);
558 mutex_unlock(&lgr->rmbs_lock);
559 mutex_lock(&lgr->sndbufs_lock);
560 list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
561 list)
562 smcr_buf_unmap_link(buf_desc, false, lnk);
563 mutex_unlock(&lgr->sndbufs_lock);
564 }
565 }
566
567 static void smcr_rtoken_clear_link(struct smc_link *lnk)
568 {
569 struct smc_link_group *lgr = lnk->lgr;
570 int i;
571
572 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
573 lgr->rtokens[i][lnk->link_idx].rkey = 0;
574 lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
575 }
576 }
577
578 /* must be called under lgr->llc_conf_mutex lock */
579 void smcr_link_clear(struct smc_link *lnk)
580 {
581 struct smc_ib_device *smcibdev;
582
583 if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
584 return;
585 lnk->peer_qpn = 0;
586 smc_llc_link_clear(lnk);
587 smcr_buf_unmap_lgr(lnk);
588 smcr_rtoken_clear_link(lnk);
589 smc_ib_modify_qp_reset(lnk);
590 smc_wr_free_link(lnk);
591 smc_ib_destroy_queue_pair(lnk);
592 smc_ib_dealloc_protection_domain(lnk);
593 smc_wr_free_link_mem(lnk);
594 put_device(&lnk->smcibdev->ibdev->dev);
595 smcibdev = lnk->smcibdev;
596 memset(lnk, 0, sizeof(struct smc_link));
597 lnk->state = SMC_LNK_UNUSED;
598 if (!atomic_dec_return(&smcibdev->lnk_cnt))
599 wake_up(&smcibdev->lnks_deleted);
600 }
601
602 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
603 struct smc_buf_desc *buf_desc)
604 {
605 int i;
606
607 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
608 smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
609
610 if (buf_desc->pages)
611 __free_pages(buf_desc->pages, buf_desc->order);
612 kfree(buf_desc);
613 }
614
615 static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
616 struct smc_buf_desc *buf_desc)
617 {
618 if (is_dmb) {
619 /* restore original buf len */
620 buf_desc->len += sizeof(struct smcd_cdc_msg);
621 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
622 } else {
623 kfree(buf_desc->cpu_addr);
624 }
625 kfree(buf_desc);
626 }
627
628 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
629 struct smc_buf_desc *buf_desc)
630 {
631 if (lgr->is_smcd)
632 smcd_buf_free(lgr, is_rmb, buf_desc);
633 else
634 smcr_buf_free(lgr, is_rmb, buf_desc);
635 }
636
637 static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
638 {
639 struct smc_buf_desc *buf_desc, *bf_desc;
640 struct list_head *buf_list;
641 int i;
642
643 for (i = 0; i < SMC_RMBE_SIZES; i++) {
644 if (is_rmb)
645 buf_list = &lgr->rmbs[i];
646 else
647 buf_list = &lgr->sndbufs[i];
648 list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
649 list) {
650 list_del(&buf_desc->list);
651 smc_buf_free(lgr, is_rmb, buf_desc);
652 }
653 }
654 }
655
656 static void smc_lgr_free_bufs(struct smc_link_group *lgr)
657 {
658 /* free send buffers */
659 __smc_lgr_free_bufs(lgr, false);
660 /* free rmbs */
661 __smc_lgr_free_bufs(lgr, true);
662 }
663
664 /* remove a link group */
665 static void smc_lgr_free(struct smc_link_group *lgr)
666 {
667 int i;
668
669 smc_lgr_free_bufs(lgr);
670 if (lgr->is_smcd) {
671 if (!lgr->terminating) {
672 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
673 put_device(&lgr->smcd->dev);
674 }
675 if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
676 wake_up(&lgr->smcd->lgrs_deleted);
677 } else {
678 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
679 if (lgr->lnk[i].state != SMC_LNK_UNUSED)
680 smcr_link_clear(&lgr->lnk[i]);
681 }
682 smc_llc_lgr_clear(lgr);
683 if (!atomic_dec_return(&lgr_cnt))
684 wake_up(&lgrs_deleted);
685 }
686 kfree(lgr);
687 }
688
689 void smc_lgr_forget(struct smc_link_group *lgr)
690 {
691 struct list_head *lgr_list;
692 spinlock_t *lgr_lock;
693
694 lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
695 spin_lock_bh(lgr_lock);
696 /* do not use this link group for new connections */
697 if (!list_empty(lgr_list))
698 list_del_init(lgr_list);
699 spin_unlock_bh(lgr_lock);
700 }
701
702 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
703 {
704 int i;
705
706 for (i = 0; i < SMC_RMBE_SIZES; i++) {
707 struct smc_buf_desc *buf_desc;
708
709 list_for_each_entry(buf_desc, &lgr->rmbs[i], list) {
710 buf_desc->len += sizeof(struct smcd_cdc_msg);
711 smc_ism_unregister_dmb(lgr->smcd, buf_desc);
712 }
713 }
714 }
715
716 static void smc_sk_wake_ups(struct smc_sock *smc)
717 {
718 smc->sk.sk_write_space(&smc->sk);
719 smc->sk.sk_data_ready(&smc->sk);
720 smc->sk.sk_state_change(&smc->sk);
721 }
722
723 /* kill a connection */
724 static void smc_conn_kill(struct smc_connection *conn, bool soft)
725 {
726 struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
727
728 if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
729 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
730 else
731 smc_close_abort(conn);
732 conn->killed = 1;
733 smc->sk.sk_err = ECONNABORTED;
734 smc_sk_wake_ups(smc);
735 if (conn->lgr->is_smcd) {
736 smc_ism_unset_conn(conn);
737 if (soft)
738 tasklet_kill(&conn->rx_tsklet);
739 else
740 tasklet_unlock_wait(&conn->rx_tsklet);
741 } else {
742 smc_cdc_tx_dismiss_slots(conn);
743 }
744 smc_lgr_unregister_conn(conn);
745 smc_close_active_abort(smc);
746 }
747
748 static void smc_lgr_cleanup(struct smc_link_group *lgr)
749 {
750 int i;
751
752 if (lgr->is_smcd) {
753 smc_ism_signal_shutdown(lgr);
754 smcd_unregister_all_dmbs(lgr);
755 smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
756 put_device(&lgr->smcd->dev);
757 } else {
758 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
759 struct smc_link *lnk = &lgr->lnk[i];
760
761 if (smc_link_usable(lnk))
762 lnk->state = SMC_LNK_INACTIVE;
763 }
764 wake_up_interruptible_all(&lgr->llc_waiter);
765 }
766 }
767
768 /* terminate link group
769 * @soft: true if link group shutdown can take its time
770 * false if immediate link group shutdown is required
771 */
772 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
773 {
774 struct smc_connection *conn;
775 struct smc_sock *smc;
776 struct rb_node *node;
777
778 if (lgr->terminating)
779 return; /* lgr already terminating */
780 if (!soft)
781 cancel_delayed_work_sync(&lgr->free_work);
782 lgr->terminating = 1;
783
784 /* kill remaining link group connections */
785 read_lock_bh(&lgr->conns_lock);
786 node = rb_first(&lgr->conns_all);
787 while (node) {
788 read_unlock_bh(&lgr->conns_lock);
789 conn = rb_entry(node, struct smc_connection, alert_node);
790 smc = container_of(conn, struct smc_sock, conn);
791 sock_hold(&smc->sk); /* sock_put below */
792 lock_sock(&smc->sk);
793 smc_conn_kill(conn, soft);
794 release_sock(&smc->sk);
795 sock_put(&smc->sk); /* sock_hold above */
796 read_lock_bh(&lgr->conns_lock);
797 node = rb_first(&lgr->conns_all);
798 }
799 read_unlock_bh(&lgr->conns_lock);
800 smc_lgr_cleanup(lgr);
801 if (soft)
802 smc_lgr_schedule_free_work_fast(lgr);
803 else
804 smc_lgr_free(lgr);
805 }
806
807 /* unlink link group and schedule termination */
808 void smc_lgr_terminate_sched(struct smc_link_group *lgr)
809 {
810 spinlock_t *lgr_lock;
811
812 smc_lgr_list_head(lgr, &lgr_lock);
813 spin_lock_bh(lgr_lock);
814 if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
815 spin_unlock_bh(lgr_lock);
816 return; /* lgr already terminating */
817 }
818 list_del_init(&lgr->list);
819 spin_unlock_bh(lgr_lock);
820 schedule_work(&lgr->terminate_work);
821 }
822
823 /* Called when peer lgr shutdown (regularly or abnormally) is received */
824 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
825 {
826 struct smc_link_group *lgr, *l;
827 LIST_HEAD(lgr_free_list);
828
829 /* run common cleanup function and build free list */
830 spin_lock_bh(&dev->lgr_lock);
831 list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
832 if ((!peer_gid || lgr->peer_gid == peer_gid) &&
833 (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
834 if (peer_gid) /* peer triggered termination */
835 lgr->peer_shutdown = 1;
836 list_move(&lgr->list, &lgr_free_list);
837 }
838 }
839 spin_unlock_bh(&dev->lgr_lock);
840
841 /* cancel the regular free workers and actually free lgrs */
842 list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
843 list_del_init(&lgr->list);
844 schedule_work(&lgr->terminate_work);
845 }
846 }
847
848 /* Called when an SMCD device is removed or the smc module is unloaded */
849 void smc_smcd_terminate_all(struct smcd_dev *smcd)
850 {
851 struct smc_link_group *lgr, *lg;
852 LIST_HEAD(lgr_free_list);
853
854 spin_lock_bh(&smcd->lgr_lock);
855 list_splice_init(&smcd->lgr_list, &lgr_free_list);
856 list_for_each_entry(lgr, &lgr_free_list, list)
857 lgr->freeing = 1;
858 spin_unlock_bh(&smcd->lgr_lock);
859
860 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
861 list_del_init(&lgr->list);
862 __smc_lgr_terminate(lgr, false);
863 }
864
865 if (atomic_read(&smcd->lgr_cnt))
866 wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
867 }
868
869 /* Called when an SMCR device is removed or the smc module is unloaded.
870 * If smcibdev is given, all SMCR link groups using this device are terminated.
871 * If smcibdev is NULL, all SMCR link groups are terminated.
872 */
873 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
874 {
875 struct smc_link_group *lgr, *lg;
876 LIST_HEAD(lgr_free_list);
877 int i;
878
879 spin_lock_bh(&smc_lgr_list.lock);
880 if (!smcibdev) {
881 list_splice_init(&smc_lgr_list.list, &lgr_free_list);
882 list_for_each_entry(lgr, &lgr_free_list, list)
883 lgr->freeing = 1;
884 } else {
885 list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
886 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
887 if (lgr->lnk[i].smcibdev == smcibdev)
888 smcr_link_down_cond_sched(&lgr->lnk[i]);
889 }
890 }
891 }
892 spin_unlock_bh(&smc_lgr_list.lock);
893
894 list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
895 list_del_init(&lgr->list);
896 __smc_lgr_terminate(lgr, false);
897 }
898
899 if (smcibdev) {
900 if (atomic_read(&smcibdev->lnk_cnt))
901 wait_event(smcibdev->lnks_deleted,
902 !atomic_read(&smcibdev->lnk_cnt));
903 } else {
904 if (atomic_read(&lgr_cnt))
905 wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
906 }
907 }
908
909 /* link is up - establish alternate link if applicable */
910 static void smcr_link_up(struct smc_link_group *lgr,
911 struct smc_ib_device *smcibdev, u8 ibport)
912 {
913 struct smc_link *link = NULL;
914
915 if (list_empty(&lgr->list) ||
916 lgr->type == SMC_LGR_SYMMETRIC ||
917 lgr->type == SMC_LGR_ASYMMETRIC_PEER)
918 return;
919
920 if (lgr->role == SMC_SERV) {
921 /* trigger local add link processing */
922 link = smc_llc_usable_link(lgr);
923 if (!link)
924 return;
925 /* tbd: call smc_llc_srv_add_link_local(link); */
926 } else {
927 /* invite server to start add link processing */
928 u8 gid[SMC_GID_SIZE];
929
930 if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid,
931 NULL))
932 return;
933 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
934 /* some other llc task is ongoing */
935 wait_event_interruptible_timeout(lgr->llc_waiter,
936 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
937 SMC_LLC_WAIT_TIME);
938 }
939 if (list_empty(&lgr->list) ||
940 !smc_ib_port_active(smcibdev, ibport))
941 return; /* lgr or device no longer active */
942 link = smc_llc_usable_link(lgr);
943 if (!link)
944 return;
945 smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid,
946 NULL, SMC_LLC_REQ);
947 }
948 }
949
950 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
951 {
952 struct smc_ib_up_work *ib_work;
953 struct smc_link_group *lgr, *n;
954
955 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
956 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
957 SMC_MAX_PNETID_LEN) ||
958 lgr->type == SMC_LGR_SYMMETRIC ||
959 lgr->type == SMC_LGR_ASYMMETRIC_PEER)
960 continue;
961 ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL);
962 if (!ib_work)
963 continue;
964 INIT_WORK(&ib_work->work, smc_link_up_work);
965 ib_work->lgr = lgr;
966 ib_work->smcibdev = smcibdev;
967 ib_work->ibport = ibport;
968 schedule_work(&ib_work->work);
969 }
970 }
971
972 /* link is down - switch connections to alternate link,
973 * must be called under lgr->llc_conf_mutex lock
974 */
975 static void smcr_link_down(struct smc_link *lnk)
976 {
977 struct smc_link_group *lgr = lnk->lgr;
978 struct smc_link *to_lnk;
979 int del_link_id;
980
981 if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
982 return;
983
984 smc_ib_modify_qp_reset(lnk);
985 to_lnk = NULL;
986 /* tbd: call to_lnk = smc_switch_conns(lgr, lnk, true); */
987 if (!to_lnk) { /* no backup link available */
988 smcr_link_clear(lnk);
989 return;
990 }
991 lgr->type = SMC_LGR_SINGLE;
992 del_link_id = lnk->link_id;
993
994 if (lgr->role == SMC_SERV) {
995 /* trigger local delete link processing */
996 } else {
997 if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
998 /* another llc task is ongoing */
999 mutex_unlock(&lgr->llc_conf_mutex);
1000 wait_event_interruptible_timeout(lgr->llc_waiter,
1001 (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
1002 SMC_LLC_WAIT_TIME);
1003 mutex_lock(&lgr->llc_conf_mutex);
1004 }
1005 smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true,
1006 SMC_LLC_DEL_LOST_PATH);
1007 }
1008 }
1009
1010 /* must be called under lgr->llc_conf_mutex lock */
1011 void smcr_link_down_cond(struct smc_link *lnk)
1012 {
1013 if (smc_link_downing(&lnk->state))
1014 smcr_link_down(lnk);
1015 }
1016
1017 /* will get the lgr->llc_conf_mutex lock */
1018 void smcr_link_down_cond_sched(struct smc_link *lnk)
1019 {
1020 if (smc_link_downing(&lnk->state))
1021 schedule_work(&lnk->link_down_wrk);
1022 }
1023
1024 void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
1025 {
1026 struct smc_link_group *lgr, *n;
1027 int i;
1028
1029 list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
1030 if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
1031 SMC_MAX_PNETID_LEN))
1032 continue; /* lgr is not affected */
1033 if (list_empty(&lgr->list))
1034 continue;
1035 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1036 struct smc_link *lnk = &lgr->lnk[i];
1037
1038 if (smc_link_usable(lnk) &&
1039 lnk->smcibdev == smcibdev && lnk->ibport == ibport)
1040 smcr_link_down_cond_sched(lnk);
1041 }
1042 }
1043 }
1044
1045 static void smc_link_up_work(struct work_struct *work)
1046 {
1047 struct smc_ib_up_work *ib_work = container_of(work,
1048 struct smc_ib_up_work,
1049 work);
1050 struct smc_link_group *lgr = ib_work->lgr;
1051
1052 if (list_empty(&lgr->list))
1053 goto out;
1054 smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport);
1055 out:
1056 kfree(ib_work);
1057 }
1058
1059 static void smc_link_down_work(struct work_struct *work)
1060 {
1061 struct smc_link *link = container_of(work, struct smc_link,
1062 link_down_wrk);
1063 struct smc_link_group *lgr = link->lgr;
1064
1065 if (list_empty(&lgr->list))
1066 return;
1067 wake_up_interruptible_all(&lgr->llc_waiter);
1068 mutex_lock(&lgr->llc_conf_mutex);
1069 smcr_link_down(link);
1070 mutex_unlock(&lgr->llc_conf_mutex);
1071 }
1072
1073 /* Determine vlan of internal TCP socket.
1074 * @vlan_id: address to store the determined vlan id into
1075 */
1076 int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
1077 {
1078 struct dst_entry *dst = sk_dst_get(clcsock->sk);
1079 struct net_device *ndev;
1080 int i, nest_lvl, rc = 0;
1081
1082 ini->vlan_id = 0;
1083 if (!dst) {
1084 rc = -ENOTCONN;
1085 goto out;
1086 }
1087 if (!dst->dev) {
1088 rc = -ENODEV;
1089 goto out_rel;
1090 }
1091
1092 ndev = dst->dev;
1093 if (is_vlan_dev(ndev)) {
1094 ini->vlan_id = vlan_dev_vlan_id(ndev);
1095 goto out_rel;
1096 }
1097
1098 rtnl_lock();
1099 nest_lvl = ndev->lower_level;
1100 for (i = 0; i < nest_lvl; i++) {
1101 struct list_head *lower = &ndev->adj_list.lower;
1102
1103 if (list_empty(lower))
1104 break;
1105 lower = lower->next;
1106 ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower);
1107 if (is_vlan_dev(ndev)) {
1108 ini->vlan_id = vlan_dev_vlan_id(ndev);
1109 break;
1110 }
1111 }
1112 rtnl_unlock();
1113
1114 out_rel:
1115 dst_release(dst);
1116 out:
1117 return rc;
1118 }
1119
1120 static bool smcr_lgr_match(struct smc_link_group *lgr,
1121 struct smc_clc_msg_local *lcl,
1122 enum smc_lgr_role role, u32 clcqpn)
1123 {
1124 int i;
1125
1126 if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
1127 lgr->role != role)
1128 return false;
1129
1130 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1131 if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
1132 continue;
1133 if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
1134 !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
1135 !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
1136 return true;
1137 }
1138 return false;
1139 }
1140
1141 static bool smcd_lgr_match(struct smc_link_group *lgr,
1142 struct smcd_dev *smcismdev, u64 peer_gid)
1143 {
1144 return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
1145 }
1146
1147 /* create a new SMC connection (and a new link group if necessary) */
1148 int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
1149 {
1150 struct smc_connection *conn = &smc->conn;
1151 struct list_head *lgr_list;
1152 struct smc_link_group *lgr;
1153 enum smc_lgr_role role;
1154 spinlock_t *lgr_lock;
1155 int rc = 0;
1156
1157 lgr_list = ini->is_smcd ? &ini->ism_dev->lgr_list : &smc_lgr_list.list;
1158 lgr_lock = ini->is_smcd ? &ini->ism_dev->lgr_lock : &smc_lgr_list.lock;
1159 ini->cln_first_contact = SMC_FIRST_CONTACT;
1160 role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
1161 if (role == SMC_CLNT && ini->srv_first_contact)
1162 /* create new link group as well */
1163 goto create;
1164
1165 /* determine if an existing link group can be reused */
1166 spin_lock_bh(lgr_lock);
1167 list_for_each_entry(lgr, lgr_list, list) {
1168 write_lock_bh(&lgr->conns_lock);
1169 if ((ini->is_smcd ?
1170 smcd_lgr_match(lgr, ini->ism_dev, ini->ism_gid) :
1171 smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) &&
1172 !lgr->sync_err &&
1173 lgr->vlan_id == ini->vlan_id &&
1174 (role == SMC_CLNT ||
1175 lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
1176 /* link group found */
1177 ini->cln_first_contact = SMC_REUSE_CONTACT;
1178 conn->lgr = lgr;
1179 rc = smc_lgr_register_conn(conn); /* add conn to lgr */
1180 write_unlock_bh(&lgr->conns_lock);
1181 if (!rc && delayed_work_pending(&lgr->free_work))
1182 cancel_delayed_work(&lgr->free_work);
1183 break;
1184 }
1185 write_unlock_bh(&lgr->conns_lock);
1186 }
1187 spin_unlock_bh(lgr_lock);
1188 if (rc)
1189 return rc;
1190
1191 if (role == SMC_CLNT && !ini->srv_first_contact &&
1192 ini->cln_first_contact == SMC_FIRST_CONTACT) {
1193 /* Server reuses a link group, but Client wants to start
1194 * a new one
1195 * send out_of_sync decline, reason synchr. error
1196 */
1197 return SMC_CLC_DECL_SYNCERR;
1198 }
1199
1200 create:
1201 if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
1202 rc = smc_lgr_create(smc, ini);
1203 if (rc)
1204 goto out;
1205 lgr = conn->lgr;
1206 write_lock_bh(&lgr->conns_lock);
1207 rc = smc_lgr_register_conn(conn); /* add smc conn to lgr */
1208 write_unlock_bh(&lgr->conns_lock);
1209 if (rc)
1210 goto out;
1211 }
1212 conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
1213 conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
1214 conn->urg_state = SMC_URG_READ;
1215 if (ini->is_smcd) {
1216 conn->rx_off = sizeof(struct smcd_cdc_msg);
1217 smcd_cdc_rx_init(conn); /* init tasklet for this conn */
1218 }
1219 #ifndef KERNEL_HAS_ATOMIC64
1220 spin_lock_init(&conn->acurs_lock);
1221 #endif
1222
1223 out:
1224 return rc;
1225 }
1226
1227 /* convert the RMB size into the compressed notation - minimum 16K.
1228 * In contrast to plain ilog2, this rounds towards the next power of 2,
1229 * so the socket application gets at least its desired sndbuf / rcvbuf size.
1230 */
1231 static u8 smc_compress_bufsize(int size)
1232 {
1233 u8 compressed;
1234
1235 if (size <= SMC_BUF_MIN_SIZE)
1236 return 0;
1237
1238 size = (size - 1) >> 14;
1239 compressed = ilog2(size) + 1;
1240 if (compressed >= SMC_RMBE_SIZES)
1241 compressed = SMC_RMBE_SIZES - 1;
1242 return compressed;
1243 }
1244
1245 /* convert the RMB size from compressed notation into integer */
1246 int smc_uncompress_bufsize(u8 compressed)
1247 {
1248 u32 size;
1249
1250 size = 0x00000001 << (((int)compressed) + 14);
1251 return (int)size;
1252 }
1253
1254 /* try to reuse a sndbuf or rmb description slot for a certain
1255 * buffer size; if not available, return NULL
1256 */
1257 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
1258 struct mutex *lock,
1259 struct list_head *buf_list)
1260 {
1261 struct smc_buf_desc *buf_slot;
1262
1263 mutex_lock(lock);
1264 list_for_each_entry(buf_slot, buf_list, list) {
1265 if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
1266 mutex_unlock(lock);
1267 return buf_slot;
1268 }
1269 }
1270 mutex_unlock(lock);
1271 return NULL;
1272 }
1273
1274 /* one of the conditions for announcing a receiver's current window size is
1275 * that it "results in a minimum increase in the window size of 10% of the
1276 * receive buffer space" [RFC7609]
1277 */
1278 static inline int smc_rmb_wnd_update_limit(int rmbe_size)
1279 {
1280 return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
1281 }
1282
1283 /* map an rmb buf to a link */
1284 static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
1285 struct smc_link *lnk)
1286 {
1287 int rc;
1288
1289 if (buf_desc->is_map_ib[lnk->link_idx])
1290 return 0;
1291
1292 rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
1293 if (rc)
1294 return rc;
1295 sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
1296 buf_desc->cpu_addr, buf_desc->len);
1297
1298 /* map sg table to DMA address */
1299 rc = smc_ib_buf_map_sg(lnk, buf_desc,
1300 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1301 /* SMC protocol depends on mapping to one DMA address only */
1302 if (rc != 1) {
1303 rc = -EAGAIN;
1304 goto free_table;
1305 }
1306
1307 /* create a new memory region for the RMB */
1308 if (is_rmb) {
1309 rc = smc_ib_get_memory_region(lnk->roce_pd,
1310 IB_ACCESS_REMOTE_WRITE |
1311 IB_ACCESS_LOCAL_WRITE,
1312 buf_desc, lnk->link_idx);
1313 if (rc)
1314 goto buf_unmap;
1315 smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
1316 }
1317 buf_desc->is_map_ib[lnk->link_idx] = true;
1318 return 0;
1319
1320 buf_unmap:
1321 smc_ib_buf_unmap_sg(lnk, buf_desc,
1322 is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1323 free_table:
1324 sg_free_table(&buf_desc->sgt[lnk->link_idx]);
1325 return rc;
1326 }
1327
1328 /* register a new rmb on IB device,
1329 * must be called under lgr->llc_conf_mutex lock
1330 */
1331 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
1332 {
1333 if (list_empty(&link->lgr->list))
1334 return -ENOLINK;
1335 if (!rmb_desc->is_reg_mr[link->link_idx]) {
1336 /* register memory region for new rmb */
1337 if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
1338 rmb_desc->is_reg_err = true;
1339 return -EFAULT;
1340 }
1341 rmb_desc->is_reg_mr[link->link_idx] = true;
1342 }
1343 return 0;
1344 }
1345
1346 static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
1347 struct list_head *lst, bool is_rmb)
1348 {
1349 struct smc_buf_desc *buf_desc, *bf;
1350 int rc = 0;
1351
1352 mutex_lock(lock);
1353 list_for_each_entry_safe(buf_desc, bf, lst, list) {
1354 if (!buf_desc->used)
1355 continue;
1356 rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
1357 if (rc)
1358 goto out;
1359 }
1360 out:
1361 mutex_unlock(lock);
1362 return rc;
1363 }
1364
1365 /* map all used buffers of lgr for a new link */
1366 int smcr_buf_map_lgr(struct smc_link *lnk)
1367 {
1368 struct smc_link_group *lgr = lnk->lgr;
1369 int i, rc = 0;
1370
1371 for (i = 0; i < SMC_RMBE_SIZES; i++) {
1372 rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
1373 &lgr->rmbs[i], true);
1374 if (rc)
1375 return rc;
1376 rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
1377 &lgr->sndbufs[i], false);
1378 if (rc)
1379 return rc;
1380 }
1381 return 0;
1382 }
1383
1384 /* register all used buffers of lgr for a new link,
1385 * must be called under lgr->llc_conf_mutex lock
1386 */
1387 int smcr_buf_reg_lgr(struct smc_link *lnk)
1388 {
1389 struct smc_link_group *lgr = lnk->lgr;
1390 struct smc_buf_desc *buf_desc, *bf;
1391 int i, rc = 0;
1392
1393 mutex_lock(&lgr->rmbs_lock);
1394 for (i = 0; i < SMC_RMBE_SIZES; i++) {
1395 list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
1396 if (!buf_desc->used)
1397 continue;
1398 rc = smcr_link_reg_rmb(lnk, buf_desc);
1399 if (rc)
1400 goto out;
1401 }
1402 }
1403 out:
1404 mutex_unlock(&lgr->rmbs_lock);
1405 return rc;
1406 }
1407
1408 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
1409 bool is_rmb, int bufsize)
1410 {
1411 struct smc_buf_desc *buf_desc;
1412
1413 /* try to alloc a new buffer */
1414 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1415 if (!buf_desc)
1416 return ERR_PTR(-ENOMEM);
1417
1418 buf_desc->order = get_order(bufsize);
1419 buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
1420 __GFP_NOMEMALLOC | __GFP_COMP |
1421 __GFP_NORETRY | __GFP_ZERO,
1422 buf_desc->order);
1423 if (!buf_desc->pages) {
1424 kfree(buf_desc);
1425 return ERR_PTR(-EAGAIN);
1426 }
1427 buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
1428 buf_desc->len = bufsize;
1429 return buf_desc;
1430 }
1431
1432 /* map buf_desc on all usable links,
1433 * unused buffers stay mapped as long as the link is up
1434 */
1435 static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
1436 struct smc_buf_desc *buf_desc, bool is_rmb)
1437 {
1438 int i, rc = 0;
1439
1440 /* protect against parallel link reconfiguration */
1441 mutex_lock(&lgr->llc_conf_mutex);
1442 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1443 struct smc_link *lnk = &lgr->lnk[i];
1444
1445 if (!smc_link_usable(lnk))
1446 continue;
1447 if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
1448 rc = -ENOMEM;
1449 goto out;
1450 }
1451 }
1452 out:
1453 mutex_unlock(&lgr->llc_conf_mutex);
1454 return rc;
1455 }
1456
1457 #define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
1458
1459 static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
1460 bool is_dmb, int bufsize)
1461 {
1462 struct smc_buf_desc *buf_desc;
1463 int rc;
1464
1465 if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES)
1466 return ERR_PTR(-EAGAIN);
1467
1468 /* try to alloc a new DMB */
1469 buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
1470 if (!buf_desc)
1471 return ERR_PTR(-ENOMEM);
1472 if (is_dmb) {
1473 rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
1474 if (rc) {
1475 kfree(buf_desc);
1476 return ERR_PTR(-EAGAIN);
1477 }
1478 buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
1479 /* CDC header stored in buf. So, pretend it was smaller */
1480 buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
1481 } else {
1482 buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
1483 __GFP_NOWARN | __GFP_NORETRY |
1484 __GFP_NOMEMALLOC);
1485 if (!buf_desc->cpu_addr) {
1486 kfree(buf_desc);
1487 return ERR_PTR(-EAGAIN);
1488 }
1489 buf_desc->len = bufsize;
1490 }
1491 return buf_desc;
1492 }
1493
1494 static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
1495 {
1496 struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
1497 struct smc_connection *conn = &smc->conn;
1498 struct smc_link_group *lgr = conn->lgr;
1499 struct list_head *buf_list;
1500 int bufsize, bufsize_short;
1501 struct mutex *lock; /* lock buffer list */
1502 int sk_buf_size;
1503
1504 if (is_rmb)
1505 /* use socket recv buffer size (w/o overhead) as start value */
1506 sk_buf_size = smc->sk.sk_rcvbuf / 2;
1507 else
1508 /* use socket send buffer size (w/o overhead) as start value */
1509 sk_buf_size = smc->sk.sk_sndbuf / 2;
1510
1511 for (bufsize_short = smc_compress_bufsize(sk_buf_size);
1512 bufsize_short >= 0; bufsize_short--) {
1513
1514 if (is_rmb) {
1515 lock = &lgr->rmbs_lock;
1516 buf_list = &lgr->rmbs[bufsize_short];
1517 } else {
1518 lock = &lgr->sndbufs_lock;
1519 buf_list = &lgr->sndbufs[bufsize_short];
1520 }
1521 bufsize = smc_uncompress_bufsize(bufsize_short);
1522 if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC)
1523 continue;
1524
1525 /* check for reusable slot in the link group */
1526 buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
1527 if (buf_desc) {
1528 memset(buf_desc->cpu_addr, 0, bufsize);
1529 break; /* found reusable slot */
1530 }
1531
1532 if (is_smcd)
1533 buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
1534 else
1535 buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize);
1536
1537 if (PTR_ERR(buf_desc) == -ENOMEM)
1538 break;
1539 if (IS_ERR(buf_desc))
1540 continue;
1541
1542 buf_desc->used = 1;
1543 mutex_lock(lock);
1544 list_add(&buf_desc->list, buf_list);
1545 mutex_unlock(lock);
1546 break; /* found */
1547 }
1548
1549 if (IS_ERR(buf_desc))
1550 return -ENOMEM;
1551
1552 if (!is_smcd) {
1553 if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
1554 smcr_buf_unuse(buf_desc, lgr);
1555 return -ENOMEM;
1556 }
1557 }
1558
1559 if (is_rmb) {
1560 conn->rmb_desc = buf_desc;
1561 conn->rmbe_size_short = bufsize_short;
1562 smc->sk.sk_rcvbuf = bufsize * 2;
1563 atomic_set(&conn->bytes_to_rcv, 0);
1564 conn->rmbe_update_limit =
1565 smc_rmb_wnd_update_limit(buf_desc->len);
1566 if (is_smcd)
1567 smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
1568 } else {
1569 conn->sndbuf_desc = buf_desc;
1570 smc->sk.sk_sndbuf = bufsize * 2;
1571 atomic_set(&conn->sndbuf_space, bufsize);
1572 }
1573 return 0;
1574 }
1575
1576 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
1577 {
1578 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1579 return;
1580 smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1581 }
1582
1583 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
1584 {
1585 if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
1586 return;
1587 smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
1588 }
1589
1590 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
1591 {
1592 int i;
1593
1594 if (!conn->lgr || conn->lgr->is_smcd)
1595 return;
1596 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1597 if (!smc_link_usable(&conn->lgr->lnk[i]))
1598 continue;
1599 smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
1600 DMA_FROM_DEVICE);
1601 }
1602 }
1603
1604 void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
1605 {
1606 int i;
1607
1608 if (!conn->lgr || conn->lgr->is_smcd)
1609 return;
1610 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
1611 if (!smc_link_usable(&conn->lgr->lnk[i]))
1612 continue;
1613 smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
1614 DMA_FROM_DEVICE);
1615 }
1616 }
1617
1618 /* create the send and receive buffer for an SMC socket;
1619 * receive buffers are called RMBs;
1620 * (even though the SMC protocol allows more than one RMB-element per RMB,
1621 * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
1622 * extra RMB for every connection in a link group
1623 */
1624 int smc_buf_create(struct smc_sock *smc, bool is_smcd)
1625 {
1626 int rc;
1627
1628 /* create send buffer */
1629 rc = __smc_buf_create(smc, is_smcd, false);
1630 if (rc)
1631 return rc;
1632 /* create rmb */
1633 rc = __smc_buf_create(smc, is_smcd, true);
1634 if (rc)
1635 smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
1636 return rc;
1637 }
1638
1639 static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
1640 {
1641 int i;
1642
1643 for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
1644 if (!test_and_set_bit(i, lgr->rtokens_used_mask))
1645 return i;
1646 }
1647 return -ENOSPC;
1648 }
1649
1650 static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
1651 u32 rkey)
1652 {
1653 int i;
1654
1655 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1656 if (test_bit(i, lgr->rtokens_used_mask) &&
1657 lgr->rtokens[i][lnk_idx].rkey == rkey)
1658 return i;
1659 }
1660 return -ENOENT;
1661 }
1662
1663 /* set rtoken for a new link to an existing rmb */
1664 void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
1665 __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
1666 {
1667 int rtok_idx;
1668
1669 rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
1670 if (rtok_idx == -ENOENT)
1671 return;
1672 lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
1673 lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
1674 }
1675
1676 /* set rtoken for a new link whose link_id is given */
1677 void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
1678 __be64 nw_vaddr, __be32 nw_rkey)
1679 {
1680 u64 dma_addr = be64_to_cpu(nw_vaddr);
1681 u32 rkey = ntohl(nw_rkey);
1682 bool found = false;
1683 int link_idx;
1684
1685 for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
1686 if (lgr->lnk[link_idx].link_id == link_id) {
1687 found = true;
1688 break;
1689 }
1690 }
1691 if (!found)
1692 return;
1693 lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
1694 lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
1695 }
1696
1697 /* add a new rtoken from peer */
1698 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
1699 {
1700 struct smc_link_group *lgr = smc_get_lgr(lnk);
1701 u64 dma_addr = be64_to_cpu(nw_vaddr);
1702 u32 rkey = ntohl(nw_rkey);
1703 int i;
1704
1705 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1706 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1707 lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
1708 test_bit(i, lgr->rtokens_used_mask)) {
1709 /* already in list */
1710 return i;
1711 }
1712 }
1713 i = smc_rmb_reserve_rtoken_idx(lgr);
1714 if (i < 0)
1715 return i;
1716 lgr->rtokens[i][lnk->link_idx].rkey = rkey;
1717 lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
1718 return i;
1719 }
1720
1721 /* delete an rtoken from all links */
1722 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
1723 {
1724 struct smc_link_group *lgr = smc_get_lgr(lnk);
1725 u32 rkey = ntohl(nw_rkey);
1726 int i, j;
1727
1728 for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
1729 if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
1730 test_bit(i, lgr->rtokens_used_mask)) {
1731 for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
1732 lgr->rtokens[i][j].rkey = 0;
1733 lgr->rtokens[i][j].dma_addr = 0;
1734 }
1735 clear_bit(i, lgr->rtokens_used_mask);
1736 return 0;
1737 }
1738 }
1739 return -ENOENT;
1740 }
1741
1742 /* save rkey and dma_addr received from peer during clc handshake */
1743 int smc_rmb_rtoken_handling(struct smc_connection *conn,
1744 struct smc_link *lnk,
1745 struct smc_clc_msg_accept_confirm *clc)
1746 {
1747 conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
1748 clc->rmb_rkey);
1749 if (conn->rtoken_idx < 0)
1750 return conn->rtoken_idx;
1751 return 0;
1752 }
1753
1754 static void smc_core_going_away(void)
1755 {
1756 struct smc_ib_device *smcibdev;
1757 struct smcd_dev *smcd;
1758
1759 spin_lock(&smc_ib_devices.lock);
1760 list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
1761 int i;
1762
1763 for (i = 0; i < SMC_MAX_PORTS; i++)
1764 set_bit(i, smcibdev->ports_going_away);
1765 }
1766 spin_unlock(&smc_ib_devices.lock);
1767
1768 spin_lock(&smcd_dev_list.lock);
1769 list_for_each_entry(smcd, &smcd_dev_list.list, list) {
1770 smcd->going_away = 1;
1771 }
1772 spin_unlock(&smcd_dev_list.lock);
1773 }
1774
1775 /* Clean up all SMC link groups */
1776 static void smc_lgrs_shutdown(void)
1777 {
1778 struct smcd_dev *smcd;
1779
1780 smc_core_going_away();
1781
1782 smc_smcr_terminate_all(NULL);
1783
1784 spin_lock(&smcd_dev_list.lock);
1785 list_for_each_entry(smcd, &smcd_dev_list.list, list)
1786 smc_smcd_terminate_all(smcd);
1787 spin_unlock(&smcd_dev_list.lock);
1788 }
1789
1790 static int smc_core_reboot_event(struct notifier_block *this,
1791 unsigned long event, void *ptr)
1792 {
1793 smc_lgrs_shutdown();
1794 smc_ib_unregister_client();
1795 return 0;
1796 }
1797
1798 static struct notifier_block smc_reboot_notifier = {
1799 .notifier_call = smc_core_reboot_event,
1800 };
1801
1802 int __init smc_core_init(void)
1803 {
1804 return register_reboot_notifier(&smc_reboot_notifier);
1805 }
1806
1807 /* Called (from smc_exit) when module is removed */
1808 void smc_core_exit(void)
1809 {
1810 unregister_reboot_notifier(&smc_reboot_notifier);
1811 smc_lgrs_shutdown();
1812 }