1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * CLC (connection layer control) handshake over initial TCP socket to
6 * prepare for RDMA traffic
8 * Copyright IBM Corp. 2016
10 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
14 #include <linux/if_ether.h>
15 #include <linux/sched/signal.h>
25 /* Wait for data on the tcp-socket, analyze received data
27 * 0 if success and it was not a decline that we received.
28 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
29 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
31 int smc_clc_wait_msg(struct smc_sock
*smc
, void *buf
, int buflen
,
34 long rcvtimeo
= smc
->clcsock
->sk
->sk_rcvtimeo
;
35 struct sock
*clc_sk
= smc
->clcsock
->sk
;
36 struct smc_clc_msg_hdr
*clcm
= buf
;
37 struct msghdr msg
= {NULL
, 0};
43 /* peek the first few bytes to determine length of data to receive
44 * so we don't consume any subsequent CLC message or payload data
45 * in the TCP byte stream
49 krflags
= MSG_PEEK
| MSG_WAITALL
;
50 smc
->clcsock
->sk
->sk_rcvtimeo
= CLC_WAIT_TIME
;
51 len
= kernel_recvmsg(smc
->clcsock
, &msg
, &vec
, 1,
52 sizeof(struct smc_clc_msg_hdr
), krflags
);
53 if (signal_pending(current
)) {
55 clc_sk
->sk_err
= EINTR
;
56 smc
->sk
.sk_err
= EINTR
;
60 reason_code
= -clc_sk
->sk_err
;
61 smc
->sk
.sk_err
= clc_sk
->sk_err
;
64 if (!len
) { /* peer has performed orderly shutdown */
65 smc
->sk
.sk_err
= ECONNRESET
;
66 reason_code
= -ECONNRESET
;
70 smc
->sk
.sk_err
= -len
;
74 datlen
= ntohs(clcm
->length
);
75 if ((len
< sizeof(struct smc_clc_msg_hdr
)) ||
76 (datlen
< sizeof(struct smc_clc_msg_decline
)) ||
77 (datlen
> sizeof(struct smc_clc_msg_accept_confirm
)) ||
78 memcmp(clcm
->eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
)) ||
79 ((clcm
->type
!= SMC_CLC_DECLINE
) &&
80 (clcm
->type
!= expected_type
))) {
81 smc
->sk
.sk_err
= EPROTO
;
82 reason_code
= -EPROTO
;
86 /* receive the complete CLC message */
89 memset(&msg
, 0, sizeof(struct msghdr
));
90 krflags
= MSG_WAITALL
;
91 len
= kernel_recvmsg(smc
->clcsock
, &msg
, &vec
, 1, datlen
, krflags
);
93 smc
->sk
.sk_err
= EPROTO
;
94 reason_code
= -EPROTO
;
97 if (clcm
->type
== SMC_CLC_DECLINE
) {
98 reason_code
= SMC_CLC_DECL_REPLY
;
99 if (((struct smc_clc_msg_decline
*)buf
)->hdr
.flag
) {
100 smc
->conn
.lgr
->sync_err
= true;
101 smc_lgr_terminate(smc
->conn
.lgr
);
106 smc
->clcsock
->sk
->sk_rcvtimeo
= rcvtimeo
;
110 /* send CLC DECLINE message across internal TCP socket */
111 int smc_clc_send_decline(struct smc_sock
*smc
, u32 peer_diag_info
)
113 struct smc_clc_msg_decline dclc
;
118 memset(&dclc
, 0, sizeof(dclc
));
119 memcpy(dclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
120 dclc
.hdr
.type
= SMC_CLC_DECLINE
;
121 dclc
.hdr
.length
= htons(sizeof(struct smc_clc_msg_decline
));
122 dclc
.hdr
.version
= SMC_CLC_V1
;
123 dclc
.hdr
.flag
= (peer_diag_info
== SMC_CLC_DECL_SYNCERR
) ? 1 : 0;
124 memcpy(dclc
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
125 dclc
.peer_diagnosis
= htonl(peer_diag_info
);
126 memcpy(dclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
128 memset(&msg
, 0, sizeof(msg
));
129 vec
.iov_base
= &dclc
;
130 vec
.iov_len
= sizeof(struct smc_clc_msg_decline
);
131 len
= kernel_sendmsg(smc
->clcsock
, &msg
, &vec
, 1,
132 sizeof(struct smc_clc_msg_decline
));
133 if (len
< sizeof(struct smc_clc_msg_decline
))
134 smc
->sk
.sk_err
= EPROTO
;
136 smc
->sk
.sk_err
= -len
;
140 /* send CLC PROPOSAL message across internal TCP socket */
141 int smc_clc_send_proposal(struct smc_sock
*smc
,
142 struct smc_ib_device
*smcibdev
,
145 struct smc_clc_msg_proposal pclc
;
151 /* send SMC Proposal CLC message */
152 memset(&pclc
, 0, sizeof(pclc
));
153 memcpy(pclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
154 pclc
.hdr
.type
= SMC_CLC_PROPOSAL
;
155 pclc
.hdr
.length
= htons(sizeof(pclc
));
156 pclc
.hdr
.version
= SMC_CLC_V1
; /* SMC version */
157 memcpy(pclc
.lcl
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
158 memcpy(&pclc
.lcl
.gid
, &smcibdev
->gid
[ibport
- 1], SMC_GID_SIZE
);
159 memcpy(&pclc
.lcl
.mac
, &smcibdev
->mac
[ibport
- 1], ETH_ALEN
);
161 /* determine subnet and mask from internal TCP socket */
162 rc
= smc_netinfo_by_tcpsk(smc
->clcsock
, &pclc
.outgoing_subnet
,
165 return SMC_CLC_DECL_CNFERR
; /* configuration error */
166 memcpy(pclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
167 memset(&msg
, 0, sizeof(msg
));
168 vec
.iov_base
= &pclc
;
169 vec
.iov_len
= sizeof(pclc
);
170 /* due to the few bytes needed for clc-handshake this cannot block */
171 len
= kernel_sendmsg(smc
->clcsock
, &msg
, &vec
, 1, sizeof(pclc
));
173 smc
->sk
.sk_err
= smc
->clcsock
->sk
->sk_err
;
174 reason_code
= -smc
->sk
.sk_err
;
175 } else if (len
< (int)sizeof(pclc
)) {
176 reason_code
= -ENETUNREACH
;
177 smc
->sk
.sk_err
= -reason_code
;
183 /* send CLC CONFIRM message across internal TCP socket */
184 int smc_clc_send_confirm(struct smc_sock
*smc
)
186 struct smc_connection
*conn
= &smc
->conn
;
187 struct smc_clc_msg_accept_confirm cclc
;
188 struct smc_link
*link
;
194 link
= &conn
->lgr
->lnk
[SMC_SINGLE_LINK
];
195 /* send SMC Confirm CLC msg */
196 memset(&cclc
, 0, sizeof(cclc
));
197 memcpy(cclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
198 cclc
.hdr
.type
= SMC_CLC_CONFIRM
;
199 cclc
.hdr
.length
= htons(sizeof(cclc
));
200 cclc
.hdr
.version
= SMC_CLC_V1
; /* SMC version */
201 memcpy(cclc
.lcl
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
202 memcpy(&cclc
.lcl
.gid
, &link
->smcibdev
->gid
[link
->ibport
- 1],
204 memcpy(&cclc
.lcl
.mac
, &link
->smcibdev
->mac
[link
->ibport
- 1], ETH_ALEN
);
205 hton24(cclc
.qpn
, link
->roce_qp
->qp_num
);
207 htonl(conn
->rmb_desc
->mr_rx
[SMC_SINGLE_LINK
]->rkey
);
208 cclc
.conn_idx
= 1; /* for now: 1 RMB = 1 RMBE */
209 cclc
.rmbe_alert_token
= htonl(conn
->alert_token_local
);
210 cclc
.qp_mtu
= min(link
->path_mtu
, link
->peer_mtu
);
211 cclc
.rmbe_size
= conn
->rmbe_size_short
;
212 cclc
.rmb_dma_addr
= cpu_to_be64(
213 (u64
)sg_dma_address(conn
->rmb_desc
->sgt
[SMC_SINGLE_LINK
].sgl
));
214 hton24(cclc
.psn
, link
->psn_initial
);
216 memcpy(cclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
218 memset(&msg
, 0, sizeof(msg
));
219 vec
.iov_base
= &cclc
;
220 vec
.iov_len
= sizeof(cclc
);
221 len
= kernel_sendmsg(smc
->clcsock
, &msg
, &vec
, 1, sizeof(cclc
));
222 if (len
< sizeof(cclc
)) {
224 reason_code
= -ENETUNREACH
;
225 smc
->sk
.sk_err
= -reason_code
;
227 smc
->sk
.sk_err
= smc
->clcsock
->sk
->sk_err
;
228 reason_code
= -smc
->sk
.sk_err
;
234 /* send CLC ACCEPT message across internal TCP socket */
235 int smc_clc_send_accept(struct smc_sock
*new_smc
, int srv_first_contact
)
237 struct smc_connection
*conn
= &new_smc
->conn
;
238 struct smc_clc_msg_accept_confirm aclc
;
239 struct smc_link
*link
;
245 link
= &conn
->lgr
->lnk
[SMC_SINGLE_LINK
];
246 memset(&aclc
, 0, sizeof(aclc
));
247 memcpy(aclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
248 aclc
.hdr
.type
= SMC_CLC_ACCEPT
;
249 aclc
.hdr
.length
= htons(sizeof(aclc
));
250 aclc
.hdr
.version
= SMC_CLC_V1
; /* SMC version */
251 if (srv_first_contact
)
253 memcpy(aclc
.lcl
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
254 memcpy(&aclc
.lcl
.gid
, &link
->smcibdev
->gid
[link
->ibport
- 1],
256 memcpy(&aclc
.lcl
.mac
, link
->smcibdev
->mac
[link
->ibport
- 1], ETH_ALEN
);
257 hton24(aclc
.qpn
, link
->roce_qp
->qp_num
);
259 htonl(conn
->rmb_desc
->mr_rx
[SMC_SINGLE_LINK
]->rkey
);
260 aclc
.conn_idx
= 1; /* as long as 1 RMB = 1 RMBE */
261 aclc
.rmbe_alert_token
= htonl(conn
->alert_token_local
);
262 aclc
.qp_mtu
= link
->path_mtu
;
263 aclc
.rmbe_size
= conn
->rmbe_size_short
,
264 aclc
.rmb_dma_addr
= cpu_to_be64(
265 (u64
)sg_dma_address(conn
->rmb_desc
->sgt
[SMC_SINGLE_LINK
].sgl
));
266 hton24(aclc
.psn
, link
->psn_initial
);
267 memcpy(aclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
269 memset(&msg
, 0, sizeof(msg
));
270 vec
.iov_base
= &aclc
;
271 vec
.iov_len
= sizeof(aclc
);
272 len
= kernel_sendmsg(new_smc
->clcsock
, &msg
, &vec
, 1, sizeof(aclc
));
273 if (len
< sizeof(aclc
)) {
275 new_smc
->sk
.sk_err
= EPROTO
;
277 new_smc
->sk
.sk_err
= new_smc
->clcsock
->sk
->sk_err
;
278 rc
= sock_error(&new_smc
->sk
);