2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 * CLC (connection layer control) handshake over initial TCP socket to
5 * prepare for RDMA traffic
7 * Copyright IBM Corp. 2016
9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
13 #include <linux/if_ether.h>
22 /* Wait for data on the tcp-socket, analyze received data
24 * 0 if success and it was not a decline that we received.
25 * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
26 * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
28 int smc_clc_wait_msg(struct smc_sock
*smc
, void *buf
, int buflen
,
31 struct sock
*clc_sk
= smc
->clcsock
->sk
;
32 struct smc_clc_msg_hdr
*clcm
= buf
;
33 struct msghdr msg
= {NULL
, 0};
39 /* peek the first few bytes to determine length of data to receive
40 * so we don't consume any subsequent CLC message or payload data
41 * in the TCP byte stream
45 krflags
= MSG_PEEK
| MSG_WAITALL
;
46 smc
->clcsock
->sk
->sk_rcvtimeo
= CLC_WAIT_TIME
;
47 len
= kernel_recvmsg(smc
->clcsock
, &msg
, &vec
, 1,
48 sizeof(struct smc_clc_msg_hdr
), krflags
);
49 if (signal_pending(current
)) {
51 clc_sk
->sk_err
= EINTR
;
52 smc
->sk
.sk_err
= EINTR
;
56 reason_code
= -clc_sk
->sk_err
;
57 smc
->sk
.sk_err
= clc_sk
->sk_err
;
60 if (!len
) { /* peer has performed orderly shutdown */
61 smc
->sk
.sk_err
= ECONNRESET
;
62 reason_code
= -ECONNRESET
;
66 smc
->sk
.sk_err
= -len
;
70 datlen
= ntohs(clcm
->length
);
71 if ((len
< sizeof(struct smc_clc_msg_hdr
)) ||
72 (datlen
< sizeof(struct smc_clc_msg_decline
)) ||
73 (datlen
> sizeof(struct smc_clc_msg_accept_confirm
)) ||
74 memcmp(clcm
->eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
)) ||
75 ((clcm
->type
!= SMC_CLC_DECLINE
) &&
76 (clcm
->type
!= expected_type
))) {
77 smc
->sk
.sk_err
= EPROTO
;
78 reason_code
= -EPROTO
;
82 /* receive the complete CLC message */
85 memset(&msg
, 0, sizeof(struct msghdr
));
86 krflags
= MSG_WAITALL
;
87 smc
->clcsock
->sk
->sk_rcvtimeo
= CLC_WAIT_TIME
;
88 len
= kernel_recvmsg(smc
->clcsock
, &msg
, &vec
, 1, datlen
, krflags
);
90 smc
->sk
.sk_err
= EPROTO
;
91 reason_code
= -EPROTO
;
94 if (clcm
->type
== SMC_CLC_DECLINE
) {
95 reason_code
= SMC_CLC_DECL_REPLY
;
96 if (ntohl(((struct smc_clc_msg_decline
*)buf
)->peer_diagnosis
)
97 == SMC_CLC_DECL_SYNCERR
)
98 smc
->conn
.lgr
->sync_err
= true;
105 /* send CLC DECLINE message across internal TCP socket */
106 int smc_clc_send_decline(struct smc_sock
*smc
, u32 peer_diag_info
,
109 struct smc_clc_msg_decline dclc
;
114 memset(&dclc
, 0, sizeof(dclc
));
115 memcpy(dclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
116 dclc
.hdr
.type
= SMC_CLC_DECLINE
;
117 dclc
.hdr
.length
= htons(sizeof(struct smc_clc_msg_decline
));
118 dclc
.hdr
.version
= SMC_CLC_V1
;
119 dclc
.hdr
.flag
= out_of_sync
? 1 : 0;
120 memcpy(dclc
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
121 dclc
.peer_diagnosis
= htonl(peer_diag_info
);
122 memcpy(dclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
124 memset(&msg
, 0, sizeof(msg
));
125 vec
.iov_base
= &dclc
;
126 vec
.iov_len
= sizeof(struct smc_clc_msg_decline
);
127 len
= kernel_sendmsg(smc
->clcsock
, &msg
, &vec
, 1,
128 sizeof(struct smc_clc_msg_decline
));
129 if (len
< sizeof(struct smc_clc_msg_decline
))
130 smc
->sk
.sk_err
= EPROTO
;
132 smc
->sk
.sk_err
= -len
;
136 /* send CLC PROPOSAL message across internal TCP socket */
137 int smc_clc_send_proposal(struct smc_sock
*smc
,
138 struct smc_ib_device
*smcibdev
,
141 struct smc_clc_msg_proposal pclc
;
147 /* send SMC Proposal CLC message */
148 memset(&pclc
, 0, sizeof(pclc
));
149 memcpy(pclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
150 pclc
.hdr
.type
= SMC_CLC_PROPOSAL
;
151 pclc
.hdr
.length
= htons(sizeof(pclc
));
152 pclc
.hdr
.version
= SMC_CLC_V1
; /* SMC version */
153 memcpy(pclc
.lcl
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
154 memcpy(&pclc
.lcl
.gid
, &smcibdev
->gid
[ibport
- 1], SMC_GID_SIZE
);
155 memcpy(&pclc
.lcl
.mac
, &smcibdev
->mac
[ibport
- 1], ETH_ALEN
);
157 /* determine subnet and mask from internal TCP socket */
158 rc
= smc_netinfo_by_tcpsk(smc
->clcsock
, &pclc
.outgoing_subnet
,
161 return SMC_CLC_DECL_CNFERR
; /* configuration error */
162 memcpy(pclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
163 memset(&msg
, 0, sizeof(msg
));
164 vec
.iov_base
= &pclc
;
165 vec
.iov_len
= sizeof(pclc
);
166 /* due to the few bytes needed for clc-handshake this cannot block */
167 len
= kernel_sendmsg(smc
->clcsock
, &msg
, &vec
, 1, sizeof(pclc
));
168 if (len
< sizeof(pclc
)) {
170 reason_code
= -ENETUNREACH
;
171 smc
->sk
.sk_err
= -reason_code
;
173 smc
->sk
.sk_err
= smc
->clcsock
->sk
->sk_err
;
174 reason_code
= -smc
->sk
.sk_err
;
181 /* send CLC CONFIRM message across internal TCP socket */
182 int smc_clc_send_confirm(struct smc_sock
*smc
)
184 struct smc_connection
*conn
= &smc
->conn
;
185 struct smc_clc_msg_accept_confirm cclc
;
186 struct smc_link
*link
;
192 link
= &conn
->lgr
->lnk
[SMC_SINGLE_LINK
];
193 /* send SMC Confirm CLC msg */
194 memset(&cclc
, 0, sizeof(cclc
));
195 memcpy(cclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
196 cclc
.hdr
.type
= SMC_CLC_CONFIRM
;
197 cclc
.hdr
.length
= htons(sizeof(cclc
));
198 cclc
.hdr
.version
= SMC_CLC_V1
; /* SMC version */
199 memcpy(cclc
.lcl
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
200 memcpy(&cclc
.lcl
.gid
, &link
->smcibdev
->gid
[link
->ibport
- 1],
202 memcpy(&cclc
.lcl
.mac
, &link
->smcibdev
->mac
[link
->ibport
- 1], ETH_ALEN
);
203 hton24(cclc
.qpn
, link
->roce_qp
->qp_num
);
205 htonl(conn
->rmb_desc
->mr_rx
[SMC_SINGLE_LINK
]->rkey
);
206 cclc
.conn_idx
= 1; /* for now: 1 RMB = 1 RMBE */
207 cclc
.rmbe_alert_token
= htonl(conn
->alert_token_local
);
208 cclc
.qp_mtu
= min(link
->path_mtu
, link
->peer_mtu
);
209 cclc
.rmbe_size
= conn
->rmbe_size_short
;
211 cpu_to_be64((u64
)conn
->rmb_desc
->dma_addr
[SMC_SINGLE_LINK
]);
212 hton24(cclc
.psn
, link
->psn_initial
);
214 memcpy(cclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
216 memset(&msg
, 0, sizeof(msg
));
217 vec
.iov_base
= &cclc
;
218 vec
.iov_len
= sizeof(cclc
);
219 len
= kernel_sendmsg(smc
->clcsock
, &msg
, &vec
, 1, sizeof(cclc
));
220 if (len
< sizeof(cclc
)) {
222 reason_code
= -ENETUNREACH
;
223 smc
->sk
.sk_err
= -reason_code
;
225 smc
->sk
.sk_err
= smc
->clcsock
->sk
->sk_err
;
226 reason_code
= -smc
->sk
.sk_err
;
232 /* send CLC ACCEPT message across internal TCP socket */
233 int smc_clc_send_accept(struct smc_sock
*new_smc
, int srv_first_contact
)
235 struct smc_connection
*conn
= &new_smc
->conn
;
236 struct smc_clc_msg_accept_confirm aclc
;
237 struct smc_link
*link
;
243 link
= &conn
->lgr
->lnk
[SMC_SINGLE_LINK
];
244 memset(&aclc
, 0, sizeof(aclc
));
245 memcpy(aclc
.hdr
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
246 aclc
.hdr
.type
= SMC_CLC_ACCEPT
;
247 aclc
.hdr
.length
= htons(sizeof(aclc
));
248 aclc
.hdr
.version
= SMC_CLC_V1
; /* SMC version */
249 if (srv_first_contact
)
251 memcpy(aclc
.lcl
.id_for_peer
, local_systemid
, sizeof(local_systemid
));
252 memcpy(&aclc
.lcl
.gid
, &link
->smcibdev
->gid
[link
->ibport
- 1],
254 memcpy(&aclc
.lcl
.mac
, link
->smcibdev
->mac
[link
->ibport
- 1], ETH_ALEN
);
255 hton24(aclc
.qpn
, link
->roce_qp
->qp_num
);
257 htonl(conn
->rmb_desc
->mr_rx
[SMC_SINGLE_LINK
]->rkey
);
258 aclc
.conn_idx
= 1; /* as long as 1 RMB = 1 RMBE */
259 aclc
.rmbe_alert_token
= htonl(conn
->alert_token_local
);
260 aclc
.qp_mtu
= link
->path_mtu
;
261 aclc
.rmbe_size
= conn
->rmbe_size_short
,
263 cpu_to_be64((u64
)conn
->rmb_desc
->dma_addr
[SMC_SINGLE_LINK
]);
264 hton24(aclc
.psn
, link
->psn_initial
);
265 memcpy(aclc
.trl
.eyecatcher
, SMC_EYECATCHER
, sizeof(SMC_EYECATCHER
));
267 memset(&msg
, 0, sizeof(msg
));
268 vec
.iov_base
= &aclc
;
269 vec
.iov_len
= sizeof(aclc
);
270 len
= kernel_sendmsg(new_smc
->clcsock
, &msg
, &vec
, 1, sizeof(aclc
));
271 if (len
< sizeof(aclc
)) {
273 new_smc
->sk
.sk_err
= EPROTO
;
275 new_smc
->sk
.sk_err
= new_smc
->clcsock
->sk
->sk_err
;
276 rc
= sock_error(&new_smc
->sk
);