2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 * Socket Closing - normal and abnormal
6 * Copyright IBM Corp. 2016
8 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
11 #include <linux/workqueue.h>
17 #include "smc_close.h"
19 #define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ)
21 static void smc_close_cleanup_listen(struct sock
*parent
)
25 /* Close non-accepted connections */
26 while ((sk
= smc_accept_dequeue(parent
, NULL
)))
27 smc_close_non_accepted(sk
);
30 static void smc_close_wait_tx_pends(struct smc_sock
*smc
)
32 DEFINE_WAIT_FUNC(wait
, woken_wake_function
);
33 struct sock
*sk
= &smc
->sk
;
36 timeout
= SMC_CLOSE_WAIT_TX_PENDS_TIME
;
37 add_wait_queue(sk_sleep(sk
), &wait
);
38 while (!signal_pending(current
) && timeout
) {
41 rc
= sk_wait_event(sk
, &timeout
,
42 !smc_cdc_tx_has_pending(&smc
->conn
),
47 remove_wait_queue(sk_sleep(sk
), &wait
);
50 /* wait for sndbuf data being transmitted */
51 static void smc_close_stream_wait(struct smc_sock
*smc
, long timeout
)
53 DEFINE_WAIT_FUNC(wait
, woken_wake_function
);
54 struct sock
*sk
= &smc
->sk
;
59 if (!smc_tx_prepared_sends(&smc
->conn
))
62 smc
->wait_close_tx_prepared
= 1;
63 add_wait_queue(sk_sleep(sk
), &wait
);
64 while (!signal_pending(current
) && timeout
) {
67 rc
= sk_wait_event(sk
, &timeout
,
68 !smc_tx_prepared_sends(&smc
->conn
) ||
69 (sk
->sk_err
== ECONNABORTED
) ||
70 (sk
->sk_err
== ECONNRESET
),
75 remove_wait_queue(sk_sleep(sk
), &wait
);
76 smc
->wait_close_tx_prepared
= 0;
79 void smc_close_wake_tx_prepared(struct smc_sock
*smc
)
81 if (smc
->wait_close_tx_prepared
)
82 /* wake up socket closing */
83 smc
->sk
.sk_state_change(&smc
->sk
);
86 static int smc_close_wr(struct smc_connection
*conn
)
88 conn
->local_tx_ctrl
.conn_state_flags
.peer_done_writing
= 1;
90 return smc_cdc_get_slot_and_msg_send(conn
);
93 static int smc_close_final(struct smc_connection
*conn
)
95 if (atomic_read(&conn
->bytes_to_rcv
))
96 conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_abort
= 1;
98 conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_closed
= 1;
100 return smc_cdc_get_slot_and_msg_send(conn
);
103 static int smc_close_abort(struct smc_connection
*conn
)
105 conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_abort
= 1;
107 return smc_cdc_get_slot_and_msg_send(conn
);
110 /* terminate smc socket abnormally - active abort
111 * RDMA communication no longer possible
113 void smc_close_active_abort(struct smc_sock
*smc
)
115 struct smc_cdc_conn_state_flags
*txflags
=
116 &smc
->conn
.local_tx_ctrl
.conn_state_flags
;
118 bh_lock_sock(&smc
->sk
);
119 smc
->sk
.sk_err
= ECONNABORTED
;
120 if (smc
->clcsock
&& smc
->clcsock
->sk
) {
121 smc
->clcsock
->sk
->sk_err
= ECONNABORTED
;
122 smc
->clcsock
->sk
->sk_state_change(smc
->clcsock
->sk
);
124 switch (smc
->sk
.sk_state
) {
126 smc
->sk
.sk_state
= SMC_PEERABORTWAIT
;
128 case SMC_APPCLOSEWAIT1
:
129 case SMC_APPCLOSEWAIT2
:
130 txflags
->peer_conn_abort
= 1;
131 sock_release(smc
->clcsock
);
132 if (!smc_cdc_rxed_any_close(&smc
->conn
))
133 smc
->sk
.sk_state
= SMC_PEERABORTWAIT
;
135 smc
->sk
.sk_state
= SMC_CLOSED
;
137 case SMC_PEERCLOSEWAIT1
:
138 case SMC_PEERCLOSEWAIT2
:
139 if (!txflags
->peer_conn_closed
) {
140 smc
->sk
.sk_state
= SMC_PEERABORTWAIT
;
141 txflags
->peer_conn_abort
= 1;
142 sock_release(smc
->clcsock
);
144 smc
->sk
.sk_state
= SMC_CLOSED
;
147 case SMC_PROCESSABORT
:
148 case SMC_APPFINCLOSEWAIT
:
149 if (!txflags
->peer_conn_closed
) {
150 txflags
->peer_conn_abort
= 1;
151 sock_release(smc
->clcsock
);
153 smc
->sk
.sk_state
= SMC_CLOSED
;
155 case SMC_PEERFINCLOSEWAIT
:
156 case SMC_PEERABORTWAIT
:
161 sock_set_flag(&smc
->sk
, SOCK_DEAD
);
162 bh_unlock_sock(&smc
->sk
);
163 smc
->sk
.sk_state_change(&smc
->sk
);
166 int smc_close_active(struct smc_sock
*smc
)
168 struct smc_cdc_conn_state_flags
*txflags
=
169 &smc
->conn
.local_tx_ctrl
.conn_state_flags
;
170 long timeout
= SMC_MAX_STREAM_WAIT_TIMEOUT
;
171 struct smc_connection
*conn
= &smc
->conn
;
172 struct sock
*sk
= &smc
->sk
;
176 if (sock_flag(sk
, SOCK_LINGER
) &&
177 !(current
->flags
& PF_EXITING
))
178 timeout
= sk
->sk_lingertime
;
181 old_state
= sk
->sk_state
;
184 sk
->sk_state
= SMC_CLOSED
;
185 if (smc
->smc_listen_work
.func
)
186 flush_work(&smc
->smc_listen_work
);
190 sk
->sk_state
= SMC_CLOSED
;
191 sk
->sk_state_change(sk
); /* wake up accept */
192 if (smc
->clcsock
&& smc
->clcsock
->sk
) {
193 rc
= kernel_sock_shutdown(smc
->clcsock
, SHUT_RDWR
);
194 /* wake up kernel_accept of smc_tcp_listen_worker */
195 smc
->clcsock
->sk
->sk_data_ready(smc
->clcsock
->sk
);
198 smc_close_cleanup_listen(sk
);
199 flush_work(&smc
->tcp_listen_work
);
203 smc_close_stream_wait(smc
, timeout
);
205 cancel_work_sync(&conn
->tx_work
);
207 if (sk
->sk_state
== SMC_ACTIVE
) {
208 /* send close request */
209 rc
= smc_close_final(conn
);
210 sk
->sk_state
= SMC_PEERCLOSEWAIT1
;
212 /* peer event has changed the state */
216 case SMC_APPFINCLOSEWAIT
:
217 /* socket already shutdown wr or both (active close) */
218 if (txflags
->peer_done_writing
&&
219 !txflags
->peer_conn_closed
) {
220 /* just shutdown wr done, send close request */
221 rc
= smc_close_final(conn
);
223 sk
->sk_state
= SMC_CLOSED
;
224 smc_close_wait_tx_pends(smc
);
226 case SMC_APPCLOSEWAIT1
:
227 case SMC_APPCLOSEWAIT2
:
228 if (!smc_cdc_rxed_any_close(conn
))
229 smc_close_stream_wait(smc
, timeout
);
231 cancel_work_sync(&conn
->tx_work
);
233 if (sk
->sk_err
!= ECONNABORTED
) {
234 /* confirm close from peer */
235 rc
= smc_close_final(conn
);
239 if (smc_cdc_rxed_any_close(conn
))
240 /* peer has closed the socket already */
241 sk
->sk_state
= SMC_CLOSED
;
243 /* peer has just issued a shutdown write */
244 sk
->sk_state
= SMC_PEERFINCLOSEWAIT
;
245 smc_close_wait_tx_pends(smc
);
247 case SMC_PEERCLOSEWAIT1
:
248 case SMC_PEERCLOSEWAIT2
:
249 case SMC_PEERFINCLOSEWAIT
:
250 /* peer sending PeerConnectionClosed will cause transition */
252 case SMC_PROCESSABORT
:
253 cancel_work_sync(&conn
->tx_work
);
254 smc_close_abort(conn
);
255 sk
->sk_state
= SMC_CLOSED
;
256 smc_close_wait_tx_pends(smc
);
258 case SMC_PEERABORTWAIT
:
260 /* nothing to do, add tracing in future patch */
264 if (old_state
!= sk
->sk_state
)
265 sk
->sk_state_change(&smc
->sk
);
269 static void smc_close_passive_abort_received(struct smc_sock
*smc
)
271 struct smc_cdc_conn_state_flags
*txflags
=
272 &smc
->conn
.local_tx_ctrl
.conn_state_flags
;
273 struct sock
*sk
= &smc
->sk
;
275 switch (sk
->sk_state
) {
277 case SMC_APPFINCLOSEWAIT
:
278 case SMC_APPCLOSEWAIT1
:
279 case SMC_APPCLOSEWAIT2
:
280 smc_close_abort(&smc
->conn
);
281 sk
->sk_state
= SMC_PROCESSABORT
;
283 case SMC_PEERCLOSEWAIT1
:
284 case SMC_PEERCLOSEWAIT2
:
285 if (txflags
->peer_done_writing
&&
286 !txflags
->peer_conn_closed
) {
287 /* just shutdown, but not yet closed locally */
288 smc_close_abort(&smc
->conn
);
289 sk
->sk_state
= SMC_PROCESSABORT
;
291 sk
->sk_state
= SMC_CLOSED
;
294 case SMC_PEERFINCLOSEWAIT
:
295 case SMC_PEERABORTWAIT
:
296 sk
->sk_state
= SMC_CLOSED
;
299 case SMC_PROCESSABORT
:
300 /* nothing to do, add tracing in future patch */
305 /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
306 * or peer_done_writing.
307 * Called under tasklet context.
309 void smc_close_passive_received(struct smc_sock
*smc
)
311 struct smc_cdc_conn_state_flags
*rxflags
=
312 &smc
->conn
.local_rx_ctrl
.conn_state_flags
;
313 struct sock
*sk
= &smc
->sk
;
316 sk
->sk_shutdown
|= RCV_SHUTDOWN
;
317 if (smc
->clcsock
&& smc
->clcsock
->sk
)
318 smc
->clcsock
->sk
->sk_shutdown
|= RCV_SHUTDOWN
;
319 sock_set_flag(&smc
->sk
, SOCK_DONE
);
321 old_state
= sk
->sk_state
;
323 if (rxflags
->peer_conn_abort
) {
324 smc_close_passive_abort_received(smc
);
328 switch (sk
->sk_state
) {
330 if (atomic_read(&smc
->conn
.bytes_to_rcv
) ||
331 (rxflags
->peer_done_writing
&&
332 !rxflags
->peer_conn_closed
))
333 sk
->sk_state
= SMC_APPCLOSEWAIT1
;
335 sk
->sk_state
= SMC_CLOSED
;
338 sk
->sk_state
= SMC_APPCLOSEWAIT1
;
340 case SMC_PEERCLOSEWAIT1
:
341 if (rxflags
->peer_done_writing
)
342 sk
->sk_state
= SMC_PEERCLOSEWAIT2
;
343 /* fall through to check for closing */
344 case SMC_PEERCLOSEWAIT2
:
345 case SMC_PEERFINCLOSEWAIT
:
346 if (!smc_cdc_rxed_any_close(&smc
->conn
))
348 if (sock_flag(sk
, SOCK_DEAD
) &&
349 (sk
->sk_shutdown
== SHUTDOWN_MASK
)) {
350 /* smc_release has already been called locally */
351 sk
->sk_state
= SMC_CLOSED
;
353 /* just shutdown, but not yet closed locally */
354 sk
->sk_state
= SMC_APPFINCLOSEWAIT
;
357 case SMC_APPCLOSEWAIT1
:
358 case SMC_APPCLOSEWAIT2
:
359 case SMC_APPFINCLOSEWAIT
:
360 case SMC_PEERABORTWAIT
:
361 case SMC_PROCESSABORT
:
363 /* nothing to do, add tracing in future patch */
368 if (old_state
!= sk
->sk_state
)
369 sk
->sk_state_change(sk
);
370 sk
->sk_data_ready(sk
); /* wakeup blocked rcvbuf consumers */
371 sk
->sk_write_space(sk
); /* wakeup blocked sndbuf producers */
373 if ((sk
->sk_state
== SMC_CLOSED
) &&
374 (sock_flag(sk
, SOCK_DEAD
) || (old_state
== SMC_INIT
))) {
375 smc_conn_free(&smc
->conn
);
376 schedule_delayed_work(&smc
->sock_put_work
,
377 SMC_CLOSE_SOCK_PUT_DELAY
);
381 void smc_close_sock_put_work(struct work_struct
*work
)
383 struct smc_sock
*smc
= container_of(to_delayed_work(work
),
387 smc
->sk
.sk_prot
->unhash(&smc
->sk
);
391 int smc_close_shutdown_write(struct smc_sock
*smc
)
393 struct smc_connection
*conn
= &smc
->conn
;
394 long timeout
= SMC_MAX_STREAM_WAIT_TIMEOUT
;
395 struct sock
*sk
= &smc
->sk
;
399 if (sock_flag(sk
, SOCK_LINGER
))
400 timeout
= sk
->sk_lingertime
;
403 old_state
= sk
->sk_state
;
406 smc_close_stream_wait(smc
, timeout
);
408 cancel_work_sync(&conn
->tx_work
);
410 /* send close wr request */
411 rc
= smc_close_wr(conn
);
412 if (sk
->sk_state
== SMC_ACTIVE
)
413 sk
->sk_state
= SMC_PEERCLOSEWAIT1
;
417 case SMC_APPCLOSEWAIT1
:
419 if (!smc_cdc_rxed_any_close(conn
))
420 smc_close_stream_wait(smc
, timeout
);
422 cancel_work_sync(&conn
->tx_work
);
424 /* confirm close from peer */
425 rc
= smc_close_wr(conn
);
426 sk
->sk_state
= SMC_APPCLOSEWAIT2
;
428 case SMC_APPCLOSEWAIT2
:
429 case SMC_PEERFINCLOSEWAIT
:
430 case SMC_PEERCLOSEWAIT1
:
431 case SMC_PEERCLOSEWAIT2
:
432 case SMC_APPFINCLOSEWAIT
:
433 case SMC_PROCESSABORT
:
434 case SMC_PEERABORTWAIT
:
435 /* nothing to do, add tracing in future patch */
439 if (old_state
!= sk
->sk_state
)
440 sk
->sk_state_change(&smc
->sk
);