]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/smc/af_smc.c
net/smc: consolidate function parameters
[mirror_ubuntu-jammy-kernel.git] / net / smc / af_smc.c
CommitLineData
ac713874
UB
1/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 *
4 * AF_SMC protocol family socket handler keeping the AF_INET sock address type
5 * applies to SOCK_STREAM sockets only
6 * offers an alternative communication option for TCP-protocol sockets
7 * applicable with RoCE-cards only
8 *
a046d57d 9 * Initial restrictions:
a046d57d 10 * - support for alternate links postponed
a046d57d 11 *
aaa4d33f 12 * Copyright IBM Corp. 2016, 2018
ac713874
UB
13 *
14 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
15 * based on prototype from Frank Blaschka
16 */
17
18#define KMSG_COMPONENT "smc"
19#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
20
21#include <linux/module.h>
22#include <linux/socket.h>
a046d57d 23#include <linux/workqueue.h>
5f08318f 24#include <linux/in.h>
c3edc401 25#include <linux/sched/signal.h>
41349844 26#include <linux/if_vlan.h>
c3edc401 27
ac713874 28#include <net/sock.h>
a046d57d 29#include <net/tcp.h>
f16a7dd5 30#include <net/smc.h>
9b67e26f 31#include <asm/ioctls.h>
ac713874 32
64e28b52
HW
33#include <net/net_namespace.h>
34#include <net/netns/generic.h>
35#include "smc_netns.h"
36
ac713874 37#include "smc.h"
a046d57d 38#include "smc_clc.h"
9bf9abea 39#include "smc_llc.h"
5f08318f 40#include "smc_cdc.h"
0cfdd8f9 41#include "smc_core.h"
a4cf0443 42#include "smc_ib.h"
41349844 43#include "smc_ism.h"
6812baab 44#include "smc_pnet.h"
e6727f39 45#include "smc_tx.h"
952310cc 46#include "smc_rx.h"
b38d7324 47#include "smc_close.h"
ac713874 48
72a36a8a
HW
49static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
50 * creation on server
51 */
52static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group
53 * creation on client
0cfdd8f9
UB
54 */
55
a046d57d 56static void smc_tcp_listen_work(struct work_struct *);
24ac3a08 57static void smc_connect_work(struct work_struct *);
a046d57d 58
ac713874
UB
59static void smc_set_keepalive(struct sock *sk, int val)
60{
61 struct smc_sock *smc = smc_sk(sk);
62
63 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
64}
65
f16a7dd5
UB
66static struct smc_hashinfo smc_v4_hashinfo = {
67 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
68};
69
aaa4d33f
KG
70static struct smc_hashinfo smc_v6_hashinfo = {
71 .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
72};
73
f16a7dd5
UB
74int smc_hash_sk(struct sock *sk)
75{
76 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
77 struct hlist_head *head;
78
79 head = &h->ht;
80
81 write_lock_bh(&h->lock);
82 sk_add_node(sk, head);
83 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
84 write_unlock_bh(&h->lock);
85
86 return 0;
87}
88EXPORT_SYMBOL_GPL(smc_hash_sk);
89
90void smc_unhash_sk(struct sock *sk)
91{
92 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
93
94 write_lock_bh(&h->lock);
95 if (sk_del_node_init(sk))
96 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
97 write_unlock_bh(&h->lock);
98}
99EXPORT_SYMBOL_GPL(smc_unhash_sk);
100
101struct proto smc_proto = {
ac713874
UB
102 .name = "SMC",
103 .owner = THIS_MODULE,
104 .keepalive = smc_set_keepalive,
f16a7dd5
UB
105 .hash = smc_hash_sk,
106 .unhash = smc_unhash_sk,
ac713874 107 .obj_size = sizeof(struct smc_sock),
f16a7dd5 108 .h.smc_hash = &smc_v4_hashinfo,
5f0d5a3a 109 .slab_flags = SLAB_TYPESAFE_BY_RCU,
ac713874 110};
f16a7dd5 111EXPORT_SYMBOL_GPL(smc_proto);
ac713874 112
aaa4d33f
KG
113struct proto smc_proto6 = {
114 .name = "SMC6",
115 .owner = THIS_MODULE,
116 .keepalive = smc_set_keepalive,
117 .hash = smc_hash_sk,
118 .unhash = smc_unhash_sk,
119 .obj_size = sizeof(struct smc_sock),
120 .h.smc_hash = &smc_v6_hashinfo,
121 .slab_flags = SLAB_TYPESAFE_BY_RCU,
122};
123EXPORT_SYMBOL_GPL(smc_proto6);
124
ac713874
UB
125static int smc_release(struct socket *sock)
126{
127 struct sock *sk = sock->sk;
128 struct smc_sock *smc;
b38d7324 129 int rc = 0;
ac713874
UB
130
131 if (!sk)
132 goto out;
133
134 smc = smc_sk(sk);
24ac3a08
UB
135
136 /* cleanup for a dangling non-blocking connect */
50717a37 137 if (smc->connect_nonblock && sk->sk_state == SMC_INIT)
f07920ad 138 tcp_abort(smc->clcsock->sk, ECONNABORTED);
24ac3a08 139 flush_work(&smc->connect_work);
24ac3a08 140
b38d7324
UB
141 if (sk->sk_state == SMC_LISTEN)
142 /* smc_close_non_accepted() is called and acquires
143 * sock lock for child sockets again
144 */
145 lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
146 else
147 lock_sock(sk);
ac713874 148
51f1de79 149 if (!smc->use_fallback) {
b38d7324
UB
150 rc = smc_close_active(smc);
151 sock_set_flag(sk, SOCK_DEAD);
152 sk->sk_shutdown |= SHUTDOWN_MASK;
b03faa1f
UB
153 } else {
154 if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
155 sock_put(sk); /* passive closing */
156 if (sk->sk_state == SMC_LISTEN) {
78abe3d0
MJ
157 /* wake up clcsock accept */
158 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
159 }
51f1de79
UB
160 sk->sk_state = SMC_CLOSED;
161 sk->sk_state_change(sk);
162 }
ac713874 163
b03faa1f
UB
164 sk->sk_prot->unhash(sk);
165
166 if (sk->sk_state == SMC_CLOSED) {
167 if (smc->clcsock) {
168 mutex_lock(&smc->clcsock_release_lock);
169 sock_release(smc->clcsock);
170 smc->clcsock = NULL;
171 mutex_unlock(&smc->clcsock_release_lock);
172 }
173 if (!smc->use_fallback)
174 smc_conn_free(&smc->conn);
175 }
176
ac713874
UB
177 /* detach socket */
178 sock_orphan(sk);
179 sock->sk = NULL;
180 release_sock(sk);
181
51f1de79 182 sock_put(sk); /* final sock_put */
ac713874 183out:
b38d7324 184 return rc;
ac713874
UB
185}
186
187static void smc_destruct(struct sock *sk)
188{
189 if (sk->sk_state != SMC_CLOSED)
190 return;
191 if (!sock_flag(sk, SOCK_DEAD))
192 return;
193
194 sk_refcnt_debug_dec(sk);
195}
196
aaa4d33f
KG
197static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
198 int protocol)
ac713874
UB
199{
200 struct smc_sock *smc;
aaa4d33f 201 struct proto *prot;
ac713874
UB
202 struct sock *sk;
203
aaa4d33f
KG
204 prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
205 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
ac713874
UB
206 if (!sk)
207 return NULL;
208
209 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
210 sk->sk_state = SMC_INIT;
211 sk->sk_destruct = smc_destruct;
aaa4d33f 212 sk->sk_protocol = protocol;
ac713874 213 smc = smc_sk(sk);
a046d57d 214 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
24ac3a08 215 INIT_WORK(&smc->connect_work, smc_connect_work);
be7f3e59 216 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
a046d57d
UB
217 INIT_LIST_HEAD(&smc->accept_q);
218 spin_lock_init(&smc->accept_q_lock);
be7f3e59 219 spin_lock_init(&smc->conn.send_lock);
f16a7dd5 220 sk->sk_prot->hash(sk);
a046d57d 221 sk_refcnt_debug_inc(sk);
78abe3d0 222 mutex_init(&smc->clcsock_release_lock);
ac713874
UB
223
224 return sk;
225}
226
227static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
228 int addr_len)
229{
230 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
231 struct sock *sk = sock->sk;
232 struct smc_sock *smc;
233 int rc;
234
235 smc = smc_sk(sk);
236
237 /* replicate tests from inet_bind(), to be safe wrt. future changes */
238 rc = -EINVAL;
239 if (addr_len < sizeof(struct sockaddr_in))
240 goto out;
241
242 rc = -EAFNOSUPPORT;
aaa4d33f
KG
243 if (addr->sin_family != AF_INET &&
244 addr->sin_family != AF_INET6 &&
245 addr->sin_family != AF_UNSPEC)
246 goto out;
ac713874 247 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
aaa4d33f
KG
248 if (addr->sin_family == AF_UNSPEC &&
249 addr->sin_addr.s_addr != htonl(INADDR_ANY))
ac713874
UB
250 goto out;
251
252 lock_sock(sk);
253
254 /* Check if socket is already active */
255 rc = -EINVAL;
256 if (sk->sk_state != SMC_INIT)
257 goto out_rel;
258
259 smc->clcsock->sk->sk_reuse = sk->sk_reuse;
260 rc = kernel_bind(smc->clcsock, uaddr, addr_len);
261
262out_rel:
263 release_sock(sk);
264out:
265 return rc;
266}
267
268static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
269 unsigned long mask)
270{
271 /* options we don't get control via setsockopt for */
272 nsk->sk_type = osk->sk_type;
273 nsk->sk_sndbuf = osk->sk_sndbuf;
274 nsk->sk_rcvbuf = osk->sk_rcvbuf;
275 nsk->sk_sndtimeo = osk->sk_sndtimeo;
276 nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
277 nsk->sk_mark = osk->sk_mark;
278 nsk->sk_priority = osk->sk_priority;
279 nsk->sk_rcvlowat = osk->sk_rcvlowat;
280 nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
281 nsk->sk_err = osk->sk_err;
282
283 nsk->sk_flags &= ~mask;
284 nsk->sk_flags |= osk->sk_flags & mask;
285}
286
287#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
288 (1UL << SOCK_KEEPOPEN) | \
289 (1UL << SOCK_LINGER) | \
290 (1UL << SOCK_BROADCAST) | \
291 (1UL << SOCK_TIMESTAMP) | \
292 (1UL << SOCK_DBG) | \
293 (1UL << SOCK_RCVTSTAMP) | \
294 (1UL << SOCK_RCVTSTAMPNS) | \
295 (1UL << SOCK_LOCALROUTE) | \
296 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
297 (1UL << SOCK_RXQ_OVFL) | \
298 (1UL << SOCK_WIFI_STATUS) | \
299 (1UL << SOCK_NOFCS) | \
9718475e
DD
300 (1UL << SOCK_FILTER_LOCKED) | \
301 (1UL << SOCK_TSTAMP_NEW))
ac713874
UB
302/* copy only relevant settings and flags of SOL_SOCKET level from smc to
303 * clc socket (since smc is not called for these options from net/core)
304 */
305static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
306{
307 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
308}
309
310#define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
311 (1UL << SOCK_KEEPOPEN) | \
312 (1UL << SOCK_LINGER) | \
313 (1UL << SOCK_DBG))
314/* copy only settings and flags relevant for smc from clc to smc socket */
315static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
316{
317 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
318}
319
c7674c00 320/* register a new rmb, send confirm_rkey msg to register with peer */
44aa81ce
KG
321static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
322 bool conf_rkey)
e63a5f8c 323{
c7674c00
KG
324 if (!rmb_desc->wr_reg) {
325 /* register memory region for new rmb */
326 if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
327 rmb_desc->regerr = 1;
328 return -EFAULT;
329 }
330 rmb_desc->wr_reg = 1;
a6920d1d 331 }
44aa81ce
KG
332 if (!conf_rkey)
333 return 0;
334 /* exchange confirm_rkey msg with peer */
335 if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
336 rmb_desc->regerr = 1;
337 return -EFAULT;
338 }
e63a5f8c
KG
339 return 0;
340}
341
0f627126 342static int smc_clnt_conf_first_link(struct smc_sock *smc)
9bf9abea 343{
877ae5be 344 struct net *net = sock_net(smc->clcsock->sk);
9bf9abea
UB
345 struct smc_link_group *lgr = smc->conn.lgr;
346 struct smc_link *link;
347 int rest;
348 int rc;
349
350 link = &lgr->lnk[SMC_SINGLE_LINK];
351 /* receive CONFIRM LINK request from server over RoCE fabric */
352 rest = wait_for_completion_interruptible_timeout(
353 &link->llc_confirm,
354 SMC_LLC_WAIT_FIRST_TIME);
355 if (rest <= 0) {
356 struct smc_clc_msg_decline dclc;
357
358 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
2b59f58e 359 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
9ed28556 360 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
9bf9abea
UB
361 }
362
75d320d6
KG
363 if (link->llc_confirm_rc)
364 return SMC_CLC_DECL_RMBE_EC;
365
9bf9abea
UB
366 rc = smc_ib_modify_qp_rts(link);
367 if (rc)
603cc149 368 return SMC_CLC_DECL_ERR_RDYLNK;
9bf9abea
UB
369
370 smc_wr_remember_qp_attr(link);
652a1e41 371
44aa81ce 372 if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
603cc149 373 return SMC_CLC_DECL_ERR_REGRMB;
652a1e41 374
9bf9abea 375 /* send CONFIRM LINK response over RoCE fabric */
947541f3 376 rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
9bf9abea 377 if (rc < 0)
603cc149 378 return SMC_CLC_DECL_TIMEOUT_CL;
9bf9abea 379
52bedf37
KG
380 /* receive ADD LINK request from server over RoCE fabric */
381 rest = wait_for_completion_interruptible_timeout(&link->llc_add,
382 SMC_LLC_WAIT_TIME);
383 if (rest <= 0) {
384 struct smc_clc_msg_decline dclc;
385
386 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
2b59f58e 387 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
9ed28556 388 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
52bedf37
KG
389 }
390
391 /* send add link reject message, only one link supported for now */
392 rc = smc_llc_send_add_link(link,
393 link->smcibdev->mac[link->ibport - 1],
7005ada6 394 link->gid, SMC_LLC_RESP);
52bedf37 395 if (rc < 0)
603cc149 396 return SMC_CLC_DECL_TIMEOUT_AL;
52bedf37 397
877ae5be 398 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
52bedf37 399
75d320d6 400 return 0;
9bf9abea
UB
401}
402
41349844
HW
403static void smcr_conn_save_peer_info(struct smc_sock *smc,
404 struct smc_clc_msg_accept_confirm *clc)
0cfdd8f9 405{
95d8d263
HW
406 int bufsize = smc_uncompress_bufsize(clc->rmbe_size);
407
92a138e3 408 smc->conn.peer_rmbe_idx = clc->rmbe_idx;
5f08318f 409 smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token);
95d8d263 410 smc->conn.peer_rmbe_size = bufsize;
cd6851f3 411 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
95d8d263 412 smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
0cfdd8f9
UB
413}
414
41349844
HW
415static void smcd_conn_save_peer_info(struct smc_sock *smc,
416 struct smc_clc_msg_accept_confirm *clc)
417{
418 int bufsize = smc_uncompress_bufsize(clc->dmbe_size);
419
420 smc->conn.peer_rmbe_idx = clc->dmbe_idx;
421 smc->conn.peer_token = clc->token;
422 /* msg header takes up space in the buffer */
423 smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
424 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
425 smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
426}
427
428static void smc_conn_save_peer_info(struct smc_sock *smc,
429 struct smc_clc_msg_accept_confirm *clc)
430{
431 if (smc->conn.lgr->is_smcd)
432 smcd_conn_save_peer_info(smc, clc);
433 else
434 smcr_conn_save_peer_info(smc, clc);
435}
436
0cfdd8f9
UB
437static void smc_link_save_peer_info(struct smc_link *link,
438 struct smc_clc_msg_accept_confirm *clc)
439{
440 link->peer_qpn = ntoh24(clc->qpn);
441 memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE);
442 memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac));
443 link->peer_psn = ntoh24(clc->psn);
444 link->peer_mtu = clc->qp_mtu;
445}
446
3b2dec26 447/* fall back during connect */
603cc149 448static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
a046d57d 449{
3b2dec26 450 smc->use_fallback = true;
603cc149 451 smc->fallback_rsn = reason_code;
3b2dec26 452 smc_copy_sock_settings_to_clc(smc);
50717a37 453 smc->connect_nonblock = 0;
3b2dec26
HW
454 if (smc->sk.sk_state == SMC_INIT)
455 smc->sk.sk_state = SMC_ACTIVE;
456 return 0;
457}
51f1de79 458
3b2dec26
HW
459/* decline and fall back during connect */
460static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
461{
462 int rc;
ee9dfbef 463
e1bbdd57
UB
464 if (reason_code < 0) { /* error, fallback is not possible */
465 if (smc->sk.sk_state == SMC_INIT)
466 sock_put(&smc->sk); /* passive closing */
3b2dec26 467 return reason_code;
e1bbdd57 468 }
603cc149 469 if (reason_code != SMC_CLC_DECL_PEERDECL) {
3b2dec26 470 rc = smc_clc_send_decline(smc, reason_code);
e1bbdd57
UB
471 if (rc < 0) {
472 if (smc->sk.sk_state == SMC_INIT)
473 sock_put(&smc->sk); /* passive closing */
3b2dec26 474 return rc;
e1bbdd57 475 }
c5c1cc9c 476 }
603cc149 477 return smc_connect_fallback(smc, reason_code);
3b2dec26 478}
c5c1cc9c 479
3b2dec26
HW
480/* abort connecting */
481static int smc_connect_abort(struct smc_sock *smc, int reason_code,
482 int local_contact)
483{
484 if (local_contact == SMC_FIRST_CONTACT)
485 smc_lgr_forget(smc->conn.lgr);
72a36a8a
HW
486 if (smc->conn.lgr->is_smcd)
487 /* there is only one lgr role for SMC-D; use server lock */
488 mutex_unlock(&smc_server_lgr_pending);
489 else
490 mutex_unlock(&smc_client_lgr_pending);
491
3b2dec26 492 smc_conn_free(&smc->conn);
50717a37 493 smc->connect_nonblock = 0;
3b2dec26
HW
494 return reason_code;
495}
496
497/* check if there is a rdma device available for this connection. */
498/* called for connect and listen */
bc36d2fc 499static int smc_check_rdma(struct smc_sock *smc, struct smc_init_info *ini)
3b2dec26 500{
a046d57d
UB
501 /* PNET table look up: search active ib_device and port
502 * within same PNETID that also contains the ethernet device
503 * used for the internal TCP socket
504 */
bc36d2fc
KG
505 smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
506 if (!(ini->ib_dev))
507 return SMC_CLC_DECL_CNFERR; /* configuration error */
508 return 0;
3b2dec26
HW
509}
510
41349844
HW
511/* check if there is an ISM device available for this connection. */
512/* called for connect and listen */
bc36d2fc 513static int smc_check_ism(struct smc_sock *smc, struct smc_init_info *ini)
41349844
HW
514{
515 /* Find ISM device with same PNETID as connecting interface */
bc36d2fc
KG
516 smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
517 if (!ini->ism_dev)
41349844
HW
518 return SMC_CLC_DECL_CNFERR; /* configuration error */
519 return 0;
520}
521
522/* Check for VLAN ID and register it on ISM device just for CLC handshake */
523static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
bc36d2fc 524 struct smc_init_info *ini)
41349844 525{
bc36d2fc 526 if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id))
41349844
HW
527 return SMC_CLC_DECL_CNFERR;
528 return 0;
529}
530
531/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
532 * used, the VLAN ID will be registered again during the connection setup.
533 */
534static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
bc36d2fc 535 struct smc_init_info *ini)
41349844
HW
536{
537 if (!is_smcd)
538 return 0;
bc36d2fc 539 if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id))
41349844
HW
540 return SMC_CLC_DECL_CNFERR;
541 return 0;
542}
543
3b2dec26 544/* CLC handshake during connect */
c758dfdd 545static int smc_connect_clc(struct smc_sock *smc, int smc_type,
3b2dec26 546 struct smc_clc_msg_accept_confirm *aclc,
bc36d2fc 547 struct smc_init_info *ini)
3b2dec26
HW
548{
549 int rc = 0;
a046d57d
UB
550
551 /* do inband token exchange */
bc36d2fc 552 rc = smc_clc_send_proposal(smc, smc_type, ini);
3b2dec26
HW
553 if (rc)
554 return rc;
a046d57d 555 /* receive SMC Accept CLC message */
2b59f58e
UB
556 return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT,
557 CLC_WAIT_TIME);
3b2dec26
HW
558}
559
560/* setup for RDMA connection of client */
561static int smc_connect_rdma(struct smc_sock *smc,
562 struct smc_clc_msg_accept_confirm *aclc,
bc36d2fc 563 struct smc_init_info *ini)
3b2dec26
HW
564{
565 int local_contact = SMC_FIRST_CONTACT;
566 struct smc_link *link;
567 int reason_code = 0;
a046d57d 568
bc36d2fc
KG
569 ini->is_smcd = false;
570 ini->ib_lcl = &aclc->lcl;
571 ini->ib_clcqpn = ntoh24(aclc->qpn);
572 ini->srv_first_contact = aclc->hdr.flag;
573
72a36a8a 574 mutex_lock(&smc_client_lgr_pending);
bc36d2fc 575 local_contact = smc_conn_create(smc, ini);
0cfdd8f9 576 if (local_contact < 0) {
3b2dec26 577 if (local_contact == -ENOMEM)
0cfdd8f9 578 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
3b2dec26 579 else if (local_contact == -ENOLINK)
0cfdd8f9 580 reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */
1401ea04
KG
581 else
582 reason_code = SMC_CLC_DECL_INTERR; /* other error */
72a36a8a
HW
583 mutex_unlock(&smc_client_lgr_pending);
584 return reason_code;
0cfdd8f9
UB
585 }
586 link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
a046d57d 587
3b2dec26 588 smc_conn_save_peer_info(smc, aclc);
cd6851f3 589
3e034725 590 /* create send buffer and rmb */
c6ba7c9b 591 if (smc_buf_create(smc, false))
3b2dec26 592 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
cd6851f3 593
0cfdd8f9 594 if (local_contact == SMC_FIRST_CONTACT)
3b2dec26 595 smc_link_save_peer_info(link, aclc);
bd4ad577 596
3b2dec26 597 if (smc_rmb_rtoken_handling(&smc->conn, aclc))
603cc149 598 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
3b2dec26 599 local_contact);
bd4ad577 600
46c28dbd
UB
601 smc_close_init(smc);
602 smc_rx_init(smc);
603
bd4ad577 604 if (local_contact == SMC_FIRST_CONTACT) {
3b2dec26 605 if (smc_ib_ready_link(link))
603cc149 606 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
3b2dec26 607 local_contact);
652a1e41 608 } else {
c7674c00 609 if (smc_reg_rmb(link, smc->conn.rmb_desc, true))
603cc149 610 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
3b2dec26 611 local_contact);
bd4ad577 612 }
10428dd8 613 smc_rmb_sync_sg_for_device(&smc->conn);
a046d57d 614
3b2dec26
HW
615 reason_code = smc_clc_send_confirm(smc);
616 if (reason_code)
617 return smc_connect_abort(smc, reason_code, local_contact);
618
619 smc_tx_init(smc);
a046d57d 620
9bf9abea
UB
621 if (local_contact == SMC_FIRST_CONTACT) {
622 /* QP confirmation over RoCE fabric */
0f627126 623 reason_code = smc_clnt_conf_first_link(smc);
3b2dec26
HW
624 if (reason_code)
625 return smc_connect_abort(smc, reason_code,
626 local_contact);
9bf9abea 627 }
72a36a8a 628 mutex_unlock(&smc_client_lgr_pending);
e6727f39 629
a046d57d 630 smc_copy_sock_settings_to_clc(smc);
50717a37 631 smc->connect_nonblock = 0;
b38d7324
UB
632 if (smc->sk.sk_state == SMC_INIT)
633 smc->sk.sk_state = SMC_ACTIVE;
a046d57d 634
3b2dec26
HW
635 return 0;
636}
a046d57d 637
41349844
HW
638/* setup for ISM connection of client */
639static int smc_connect_ism(struct smc_sock *smc,
640 struct smc_clc_msg_accept_confirm *aclc,
bc36d2fc 641 struct smc_init_info *ini)
41349844
HW
642{
643 int local_contact = SMC_FIRST_CONTACT;
644 int rc = 0;
645
bc36d2fc
KG
646 ini->is_smcd = true;
647 ini->ism_gid = aclc->gid;
648 ini->srv_first_contact = aclc->hdr.flag;
649
72a36a8a
HW
650 /* there is only one lgr role for SMC-D; use server lock */
651 mutex_lock(&smc_server_lgr_pending);
bc36d2fc 652 local_contact = smc_conn_create(smc, ini);
72a36a8a
HW
653 if (local_contact < 0) {
654 mutex_unlock(&smc_server_lgr_pending);
655 return SMC_CLC_DECL_MEM;
656 }
41349844
HW
657
658 /* Create send and receive buffers */
659 if (smc_buf_create(smc, true))
660 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact);
661
662 smc_conn_save_peer_info(smc, aclc);
663 smc_close_init(smc);
664 smc_rx_init(smc);
665 smc_tx_init(smc);
666
667 rc = smc_clc_send_confirm(smc);
668 if (rc)
669 return smc_connect_abort(smc, rc, local_contact);
72a36a8a 670 mutex_unlock(&smc_server_lgr_pending);
41349844
HW
671
672 smc_copy_sock_settings_to_clc(smc);
50717a37 673 smc->connect_nonblock = 0;
41349844
HW
674 if (smc->sk.sk_state == SMC_INIT)
675 smc->sk.sk_state = SMC_ACTIVE;
676
677 return 0;
678}
679
3b2dec26
HW
680/* perform steps before actually connecting */
681static int __smc_connect(struct smc_sock *smc)
682{
41349844 683 bool ism_supported = false, rdma_supported = false;
3b2dec26 684 struct smc_clc_msg_accept_confirm aclc;
bc36d2fc 685 struct smc_init_info ini = {0};
41349844 686 int smc_type;
3b2dec26 687 int rc = 0;
a046d57d 688
3b2dec26
HW
689 sock_hold(&smc->sk); /* sock put in passive closing */
690
691 if (smc->use_fallback)
603cc149 692 return smc_connect_fallback(smc, smc->fallback_rsn);
3b2dec26
HW
693
694 /* if peer has not signalled SMC-capability, fall back */
695 if (!tcp_sk(smc->clcsock->sk)->syn_smc)
603cc149 696 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
3b2dec26
HW
697
698 /* IPSec connections opt out of SMC-R optimizations */
699 if (using_ipsec(smc))
700 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC);
701
41349844 702 /* check for VLAN ID */
bc36d2fc 703 if (smc_vlan_by_tcpsk(smc->clcsock, &ini))
41349844
HW
704 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
705
706 /* check if there is an ism device available */
bc36d2fc
KG
707 if (!smc_check_ism(smc, &ini) &&
708 !smc_connect_ism_vlan_setup(smc, &ini)) {
41349844
HW
709 /* ISM is supported for this connection */
710 ism_supported = true;
711 smc_type = SMC_TYPE_D;
712 }
713
714 /* check if there is a rdma device available */
bc36d2fc 715 if (!smc_check_rdma(smc, &ini)) {
41349844
HW
716 /* RDMA is supported for this connection */
717 rdma_supported = true;
718 if (ism_supported)
719 smc_type = SMC_TYPE_B; /* both */
720 else
721 smc_type = SMC_TYPE_R; /* only RDMA */
722 }
723
724 /* if neither ISM nor RDMA are supported, fallback */
725 if (!rdma_supported && !ism_supported)
603cc149 726 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
3b2dec26
HW
727
728 /* perform CLC handshake */
bc36d2fc 729 rc = smc_connect_clc(smc, smc_type, &aclc, &ini);
41349844 730 if (rc) {
bc36d2fc 731 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
3b2dec26 732 return smc_connect_decline_fallback(smc, rc);
41349844 733 }
3b2dec26 734
41349844
HW
735 /* depending on previous steps, connect using rdma or ism */
736 if (rdma_supported && aclc.hdr.path == SMC_TYPE_R)
bc36d2fc 737 rc = smc_connect_rdma(smc, &aclc, &ini);
41349844 738 else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
bc36d2fc 739 rc = smc_connect_ism(smc, &aclc, &ini);
41349844 740 else
603cc149 741 rc = SMC_CLC_DECL_MODEUNSUPP;
41349844 742 if (rc) {
bc36d2fc 743 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
3b2dec26 744 return smc_connect_decline_fallback(smc, rc);
41349844 745 }
3b2dec26 746
bc36d2fc 747 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini);
3b2dec26 748 return 0;
a046d57d
UB
749}
750
24ac3a08
UB
751static void smc_connect_work(struct work_struct *work)
752{
753 struct smc_sock *smc = container_of(work, struct smc_sock,
754 connect_work);
50717a37
UB
755 long timeo = smc->sk.sk_sndtimeo;
756 int rc = 0;
24ac3a08 757
50717a37
UB
758 if (!timeo)
759 timeo = MAX_SCHEDULE_TIMEOUT;
760 lock_sock(smc->clcsock->sk);
24ac3a08
UB
761 if (smc->clcsock->sk->sk_err) {
762 smc->sk.sk_err = smc->clcsock->sk->sk_err;
50717a37
UB
763 } else if ((1 << smc->clcsock->sk->sk_state) &
764 (TCPF_SYN_SENT | TCP_SYN_RECV)) {
765 rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
766 if ((rc == -EPIPE) &&
767 ((1 << smc->clcsock->sk->sk_state) &
768 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
769 rc = 0;
24ac3a08 770 }
50717a37
UB
771 release_sock(smc->clcsock->sk);
772 lock_sock(&smc->sk);
773 if (rc != 0 || smc->sk.sk_err) {
774 smc->sk.sk_state = SMC_CLOSED;
775 if (rc == -EPIPE || rc == -EAGAIN)
776 smc->sk.sk_err = EPIPE;
777 else if (signal_pending(current))
778 smc->sk.sk_err = -sock_intr_errno(timeo);
24ac3a08
UB
779 goto out;
780 }
781
782 rc = __smc_connect(smc);
783 if (rc < 0)
784 smc->sk.sk_err = -rc;
785
786out:
648a5a7a
UB
787 if (smc->sk.sk_err)
788 smc->sk.sk_state_change(&smc->sk);
789 else
790 smc->sk.sk_write_space(&smc->sk);
24ac3a08
UB
791 release_sock(&smc->sk);
792}
793
ac713874
UB
794static int smc_connect(struct socket *sock, struct sockaddr *addr,
795 int alen, int flags)
796{
797 struct sock *sk = sock->sk;
798 struct smc_sock *smc;
799 int rc = -EINVAL;
800
801 smc = smc_sk(sk);
802
803 /* separate smc parameter checking to be safe */
804 if (alen < sizeof(addr->sa_family))
805 goto out_err;
aaa4d33f 806 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
ac713874
UB
807 goto out_err;
808
809 lock_sock(sk);
810 switch (sk->sk_state) {
811 default:
812 goto out;
813 case SMC_ACTIVE:
814 rc = -EISCONN;
815 goto out;
816 case SMC_INIT:
817 rc = 0;
818 break;
819 }
820
821 smc_copy_sock_settings_to_clc(smc);
c5c1cc9c 822 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
50717a37
UB
823 if (smc->connect_nonblock) {
824 rc = -EALREADY;
825 goto out;
826 }
827 rc = kernel_connect(smc->clcsock, addr, alen, flags);
828 if (rc && rc != -EINPROGRESS)
829 goto out;
24ac3a08 830 if (flags & O_NONBLOCK) {
50717a37
UB
831 if (schedule_work(&smc->connect_work))
832 smc->connect_nonblock = 1;
24ac3a08
UB
833 rc = -EINPROGRESS;
834 } else {
24ac3a08
UB
835 rc = __smc_connect(smc);
836 if (rc < 0)
837 goto out;
838 else
839 rc = 0; /* success cases including fallback */
840 }
ac713874
UB
841
842out:
843 release_sock(sk);
844out_err:
845 return rc;
846}
847
848static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
849{
3163c507
UB
850 struct socket *new_clcsock = NULL;
851 struct sock *lsk = &lsmc->sk;
ac713874 852 struct sock *new_sk;
78abe3d0 853 int rc = -EINVAL;
ac713874 854
3163c507 855 release_sock(lsk);
aaa4d33f 856 new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
ac713874
UB
857 if (!new_sk) {
858 rc = -ENOMEM;
3163c507 859 lsk->sk_err = ENOMEM;
ac713874 860 *new_smc = NULL;
3163c507 861 lock_sock(lsk);
ac713874
UB
862 goto out;
863 }
864 *new_smc = smc_sk(new_sk);
865
78abe3d0
MJ
866 mutex_lock(&lsmc->clcsock_release_lock);
867 if (lsmc->clcsock)
868 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
869 mutex_unlock(&lsmc->clcsock_release_lock);
3163c507 870 lock_sock(lsk);
35a6b178 871 if (rc < 0)
3163c507 872 lsk->sk_err = -rc;
35a6b178 873 if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
a046d57d
UB
874 if (new_clcsock)
875 sock_release(new_clcsock);
876 new_sk->sk_state = SMC_CLOSED;
877 sock_set_flag(new_sk, SOCK_DEAD);
3163c507 878 new_sk->sk_prot->unhash(new_sk);
51f1de79 879 sock_put(new_sk); /* final */
ac713874
UB
880 *new_smc = NULL;
881 goto out;
882 }
883
884 (*new_smc)->clcsock = new_clcsock;
885out:
886 return rc;
887}
888
a046d57d
UB
889/* add a just created sock to the accept queue of the listen sock as
890 * candidate for a following socket accept call from user space
891 */
892static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
893{
894 struct smc_sock *par = smc_sk(parent);
895
51f1de79 896 sock_hold(sk); /* sock_put in smc_accept_unlink () */
a046d57d
UB
897 spin_lock(&par->accept_q_lock);
898 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
899 spin_unlock(&par->accept_q_lock);
900 sk_acceptq_added(parent);
901}
902
903/* remove a socket from the accept queue of its parental listening socket */
904static void smc_accept_unlink(struct sock *sk)
905{
906 struct smc_sock *par = smc_sk(sk)->listen_smc;
907
908 spin_lock(&par->accept_q_lock);
909 list_del_init(&smc_sk(sk)->accept_q);
910 spin_unlock(&par->accept_q_lock);
911 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
51f1de79 912 sock_put(sk); /* sock_hold in smc_accept_enqueue */
a046d57d
UB
913}
914
915/* remove a sock from the accept queue to bind it to a new socket created
916 * for a socket accept call from user space
917 */
b38d7324
UB
918struct sock *smc_accept_dequeue(struct sock *parent,
919 struct socket *new_sock)
a046d57d
UB
920{
921 struct smc_sock *isk, *n;
922 struct sock *new_sk;
923
924 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
925 new_sk = (struct sock *)isk;
926
927 smc_accept_unlink(new_sk);
928 if (new_sk->sk_state == SMC_CLOSED) {
127f4970
UB
929 if (isk->clcsock) {
930 sock_release(isk->clcsock);
931 isk->clcsock = NULL;
932 }
288c8390 933 new_sk->sk_prot->unhash(new_sk);
51f1de79 934 sock_put(new_sk); /* final */
a046d57d
UB
935 continue;
936 }
937 if (new_sock)
938 sock_graft(new_sk, new_sock);
939 return new_sk;
940 }
941 return NULL;
942}
943
944/* clean up for a created but never accepted sock */
b38d7324 945void smc_close_non_accepted(struct sock *sk)
a046d57d
UB
946{
947 struct smc_sock *smc = smc_sk(sk);
948
b38d7324
UB
949 lock_sock(sk);
950 if (!sk->sk_lingertime)
951 /* wait for peer closing */
952 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
51f1de79 953 if (!smc->use_fallback) {
b38d7324 954 smc_close_active(smc);
288c8390
UB
955 sock_set_flag(sk, SOCK_DEAD);
956 sk->sk_shutdown |= SHUTDOWN_MASK;
957 }
a046d57d
UB
958 if (smc->clcsock) {
959 struct socket *tcp;
960
961 tcp = smc->clcsock;
962 smc->clcsock = NULL;
963 sock_release(tcp);
964 }
b38d7324 965 if (smc->use_fallback) {
51f1de79
UB
966 sock_put(sk); /* passive closing */
967 sk->sk_state = SMC_CLOSED;
968 } else {
969 if (sk->sk_state == SMC_CLOSED)
970 smc_conn_free(&smc->conn);
b38d7324
UB
971 }
972 release_sock(sk);
51f1de79
UB
973 sk->sk_prot->unhash(sk);
974 sock_put(sk); /* final sock_put */
a046d57d
UB
975}
976
9bf9abea
UB
977static int smc_serv_conf_first_link(struct smc_sock *smc)
978{
877ae5be 979 struct net *net = sock_net(smc->clcsock->sk);
9bf9abea
UB
980 struct smc_link_group *lgr = smc->conn.lgr;
981 struct smc_link *link;
982 int rest;
983 int rc;
984
985 link = &lgr->lnk[SMC_SINGLE_LINK];
652a1e41 986
44aa81ce 987 if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
603cc149 988 return SMC_CLC_DECL_ERR_REGRMB;
652a1e41 989
9bf9abea 990 /* send CONFIRM LINK request to client over the RoCE fabric */
947541f3 991 rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
9bf9abea 992 if (rc < 0)
603cc149 993 return SMC_CLC_DECL_TIMEOUT_CL;
9bf9abea
UB
994
995 /* receive CONFIRM LINK response from client over the RoCE fabric */
996 rest = wait_for_completion_interruptible_timeout(
997 &link->llc_confirm_resp,
998 SMC_LLC_WAIT_FIRST_TIME);
999 if (rest <= 0) {
1000 struct smc_clc_msg_decline dclc;
1001
1002 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
2b59f58e 1003 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
9ed28556 1004 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
9bf9abea
UB
1005 }
1006
75d320d6
KG
1007 if (link->llc_confirm_resp_rc)
1008 return SMC_CLC_DECL_RMBE_EC;
1009
52bedf37
KG
1010 /* send ADD LINK request to client over the RoCE fabric */
1011 rc = smc_llc_send_add_link(link,
1012 link->smcibdev->mac[link->ibport - 1],
7005ada6 1013 link->gid, SMC_LLC_REQ);
52bedf37 1014 if (rc < 0)
603cc149 1015 return SMC_CLC_DECL_TIMEOUT_AL;
52bedf37
KG
1016
1017 /* receive ADD LINK response from client over the RoCE fabric */
1018 rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
1019 SMC_LLC_WAIT_TIME);
1020 if (rest <= 0) {
1021 struct smc_clc_msg_decline dclc;
1022
1023 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
2b59f58e 1024 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
9ed28556 1025 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
52bedf37
KG
1026 }
1027
877ae5be 1028 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
52bedf37 1029
75d320d6 1030 return 0;
9bf9abea
UB
1031}
1032
3b2dec26
HW
1033/* listen worker: finish */
1034static void smc_listen_out(struct smc_sock *new_smc)
a046d57d 1035{
a046d57d 1036 struct smc_sock *lsmc = new_smc->listen_smc;
a046d57d 1037 struct sock *newsmcsk = &new_smc->sk;
a046d57d 1038
3b2dec26
HW
1039 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
1040 if (lsmc->sk.sk_state == SMC_LISTEN) {
1041 smc_accept_enqueue(&lsmc->sk, newsmcsk);
1042 } else { /* no longer listening */
1043 smc_close_non_accepted(newsmcsk);
c5c1cc9c 1044 }
3b2dec26 1045 release_sock(&lsmc->sk);
c5c1cc9c 1046
3b2dec26
HW
1047 /* Wake up accept */
1048 lsmc->sk.sk_data_ready(&lsmc->sk);
1049 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
1050}
a046d57d 1051
3b2dec26
HW
1052/* listen worker: finish in state connected */
1053static void smc_listen_out_connected(struct smc_sock *new_smc)
1054{
1055 struct sock *newsmcsk = &new_smc->sk;
a046d57d 1056
3b2dec26
HW
1057 sk_refcnt_debug_inc(newsmcsk);
1058 if (newsmcsk->sk_state == SMC_INIT)
1059 newsmcsk->sk_state = SMC_ACTIVE;
1060
1061 smc_listen_out(new_smc);
1062}
1063
1064/* listen worker: finish in error state */
1065static void smc_listen_out_err(struct smc_sock *new_smc)
1066{
1067 struct sock *newsmcsk = &new_smc->sk;
1068
1069 if (newsmcsk->sk_state == SMC_INIT)
1070 sock_put(&new_smc->sk); /* passive closing */
1071 newsmcsk->sk_state = SMC_CLOSED;
1072 smc_conn_free(&new_smc->conn);
1073
1074 smc_listen_out(new_smc);
1075}
1076
1077/* listen worker: decline and fall back if possible */
1078static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
1079 int local_contact)
1080{
1081 /* RDMA setup failed, switch back to TCP */
1082 if (local_contact == SMC_FIRST_CONTACT)
1083 smc_lgr_forget(new_smc->conn.lgr);
1084 if (reason_code < 0) { /* error, no fallback possible */
1085 smc_listen_out_err(new_smc);
1086 return;
1087 }
1088 smc_conn_free(&new_smc->conn);
1089 new_smc->use_fallback = true;
603cc149
KG
1090 new_smc->fallback_rsn = reason_code;
1091 if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
3b2dec26
HW
1092 if (smc_clc_send_decline(new_smc, reason_code) < 0) {
1093 smc_listen_out_err(new_smc);
1094 return;
1095 }
a046d57d 1096 }
3b2dec26
HW
1097 smc_listen_out_connected(new_smc);
1098}
1099
1100/* listen worker: check prefixes */
59886697 1101static int smc_listen_prfx_check(struct smc_sock *new_smc,
3b2dec26
HW
1102 struct smc_clc_msg_proposal *pclc)
1103{
1104 struct smc_clc_msg_proposal_prefix *pclc_prfx;
1105 struct socket *newclcsock = new_smc->clcsock;
a046d57d 1106
e7b7a64a 1107 pclc_prfx = smc_clc_proposal_get_prefix(pclc);
3b2dec26 1108 if (smc_clc_prfx_match(newclcsock, pclc_prfx))
59886697 1109 return SMC_CLC_DECL_DIFFPREFIX;
c246d942 1110
3b2dec26
HW
1111 return 0;
1112}
a046d57d 1113
3b2dec26
HW
1114/* listen worker: initialize connection and buffers */
1115static int smc_listen_rdma_init(struct smc_sock *new_smc,
bc36d2fc 1116 struct smc_init_info *ini, int *local_contact)
3b2dec26 1117{
0cfdd8f9 1118 /* allocate connection / link group */
bc36d2fc 1119 *local_contact = smc_conn_create(new_smc, ini);
3b2dec26
HW
1120 if (*local_contact < 0) {
1121 if (*local_contact == -ENOMEM)
1122 return SMC_CLC_DECL_MEM;/* insufficient memory*/
1123 return SMC_CLC_DECL_INTERR; /* other error */
0cfdd8f9 1124 }
a046d57d 1125
3e034725 1126 /* create send buffer and rmb */
c6ba7c9b 1127 if (smc_buf_create(new_smc, false))
3b2dec26 1128 return SMC_CLC_DECL_MEM;
a046d57d 1129
3b2dec26
HW
1130 return 0;
1131}
1132
41349844
HW
1133/* listen worker: initialize connection and buffers for SMC-D */
1134static int smc_listen_ism_init(struct smc_sock *new_smc,
1135 struct smc_clc_msg_proposal *pclc,
bc36d2fc 1136 struct smc_init_info *ini,
41349844
HW
1137 int *local_contact)
1138{
1139 struct smc_clc_msg_smcd *pclc_smcd;
1140
1141 pclc_smcd = smc_get_clc_msg_smcd(pclc);
bc36d2fc
KG
1142 ini->ism_gid = pclc_smcd->gid;
1143 *local_contact = smc_conn_create(new_smc, ini);
41349844
HW
1144 if (*local_contact < 0) {
1145 if (*local_contact == -ENOMEM)
1146 return SMC_CLC_DECL_MEM;/* insufficient memory*/
1147 return SMC_CLC_DECL_INTERR; /* other error */
1148 }
1149
1150 /* Check if peer can be reached via ISM device */
1151 if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid,
1152 new_smc->conn.lgr->vlan_id,
1153 new_smc->conn.lgr->smcd)) {
1154 if (*local_contact == SMC_FIRST_CONTACT)
1155 smc_lgr_forget(new_smc->conn.lgr);
1156 smc_conn_free(&new_smc->conn);
1157 return SMC_CLC_DECL_CNFERR;
1158 }
1159
1160 /* Create send and receive buffers */
1161 if (smc_buf_create(new_smc, true)) {
1162 if (*local_contact == SMC_FIRST_CONTACT)
1163 smc_lgr_forget(new_smc->conn.lgr);
1164 smc_conn_free(&new_smc->conn);
1165 return SMC_CLC_DECL_MEM;
1166 }
1167
1168 return 0;
1169}
1170
3b2dec26
HW
1171/* listen worker: register buffers */
1172static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
1173{
1174 struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
46c28dbd 1175
652a1e41 1176 if (local_contact != SMC_FIRST_CONTACT) {
c7674c00
KG
1177 if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
1178 return SMC_CLC_DECL_ERR_REGRMB;
652a1e41 1179 }
10428dd8 1180 smc_rmb_sync_sg_for_device(&new_smc->conn);
652a1e41 1181
3b2dec26
HW
1182 return 0;
1183}
1184
1185/* listen worker: finish RDMA setup */
1ca52fcf
UB
1186static int smc_listen_rdma_finish(struct smc_sock *new_smc,
1187 struct smc_clc_msg_accept_confirm *cclc,
1188 int local_contact)
3b2dec26
HW
1189{
1190 struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
1191 int reason_code = 0;
a046d57d 1192
0cfdd8f9 1193 if (local_contact == SMC_FIRST_CONTACT)
3b2dec26 1194 smc_link_save_peer_info(link, cclc);
a046d57d 1195
3b2dec26 1196 if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
603cc149 1197 reason_code = SMC_CLC_DECL_ERR_RTOK;
3b2dec26 1198 goto decline;
bd4ad577
UB
1199 }
1200
bd4ad577 1201 if (local_contact == SMC_FIRST_CONTACT) {
3b2dec26 1202 if (smc_ib_ready_link(link)) {
603cc149 1203 reason_code = SMC_CLC_DECL_ERR_RDYLNK;
3b2dec26 1204 goto decline;
bd4ad577 1205 }
9bf9abea
UB
1206 /* QP confirmation over RoCE fabric */
1207 reason_code = smc_serv_conf_first_link(new_smc);
3b2dec26
HW
1208 if (reason_code)
1209 goto decline;
bd4ad577 1210 }
1ca52fcf 1211 return 0;
a046d57d 1212
3b2dec26 1213decline:
3b2dec26 1214 smc_listen_decline(new_smc, reason_code, local_contact);
1ca52fcf 1215 return reason_code;
3b2dec26 1216}
e6727f39 1217
3b2dec26
HW
1218/* setup for RDMA connection of server */
1219static void smc_listen_work(struct work_struct *work)
1220{
1221 struct smc_sock *new_smc = container_of(work, struct smc_sock,
1222 smc_listen_work);
1223 struct socket *newclcsock = new_smc->clcsock;
1224 struct smc_clc_msg_accept_confirm cclc;
1225 struct smc_clc_msg_proposal *pclc;
bc36d2fc 1226 struct smc_init_info ini = {0};
41349844 1227 bool ism_supported = false;
3b2dec26
HW
1228 u8 buf[SMC_CLC_MAX_LEN];
1229 int local_contact = 0;
1230 int reason_code = 0;
1231 int rc = 0;
3b2dec26
HW
1232
1233 if (new_smc->use_fallback) {
1234 smc_listen_out_connected(new_smc);
1235 return;
a046d57d 1236 }
a046d57d 1237
3b2dec26
HW
1238 /* check if peer is smc capable */
1239 if (!tcp_sk(newclcsock->sk)->syn_smc) {
1240 new_smc->use_fallback = true;
603cc149 1241 new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
3b2dec26
HW
1242 smc_listen_out_connected(new_smc);
1243 return;
1244 }
a046d57d 1245
3b2dec26
HW
1246 /* do inband token exchange -
1247 * wait for and receive SMC Proposal CLC message
1248 */
1249 pclc = (struct smc_clc_msg_proposal *)&buf;
1250 reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN,
2b59f58e 1251 SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
3b2dec26
HW
1252 if (reason_code) {
1253 smc_listen_decline(new_smc, reason_code, 0);
1254 return;
a046d57d 1255 }
a046d57d 1256
3b2dec26
HW
1257 /* IPSec connections opt out of SMC-R optimizations */
1258 if (using_ipsec(new_smc)) {
1259 smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0);
1260 return;
1261 }
1262
59886697
KG
1263 /* check for matching IP prefix and subnet length */
1264 rc = smc_listen_prfx_check(new_smc, pclc);
1265 if (rc) {
1266 smc_listen_decline(new_smc, rc, 0);
1267 return;
1268 }
1269
72a36a8a 1270 mutex_lock(&smc_server_lgr_pending);
3b2dec26
HW
1271 smc_close_init(new_smc);
1272 smc_rx_init(new_smc);
1273 smc_tx_init(new_smc);
1274
bc36d2fc
KG
1275 /* prepare ISM check */
1276 ini.is_smcd = true;
41349844
HW
1277 /* check if ISM is available */
1278 if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) &&
bc36d2fc
KG
1279 !smc_check_ism(new_smc, &ini) &&
1280 !smc_listen_ism_init(new_smc, pclc, &ini, &local_contact)) {
41349844 1281 ism_supported = true;
bc36d2fc
KG
1282 } else {
1283 /* prepare RDMA check */
1284 memset(&ini, 0, sizeof(ini));
1285 ini.is_smcd = false;
1286 ini.ib_lcl = &pclc->lcl;
41349844
HW
1287 }
1288
3b2dec26 1289 /* check if RDMA is available */
41349844
HW
1290 if (!ism_supported &&
1291 ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
bc36d2fc
KG
1292 smc_vlan_by_tcpsk(new_smc->clcsock, &ini) ||
1293 smc_check_rdma(new_smc, &ini) ||
1294 smc_listen_rdma_init(new_smc, &ini, &local_contact) ||
41349844 1295 smc_listen_rdma_reg(new_smc, local_contact))) {
3b2dec26 1296 /* SMC not supported, decline */
72a36a8a 1297 mutex_unlock(&smc_server_lgr_pending);
603cc149
KG
1298 smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
1299 local_contact);
3b2dec26
HW
1300 return;
1301 }
1302
1303 /* send SMC Accept CLC message */
1304 rc = smc_clc_send_accept(new_smc, local_contact);
1305 if (rc) {
72a36a8a 1306 mutex_unlock(&smc_server_lgr_pending);
3b2dec26
HW
1307 smc_listen_decline(new_smc, rc, local_contact);
1308 return;
1309 }
1310
62c7139f
HW
1311 /* SMC-D does not need this lock any more */
1312 if (ism_supported)
72a36a8a 1313 mutex_unlock(&smc_server_lgr_pending);
62c7139f 1314
3b2dec26
HW
1315 /* receive SMC Confirm CLC message */
1316 reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
2b59f58e 1317 SMC_CLC_CONFIRM, CLC_WAIT_TIME);
3b2dec26 1318 if (reason_code) {
62c7139f 1319 if (!ism_supported)
72a36a8a 1320 mutex_unlock(&smc_server_lgr_pending);
3b2dec26
HW
1321 smc_listen_decline(new_smc, reason_code, local_contact);
1322 return;
1323 }
1324
1325 /* finish worker */
1ca52fcf 1326 if (!ism_supported) {
62c7139f 1327 rc = smc_listen_rdma_finish(new_smc, &cclc, local_contact);
72a36a8a 1328 mutex_unlock(&smc_server_lgr_pending);
62c7139f 1329 if (rc)
1ca52fcf
UB
1330 return;
1331 }
3b2dec26 1332 smc_conn_save_peer_info(new_smc, &cclc);
3b2dec26 1333 smc_listen_out_connected(new_smc);
a046d57d
UB
1334}
1335
1336static void smc_tcp_listen_work(struct work_struct *work)
1337{
1338 struct smc_sock *lsmc = container_of(work, struct smc_sock,
1339 tcp_listen_work);
3163c507 1340 struct sock *lsk = &lsmc->sk;
a046d57d
UB
1341 struct smc_sock *new_smc;
1342 int rc = 0;
1343
3163c507
UB
1344 lock_sock(lsk);
1345 while (lsk->sk_state == SMC_LISTEN) {
a046d57d
UB
1346 rc = smc_clcsock_accept(lsmc, &new_smc);
1347 if (rc)
1348 goto out;
1349 if (!new_smc)
1350 continue;
1351
1352 new_smc->listen_smc = lsmc;
ee9dfbef 1353 new_smc->use_fallback = lsmc->use_fallback;
603cc149 1354 new_smc->fallback_rsn = lsmc->fallback_rsn;
3163c507 1355 sock_hold(lsk); /* sock_put in smc_listen_work */
a046d57d
UB
1356 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
1357 smc_copy_sock_settings_to_smc(new_smc);
bd58c7e0
UB
1358 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
1359 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
51f1de79
UB
1360 sock_hold(&new_smc->sk); /* sock_put in passive closing */
1361 if (!schedule_work(&new_smc->smc_listen_work))
1362 sock_put(&new_smc->sk);
a046d57d
UB
1363 }
1364
1365out:
3163c507 1366 release_sock(lsk);
51f1de79 1367 sock_put(&lsmc->sk); /* sock_hold in smc_listen */
a046d57d
UB
1368}
1369
ac713874
UB
1370static int smc_listen(struct socket *sock, int backlog)
1371{
1372 struct sock *sk = sock->sk;
1373 struct smc_sock *smc;
1374 int rc;
1375
1376 smc = smc_sk(sk);
1377 lock_sock(sk);
1378
1379 rc = -EINVAL;
1380 if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
1381 goto out;
1382
1383 rc = 0;
1384 if (sk->sk_state == SMC_LISTEN) {
1385 sk->sk_max_ack_backlog = backlog;
1386 goto out;
1387 }
1388 /* some socket options are handled in core, so we could not apply
1389 * them to the clc socket -- copy smc socket options to clc socket
1390 */
1391 smc_copy_sock_settings_to_clc(smc);
ee9dfbef
UB
1392 if (!smc->use_fallback)
1393 tcp_sk(smc->clcsock->sk)->syn_smc = 1;
ac713874
UB
1394
1395 rc = kernel_listen(smc->clcsock, backlog);
1396 if (rc)
1397 goto out;
1398 sk->sk_max_ack_backlog = backlog;
1399 sk->sk_ack_backlog = 0;
1400 sk->sk_state = SMC_LISTEN;
51f1de79
UB
1401 sock_hold(sk); /* sock_hold in tcp_listen_worker */
1402 if (!schedule_work(&smc->tcp_listen_work))
1403 sock_put(sk);
ac713874
UB
1404
1405out:
1406 release_sock(sk);
1407 return rc;
1408}
1409
1410static int smc_accept(struct socket *sock, struct socket *new_sock,
cdfbabfb 1411 int flags, bool kern)
ac713874 1412{
a046d57d
UB
1413 struct sock *sk = sock->sk, *nsk;
1414 DECLARE_WAITQUEUE(wait, current);
ac713874 1415 struct smc_sock *lsmc;
a046d57d
UB
1416 long timeo;
1417 int rc = 0;
ac713874
UB
1418
1419 lsmc = smc_sk(sk);
51f1de79 1420 sock_hold(sk); /* sock_put below */
ac713874
UB
1421 lock_sock(sk);
1422
1423 if (lsmc->sk.sk_state != SMC_LISTEN) {
1424 rc = -EINVAL;
abb190f1 1425 release_sock(sk);
ac713874
UB
1426 goto out;
1427 }
1428
a046d57d
UB
1429 /* Wait for an incoming connection */
1430 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
1431 add_wait_queue_exclusive(sk_sleep(sk), &wait);
1432 while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
1433 set_current_state(TASK_INTERRUPTIBLE);
1434 if (!timeo) {
1435 rc = -EAGAIN;
1436 break;
1437 }
1438 release_sock(sk);
1439 timeo = schedule_timeout(timeo);
1440 /* wakeup by sk_data_ready in smc_listen_work() */
1441 sched_annotate_sleep();
1442 lock_sock(sk);
1443 if (signal_pending(current)) {
1444 rc = sock_intr_errno(timeo);
1445 break;
1446 }
1447 }
1448 set_current_state(TASK_RUNNING);
1449 remove_wait_queue(sk_sleep(sk), &wait);
ac713874 1450
a046d57d
UB
1451 if (!rc)
1452 rc = sock_error(nsk);
abb190f1
UB
1453 release_sock(sk);
1454 if (rc)
1455 goto out;
1456
1457 if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) {
1458 /* wait till data arrives on the socket */
1459 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
1460 MSEC_PER_SEC);
1461 if (smc_sk(nsk)->use_fallback) {
1462 struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
1463
1464 lock_sock(clcsk);
1465 if (skb_queue_empty(&clcsk->sk_receive_queue))
1466 sk_wait_data(clcsk, &timeo, NULL);
1467 release_sock(clcsk);
1468 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
1469 lock_sock(nsk);
b51fa1b1 1470 smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
abb190f1
UB
1471 release_sock(nsk);
1472 }
1473 }
ac713874
UB
1474
1475out:
51f1de79 1476 sock_put(sk); /* sock_hold above */
ac713874
UB
1477 return rc;
1478}
1479
1480static int smc_getname(struct socket *sock, struct sockaddr *addr,
9b2c45d4 1481 int peer)
ac713874
UB
1482{
1483 struct smc_sock *smc;
1484
b38d7324
UB
1485 if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
1486 (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
ac713874
UB
1487 return -ENOTCONN;
1488
1489 smc = smc_sk(sock->sk);
1490
9b2c45d4 1491 return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
ac713874
UB
1492}
1493
1494static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
1495{
1496 struct sock *sk = sock->sk;
1497 struct smc_sock *smc;
1498 int rc = -EPIPE;
1499
1500 smc = smc_sk(sk);
1501 lock_sock(sk);
b38d7324
UB
1502 if ((sk->sk_state != SMC_ACTIVE) &&
1503 (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1504 (sk->sk_state != SMC_INIT))
ac713874 1505 goto out;
ee9dfbef
UB
1506
1507 if (msg->msg_flags & MSG_FASTOPEN) {
1508 if (sk->sk_state == SMC_INIT) {
1509 smc->use_fallback = true;
603cc149 1510 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
ee9dfbef
UB
1511 } else {
1512 rc = -EINVAL;
1513 goto out;
1514 }
1515 }
1516
ac713874
UB
1517 if (smc->use_fallback)
1518 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
1519 else
e6727f39 1520 rc = smc_tx_sendmsg(smc, msg, len);
ac713874
UB
1521out:
1522 release_sock(sk);
1523 return rc;
1524}
1525
1526static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
1527 int flags)
1528{
1529 struct sock *sk = sock->sk;
1530 struct smc_sock *smc;
1531 int rc = -ENOTCONN;
1532
1533 smc = smc_sk(sk);
1534 lock_sock(sk);
51c5aba3
KG
1535 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
1536 /* socket was connected before, no more data to read */
1537 rc = 0;
1538 goto out;
1539 }
b38d7324
UB
1540 if ((sk->sk_state == SMC_INIT) ||
1541 (sk->sk_state == SMC_LISTEN) ||
1542 (sk->sk_state == SMC_CLOSED))
ac713874
UB
1543 goto out;
1544
b38d7324
UB
1545 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1546 rc = 0;
1547 goto out;
1548 }
1549
9014db20 1550 if (smc->use_fallback) {
ac713874 1551 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
9014db20
SR
1552 } else {
1553 msg->msg_namelen = 0;
1554 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
1555 }
b38d7324 1556
ac713874
UB
1557out:
1558 release_sock(sk);
1559 return rc;
1560}
1561
ade994f4 1562static __poll_t smc_accept_poll(struct sock *parent)
a046d57d 1563{
8dce2786 1564 struct smc_sock *isk = smc_sk(parent);
63e2480c 1565 __poll_t mask = 0;
a046d57d 1566
8dce2786
UB
1567 spin_lock(&isk->accept_q_lock);
1568 if (!list_empty(&isk->accept_q))
a9a08845 1569 mask = EPOLLIN | EPOLLRDNORM;
8dce2786 1570 spin_unlock(&isk->accept_q_lock);
a046d57d 1571
8dce2786 1572 return mask;
a046d57d
UB
1573}
1574
a11e1d43
LT
1575static __poll_t smc_poll(struct file *file, struct socket *sock,
1576 poll_table *wait)
ac713874
UB
1577{
1578 struct sock *sk = sock->sk;
ac713874 1579 struct smc_sock *smc;
50717a37 1580 __poll_t mask = 0;
ac713874 1581
8dce2786 1582 if (!sk)
a9a08845 1583 return EPOLLNVAL;
8dce2786 1584
ac713874 1585 smc = smc_sk(sock->sk);
648a5a7a 1586 if (smc->use_fallback) {
a046d57d 1587 /* delegate to CLC child sock */
a11e1d43 1588 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
784813ae 1589 sk->sk_err = smc->clcsock->sk->sk_err;
ac713874 1590 } else {
410da1e1 1591 if (sk->sk_state != SMC_CLOSED)
89ab066d 1592 sock_poll_wait(file, sock, wait);
a046d57d 1593 if (sk->sk_err)
a9a08845 1594 mask |= EPOLLERR;
b38d7324
UB
1595 if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
1596 (sk->sk_state == SMC_CLOSED))
a9a08845 1597 mask |= EPOLLHUP;
8dce2786
UB
1598 if (sk->sk_state == SMC_LISTEN) {
1599 /* woken up by sk_data_ready in smc_listen_work() */
50717a37
UB
1600 mask |= smc_accept_poll(sk);
1601 } else if (smc->use_fallback) { /* as result of connect_work()*/
1602 mask |= smc->clcsock->ops->poll(file, smc->clcsock,
1603 wait);
1604 sk->sk_err = smc->clcsock->sk->sk_err;
8dce2786 1605 } else {
50717a37
UB
1606 if ((sk->sk_state != SMC_INIT &&
1607 atomic_read(&smc->conn.sndbuf_space)) ||
8dce2786 1608 sk->sk_shutdown & SEND_SHUTDOWN) {
a9a08845 1609 mask |= EPOLLOUT | EPOLLWRNORM;
8dce2786
UB
1610 } else {
1611 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1612 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1613 }
1614 if (atomic_read(&smc->conn.bytes_to_rcv))
a9a08845 1615 mask |= EPOLLIN | EPOLLRDNORM;
8dce2786 1616 if (sk->sk_shutdown & RCV_SHUTDOWN)
a9a08845 1617 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
8dce2786 1618 if (sk->sk_state == SMC_APPCLOSEWAIT1)
a9a08845 1619 mask |= EPOLLIN;
71d117f5
KG
1620 if (smc->conn.urg_state == SMC_URG_VALID)
1621 mask |= EPOLLPRI;
8dce2786 1622 }
ac713874
UB
1623 }
1624
1625 return mask;
1626}
1627
1628static int smc_shutdown(struct socket *sock, int how)
1629{
1630 struct sock *sk = sock->sk;
1631 struct smc_sock *smc;
1632 int rc = -EINVAL;
b38d7324 1633 int rc1 = 0;
ac713874
UB
1634
1635 smc = smc_sk(sk);
1636
1637 if ((how < SHUT_RD) || (how > SHUT_RDWR))
b38d7324 1638 return rc;
ac713874
UB
1639
1640 lock_sock(sk);
1641
1642 rc = -ENOTCONN;
caa21e19 1643 if ((sk->sk_state != SMC_ACTIVE) &&
b38d7324
UB
1644 (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
1645 (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
1646 (sk->sk_state != SMC_APPCLOSEWAIT1) &&
1647 (sk->sk_state != SMC_APPCLOSEWAIT2) &&
1648 (sk->sk_state != SMC_APPFINCLOSEWAIT))
ac713874
UB
1649 goto out;
1650 if (smc->use_fallback) {
1651 rc = kernel_sock_shutdown(smc->clcsock, how);
1652 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
1653 if (sk->sk_shutdown == SHUTDOWN_MASK)
1654 sk->sk_state = SMC_CLOSED;
b38d7324 1655 goto out;
ac713874 1656 }
b38d7324
UB
1657 switch (how) {
1658 case SHUT_RDWR: /* shutdown in both directions */
1659 rc = smc_close_active(smc);
1660 break;
1661 case SHUT_WR:
1662 rc = smc_close_shutdown_write(smc);
1663 break;
1664 case SHUT_RD:
1255fcb2
UB
1665 rc = 0;
1666 /* nothing more to do because peer is not involved */
b38d7324
UB
1667 break;
1668 }
1255fcb2
UB
1669 if (smc->clcsock)
1670 rc1 = kernel_sock_shutdown(smc->clcsock, how);
b38d7324
UB
1671 /* map sock_shutdown_cmd constants to sk_shutdown value range */
1672 sk->sk_shutdown |= how + 1;
ac713874
UB
1673
1674out:
1675 release_sock(sk);
b38d7324 1676 return rc ? rc : rc1;
ac713874
UB
1677}
1678
1679static int smc_setsockopt(struct socket *sock, int level, int optname,
1680 char __user *optval, unsigned int optlen)
1681{
1682 struct sock *sk = sock->sk;
1683 struct smc_sock *smc;
01d2f7e2 1684 int val, rc;
ac713874
UB
1685
1686 smc = smc_sk(sk);
1687
1688 /* generic setsockopts reaching us here always apply to the
1689 * CLC socket
1690 */
ee9dfbef
UB
1691 rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
1692 optval, optlen);
1693 if (smc->clcsock->sk->sk_err) {
1694 sk->sk_err = smc->clcsock->sk->sk_err;
1695 sk->sk_error_report(sk);
1696 }
1697 if (rc)
1698 return rc;
1699
01d2f7e2 1700 if (optlen < sizeof(int))
3dc9f558 1701 return -EINVAL;
ac0107ed
UB
1702 if (get_user(val, (int __user *)optval))
1703 return -EFAULT;
01d2f7e2 1704
ee9dfbef
UB
1705 lock_sock(sk);
1706 switch (optname) {
1707 case TCP_ULP:
1708 case TCP_FASTOPEN:
1709 case TCP_FASTOPEN_CONNECT:
1710 case TCP_FASTOPEN_KEY:
1711 case TCP_FASTOPEN_NO_COOKIE:
1712 /* option not supported by SMC */
1713 if (sk->sk_state == SMC_INIT) {
1714 smc->use_fallback = true;
603cc149 1715 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
ee9dfbef
UB
1716 } else {
1717 if (!smc->use_fallback)
1718 rc = -EINVAL;
1719 }
1720 break;
01d2f7e2
UB
1721 case TCP_NODELAY:
1722 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
569bc643 1723 if (val && !smc->use_fallback)
01d2f7e2
UB
1724 mod_delayed_work(system_wq, &smc->conn.tx_work,
1725 0);
1726 }
1727 break;
1728 case TCP_CORK:
1729 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) {
569bc643 1730 if (!val && !smc->use_fallback)
01d2f7e2
UB
1731 mod_delayed_work(system_wq, &smc->conn.tx_work,
1732 0);
1733 }
1734 break;
abb190f1
UB
1735 case TCP_DEFER_ACCEPT:
1736 smc->sockopt_defer_accept = val;
1737 break;
ee9dfbef
UB
1738 default:
1739 break;
1740 }
1741 release_sock(sk);
1742
1743 return rc;
ac713874
UB
1744}
1745
1746static int smc_getsockopt(struct socket *sock, int level, int optname,
1747 char __user *optval, int __user *optlen)
1748{
1749 struct smc_sock *smc;
1750
1751 smc = smc_sk(sock->sk);
1752 /* socket options apply to the CLC socket */
1753 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
1754 optval, optlen);
1755}
1756
1757static int smc_ioctl(struct socket *sock, unsigned int cmd,
1758 unsigned long arg)
1759{
de8474eb
SR
1760 union smc_host_cursor cons, urg;
1761 struct smc_connection *conn;
ac713874 1762 struct smc_sock *smc;
9b67e26f 1763 int answ;
ac713874
UB
1764
1765 smc = smc_sk(sock->sk);
de8474eb 1766 conn = &smc->conn;
7311d665 1767 lock_sock(&smc->sk);
9b67e26f 1768 if (smc->use_fallback) {
7311d665
UB
1769 if (!smc->clcsock) {
1770 release_sock(&smc->sk);
9b67e26f 1771 return -EBADF;
7311d665
UB
1772 }
1773 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
1774 release_sock(&smc->sk);
1775 return answ;
9b67e26f
UB
1776 }
1777 switch (cmd) {
1778 case SIOCINQ: /* same as FIONREAD */
1992d998
UB
1779 if (smc->sk.sk_state == SMC_LISTEN) {
1780 release_sock(&smc->sk);
9b67e26f 1781 return -EINVAL;
1992d998 1782 }
2351abe6
UB
1783 if (smc->sk.sk_state == SMC_INIT ||
1784 smc->sk.sk_state == SMC_CLOSED)
1785 answ = 0;
1786 else
1787 answ = atomic_read(&smc->conn.bytes_to_rcv);
9b67e26f
UB
1788 break;
1789 case SIOCOUTQ:
1790 /* output queue size (not send + not acked) */
1992d998
UB
1791 if (smc->sk.sk_state == SMC_LISTEN) {
1792 release_sock(&smc->sk);
9b67e26f 1793 return -EINVAL;
1992d998 1794 }
2351abe6
UB
1795 if (smc->sk.sk_state == SMC_INIT ||
1796 smc->sk.sk_state == SMC_CLOSED)
1797 answ = 0;
1798 else
1799 answ = smc->conn.sndbuf_desc->len -
9b67e26f
UB
1800 atomic_read(&smc->conn.sndbuf_space);
1801 break;
1802 case SIOCOUTQNSD:
1803 /* output queue size (not send only) */
1992d998
UB
1804 if (smc->sk.sk_state == SMC_LISTEN) {
1805 release_sock(&smc->sk);
9b67e26f 1806 return -EINVAL;
1992d998 1807 }
2351abe6
UB
1808 if (smc->sk.sk_state == SMC_INIT ||
1809 smc->sk.sk_state == SMC_CLOSED)
1810 answ = 0;
1811 else
1812 answ = smc_tx_prepared_sends(&smc->conn);
9b67e26f 1813 break;
de8474eb 1814 case SIOCATMARK:
1992d998
UB
1815 if (smc->sk.sk_state == SMC_LISTEN) {
1816 release_sock(&smc->sk);
de8474eb 1817 return -EINVAL;
1992d998 1818 }
de8474eb
SR
1819 if (smc->sk.sk_state == SMC_INIT ||
1820 smc->sk.sk_state == SMC_CLOSED) {
1821 answ = 0;
1822 } else {
bac6de7b
SR
1823 smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
1824 smc_curs_copy(&urg, &conn->urg_curs, conn);
de8474eb
SR
1825 answ = smc_curs_diff(conn->rmb_desc->len,
1826 &cons, &urg) == 1;
1827 }
1828 break;
9b67e26f 1829 default:
1992d998 1830 release_sock(&smc->sk);
9b67e26f
UB
1831 return -ENOIOCTLCMD;
1832 }
1992d998 1833 release_sock(&smc->sk);
9b67e26f
UB
1834
1835 return put_user(answ, (int __user *)arg);
ac713874
UB
1836}
1837
1838static ssize_t smc_sendpage(struct socket *sock, struct page *page,
1839 int offset, size_t size, int flags)
1840{
1841 struct sock *sk = sock->sk;
1842 struct smc_sock *smc;
1843 int rc = -EPIPE;
1844
1845 smc = smc_sk(sk);
1846 lock_sock(sk);
bda27ff5
SR
1847 if (sk->sk_state != SMC_ACTIVE) {
1848 release_sock(sk);
ac713874 1849 goto out;
bda27ff5
SR
1850 }
1851 release_sock(sk);
ac713874
UB
1852 if (smc->use_fallback)
1853 rc = kernel_sendpage(smc->clcsock, page, offset,
1854 size, flags);
1855 else
1856 rc = sock_no_sendpage(sock, page, offset, size, flags);
1857
1858out:
ac713874
UB
1859 return rc;
1860}
1861
9014db20
SR
1862/* Map the affected portions of the rmbe into an spd, note the number of bytes
1863 * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
1864 * updates till whenever a respective page has been fully processed.
1865 * Note that subsequent recv() calls have to wait till all splice() processing
1866 * completed.
1867 */
ac713874
UB
1868static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
1869 struct pipe_inode_info *pipe, size_t len,
9014db20 1870 unsigned int flags)
ac713874
UB
1871{
1872 struct sock *sk = sock->sk;
1873 struct smc_sock *smc;
1874 int rc = -ENOTCONN;
1875
1876 smc = smc_sk(sk);
1877 lock_sock(sk);
51c5aba3
KG
1878 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
1879 /* socket was connected before, no more data to read */
1880 rc = 0;
1881 goto out;
1882 }
9014db20
SR
1883 if (sk->sk_state == SMC_INIT ||
1884 sk->sk_state == SMC_LISTEN ||
1885 sk->sk_state == SMC_CLOSED)
1886 goto out;
1887
1888 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
1889 rc = 0;
ac713874 1890 goto out;
9014db20
SR
1891 }
1892
ac713874
UB
1893 if (smc->use_fallback) {
1894 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
1895 pipe, len, flags);
1896 } else {
9014db20
SR
1897 if (*ppos) {
1898 rc = -ESPIPE;
1899 goto out;
1900 }
1901 if (flags & SPLICE_F_NONBLOCK)
1902 flags = MSG_DONTWAIT;
1903 else
1904 flags = 0;
1905 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
ac713874
UB
1906 }
1907out:
1908 release_sock(sk);
9014db20 1909
ac713874
UB
1910 return rc;
1911}
1912
1913/* must look like tcp */
1914static const struct proto_ops smc_sock_ops = {
1915 .family = PF_SMC,
1916 .owner = THIS_MODULE,
1917 .release = smc_release,
1918 .bind = smc_bind,
1919 .connect = smc_connect,
1920 .socketpair = sock_no_socketpair,
1921 .accept = smc_accept,
1922 .getname = smc_getname,
a11e1d43 1923 .poll = smc_poll,
ac713874
UB
1924 .ioctl = smc_ioctl,
1925 .listen = smc_listen,
1926 .shutdown = smc_shutdown,
1927 .setsockopt = smc_setsockopt,
1928 .getsockopt = smc_getsockopt,
1929 .sendmsg = smc_sendmsg,
1930 .recvmsg = smc_recvmsg,
1931 .mmap = sock_no_mmap,
1932 .sendpage = smc_sendpage,
1933 .splice_read = smc_splice_read,
1934};
1935
1936static int smc_create(struct net *net, struct socket *sock, int protocol,
1937 int kern)
1938{
aaa4d33f 1939 int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
ac713874
UB
1940 struct smc_sock *smc;
1941 struct sock *sk;
1942 int rc;
1943
1944 rc = -ESOCKTNOSUPPORT;
1945 if (sock->type != SOCK_STREAM)
1946 goto out;
1947
1948 rc = -EPROTONOSUPPORT;
aaa4d33f 1949 if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
ac713874
UB
1950 goto out;
1951
1952 rc = -ENOBUFS;
1953 sock->ops = &smc_sock_ops;
aaa4d33f 1954 sk = smc_sock_alloc(net, sock, protocol);
ac713874
UB
1955 if (!sk)
1956 goto out;
1957
1958 /* create internal TCP socket for CLC handshake and fallback */
1959 smc = smc_sk(sk);
a046d57d 1960 smc->use_fallback = false; /* assume rdma capability first */
603cc149 1961 smc->fallback_rsn = 0;
aaa4d33f
KG
1962 rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
1963 &smc->clcsock);
a5dcb73b 1964 if (rc) {
ac713874 1965 sk_common_release(sk);
a5dcb73b
DC
1966 goto out;
1967 }
cd6851f3
UB
1968 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE);
1969 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE);
ac713874
UB
1970
1971out:
1972 return rc;
1973}
1974
1975static const struct net_proto_family smc_sock_family_ops = {
1976 .family = PF_SMC,
1977 .owner = THIS_MODULE,
1978 .create = smc_create,
1979};
1980
64e28b52
HW
1981unsigned int smc_net_id;
1982
1983static __net_init int smc_net_init(struct net *net)
1984{
1985 return smc_pnet_net_init(net);
1986}
1987
1988static void __net_exit smc_net_exit(struct net *net)
1989{
1990 smc_pnet_net_exit(net);
1991}
1992
1993static struct pernet_operations smc_net_ops = {
1994 .init = smc_net_init,
1995 .exit = smc_net_exit,
1996 .id = &smc_net_id,
1997 .size = sizeof(struct smc_net),
1998};
1999
ac713874
UB
2000static int __init smc_init(void)
2001{
2002 int rc;
2003
64e28b52
HW
2004 rc = register_pernet_subsys(&smc_net_ops);
2005 if (rc)
2006 return rc;
2007
6812baab
TR
2008 rc = smc_pnet_init();
2009 if (rc)
2010 return rc;
2011
9bf9abea
UB
2012 rc = smc_llc_init();
2013 if (rc) {
2014 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
2015 goto out_pnet;
2016 }
2017
5f08318f
UB
2018 rc = smc_cdc_init();
2019 if (rc) {
2020 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
2021 goto out_pnet;
2022 }
2023
ac713874
UB
2024 rc = proto_register(&smc_proto, 1);
2025 if (rc) {
aaa4d33f 2026 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
6812baab 2027 goto out_pnet;
ac713874
UB
2028 }
2029
aaa4d33f
KG
2030 rc = proto_register(&smc_proto6, 1);
2031 if (rc) {
2032 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
2033 goto out_proto;
2034 }
2035
ac713874
UB
2036 rc = sock_register(&smc_sock_family_ops);
2037 if (rc) {
2038 pr_err("%s: sock_register fails with %d\n", __func__, rc);
aaa4d33f 2039 goto out_proto6;
ac713874 2040 }
f16a7dd5 2041 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
aaa4d33f 2042 INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
ac713874 2043
a4cf0443
UB
2044 rc = smc_ib_register_client();
2045 if (rc) {
2046 pr_err("%s: ib_register fails with %d\n", __func__, rc);
2047 goto out_sock;
2048 }
2049
c5c1cc9c 2050 static_branch_enable(&tcp_have_smc);
ac713874
UB
2051 return 0;
2052
a4cf0443
UB
2053out_sock:
2054 sock_unregister(PF_SMC);
aaa4d33f
KG
2055out_proto6:
2056 proto_unregister(&smc_proto6);
ac713874
UB
2057out_proto:
2058 proto_unregister(&smc_proto);
6812baab
TR
2059out_pnet:
2060 smc_pnet_exit();
ac713874
UB
2061 return rc;
2062}
2063
2064static void __exit smc_exit(void)
2065{
9fda3510 2066 smc_core_exit();
c5c1cc9c 2067 static_branch_disable(&tcp_have_smc);
a4cf0443 2068 smc_ib_unregister_client();
ac713874 2069 sock_unregister(PF_SMC);
aaa4d33f 2070 proto_unregister(&smc_proto6);
ac713874 2071 proto_unregister(&smc_proto);
6812baab 2072 smc_pnet_exit();
64e28b52 2073 unregister_pernet_subsys(&smc_net_ops);
ac713874
UB
2074}
2075
2076module_init(smc_init);
2077module_exit(smc_exit);
2078
2079MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
2080MODULE_DESCRIPTION("smc socket address family");
2081MODULE_LICENSE("GPL");
2082MODULE_ALIAS_NETPROTO(PF_SMC);