]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/smc/af_smc.c
smc: establish new socket family
[mirror_ubuntu-jammy-kernel.git] / net / smc / af_smc.c
CommitLineData
ac713874
UB
1/*
2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
3 *
4 * AF_SMC protocol family socket handler keeping the AF_INET sock address type
5 * applies to SOCK_STREAM sockets only
6 * offers an alternative communication option for TCP-protocol sockets
7 * applicable with RoCE-cards only
8 *
9 * Copyright IBM Corp. 2016
10 *
11 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
12 * based on prototype from Frank Blaschka
13 */
14
15#define KMSG_COMPONENT "smc"
16#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18#include <linux/module.h>
19#include <linux/socket.h>
20#include <net/sock.h>
21
22#include "smc.h"
23
24static void smc_set_keepalive(struct sock *sk, int val)
25{
26 struct smc_sock *smc = smc_sk(sk);
27
28 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
29}
30
31static struct proto smc_proto = {
32 .name = "SMC",
33 .owner = THIS_MODULE,
34 .keepalive = smc_set_keepalive,
35 .obj_size = sizeof(struct smc_sock),
36 .slab_flags = SLAB_DESTROY_BY_RCU,
37};
38
39static int smc_release(struct socket *sock)
40{
41 struct sock *sk = sock->sk;
42 struct smc_sock *smc;
43
44 if (!sk)
45 goto out;
46
47 smc = smc_sk(sk);
48 lock_sock(sk);
49
50 sk->sk_state = SMC_CLOSED;
51 if (smc->clcsock) {
52 sock_release(smc->clcsock);
53 smc->clcsock = NULL;
54 }
55
56 /* detach socket */
57 sock_orphan(sk);
58 sock->sk = NULL;
59 release_sock(sk);
60
61 sock_put(sk);
62out:
63 return 0;
64}
65
66static void smc_destruct(struct sock *sk)
67{
68 if (sk->sk_state != SMC_CLOSED)
69 return;
70 if (!sock_flag(sk, SOCK_DEAD))
71 return;
72
73 sk_refcnt_debug_dec(sk);
74}
75
76static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
77{
78 struct smc_sock *smc;
79 struct sock *sk;
80
81 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0);
82 if (!sk)
83 return NULL;
84
85 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
86 sk->sk_state = SMC_INIT;
87 sk->sk_destruct = smc_destruct;
88 sk->sk_protocol = SMCPROTO_SMC;
89 sk_refcnt_debug_inc(sk);
90
91 smc = smc_sk(sk);
92
93 return sk;
94}
95
96static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
97 int addr_len)
98{
99 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
100 struct sock *sk = sock->sk;
101 struct smc_sock *smc;
102 int rc;
103
104 smc = smc_sk(sk);
105
106 /* replicate tests from inet_bind(), to be safe wrt. future changes */
107 rc = -EINVAL;
108 if (addr_len < sizeof(struct sockaddr_in))
109 goto out;
110
111 rc = -EAFNOSUPPORT;
112 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
113 if ((addr->sin_family != AF_INET) &&
114 ((addr->sin_family != AF_UNSPEC) ||
115 (addr->sin_addr.s_addr != htonl(INADDR_ANY))))
116 goto out;
117
118 lock_sock(sk);
119
120 /* Check if socket is already active */
121 rc = -EINVAL;
122 if (sk->sk_state != SMC_INIT)
123 goto out_rel;
124
125 smc->clcsock->sk->sk_reuse = sk->sk_reuse;
126 rc = kernel_bind(smc->clcsock, uaddr, addr_len);
127
128out_rel:
129 release_sock(sk);
130out:
131 return rc;
132}
133
134static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
135 unsigned long mask)
136{
137 /* options we don't get control via setsockopt for */
138 nsk->sk_type = osk->sk_type;
139 nsk->sk_sndbuf = osk->sk_sndbuf;
140 nsk->sk_rcvbuf = osk->sk_rcvbuf;
141 nsk->sk_sndtimeo = osk->sk_sndtimeo;
142 nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
143 nsk->sk_mark = osk->sk_mark;
144 nsk->sk_priority = osk->sk_priority;
145 nsk->sk_rcvlowat = osk->sk_rcvlowat;
146 nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
147 nsk->sk_err = osk->sk_err;
148
149 nsk->sk_flags &= ~mask;
150 nsk->sk_flags |= osk->sk_flags & mask;
151}
152
153#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
154 (1UL << SOCK_KEEPOPEN) | \
155 (1UL << SOCK_LINGER) | \
156 (1UL << SOCK_BROADCAST) | \
157 (1UL << SOCK_TIMESTAMP) | \
158 (1UL << SOCK_DBG) | \
159 (1UL << SOCK_RCVTSTAMP) | \
160 (1UL << SOCK_RCVTSTAMPNS) | \
161 (1UL << SOCK_LOCALROUTE) | \
162 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
163 (1UL << SOCK_RXQ_OVFL) | \
164 (1UL << SOCK_WIFI_STATUS) | \
165 (1UL << SOCK_NOFCS) | \
166 (1UL << SOCK_FILTER_LOCKED))
167/* copy only relevant settings and flags of SOL_SOCKET level from smc to
168 * clc socket (since smc is not called for these options from net/core)
169 */
170static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
171{
172 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
173}
174
175#define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
176 (1UL << SOCK_KEEPOPEN) | \
177 (1UL << SOCK_LINGER) | \
178 (1UL << SOCK_DBG))
179/* copy only settings and flags relevant for smc from clc to smc socket */
180static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
181{
182 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
183}
184
185static int smc_connect(struct socket *sock, struct sockaddr *addr,
186 int alen, int flags)
187{
188 struct sock *sk = sock->sk;
189 struct smc_sock *smc;
190 int rc = -EINVAL;
191
192 smc = smc_sk(sk);
193
194 /* separate smc parameter checking to be safe */
195 if (alen < sizeof(addr->sa_family))
196 goto out_err;
197 if (addr->sa_family != AF_INET)
198 goto out_err;
199
200 lock_sock(sk);
201 switch (sk->sk_state) {
202 default:
203 goto out;
204 case SMC_ACTIVE:
205 rc = -EISCONN;
206 goto out;
207 case SMC_INIT:
208 rc = 0;
209 break;
210 }
211
212 smc_copy_sock_settings_to_clc(smc);
213 rc = kernel_connect(smc->clcsock, addr, alen, flags);
214 if (rc)
215 goto out;
216
217 sk->sk_state = SMC_ACTIVE;
218
219 /* always use TCP fallback as transport mechanism for now;
220 * This will change once RDMA transport is implemented
221 */
222 smc->use_fallback = true;
223
224out:
225 release_sock(sk);
226out_err:
227 return rc;
228}
229
230static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
231{
232 struct sock *sk = &lsmc->sk;
233 struct socket *new_clcsock;
234 struct sock *new_sk;
235 int rc;
236
237 new_sk = smc_sock_alloc(sock_net(sk), NULL);
238 if (!new_sk) {
239 rc = -ENOMEM;
240 lsmc->sk.sk_err = ENOMEM;
241 *new_smc = NULL;
242 goto out;
243 }
244 *new_smc = smc_sk(new_sk);
245
246 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
247 if (rc) {
248 sock_put(new_sk);
249 *new_smc = NULL;
250 goto out;
251 }
252
253 (*new_smc)->clcsock = new_clcsock;
254out:
255 return rc;
256}
257
258static int smc_listen(struct socket *sock, int backlog)
259{
260 struct sock *sk = sock->sk;
261 struct smc_sock *smc;
262 int rc;
263
264 smc = smc_sk(sk);
265 lock_sock(sk);
266
267 rc = -EINVAL;
268 if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
269 goto out;
270
271 rc = 0;
272 if (sk->sk_state == SMC_LISTEN) {
273 sk->sk_max_ack_backlog = backlog;
274 goto out;
275 }
276 /* some socket options are handled in core, so we could not apply
277 * them to the clc socket -- copy smc socket options to clc socket
278 */
279 smc_copy_sock_settings_to_clc(smc);
280
281 rc = kernel_listen(smc->clcsock, backlog);
282 if (rc)
283 goto out;
284 sk->sk_max_ack_backlog = backlog;
285 sk->sk_ack_backlog = 0;
286 sk->sk_state = SMC_LISTEN;
287
288out:
289 release_sock(sk);
290 return rc;
291}
292
293static int smc_accept(struct socket *sock, struct socket *new_sock,
294 int flags)
295{
296 struct smc_sock *new_smc;
297 struct sock *sk = sock->sk;
298 struct smc_sock *lsmc;
299 int rc;
300
301 lsmc = smc_sk(sk);
302 lock_sock(sk);
303
304 if (lsmc->sk.sk_state != SMC_LISTEN) {
305 rc = -EINVAL;
306 goto out;
307 }
308
309 rc = smc_clcsock_accept(lsmc, &new_smc);
310 if (rc)
311 goto out;
312 sock_graft(&new_smc->sk, new_sock);
313 new_smc->sk.sk_state = SMC_ACTIVE;
314
315 smc_copy_sock_settings_to_smc(new_smc);
316
317 /* always use TCP fallback as transport mechanism for now;
318 * This will change once RDMA transport is implemented
319 */
320 new_smc->use_fallback = true;
321
322out:
323 release_sock(sk);
324 return rc;
325}
326
327static int smc_getname(struct socket *sock, struct sockaddr *addr,
328 int *len, int peer)
329{
330 struct smc_sock *smc;
331
332 if (peer && (sock->sk->sk_state != SMC_ACTIVE))
333 return -ENOTCONN;
334
335 smc = smc_sk(sock->sk);
336
337 return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer);
338}
339
340static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
341{
342 struct sock *sk = sock->sk;
343 struct smc_sock *smc;
344 int rc = -EPIPE;
345
346 smc = smc_sk(sk);
347 lock_sock(sk);
348 if (sk->sk_state != SMC_ACTIVE)
349 goto out;
350 if (smc->use_fallback)
351 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
352 else
353 rc = sock_no_sendmsg(sock, msg, len);
354out:
355 release_sock(sk);
356 return rc;
357}
358
359static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
360 int flags)
361{
362 struct sock *sk = sock->sk;
363 struct smc_sock *smc;
364 int rc = -ENOTCONN;
365
366 smc = smc_sk(sk);
367 lock_sock(sk);
368 if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED))
369 goto out;
370
371 if (smc->use_fallback)
372 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
373 else
374 rc = sock_no_recvmsg(sock, msg, len, flags);
375out:
376 release_sock(sk);
377 return rc;
378}
379
380static unsigned int smc_poll(struct file *file, struct socket *sock,
381 poll_table *wait)
382{
383 struct sock *sk = sock->sk;
384 unsigned int mask = 0;
385 struct smc_sock *smc;
386
387 smc = smc_sk(sock->sk);
388 if ((sk->sk_state == SMC_INIT) || (sk->sk_state == SMC_LISTEN) ||
389 smc->use_fallback) {
390 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
391 /* if non-blocking connect finished ... */
392 lock_sock(sk);
393 if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) {
394 sk->sk_state = SMC_ACTIVE;
395 /* always use TCP fallback as transport mechanism;
396 * This will change once RDMA transport is implemented
397 */
398 smc->use_fallback = true;
399 }
400 release_sock(sk);
401 } else {
402 mask = sock_no_poll(file, sock, wait);
403 }
404
405 return mask;
406}
407
408static int smc_shutdown(struct socket *sock, int how)
409{
410 struct sock *sk = sock->sk;
411 struct smc_sock *smc;
412 int rc = -EINVAL;
413
414 smc = smc_sk(sk);
415
416 if ((how < SHUT_RD) || (how > SHUT_RDWR))
417 goto out_err;
418
419 lock_sock(sk);
420
421 rc = -ENOTCONN;
422 if (sk->sk_state == SMC_CLOSED)
423 goto out;
424 if (smc->use_fallback) {
425 rc = kernel_sock_shutdown(smc->clcsock, how);
426 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
427 if (sk->sk_shutdown == SHUTDOWN_MASK)
428 sk->sk_state = SMC_CLOSED;
429 } else {
430 rc = sock_no_shutdown(sock, how);
431 }
432
433out:
434 release_sock(sk);
435
436out_err:
437 return rc;
438}
439
440static int smc_setsockopt(struct socket *sock, int level, int optname,
441 char __user *optval, unsigned int optlen)
442{
443 struct sock *sk = sock->sk;
444 struct smc_sock *smc;
445
446 smc = smc_sk(sk);
447
448 /* generic setsockopts reaching us here always apply to the
449 * CLC socket
450 */
451 return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
452 optval, optlen);
453}
454
455static int smc_getsockopt(struct socket *sock, int level, int optname,
456 char __user *optval, int __user *optlen)
457{
458 struct smc_sock *smc;
459
460 smc = smc_sk(sock->sk);
461 /* socket options apply to the CLC socket */
462 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
463 optval, optlen);
464}
465
466static int smc_ioctl(struct socket *sock, unsigned int cmd,
467 unsigned long arg)
468{
469 struct smc_sock *smc;
470
471 smc = smc_sk(sock->sk);
472 if (smc->use_fallback)
473 return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
474 else
475 return sock_no_ioctl(sock, cmd, arg);
476}
477
478static ssize_t smc_sendpage(struct socket *sock, struct page *page,
479 int offset, size_t size, int flags)
480{
481 struct sock *sk = sock->sk;
482 struct smc_sock *smc;
483 int rc = -EPIPE;
484
485 smc = smc_sk(sk);
486 lock_sock(sk);
487 if (sk->sk_state != SMC_ACTIVE)
488 goto out;
489 if (smc->use_fallback)
490 rc = kernel_sendpage(smc->clcsock, page, offset,
491 size, flags);
492 else
493 rc = sock_no_sendpage(sock, page, offset, size, flags);
494
495out:
496 release_sock(sk);
497 return rc;
498}
499
500static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
501 struct pipe_inode_info *pipe, size_t len,
502 unsigned int flags)
503{
504 struct sock *sk = sock->sk;
505 struct smc_sock *smc;
506 int rc = -ENOTCONN;
507
508 smc = smc_sk(sk);
509 lock_sock(sk);
510 if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED))
511 goto out;
512 if (smc->use_fallback) {
513 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
514 pipe, len, flags);
515 } else {
516 rc = -EOPNOTSUPP;
517 }
518out:
519 release_sock(sk);
520 return rc;
521}
522
523/* must look like tcp */
524static const struct proto_ops smc_sock_ops = {
525 .family = PF_SMC,
526 .owner = THIS_MODULE,
527 .release = smc_release,
528 .bind = smc_bind,
529 .connect = smc_connect,
530 .socketpair = sock_no_socketpair,
531 .accept = smc_accept,
532 .getname = smc_getname,
533 .poll = smc_poll,
534 .ioctl = smc_ioctl,
535 .listen = smc_listen,
536 .shutdown = smc_shutdown,
537 .setsockopt = smc_setsockopt,
538 .getsockopt = smc_getsockopt,
539 .sendmsg = smc_sendmsg,
540 .recvmsg = smc_recvmsg,
541 .mmap = sock_no_mmap,
542 .sendpage = smc_sendpage,
543 .splice_read = smc_splice_read,
544};
545
546static int smc_create(struct net *net, struct socket *sock, int protocol,
547 int kern)
548{
549 struct smc_sock *smc;
550 struct sock *sk;
551 int rc;
552
553 rc = -ESOCKTNOSUPPORT;
554 if (sock->type != SOCK_STREAM)
555 goto out;
556
557 rc = -EPROTONOSUPPORT;
558 if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP))
559 goto out;
560
561 rc = -ENOBUFS;
562 sock->ops = &smc_sock_ops;
563 sk = smc_sock_alloc(net, sock);
564 if (!sk)
565 goto out;
566
567 /* create internal TCP socket for CLC handshake and fallback */
568 smc = smc_sk(sk);
569 rc = sock_create_kern(net, PF_INET, SOCK_STREAM,
570 IPPROTO_TCP, &smc->clcsock);
571 if (rc)
572 sk_common_release(sk);
573
574out:
575 return rc;
576}
577
578static const struct net_proto_family smc_sock_family_ops = {
579 .family = PF_SMC,
580 .owner = THIS_MODULE,
581 .create = smc_create,
582};
583
584static int __init smc_init(void)
585{
586 int rc;
587
588 rc = proto_register(&smc_proto, 1);
589 if (rc) {
590 pr_err("%s: proto_register fails with %d\n", __func__, rc);
591 goto out;
592 }
593
594 rc = sock_register(&smc_sock_family_ops);
595 if (rc) {
596 pr_err("%s: sock_register fails with %d\n", __func__, rc);
597 goto out_proto;
598 }
599
600 return 0;
601
602out_proto:
603 proto_unregister(&smc_proto);
604out:
605 return rc;
606}
607
608static void __exit smc_exit(void)
609{
610 sock_unregister(PF_SMC);
611 proto_unregister(&smc_proto);
612}
613
614module_init(smc_init);
615module_exit(smc_exit);
616
617MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
618MODULE_DESCRIPTION("smc socket address family");
619MODULE_LICENSE("GPL");
620MODULE_ALIAS_NETPROTO(PF_SMC);