1 // SPDX-License-Identifier: GPL-2.0
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 only,
9 * as published by the Free Software Foundation.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License version 2 for more details (a copy is included
15 * in the LICENSE file that accompanied this code).
17 * You should have received a copy of the GNU General Public License
18 * version 2 along with this program; If not, see
19 * http://www.gnu.org/licenses/gpl-2.0.html
24 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Use is subject to license terms.
27 * Copyright (c) 2012, 2015, Intel Corporation.
30 * This file is part of Lustre, http://www.lustre.org/
31 * Lustre is a trademark of Seagate, Inc.
33 #define DEBUG_SUBSYSTEM S_LNET
37 #include <linux/net.h>
38 #include <linux/file.h>
39 #include <linux/pagemap.h>
40 /* For sys_open & sys_close */
41 #include <linux/syscalls.h>
44 #include <linux/libcfs/libcfs.h>
45 #include <linux/lnet/lib-lnet.h>
48 kernel_sock_unlocked_ioctl(struct file
*filp
, int cmd
, unsigned long arg
)
50 mm_segment_t oldfs
= get_fs();
54 err
= filp
->f_op
->unlocked_ioctl(filp
, cmd
, arg
);
61 lnet_sock_ioctl(int cmd
, unsigned long arg
)
63 struct file
*sock_filp
;
67 rc
= sock_create(PF_INET
, SOCK_STREAM
, 0, &sock
);
69 CERROR("Can't create socket: %d\n", rc
);
73 sock_filp
= sock_alloc_file(sock
, 0, NULL
);
74 if (IS_ERR(sock_filp
))
75 return PTR_ERR(sock_filp
);
77 rc
= kernel_sock_unlocked_ioctl(sock_filp
, cmd
, arg
);
84 lnet_ipif_query(char *name
, int *up
, __u32
*ip
, __u32
*mask
)
91 nob
= strnlen(name
, IFNAMSIZ
);
92 if (nob
== IFNAMSIZ
) {
93 CERROR("Interface name %s too long\n", name
);
97 BUILD_BUG_ON(sizeof(ifr
.ifr_name
) < IFNAMSIZ
);
99 if (strlen(name
) > sizeof(ifr
.ifr_name
) - 1)
101 strncpy(ifr
.ifr_name
, name
, sizeof(ifr
.ifr_name
));
103 rc
= lnet_sock_ioctl(SIOCGIFFLAGS
, (unsigned long)&ifr
);
105 CERROR("Can't get flags for interface %s\n", name
);
109 if (!(ifr
.ifr_flags
& IFF_UP
)) {
110 CDEBUG(D_NET
, "Interface %s down\n", name
);
117 if (strlen(name
) > sizeof(ifr
.ifr_name
) - 1)
119 strncpy(ifr
.ifr_name
, name
, sizeof(ifr
.ifr_name
));
121 ifr
.ifr_addr
.sa_family
= AF_INET
;
122 rc
= lnet_sock_ioctl(SIOCGIFADDR
, (unsigned long)&ifr
);
124 CERROR("Can't get IP address for interface %s\n", name
);
128 val
= ((struct sockaddr_in
*)&ifr
.ifr_addr
)->sin_addr
.s_addr
;
131 if (strlen(name
) > sizeof(ifr
.ifr_name
) - 1)
133 strncpy(ifr
.ifr_name
, name
, sizeof(ifr
.ifr_name
));
135 ifr
.ifr_addr
.sa_family
= AF_INET
;
136 rc
= lnet_sock_ioctl(SIOCGIFNETMASK
, (unsigned long)&ifr
);
138 CERROR("Can't get netmask for interface %s\n", name
);
142 val
= ((struct sockaddr_in
*)&ifr
.ifr_netmask
)->sin_addr
.s_addr
;
147 EXPORT_SYMBOL(lnet_ipif_query
);
150 lnet_ipif_enumerate(char ***namesp
)
152 /* Allocate and fill in 'names', returning # interfaces/error */
163 nalloc
= 16; /* first guess at max interfaces */
166 if (nalloc
* sizeof(*ifr
) > PAGE_SIZE
) {
168 nalloc
= PAGE_SIZE
/ sizeof(*ifr
);
169 CWARN("Too many interfaces: only enumerating first %d\n",
173 LIBCFS_ALLOC(ifr
, nalloc
* sizeof(*ifr
));
175 CERROR("ENOMEM enumerating up to %d interfaces\n",
181 ifc
.ifc_buf
= (char *)ifr
;
182 ifc
.ifc_len
= nalloc
* sizeof(*ifr
);
184 rc
= lnet_sock_ioctl(SIOCGIFCONF
, (unsigned long)&ifc
);
186 CERROR("Error %d enumerating interfaces\n", rc
);
192 nfound
= ifc
.ifc_len
/ sizeof(*ifr
);
193 LASSERT(nfound
<= nalloc
);
195 if (nfound
< nalloc
|| toobig
)
198 LIBCFS_FREE(ifr
, nalloc
* sizeof(*ifr
));
205 LIBCFS_ALLOC(names
, nfound
* sizeof(*names
));
211 for (i
= 0; i
< nfound
; i
++) {
212 nob
= strnlen(ifr
[i
].ifr_name
, IFNAMSIZ
);
213 if (nob
== IFNAMSIZ
) {
214 /* no space for terminating NULL */
215 CERROR("interface name %.*s too long (%d max)\n",
216 nob
, ifr
[i
].ifr_name
, IFNAMSIZ
);
221 LIBCFS_ALLOC(names
[i
], IFNAMSIZ
);
227 memcpy(names
[i
], ifr
[i
].ifr_name
, nob
);
236 lnet_ipif_free_enumeration(names
, nfound
);
238 LIBCFS_FREE(ifr
, nalloc
* sizeof(*ifr
));
242 EXPORT_SYMBOL(lnet_ipif_enumerate
);
245 lnet_ipif_free_enumeration(char **names
, int n
)
251 for (i
= 0; i
< n
&& names
[i
]; i
++)
252 LIBCFS_FREE(names
[i
], IFNAMSIZ
);
254 LIBCFS_FREE(names
, n
* sizeof(*names
));
256 EXPORT_SYMBOL(lnet_ipif_free_enumeration
);
259 lnet_sock_write(struct socket
*sock
, void *buffer
, int nob
, int timeout
)
262 long jiffies_left
= timeout
* msecs_to_jiffies(MSEC_PER_SEC
);
265 struct kvec iov
= { .iov_base
= buffer
, .iov_len
= nob
};
266 struct msghdr msg
= {NULL
,};
270 * Caller may pass a zero timeout if she thinks the socket buffer is
271 * empty enough to take the whole message immediately
273 iov_iter_kvec(&msg
.msg_iter
, WRITE
| ITER_KVEC
, &iov
, 1, nob
);
275 msg
.msg_flags
= !timeout
? MSG_DONTWAIT
: 0;
277 /* Set send timeout to remaining time */
278 jiffies_to_timeval(jiffies_left
, &tv
);
279 rc
= kernel_setsockopt(sock
, SOL_SOCKET
, SO_SNDTIMEO
,
280 (char *)&tv
, sizeof(tv
));
282 CERROR("Can't set socket send timeout %ld.%06d: %d\n",
283 (long)tv
.tv_sec
, (int)tv
.tv_usec
, rc
);
289 rc
= kernel_sendmsg(sock
, &msg
, &iov
, 1, nob
);
290 jiffies_left
-= jiffies
- then
;
296 CERROR("Unexpected zero rc\n");
297 return -ECONNABORTED
;
300 if (!msg_data_left(&msg
))
303 if (jiffies_left
<= 0)
308 EXPORT_SYMBOL(lnet_sock_write
);
311 lnet_sock_read(struct socket
*sock
, void *buffer
, int nob
, int timeout
)
314 long jiffies_left
= timeout
* msecs_to_jiffies(MSEC_PER_SEC
);
319 LASSERT(jiffies_left
> 0);
326 struct msghdr msg
= {
330 /* Set receive timeout to remaining time */
331 jiffies_to_timeval(jiffies_left
, &tv
);
332 rc
= kernel_setsockopt(sock
, SOL_SOCKET
, SO_RCVTIMEO
,
333 (char *)&tv
, sizeof(tv
));
335 CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
336 (long)tv
.tv_sec
, (int)tv
.tv_usec
, rc
);
341 rc
= kernel_recvmsg(sock
, &msg
, &iov
, 1, nob
, 0);
342 jiffies_left
-= jiffies
- then
;
350 buffer
= ((char *)buffer
) + rc
;
356 if (jiffies_left
<= 0)
360 EXPORT_SYMBOL(lnet_sock_read
);
363 lnet_sock_create(struct socket
**sockp
, int *fatal
, __u32 local_ip
,
366 struct sockaddr_in locaddr
;
371 /* All errors are fatal except bind failure if the port is in use */
374 rc
= sock_create(PF_INET
, SOCK_STREAM
, 0, &sock
);
377 CERROR("Can't create socket: %d\n", rc
);
382 rc
= kernel_setsockopt(sock
, SOL_SOCKET
, SO_REUSEADDR
,
383 (char *)&option
, sizeof(option
));
385 CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc
);
389 if (local_ip
|| local_port
) {
390 memset(&locaddr
, 0, sizeof(locaddr
));
391 locaddr
.sin_family
= AF_INET
;
392 locaddr
.sin_port
= htons(local_port
);
394 locaddr
.sin_addr
.s_addr
= htonl(INADDR_ANY
);
396 locaddr
.sin_addr
.s_addr
= htonl(local_ip
);
398 rc
= kernel_bind(sock
, (struct sockaddr
*)&locaddr
,
400 if (rc
== -EADDRINUSE
) {
401 CDEBUG(D_NET
, "Port %d already in use\n", local_port
);
406 CERROR("Error trying to bind to port %d: %d\n",
419 lnet_sock_setbuf(struct socket
*sock
, int txbufsize
, int rxbufsize
)
426 rc
= kernel_setsockopt(sock
, SOL_SOCKET
, SO_SNDBUF
,
427 (char *)&option
, sizeof(option
));
429 CERROR("Can't set send buffer %d: %d\n",
437 rc
= kernel_setsockopt(sock
, SOL_SOCKET
, SO_RCVBUF
,
438 (char *)&option
, sizeof(option
));
440 CERROR("Can't set receive buffer %d: %d\n",
447 EXPORT_SYMBOL(lnet_sock_setbuf
);
450 lnet_sock_getaddr(struct socket
*sock
, bool remote
, __u32
*ip
, int *port
)
452 struct sockaddr_in sin
;
453 int len
= sizeof(sin
);
457 rc
= kernel_getpeername(sock
, (struct sockaddr
*)&sin
, &len
);
459 rc
= kernel_getsockname(sock
, (struct sockaddr
*)&sin
, &len
);
461 CERROR("Error %d getting sock %s IP/port\n",
462 rc
, remote
? "peer" : "local");
467 *ip
= ntohl(sin
.sin_addr
.s_addr
);
470 *port
= ntohs(sin
.sin_port
);
474 EXPORT_SYMBOL(lnet_sock_getaddr
);
477 lnet_sock_getbuf(struct socket
*sock
, int *txbufsize
, int *rxbufsize
)
480 *txbufsize
= sock
->sk
->sk_sndbuf
;
483 *rxbufsize
= sock
->sk
->sk_rcvbuf
;
487 EXPORT_SYMBOL(lnet_sock_getbuf
);
490 lnet_sock_listen(struct socket
**sockp
, __u32 local_ip
, int local_port
,
496 rc
= lnet_sock_create(sockp
, &fatal
, local_ip
, local_port
);
499 CERROR("Can't create socket: port %d already in use\n",
504 rc
= kernel_listen(*sockp
, backlog
);
508 CERROR("Can't set listen backlog %d: %d\n", backlog
, rc
);
509 sock_release(*sockp
);
514 lnet_sock_accept(struct socket
**newsockp
, struct socket
*sock
)
516 wait_queue_entry_t wait
;
517 struct socket
*newsock
;
521 * XXX this should add a ref to sock->ops->owner, if
522 * TCP could be a module
524 rc
= sock_create_lite(PF_PACKET
, sock
->type
, IPPROTO_TCP
, &newsock
);
526 CERROR("Can't allocate socket\n");
530 newsock
->ops
= sock
->ops
;
532 rc
= sock
->ops
->accept(sock
, newsock
, O_NONBLOCK
, false);
534 /* Nothing ready, so wait for activity */
535 init_waitqueue_entry(&wait
, current
);
536 add_wait_queue(sk_sleep(sock
->sk
), &wait
);
537 set_current_state(TASK_INTERRUPTIBLE
);
539 remove_wait_queue(sk_sleep(sock
->sk
), &wait
);
540 rc
= sock
->ops
->accept(sock
, newsock
, O_NONBLOCK
, false);
550 sock_release(newsock
);
555 lnet_sock_connect(struct socket
**sockp
, int *fatal
, __u32 local_ip
,
556 int local_port
, __u32 peer_ip
, int peer_port
)
558 struct sockaddr_in srvaddr
;
561 rc
= lnet_sock_create(sockp
, fatal
, local_ip
, local_port
);
565 memset(&srvaddr
, 0, sizeof(srvaddr
));
566 srvaddr
.sin_family
= AF_INET
;
567 srvaddr
.sin_port
= htons(peer_port
);
568 srvaddr
.sin_addr
.s_addr
= htonl(peer_ip
);
570 rc
= kernel_connect(*sockp
, (struct sockaddr
*)&srvaddr
,
576 * EADDRNOTAVAIL probably means we're already connected to the same
577 * peer/port on the same local port on a differently typed
578 * connection. Let our caller retry with a different local
581 *fatal
= !(rc
== -EADDRNOTAVAIL
);
583 CDEBUG_LIMIT(*fatal
? D_NETERROR
: D_NET
,
584 "Error %d connecting %pI4h/%d -> %pI4h/%d\n", rc
,
585 &local_ip
, local_port
, &peer_ip
, peer_port
);
587 sock_release(*sockp
);