4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lnet/klnds/socklnd/socklnd.c
38 * Author: Zach Brown <zab@zabbo.net>
39 * Author: Peter J. Braam <braam@clusterfs.com>
40 * Author: Phil Schwan <phil@clusterfs.com>
41 * Author: Eric Barton <eric@bartonsoftware.com>
47 ksock_nal_data_t ksocknal_data
;
50 ksocknal_ip2iface(lnet_ni_t
*ni
, __u32 ip
)
52 ksock_net_t
*net
= ni
->ni_data
;
54 ksock_interface_t
*iface
;
56 for (i
= 0; i
< net
->ksnn_ninterfaces
; i
++) {
57 LASSERT(i
< LNET_MAX_INTERFACES
);
58 iface
= &net
->ksnn_interfaces
[i
];
60 if (iface
->ksni_ipaddr
== ip
)
68 ksocknal_create_route (__u32 ipaddr
, int port
)
72 LIBCFS_ALLOC (route
, sizeof (*route
));
76 atomic_set (&route
->ksnr_refcount
, 1);
77 route
->ksnr_peer
= NULL
;
78 route
->ksnr_retry_interval
= 0; /* OK to connect at any time */
79 route
->ksnr_ipaddr
= ipaddr
;
80 route
->ksnr_port
= port
;
81 route
->ksnr_scheduled
= 0;
82 route
->ksnr_connecting
= 0;
83 route
->ksnr_connected
= 0;
84 route
->ksnr_deleted
= 0;
85 route
->ksnr_conn_count
= 0;
86 route
->ksnr_share_count
= 0;
92 ksocknal_destroy_route (ksock_route_t
*route
)
94 LASSERT (atomic_read(&route
->ksnr_refcount
) == 0);
96 if (route
->ksnr_peer
!= NULL
)
97 ksocknal_peer_decref(route
->ksnr_peer
);
99 LIBCFS_FREE (route
, sizeof (*route
));
103 ksocknal_create_peer (ksock_peer_t
**peerp
, lnet_ni_t
*ni
, lnet_process_id_t id
)
105 ksock_net_t
*net
= ni
->ni_data
;
108 LASSERT (id
.nid
!= LNET_NID_ANY
);
109 LASSERT (id
.pid
!= LNET_PID_ANY
);
110 LASSERT (!in_interrupt());
112 LIBCFS_ALLOC (peer
, sizeof (*peer
));
116 memset (peer
, 0, sizeof (*peer
)); /* NULL pointers/clear flags etc */
120 atomic_set (&peer
->ksnp_refcount
, 1); /* 1 ref for caller */
121 peer
->ksnp_closing
= 0;
122 peer
->ksnp_accepting
= 0;
123 peer
->ksnp_proto
= NULL
;
124 peer
->ksnp_last_alive
= 0;
125 peer
->ksnp_zc_next_cookie
= SOCKNAL_KEEPALIVE_PING
+ 1;
127 INIT_LIST_HEAD (&peer
->ksnp_conns
);
128 INIT_LIST_HEAD (&peer
->ksnp_routes
);
129 INIT_LIST_HEAD (&peer
->ksnp_tx_queue
);
130 INIT_LIST_HEAD (&peer
->ksnp_zc_req_list
);
131 spin_lock_init(&peer
->ksnp_lock
);
133 spin_lock_bh(&net
->ksnn_lock
);
135 if (net
->ksnn_shutdown
) {
136 spin_unlock_bh(&net
->ksnn_lock
);
138 LIBCFS_FREE(peer
, sizeof(*peer
));
139 CERROR("Can't create peer: network shutdown\n");
145 spin_unlock_bh(&net
->ksnn_lock
);
152 ksocknal_destroy_peer (ksock_peer_t
*peer
)
154 ksock_net_t
*net
= peer
->ksnp_ni
->ni_data
;
156 CDEBUG (D_NET
, "peer %s %p deleted\n",
157 libcfs_id2str(peer
->ksnp_id
), peer
);
159 LASSERT (atomic_read (&peer
->ksnp_refcount
) == 0);
160 LASSERT (peer
->ksnp_accepting
== 0);
161 LASSERT (list_empty (&peer
->ksnp_conns
));
162 LASSERT (list_empty (&peer
->ksnp_routes
));
163 LASSERT (list_empty (&peer
->ksnp_tx_queue
));
164 LASSERT (list_empty (&peer
->ksnp_zc_req_list
));
166 LIBCFS_FREE (peer
, sizeof (*peer
));
168 /* NB a peer's connections and routes keep a reference on their peer
169 * until they are destroyed, so we can be assured that _all_ state to
170 * do with this peer has been cleaned up when its refcount drops to
172 spin_lock_bh(&net
->ksnn_lock
);
174 spin_unlock_bh(&net
->ksnn_lock
);
178 ksocknal_find_peer_locked (lnet_ni_t
*ni
, lnet_process_id_t id
)
180 struct list_head
*peer_list
= ksocknal_nid2peerlist(id
.nid
);
181 struct list_head
*tmp
;
184 list_for_each (tmp
, peer_list
) {
186 peer
= list_entry (tmp
, ksock_peer_t
, ksnp_list
);
188 LASSERT (!peer
->ksnp_closing
);
190 if (peer
->ksnp_ni
!= ni
)
193 if (peer
->ksnp_id
.nid
!= id
.nid
||
194 peer
->ksnp_id
.pid
!= id
.pid
)
197 CDEBUG(D_NET
, "got peer [%p] -> %s (%d)\n",
198 peer
, libcfs_id2str(id
),
199 atomic_read(&peer
->ksnp_refcount
));
206 ksocknal_find_peer (lnet_ni_t
*ni
, lnet_process_id_t id
)
210 read_lock(&ksocknal_data
.ksnd_global_lock
);
211 peer
= ksocknal_find_peer_locked(ni
, id
);
212 if (peer
!= NULL
) /* +1 ref for caller? */
213 ksocknal_peer_addref(peer
);
214 read_unlock(&ksocknal_data
.ksnd_global_lock
);
220 ksocknal_unlink_peer_locked (ksock_peer_t
*peer
)
224 ksock_interface_t
*iface
;
226 for (i
= 0; i
< peer
->ksnp_n_passive_ips
; i
++) {
227 LASSERT (i
< LNET_MAX_INTERFACES
);
228 ip
= peer
->ksnp_passive_ips
[i
];
230 iface
= ksocknal_ip2iface(peer
->ksnp_ni
, ip
);
231 /* All IPs in peer->ksnp_passive_ips[] come from the
232 * interface list, therefore the call must succeed. */
233 LASSERT (iface
!= NULL
);
235 CDEBUG(D_NET
, "peer=%p iface=%p ksni_nroutes=%d\n",
236 peer
, iface
, iface
->ksni_nroutes
);
237 iface
->ksni_npeers
--;
240 LASSERT (list_empty(&peer
->ksnp_conns
));
241 LASSERT (list_empty(&peer
->ksnp_routes
));
242 LASSERT (!peer
->ksnp_closing
);
243 peer
->ksnp_closing
= 1;
244 list_del (&peer
->ksnp_list
);
245 /* lose peerlist's ref */
246 ksocknal_peer_decref(peer
);
250 ksocknal_get_peer_info (lnet_ni_t
*ni
, int index
,
251 lnet_process_id_t
*id
, __u32
*myip
, __u32
*peer_ip
,
252 int *port
, int *conn_count
, int *share_count
)
255 struct list_head
*ptmp
;
256 ksock_route_t
*route
;
257 struct list_head
*rtmp
;
262 read_lock(&ksocknal_data
.ksnd_global_lock
);
264 for (i
= 0; i
< ksocknal_data
.ksnd_peer_hash_size
; i
++) {
266 list_for_each (ptmp
, &ksocknal_data
.ksnd_peers
[i
]) {
267 peer
= list_entry (ptmp
, ksock_peer_t
, ksnp_list
);
269 if (peer
->ksnp_ni
!= ni
)
272 if (peer
->ksnp_n_passive_ips
== 0 &&
273 list_empty(&peer
->ksnp_routes
)) {
287 for (j
= 0; j
< peer
->ksnp_n_passive_ips
; j
++) {
292 *myip
= peer
->ksnp_passive_ips
[j
];
301 list_for_each (rtmp
, &peer
->ksnp_routes
) {
305 route
= list_entry(rtmp
, ksock_route_t
,
309 *myip
= route
->ksnr_myipaddr
;
310 *peer_ip
= route
->ksnr_ipaddr
;
311 *port
= route
->ksnr_port
;
312 *conn_count
= route
->ksnr_conn_count
;
313 *share_count
= route
->ksnr_share_count
;
320 read_unlock(&ksocknal_data
.ksnd_global_lock
);
325 ksocknal_associate_route_conn_locked(ksock_route_t
*route
, ksock_conn_t
*conn
)
327 ksock_peer_t
*peer
= route
->ksnr_peer
;
328 int type
= conn
->ksnc_type
;
329 ksock_interface_t
*iface
;
331 conn
->ksnc_route
= route
;
332 ksocknal_route_addref(route
);
334 if (route
->ksnr_myipaddr
!= conn
->ksnc_myipaddr
) {
335 if (route
->ksnr_myipaddr
== 0) {
336 /* route wasn't bound locally yet (the initial route) */
337 CDEBUG(D_NET
, "Binding %s %pI4h to %pI4h\n",
338 libcfs_id2str(peer
->ksnp_id
),
340 &conn
->ksnc_myipaddr
);
342 CDEBUG(D_NET
, "Rebinding %s %pI4h from "
344 libcfs_id2str(peer
->ksnp_id
),
346 &route
->ksnr_myipaddr
,
347 &conn
->ksnc_myipaddr
);
349 iface
= ksocknal_ip2iface(route
->ksnr_peer
->ksnp_ni
,
350 route
->ksnr_myipaddr
);
352 iface
->ksni_nroutes
--;
354 route
->ksnr_myipaddr
= conn
->ksnc_myipaddr
;
355 iface
= ksocknal_ip2iface(route
->ksnr_peer
->ksnp_ni
,
356 route
->ksnr_myipaddr
);
358 iface
->ksni_nroutes
++;
361 route
->ksnr_connected
|= (1<<type
);
362 route
->ksnr_conn_count
++;
364 /* Successful connection => further attempts can
365 * proceed immediately */
366 route
->ksnr_retry_interval
= 0;
370 ksocknal_add_route_locked (ksock_peer_t
*peer
, ksock_route_t
*route
)
372 struct list_head
*tmp
;
374 ksock_route_t
*route2
;
376 LASSERT (!peer
->ksnp_closing
);
377 LASSERT (route
->ksnr_peer
== NULL
);
378 LASSERT (!route
->ksnr_scheduled
);
379 LASSERT (!route
->ksnr_connecting
);
380 LASSERT (route
->ksnr_connected
== 0);
382 /* LASSERT(unique) */
383 list_for_each(tmp
, &peer
->ksnp_routes
) {
384 route2
= list_entry(tmp
, ksock_route_t
, ksnr_list
);
386 if (route2
->ksnr_ipaddr
== route
->ksnr_ipaddr
) {
387 CERROR("Duplicate route %s %pI4h\n",
388 libcfs_id2str(peer
->ksnp_id
),
389 &route
->ksnr_ipaddr
);
394 route
->ksnr_peer
= peer
;
395 ksocknal_peer_addref(peer
);
396 /* peer's routelist takes over my ref on 'route' */
397 list_add_tail(&route
->ksnr_list
, &peer
->ksnp_routes
);
399 list_for_each(tmp
, &peer
->ksnp_conns
) {
400 conn
= list_entry(tmp
, ksock_conn_t
, ksnc_list
);
402 if (conn
->ksnc_ipaddr
!= route
->ksnr_ipaddr
)
405 ksocknal_associate_route_conn_locked(route
, conn
);
406 /* keep going (typed routes) */
411 ksocknal_del_route_locked (ksock_route_t
*route
)
413 ksock_peer_t
*peer
= route
->ksnr_peer
;
414 ksock_interface_t
*iface
;
416 struct list_head
*ctmp
;
417 struct list_head
*cnxt
;
419 LASSERT (!route
->ksnr_deleted
);
421 /* Close associated conns */
422 list_for_each_safe (ctmp
, cnxt
, &peer
->ksnp_conns
) {
423 conn
= list_entry(ctmp
, ksock_conn_t
, ksnc_list
);
425 if (conn
->ksnc_route
!= route
)
428 ksocknal_close_conn_locked (conn
, 0);
431 if (route
->ksnr_myipaddr
!= 0) {
432 iface
= ksocknal_ip2iface(route
->ksnr_peer
->ksnp_ni
,
433 route
->ksnr_myipaddr
);
435 iface
->ksni_nroutes
--;
438 route
->ksnr_deleted
= 1;
439 list_del (&route
->ksnr_list
);
440 ksocknal_route_decref(route
); /* drop peer's ref */
442 if (list_empty (&peer
->ksnp_routes
) &&
443 list_empty (&peer
->ksnp_conns
)) {
444 /* I've just removed the last route to a peer with no active
446 ksocknal_unlink_peer_locked (peer
);
451 ksocknal_add_peer (lnet_ni_t
*ni
, lnet_process_id_t id
, __u32 ipaddr
, int port
)
453 struct list_head
*tmp
;
456 ksock_route_t
*route
;
457 ksock_route_t
*route2
;
460 if (id
.nid
== LNET_NID_ANY
||
461 id
.pid
== LNET_PID_ANY
)
464 /* Have a brand new peer ready... */
465 rc
= ksocknal_create_peer(&peer
, ni
, id
);
469 route
= ksocknal_create_route (ipaddr
, port
);
471 ksocknal_peer_decref(peer
);
475 write_lock_bh(&ksocknal_data
.ksnd_global_lock
);
477 /* always called with a ref on ni, so shutdown can't have started */
478 LASSERT (((ksock_net_t
*) ni
->ni_data
)->ksnn_shutdown
== 0);
480 peer2
= ksocknal_find_peer_locked (ni
, id
);
482 ksocknal_peer_decref(peer
);
485 /* peer table takes my ref on peer */
486 list_add_tail (&peer
->ksnp_list
,
487 ksocknal_nid2peerlist (id
.nid
));
491 list_for_each (tmp
, &peer
->ksnp_routes
) {
492 route2
= list_entry(tmp
, ksock_route_t
, ksnr_list
);
494 if (route2
->ksnr_ipaddr
== ipaddr
)
499 if (route2
== NULL
) {
500 ksocknal_add_route_locked(peer
, route
);
501 route
->ksnr_share_count
++;
503 ksocknal_route_decref(route
);
504 route2
->ksnr_share_count
++;
507 write_unlock_bh(&ksocknal_data
.ksnd_global_lock
);
513 ksocknal_del_peer_locked (ksock_peer_t
*peer
, __u32 ip
)
516 ksock_route_t
*route
;
517 struct list_head
*tmp
;
518 struct list_head
*nxt
;
521 LASSERT (!peer
->ksnp_closing
);
523 /* Extra ref prevents peer disappearing until I'm done with it */
524 ksocknal_peer_addref(peer
);
526 list_for_each_safe (tmp
, nxt
, &peer
->ksnp_routes
) {
527 route
= list_entry(tmp
, ksock_route_t
, ksnr_list
);
530 if (!(ip
== 0 || route
->ksnr_ipaddr
== ip
))
533 route
->ksnr_share_count
= 0;
534 /* This deletes associated conns too */
535 ksocknal_del_route_locked (route
);
539 list_for_each_safe (tmp
, nxt
, &peer
->ksnp_routes
) {
540 route
= list_entry(tmp
, ksock_route_t
, ksnr_list
);
541 nshared
+= route
->ksnr_share_count
;
545 /* remove everything else if there are no explicit entries
548 list_for_each_safe (tmp
, nxt
, &peer
->ksnp_routes
) {
549 route
= list_entry(tmp
, ksock_route_t
, ksnr_list
);
551 /* we should only be removing auto-entries */
552 LASSERT(route
->ksnr_share_count
== 0);
553 ksocknal_del_route_locked (route
);
556 list_for_each_safe (tmp
, nxt
, &peer
->ksnp_conns
) {
557 conn
= list_entry(tmp
, ksock_conn_t
, ksnc_list
);
559 ksocknal_close_conn_locked(conn
, 0);
563 ksocknal_peer_decref(peer
);
564 /* NB peer unlinks itself when last conn/route is removed */
568 ksocknal_del_peer (lnet_ni_t
*ni
, lnet_process_id_t id
, __u32 ip
)
571 struct list_head
*ptmp
;
572 struct list_head
*pnxt
;
579 write_lock_bh(&ksocknal_data
.ksnd_global_lock
);
581 if (id
.nid
!= LNET_NID_ANY
)
582 lo
= hi
= (int)(ksocknal_nid2peerlist(id
.nid
) - ksocknal_data
.ksnd_peers
);
585 hi
= ksocknal_data
.ksnd_peer_hash_size
- 1;
588 for (i
= lo
; i
<= hi
; i
++) {
589 list_for_each_safe (ptmp
, pnxt
,
590 &ksocknal_data
.ksnd_peers
[i
]) {
591 peer
= list_entry (ptmp
, ksock_peer_t
, ksnp_list
);
593 if (peer
->ksnp_ni
!= ni
)
596 if (!((id
.nid
== LNET_NID_ANY
|| peer
->ksnp_id
.nid
== id
.nid
) &&
597 (id
.pid
== LNET_PID_ANY
|| peer
->ksnp_id
.pid
== id
.pid
)))
600 ksocknal_peer_addref(peer
); /* a ref for me... */
602 ksocknal_del_peer_locked (peer
, ip
);
604 if (peer
->ksnp_closing
&&
605 !list_empty(&peer
->ksnp_tx_queue
)) {
606 LASSERT (list_empty(&peer
->ksnp_conns
));
607 LASSERT (list_empty(&peer
->ksnp_routes
));
609 list_splice_init(&peer
->ksnp_tx_queue
,
613 ksocknal_peer_decref(peer
); /* ...till here */
615 rc
= 0; /* matched! */
619 write_unlock_bh(&ksocknal_data
.ksnd_global_lock
);
621 ksocknal_txlist_done(ni
, &zombies
, 1);
627 ksocknal_get_conn_by_idx (lnet_ni_t
*ni
, int index
)
630 struct list_head
*ptmp
;
632 struct list_head
*ctmp
;
635 read_lock(&ksocknal_data
.ksnd_global_lock
);
637 for (i
= 0; i
< ksocknal_data
.ksnd_peer_hash_size
; i
++) {
638 list_for_each (ptmp
, &ksocknal_data
.ksnd_peers
[i
]) {
639 peer
= list_entry (ptmp
, ksock_peer_t
, ksnp_list
);
641 LASSERT (!peer
->ksnp_closing
);
643 if (peer
->ksnp_ni
!= ni
)
646 list_for_each (ctmp
, &peer
->ksnp_conns
) {
650 conn
= list_entry (ctmp
, ksock_conn_t
,
652 ksocknal_conn_addref(conn
);
653 read_unlock(&ksocknal_data
. \
660 read_unlock(&ksocknal_data
.ksnd_global_lock
);
665 ksocknal_choose_scheduler_locked(unsigned int cpt
)
667 struct ksock_sched_info
*info
= ksocknal_data
.ksnd_sched_info
[cpt
];
668 ksock_sched_t
*sched
;
671 LASSERT(info
->ksi_nthreads
> 0);
673 sched
= &info
->ksi_scheds
[0];
675 * NB: it's safe so far, but info->ksi_nthreads could be changed
676 * at runtime when we have dynamic LNet configuration, then we
677 * need to take care of this.
679 for (i
= 1; i
< info
->ksi_nthreads
; i
++) {
680 if (sched
->kss_nconns
> info
->ksi_scheds
[i
].kss_nconns
)
681 sched
= &info
->ksi_scheds
[i
];
688 ksocknal_local_ipvec (lnet_ni_t
*ni
, __u32
*ipaddrs
)
690 ksock_net_t
*net
= ni
->ni_data
;
694 read_lock(&ksocknal_data
.ksnd_global_lock
);
696 nip
= net
->ksnn_ninterfaces
;
697 LASSERT (nip
<= LNET_MAX_INTERFACES
);
699 /* Only offer interfaces for additional connections if I have
702 read_unlock(&ksocknal_data
.ksnd_global_lock
);
706 for (i
= 0; i
< nip
; i
++) {
707 ipaddrs
[i
] = net
->ksnn_interfaces
[i
].ksni_ipaddr
;
708 LASSERT (ipaddrs
[i
] != 0);
711 read_unlock(&ksocknal_data
.ksnd_global_lock
);
716 ksocknal_match_peerip (ksock_interface_t
*iface
, __u32
*ips
, int nips
)
718 int best_netmatch
= 0;
725 for (i
= 0; i
< nips
; i
++) {
729 this_xor
= (ips
[i
] ^ iface
->ksni_ipaddr
);
730 this_netmatch
= ((this_xor
& iface
->ksni_netmask
) == 0) ? 1 : 0;
733 best_netmatch
< this_netmatch
||
734 (best_netmatch
== this_netmatch
&&
735 best_xor
> this_xor
)))
739 best_netmatch
= this_netmatch
;
748 ksocknal_select_ips(ksock_peer_t
*peer
, __u32
*peerips
, int n_peerips
)
750 rwlock_t
*global_lock
= &ksocknal_data
.ksnd_global_lock
;
751 ksock_net_t
*net
= peer
->ksnp_ni
->ni_data
;
752 ksock_interface_t
*iface
;
753 ksock_interface_t
*best_iface
;
764 /* CAVEAT EMPTOR: We do all our interface matching with an
765 * exclusive hold of global lock at IRQ priority. We're only
766 * expecting to be dealing with small numbers of interfaces, so the
767 * O(n**3)-ness shouldn't matter */
769 /* Also note that I'm not going to return more than n_peerips
770 * interfaces, even if I have more myself */
772 write_lock_bh(global_lock
);
774 LASSERT (n_peerips
<= LNET_MAX_INTERFACES
);
775 LASSERT (net
->ksnn_ninterfaces
<= LNET_MAX_INTERFACES
);
777 /* Only match interfaces for additional connections
778 * if I have > 1 interface */
779 n_ips
= (net
->ksnn_ninterfaces
< 2) ? 0 :
780 MIN(n_peerips
, net
->ksnn_ninterfaces
);
782 for (i
= 0; peer
->ksnp_n_passive_ips
< n_ips
; i
++) {
783 /* ^ yes really... */
785 /* If we have any new interfaces, first tick off all the
786 * peer IPs that match old interfaces, then choose new
787 * interfaces to match the remaining peer IPS.
788 * We don't forget interfaces we've stopped using; we might
789 * start using them again... */
791 if (i
< peer
->ksnp_n_passive_ips
) {
793 ip
= peer
->ksnp_passive_ips
[i
];
794 best_iface
= ksocknal_ip2iface(peer
->ksnp_ni
, ip
);
796 /* peer passive ips are kept up to date */
797 LASSERT(best_iface
!= NULL
);
799 /* choose a new interface */
800 LASSERT (i
== peer
->ksnp_n_passive_ips
);
806 for (j
= 0; j
< net
->ksnn_ninterfaces
; j
++) {
807 iface
= &net
->ksnn_interfaces
[j
];
808 ip
= iface
->ksni_ipaddr
;
810 for (k
= 0; k
< peer
->ksnp_n_passive_ips
; k
++)
811 if (peer
->ksnp_passive_ips
[k
] == ip
)
814 if (k
< peer
->ksnp_n_passive_ips
) /* using it already */
817 k
= ksocknal_match_peerip(iface
, peerips
, n_peerips
);
818 xor = (ip
^ peerips
[k
]);
819 this_netmatch
= ((xor & iface
->ksni_netmask
) == 0) ? 1 : 0;
821 if (!(best_iface
== NULL
||
822 best_netmatch
< this_netmatch
||
823 (best_netmatch
== this_netmatch
&&
824 best_npeers
> iface
->ksni_npeers
)))
828 best_netmatch
= this_netmatch
;
829 best_npeers
= iface
->ksni_npeers
;
832 best_iface
->ksni_npeers
++;
833 ip
= best_iface
->ksni_ipaddr
;
834 peer
->ksnp_passive_ips
[i
] = ip
;
835 peer
->ksnp_n_passive_ips
= i
+1;
838 LASSERT (best_iface
!= NULL
);
840 /* mark the best matching peer IP used */
841 j
= ksocknal_match_peerip(best_iface
, peerips
, n_peerips
);
845 /* Overwrite input peer IP addresses */
846 memcpy(peerips
, peer
->ksnp_passive_ips
, n_ips
* sizeof(*peerips
));
848 write_unlock_bh(global_lock
);
854 ksocknal_create_routes(ksock_peer_t
*peer
, int port
,
855 __u32
*peer_ipaddrs
, int npeer_ipaddrs
)
857 ksock_route_t
*newroute
= NULL
;
858 rwlock_t
*global_lock
= &ksocknal_data
.ksnd_global_lock
;
859 lnet_ni_t
*ni
= peer
->ksnp_ni
;
860 ksock_net_t
*net
= ni
->ni_data
;
861 struct list_head
*rtmp
;
862 ksock_route_t
*route
;
863 ksock_interface_t
*iface
;
864 ksock_interface_t
*best_iface
;
871 /* CAVEAT EMPTOR: We do all our interface matching with an
872 * exclusive hold of global lock at IRQ priority. We're only
873 * expecting to be dealing with small numbers of interfaces, so the
874 * O(n**3)-ness here shouldn't matter */
876 write_lock_bh(global_lock
);
878 if (net
->ksnn_ninterfaces
< 2) {
879 /* Only create additional connections
880 * if I have > 1 interface */
881 write_unlock_bh(global_lock
);
885 LASSERT (npeer_ipaddrs
<= LNET_MAX_INTERFACES
);
887 for (i
= 0; i
< npeer_ipaddrs
; i
++) {
888 if (newroute
!= NULL
) {
889 newroute
->ksnr_ipaddr
= peer_ipaddrs
[i
];
891 write_unlock_bh(global_lock
);
893 newroute
= ksocknal_create_route(peer_ipaddrs
[i
], port
);
894 if (newroute
== NULL
)
897 write_lock_bh(global_lock
);
900 if (peer
->ksnp_closing
) {
901 /* peer got closed under me */
905 /* Already got a route? */
907 list_for_each(rtmp
, &peer
->ksnp_routes
) {
908 route
= list_entry(rtmp
, ksock_route_t
, ksnr_list
);
910 if (route
->ksnr_ipaddr
== newroute
->ksnr_ipaddr
)
922 LASSERT (net
->ksnn_ninterfaces
<= LNET_MAX_INTERFACES
);
924 /* Select interface to connect from */
925 for (j
= 0; j
< net
->ksnn_ninterfaces
; j
++) {
926 iface
= &net
->ksnn_interfaces
[j
];
928 /* Using this interface already? */
929 list_for_each(rtmp
, &peer
->ksnp_routes
) {
930 route
= list_entry(rtmp
, ksock_route_t
,
933 if (route
->ksnr_myipaddr
== iface
->ksni_ipaddr
)
941 this_netmatch
= (((iface
->ksni_ipaddr
^
942 newroute
->ksnr_ipaddr
) &
943 iface
->ksni_netmask
) == 0) ? 1 : 0;
945 if (!(best_iface
== NULL
||
946 best_netmatch
< this_netmatch
||
947 (best_netmatch
== this_netmatch
&&
948 best_nroutes
> iface
->ksni_nroutes
)))
952 best_netmatch
= this_netmatch
;
953 best_nroutes
= iface
->ksni_nroutes
;
956 if (best_iface
== NULL
)
959 newroute
->ksnr_myipaddr
= best_iface
->ksni_ipaddr
;
960 best_iface
->ksni_nroutes
++;
962 ksocknal_add_route_locked(peer
, newroute
);
966 write_unlock_bh(global_lock
);
967 if (newroute
!= NULL
)
968 ksocknal_route_decref(newroute
);
972 ksocknal_accept (lnet_ni_t
*ni
, socket_t
*sock
)
979 rc
= libcfs_sock_getaddr(sock
, 1, &peer_ip
, &peer_port
);
980 LASSERT (rc
== 0); /* we succeeded before */
982 LIBCFS_ALLOC(cr
, sizeof(*cr
));
984 LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from "
985 "%pI4h: memory exhausted\n",
992 cr
->ksncr_sock
= sock
;
994 spin_lock_bh(&ksocknal_data
.ksnd_connd_lock
);
996 list_add_tail(&cr
->ksncr_list
, &ksocknal_data
.ksnd_connd_connreqs
);
997 wake_up(&ksocknal_data
.ksnd_connd_waitq
);
999 spin_unlock_bh(&ksocknal_data
.ksnd_connd_lock
);
1004 ksocknal_connecting (ksock_peer_t
*peer
, __u32 ipaddr
)
1006 ksock_route_t
*route
;
1008 list_for_each_entry (route
, &peer
->ksnp_routes
, ksnr_list
) {
1010 if (route
->ksnr_ipaddr
== ipaddr
)
1011 return route
->ksnr_connecting
;
1017 ksocknal_create_conn (lnet_ni_t
*ni
, ksock_route_t
*route
,
1018 socket_t
*sock
, int type
)
1020 rwlock_t
*global_lock
= &ksocknal_data
.ksnd_global_lock
;
1021 LIST_HEAD (zombies
);
1022 lnet_process_id_t peerid
;
1023 struct list_head
*tmp
;
1026 ksock_conn_t
*conn2
;
1027 ksock_peer_t
*peer
= NULL
;
1028 ksock_peer_t
*peer2
;
1029 ksock_sched_t
*sched
;
1030 ksock_hello_msg_t
*hello
;
1038 active
= (route
!= NULL
);
1040 LASSERT (active
== (type
!= SOCKLND_CONN_NONE
));
1042 LIBCFS_ALLOC(conn
, sizeof(*conn
));
1048 memset (conn
, 0, sizeof (*conn
));
1050 conn
->ksnc_peer
= NULL
;
1051 conn
->ksnc_route
= NULL
;
1052 conn
->ksnc_sock
= sock
;
1053 /* 2 ref, 1 for conn, another extra ref prevents socket
1054 * being closed before establishment of connection */
1055 atomic_set (&conn
->ksnc_sock_refcount
, 2);
1056 conn
->ksnc_type
= type
;
1057 ksocknal_lib_save_callback(sock
, conn
);
1058 atomic_set (&conn
->ksnc_conn_refcount
, 1); /* 1 ref for me */
1060 conn
->ksnc_rx_ready
= 0;
1061 conn
->ksnc_rx_scheduled
= 0;
1063 INIT_LIST_HEAD (&conn
->ksnc_tx_queue
);
1064 conn
->ksnc_tx_ready
= 0;
1065 conn
->ksnc_tx_scheduled
= 0;
1066 conn
->ksnc_tx_carrier
= NULL
;
1067 atomic_set (&conn
->ksnc_tx_nob
, 0);
1069 LIBCFS_ALLOC(hello
, offsetof(ksock_hello_msg_t
,
1070 kshm_ips
[LNET_MAX_INTERFACES
]));
1071 if (hello
== NULL
) {
1076 /* stash conn's local and remote addrs */
1077 rc
= ksocknal_lib_get_conn_addrs (conn
);
1081 /* Find out/confirm peer's NID and connection type and get the
1082 * vector of interfaces she's willing to let me connect to.
1083 * Passive connections use the listener timeout since the peer sends
1087 peer
= route
->ksnr_peer
;
1088 LASSERT(ni
== peer
->ksnp_ni
);
1090 /* Active connection sends HELLO eagerly */
1091 hello
->kshm_nips
= ksocknal_local_ipvec(ni
, hello
->kshm_ips
);
1092 peerid
= peer
->ksnp_id
;
1094 write_lock_bh(global_lock
);
1095 conn
->ksnc_proto
= peer
->ksnp_proto
;
1096 write_unlock_bh(global_lock
);
1098 if (conn
->ksnc_proto
== NULL
) {
1099 conn
->ksnc_proto
= &ksocknal_protocol_v3x
;
1100 #if SOCKNAL_VERSION_DEBUG
1101 if (*ksocknal_tunables
.ksnd_protocol
== 2)
1102 conn
->ksnc_proto
= &ksocknal_protocol_v2x
;
1103 else if (*ksocknal_tunables
.ksnd_protocol
== 1)
1104 conn
->ksnc_proto
= &ksocknal_protocol_v1x
;
1108 rc
= ksocknal_send_hello (ni
, conn
, peerid
.nid
, hello
);
1112 peerid
.nid
= LNET_NID_ANY
;
1113 peerid
.pid
= LNET_PID_ANY
;
1115 /* Passive, get protocol from peer */
1116 conn
->ksnc_proto
= NULL
;
1119 rc
= ksocknal_recv_hello (ni
, conn
, hello
, &peerid
, &incarnation
);
1123 LASSERT (rc
== 0 || active
);
1124 LASSERT (conn
->ksnc_proto
!= NULL
);
1125 LASSERT (peerid
.nid
!= LNET_NID_ANY
);
1127 cpt
= lnet_cpt_of_nid(peerid
.nid
);
1130 ksocknal_peer_addref(peer
);
1131 write_lock_bh(global_lock
);
1133 rc
= ksocknal_create_peer(&peer
, ni
, peerid
);
1137 write_lock_bh(global_lock
);
1139 /* called with a ref on ni, so shutdown can't have started */
1140 LASSERT (((ksock_net_t
*) ni
->ni_data
)->ksnn_shutdown
== 0);
1142 peer2
= ksocknal_find_peer_locked(ni
, peerid
);
1143 if (peer2
== NULL
) {
1144 /* NB this puts an "empty" peer in the peer
1145 * table (which takes my ref) */
1146 list_add_tail(&peer
->ksnp_list
,
1147 ksocknal_nid2peerlist(peerid
.nid
));
1149 ksocknal_peer_decref(peer
);
1154 ksocknal_peer_addref(peer
);
1155 peer
->ksnp_accepting
++;
1157 /* Am I already connecting to this guy? Resolve in
1158 * favour of higher NID... */
1159 if (peerid
.nid
< ni
->ni_nid
&&
1160 ksocknal_connecting(peer
, conn
->ksnc_ipaddr
)) {
1162 warn
= "connection race resolution";
1167 if (peer
->ksnp_closing
||
1168 (active
&& route
->ksnr_deleted
)) {
1169 /* peer/route got closed under me */
1171 warn
= "peer/route removed";
1175 if (peer
->ksnp_proto
== NULL
) {
1176 /* Never connected before.
1177 * NB recv_hello may have returned EPROTO to signal my peer
1178 * wants a different protocol than the one I asked for.
1180 LASSERT (list_empty(&peer
->ksnp_conns
));
1182 peer
->ksnp_proto
= conn
->ksnc_proto
;
1183 peer
->ksnp_incarnation
= incarnation
;
1186 if (peer
->ksnp_proto
!= conn
->ksnc_proto
||
1187 peer
->ksnp_incarnation
!= incarnation
) {
1188 /* Peer rebooted or I've got the wrong protocol version */
1189 ksocknal_close_peer_conns_locked(peer
, 0, 0);
1191 peer
->ksnp_proto
= NULL
;
1193 warn
= peer
->ksnp_incarnation
!= incarnation
?
1195 "wrong proto version";
1205 warn
= "lost conn race";
1208 warn
= "retry with different protocol version";
1212 /* Refuse to duplicate an existing connection, unless this is a
1213 * loopback connection */
1214 if (conn
->ksnc_ipaddr
!= conn
->ksnc_myipaddr
) {
1215 list_for_each(tmp
, &peer
->ksnp_conns
) {
1216 conn2
= list_entry(tmp
, ksock_conn_t
, ksnc_list
);
1218 if (conn2
->ksnc_ipaddr
!= conn
->ksnc_ipaddr
||
1219 conn2
->ksnc_myipaddr
!= conn
->ksnc_myipaddr
||
1220 conn2
->ksnc_type
!= conn
->ksnc_type
)
1223 /* Reply on a passive connection attempt so the peer
1224 * realises we're connected. */
1234 /* If the connection created by this route didn't bind to the IP
1235 * address the route connected to, the connection/route matching
1236 * code below probably isn't going to work. */
1238 route
->ksnr_ipaddr
!= conn
->ksnc_ipaddr
) {
1239 CERROR("Route %s %pI4h connected to %pI4h\n",
1240 libcfs_id2str(peer
->ksnp_id
),
1241 &route
->ksnr_ipaddr
,
1242 &conn
->ksnc_ipaddr
);
1245 /* Search for a route corresponding to the new connection and
1246 * create an association. This allows incoming connections created
1247 * by routes in my peer to match my own route entries so I don't
1248 * continually create duplicate routes. */
1249 list_for_each (tmp
, &peer
->ksnp_routes
) {
1250 route
= list_entry(tmp
, ksock_route_t
, ksnr_list
);
1252 if (route
->ksnr_ipaddr
!= conn
->ksnc_ipaddr
)
1255 ksocknal_associate_route_conn_locked(route
, conn
);
1259 conn
->ksnc_peer
= peer
; /* conn takes my ref on peer */
1260 peer
->ksnp_last_alive
= cfs_time_current();
1261 peer
->ksnp_send_keepalive
= 0;
1262 peer
->ksnp_error
= 0;
1264 sched
= ksocknal_choose_scheduler_locked(cpt
);
1265 sched
->kss_nconns
++;
1266 conn
->ksnc_scheduler
= sched
;
1268 conn
->ksnc_tx_last_post
= cfs_time_current();
1269 /* Set the deadline for the outgoing HELLO to drain */
1270 conn
->ksnc_tx_bufnob
= cfs_sock_wmem_queued(sock
);
1271 conn
->ksnc_tx_deadline
= cfs_time_shift(*ksocknal_tunables
.ksnd_timeout
);
1272 mb(); /* order with adding to peer's conn list */
1274 list_add (&conn
->ksnc_list
, &peer
->ksnp_conns
);
1275 ksocknal_conn_addref(conn
);
1277 ksocknal_new_packet(conn
, 0);
1279 conn
->ksnc_zc_capable
= ksocknal_lib_zc_capable(conn
);
1281 /* Take packets blocking for this connection. */
1282 list_for_each_entry_safe(tx
, txtmp
, &peer
->ksnp_tx_queue
, tx_list
) {
1283 if (conn
->ksnc_proto
->pro_match_tx(conn
, tx
, tx
->tx_nonblk
) == SOCKNAL_MATCH_NO
)
1286 list_del (&tx
->tx_list
);
1287 ksocknal_queue_tx_locked (tx
, conn
);
1290 write_unlock_bh(global_lock
);
1292 /* We've now got a new connection. Any errors from here on are just
1293 * like "normal" comms errors and we close the connection normally.
1294 * NB (a) we still have to send the reply HELLO for passive
1296 * (b) normal I/O on the conn is blocked until I setup and call the
1300 CDEBUG(D_NET
, "New conn %s p %d.x %pI4h -> %pI4h/%d"
1301 " incarnation:"LPD64
" sched[%d:%d]\n",
1302 libcfs_id2str(peerid
), conn
->ksnc_proto
->pro_version
,
1303 &conn
->ksnc_myipaddr
, &conn
->ksnc_ipaddr
,
1304 conn
->ksnc_port
, incarnation
, cpt
,
1305 (int)(sched
- &sched
->kss_info
->ksi_scheds
[0]));
1308 /* additional routes after interface exchange? */
1309 ksocknal_create_routes(peer
, conn
->ksnc_port
,
1310 hello
->kshm_ips
, hello
->kshm_nips
);
1312 hello
->kshm_nips
= ksocknal_select_ips(peer
, hello
->kshm_ips
,
1314 rc
= ksocknal_send_hello(ni
, conn
, peerid
.nid
, hello
);
1317 LIBCFS_FREE(hello
, offsetof(ksock_hello_msg_t
,
1318 kshm_ips
[LNET_MAX_INTERFACES
]));
1320 /* setup the socket AFTER I've received hello (it disables
1321 * SO_LINGER). I might call back to the acceptor who may want
1322 * to send a protocol version response and then close the
1323 * socket; this ensures the socket only tears down after the
1324 * response has been sent. */
1326 rc
= ksocknal_lib_setup_sock(sock
);
1328 write_lock_bh(global_lock
);
1330 /* NB my callbacks block while I hold ksnd_global_lock */
1331 ksocknal_lib_set_callback(sock
, conn
);
1334 peer
->ksnp_accepting
--;
1336 write_unlock_bh(global_lock
);
1339 write_lock_bh(global_lock
);
1340 if (!conn
->ksnc_closing
) {
1341 /* could be closed by another thread */
1342 ksocknal_close_conn_locked(conn
, rc
);
1344 write_unlock_bh(global_lock
);
1345 } else if (ksocknal_connsock_addref(conn
) == 0) {
1346 /* Allow I/O to proceed. */
1347 ksocknal_read_callback(conn
);
1348 ksocknal_write_callback(conn
);
1349 ksocknal_connsock_decref(conn
);
1352 ksocknal_connsock_decref(conn
);
1353 ksocknal_conn_decref(conn
);
1357 if (!peer
->ksnp_closing
&&
1358 list_empty (&peer
->ksnp_conns
) &&
1359 list_empty (&peer
->ksnp_routes
)) {
1360 list_add(&zombies
, &peer
->ksnp_tx_queue
);
1361 list_del_init(&peer
->ksnp_tx_queue
);
1362 ksocknal_unlink_peer_locked(peer
);
1365 write_unlock_bh(global_lock
);
1369 CERROR("Not creating conn %s type %d: %s\n",
1370 libcfs_id2str(peerid
), conn
->ksnc_type
, warn
);
1372 CDEBUG(D_NET
, "Not creating conn %s type %d: %s\n",
1373 libcfs_id2str(peerid
), conn
->ksnc_type
, warn
);
1378 /* Request retry by replying with CONN_NONE
1379 * ksnc_proto has been set already */
1380 conn
->ksnc_type
= SOCKLND_CONN_NONE
;
1381 hello
->kshm_nips
= 0;
1382 ksocknal_send_hello(ni
, conn
, peerid
.nid
, hello
);
1385 write_lock_bh(global_lock
);
1386 peer
->ksnp_accepting
--;
1387 write_unlock_bh(global_lock
);
1390 ksocknal_txlist_done(ni
, &zombies
, 1);
1391 ksocknal_peer_decref(peer
);
1395 LIBCFS_FREE(hello
, offsetof(ksock_hello_msg_t
,
1396 kshm_ips
[LNET_MAX_INTERFACES
]));
1398 LIBCFS_FREE (conn
, sizeof(*conn
));
1401 libcfs_sock_release(sock
);
1406 ksocknal_close_conn_locked (ksock_conn_t
*conn
, int error
)
1408 /* This just does the immmediate housekeeping, and queues the
1409 * connection for the reaper to terminate.
1410 * Caller holds ksnd_global_lock exclusively in irq context */
1411 ksock_peer_t
*peer
= conn
->ksnc_peer
;
1412 ksock_route_t
*route
;
1413 ksock_conn_t
*conn2
;
1414 struct list_head
*tmp
;
1416 LASSERT (peer
->ksnp_error
== 0);
1417 LASSERT (!conn
->ksnc_closing
);
1418 conn
->ksnc_closing
= 1;
1420 /* ksnd_deathrow_conns takes over peer's ref */
1421 list_del (&conn
->ksnc_list
);
1423 route
= conn
->ksnc_route
;
1424 if (route
!= NULL
) {
1425 /* dissociate conn from route... */
1426 LASSERT (!route
->ksnr_deleted
);
1427 LASSERT ((route
->ksnr_connected
& (1 << conn
->ksnc_type
)) != 0);
1430 list_for_each(tmp
, &peer
->ksnp_conns
) {
1431 conn2
= list_entry(tmp
, ksock_conn_t
, ksnc_list
);
1433 if (conn2
->ksnc_route
== route
&&
1434 conn2
->ksnc_type
== conn
->ksnc_type
)
1440 route
->ksnr_connected
&= ~(1 << conn
->ksnc_type
);
1442 conn
->ksnc_route
= NULL
;
1444 #if 0 /* irrelevant with only eager routes */
1445 /* make route least favourite */
1446 list_del (&route
->ksnr_list
);
1447 list_add_tail (&route
->ksnr_list
, &peer
->ksnp_routes
);
1449 ksocknal_route_decref(route
); /* drop conn's ref on route */
1452 if (list_empty (&peer
->ksnp_conns
)) {
1453 /* No more connections to this peer */
1455 if (!list_empty(&peer
->ksnp_tx_queue
)) {
1458 LASSERT (conn
->ksnc_proto
== &ksocknal_protocol_v3x
);
1460 /* throw them to the last connection...,
1461 * these TXs will be send to /dev/null by scheduler */
1462 list_for_each_entry(tx
, &peer
->ksnp_tx_queue
,
1464 ksocknal_tx_prep(conn
, tx
);
1466 spin_lock_bh(&conn
->ksnc_scheduler
->kss_lock
);
1467 list_splice_init(&peer
->ksnp_tx_queue
,
1468 &conn
->ksnc_tx_queue
);
1469 spin_unlock_bh(&conn
->ksnc_scheduler
->kss_lock
);
1472 peer
->ksnp_proto
= NULL
; /* renegotiate protocol version */
1473 peer
->ksnp_error
= error
; /* stash last conn close reason */
1475 if (list_empty (&peer
->ksnp_routes
)) {
1476 /* I've just closed last conn belonging to a
1477 * peer with no routes to it */
1478 ksocknal_unlink_peer_locked (peer
);
1482 spin_lock_bh(&ksocknal_data
.ksnd_reaper_lock
);
1484 list_add_tail(&conn
->ksnc_list
,
1485 &ksocknal_data
.ksnd_deathrow_conns
);
1486 wake_up(&ksocknal_data
.ksnd_reaper_waitq
);
1488 spin_unlock_bh(&ksocknal_data
.ksnd_reaper_lock
);
1492 ksocknal_peer_failed (ksock_peer_t
*peer
)
1495 cfs_time_t last_alive
= 0;
1497 /* There has been a connection failure or comms error; but I'll only
1498 * tell LNET I think the peer is dead if it's to another kernel and
1499 * there are no connections or connection attempts in existence. */
1501 read_lock(&ksocknal_data
.ksnd_global_lock
);
1503 if ((peer
->ksnp_id
.pid
& LNET_PID_USERFLAG
) == 0 &&
1504 list_empty(&peer
->ksnp_conns
) &&
1505 peer
->ksnp_accepting
== 0 &&
1506 ksocknal_find_connecting_route_locked(peer
) == NULL
) {
1508 last_alive
= peer
->ksnp_last_alive
;
1511 read_unlock(&ksocknal_data
.ksnd_global_lock
);
1514 lnet_notify (peer
->ksnp_ni
, peer
->ksnp_id
.nid
, 0,
1519 ksocknal_finalize_zcreq(ksock_conn_t
*conn
)
1521 ksock_peer_t
*peer
= conn
->ksnc_peer
;
1526 /* NB safe to finalize TXs because closing of socket will
1527 * abort all buffered data */
1528 LASSERT (conn
->ksnc_sock
== NULL
);
1530 spin_lock(&peer
->ksnp_lock
);
1532 list_for_each_entry_safe(tx
, tmp
, &peer
->ksnp_zc_req_list
, tx_zc_list
) {
1533 if (tx
->tx_conn
!= conn
)
1536 LASSERT (tx
->tx_msg
.ksm_zc_cookies
[0] != 0);
1538 tx
->tx_msg
.ksm_zc_cookies
[0] = 0;
1539 tx
->tx_zc_aborted
= 1; /* mark it as not-acked */
1540 list_del(&tx
->tx_zc_list
);
1541 list_add(&tx
->tx_zc_list
, &zlist
);
1544 spin_unlock(&peer
->ksnp_lock
);
1546 while (!list_empty(&zlist
)) {
1547 tx
= list_entry(zlist
.next
, ksock_tx_t
, tx_zc_list
);
1549 list_del(&tx
->tx_zc_list
);
1550 ksocknal_tx_decref(tx
);
1555 ksocknal_terminate_conn (ksock_conn_t
*conn
)
1557 /* This gets called by the reaper (guaranteed thread context) to
1558 * disengage the socket from its callbacks and close it.
1559 * ksnc_refcount will eventually hit zero, and then the reaper will
1561 ksock_peer_t
*peer
= conn
->ksnc_peer
;
1562 ksock_sched_t
*sched
= conn
->ksnc_scheduler
;
1565 LASSERT(conn
->ksnc_closing
);
1567 /* wake up the scheduler to "send" all remaining packets to /dev/null */
1568 spin_lock_bh(&sched
->kss_lock
);
1570 /* a closing conn is always ready to tx */
1571 conn
->ksnc_tx_ready
= 1;
1573 if (!conn
->ksnc_tx_scheduled
&&
1574 !list_empty(&conn
->ksnc_tx_queue
)){
1575 list_add_tail (&conn
->ksnc_tx_list
,
1576 &sched
->kss_tx_conns
);
1577 conn
->ksnc_tx_scheduled
= 1;
1578 /* extra ref for scheduler */
1579 ksocknal_conn_addref(conn
);
1581 wake_up (&sched
->kss_waitq
);
1584 spin_unlock_bh(&sched
->kss_lock
);
1586 /* serialise with callbacks */
1587 write_lock_bh(&ksocknal_data
.ksnd_global_lock
);
1589 ksocknal_lib_reset_callback(conn
->ksnc_sock
, conn
);
1591 /* OK, so this conn may not be completely disengaged from its
1592 * scheduler yet, but it _has_ committed to terminate... */
1593 conn
->ksnc_scheduler
->kss_nconns
--;
1595 if (peer
->ksnp_error
!= 0) {
1596 /* peer's last conn closed in error */
1597 LASSERT (list_empty (&peer
->ksnp_conns
));
1599 peer
->ksnp_error
= 0; /* avoid multiple notifications */
1602 write_unlock_bh(&ksocknal_data
.ksnd_global_lock
);
1605 ksocknal_peer_failed(peer
);
1607 /* The socket is closed on the final put; either here, or in
1608 * ksocknal_{send,recv}msg(). Since we set up the linger2 option
1609 * when the connection was established, this will close the socket
1610 * immediately, aborting anything buffered in it. Any hung
1611 * zero-copy transmits will therefore complete in finite time. */
1612 ksocknal_connsock_decref(conn
);
1616 ksocknal_queue_zombie_conn (ksock_conn_t
*conn
)
1618 /* Queue the conn for the reaper to destroy */
1620 LASSERT(atomic_read(&conn
->ksnc_conn_refcount
) == 0);
1621 spin_lock_bh(&ksocknal_data
.ksnd_reaper_lock
);
1623 list_add_tail(&conn
->ksnc_list
, &ksocknal_data
.ksnd_zombie_conns
);
1624 wake_up(&ksocknal_data
.ksnd_reaper_waitq
);
1626 spin_unlock_bh(&ksocknal_data
.ksnd_reaper_lock
);
1630 ksocknal_destroy_conn (ksock_conn_t
*conn
)
1632 cfs_time_t last_rcv
;
1634 /* Final coup-de-grace of the reaper */
1635 CDEBUG (D_NET
, "connection %p\n", conn
);
1637 LASSERT (atomic_read (&conn
->ksnc_conn_refcount
) == 0);
1638 LASSERT (atomic_read (&conn
->ksnc_sock_refcount
) == 0);
1639 LASSERT (conn
->ksnc_sock
== NULL
);
1640 LASSERT (conn
->ksnc_route
== NULL
);
1641 LASSERT (!conn
->ksnc_tx_scheduled
);
1642 LASSERT (!conn
->ksnc_rx_scheduled
);
1643 LASSERT (list_empty(&conn
->ksnc_tx_queue
));
1645 /* complete current receive if any */
1646 switch (conn
->ksnc_rx_state
) {
1647 case SOCKNAL_RX_LNET_PAYLOAD
:
1648 last_rcv
= conn
->ksnc_rx_deadline
-
1649 cfs_time_seconds(*ksocknal_tunables
.ksnd_timeout
);
1650 CERROR("Completing partial receive from %s[%d]"
1651 ", ip %pI4h:%d, with error, wanted: %d, left: %d, "
1652 "last alive is %ld secs ago\n",
1653 libcfs_id2str(conn
->ksnc_peer
->ksnp_id
), conn
->ksnc_type
,
1654 &conn
->ksnc_ipaddr
, conn
->ksnc_port
,
1655 conn
->ksnc_rx_nob_wanted
, conn
->ksnc_rx_nob_left
,
1656 cfs_duration_sec(cfs_time_sub(cfs_time_current(),
1658 lnet_finalize (conn
->ksnc_peer
->ksnp_ni
,
1659 conn
->ksnc_cookie
, -EIO
);
1661 case SOCKNAL_RX_LNET_HEADER
:
1662 if (conn
->ksnc_rx_started
)
1663 CERROR("Incomplete receive of lnet header from %s"
1664 ", ip %pI4h:%d, with error, protocol: %d.x.\n",
1665 libcfs_id2str(conn
->ksnc_peer
->ksnp_id
),
1666 &conn
->ksnc_ipaddr
, conn
->ksnc_port
,
1667 conn
->ksnc_proto
->pro_version
);
1669 case SOCKNAL_RX_KSM_HEADER
:
1670 if (conn
->ksnc_rx_started
)
1671 CERROR("Incomplete receive of ksock message from %s"
1672 ", ip %pI4h:%d, with error, protocol: %d.x.\n",
1673 libcfs_id2str(conn
->ksnc_peer
->ksnp_id
),
1674 &conn
->ksnc_ipaddr
, conn
->ksnc_port
,
1675 conn
->ksnc_proto
->pro_version
);
1677 case SOCKNAL_RX_SLOP
:
1678 if (conn
->ksnc_rx_started
)
1679 CERROR("Incomplete receive of slops from %s"
1680 ", ip %pI4h:%d, with error\n",
1681 libcfs_id2str(conn
->ksnc_peer
->ksnp_id
),
1682 &conn
->ksnc_ipaddr
, conn
->ksnc_port
);
1689 ksocknal_peer_decref(conn
->ksnc_peer
);
1691 LIBCFS_FREE (conn
, sizeof (*conn
));
1695 ksocknal_close_peer_conns_locked (ksock_peer_t
*peer
, __u32 ipaddr
, int why
)
1698 struct list_head
*ctmp
;
1699 struct list_head
*cnxt
;
1702 list_for_each_safe (ctmp
, cnxt
, &peer
->ksnp_conns
) {
1703 conn
= list_entry (ctmp
, ksock_conn_t
, ksnc_list
);
1706 conn
->ksnc_ipaddr
== ipaddr
) {
1708 ksocknal_close_conn_locked (conn
, why
);
1716 ksocknal_close_conn_and_siblings (ksock_conn_t
*conn
, int why
)
1718 ksock_peer_t
*peer
= conn
->ksnc_peer
;
1719 __u32 ipaddr
= conn
->ksnc_ipaddr
;
1722 write_lock_bh(&ksocknal_data
.ksnd_global_lock
);
1724 count
= ksocknal_close_peer_conns_locked (peer
, ipaddr
, why
);
1726 write_unlock_bh(&ksocknal_data
.ksnd_global_lock
);
1732 ksocknal_close_matching_conns (lnet_process_id_t id
, __u32 ipaddr
)
1735 struct list_head
*ptmp
;
1736 struct list_head
*pnxt
;
1742 write_lock_bh(&ksocknal_data
.ksnd_global_lock
);
1744 if (id
.nid
!= LNET_NID_ANY
)
1745 lo
= hi
= (int)(ksocknal_nid2peerlist(id
.nid
) - ksocknal_data
.ksnd_peers
);
1748 hi
= ksocknal_data
.ksnd_peer_hash_size
- 1;
1751 for (i
= lo
; i
<= hi
; i
++) {
1752 list_for_each_safe (ptmp
, pnxt
,
1753 &ksocknal_data
.ksnd_peers
[i
]) {
1755 peer
= list_entry (ptmp
, ksock_peer_t
, ksnp_list
);
1757 if (!((id
.nid
== LNET_NID_ANY
|| id
.nid
== peer
->ksnp_id
.nid
) &&
1758 (id
.pid
== LNET_PID_ANY
|| id
.pid
== peer
->ksnp_id
.pid
)))
1761 count
+= ksocknal_close_peer_conns_locked (peer
, ipaddr
, 0);
1765 write_unlock_bh(&ksocknal_data
.ksnd_global_lock
);
1767 /* wildcards always succeed */
1768 if (id
.nid
== LNET_NID_ANY
|| id
.pid
== LNET_PID_ANY
|| ipaddr
== 0)
1778 ksocknal_notify (lnet_ni_t
*ni
, lnet_nid_t gw_nid
, int alive
)
1780 /* The router is telling me she's been notified of a change in
1781 * gateway state.... */
1782 lnet_process_id_t id
= {0};
1785 id
.pid
= LNET_PID_ANY
;
1787 CDEBUG (D_NET
, "gw %s %s\n", libcfs_nid2str(gw_nid
),
1788 alive
? "up" : "down");
1791 /* If the gateway crashed, close all open connections... */
1792 ksocknal_close_matching_conns (id
, 0);
1796 /* ...otherwise do nothing. We can only establish new connections
1797 * if we have autroutes, and these connect on demand. */
1801 ksocknal_query (lnet_ni_t
*ni
, lnet_nid_t nid
, cfs_time_t
*when
)
1804 cfs_time_t last_alive
= 0;
1805 cfs_time_t now
= cfs_time_current();
1806 ksock_peer_t
*peer
= NULL
;
1807 rwlock_t
*glock
= &ksocknal_data
.ksnd_global_lock
;
1808 lnet_process_id_t id
= {.nid
= nid
, .pid
= LUSTRE_SRV_LNET_PID
};
1812 peer
= ksocknal_find_peer_locked(ni
, id
);
1814 struct list_head
*tmp
;
1818 list_for_each (tmp
, &peer
->ksnp_conns
) {
1819 conn
= list_entry(tmp
, ksock_conn_t
, ksnc_list
);
1820 bufnob
= cfs_sock_wmem_queued(conn
->ksnc_sock
);
1822 if (bufnob
< conn
->ksnc_tx_bufnob
) {
1823 /* something got ACKed */
1824 conn
->ksnc_tx_deadline
=
1825 cfs_time_shift(*ksocknal_tunables
.ksnd_timeout
);
1826 peer
->ksnp_last_alive
= now
;
1827 conn
->ksnc_tx_bufnob
= bufnob
;
1831 last_alive
= peer
->ksnp_last_alive
;
1832 if (ksocknal_find_connectable_route_locked(peer
) == NULL
)
1838 if (last_alive
!= 0)
1841 CDEBUG(D_NET
, "Peer %s %p, alive %ld secs ago, connect %d\n",
1842 libcfs_nid2str(nid
), peer
,
1843 last_alive
? cfs_duration_sec(now
- last_alive
) : -1,
1849 ksocknal_add_peer(ni
, id
, LNET_NIDADDR(nid
), lnet_acceptor_port());
1851 write_lock_bh(glock
);
1853 peer
= ksocknal_find_peer_locked(ni
, id
);
1855 ksocknal_launch_all_connections_locked(peer
);
1857 write_unlock_bh(glock
);
1862 ksocknal_push_peer (ksock_peer_t
*peer
)
1866 struct list_head
*tmp
;
1869 for (index
= 0; ; index
++) {
1870 read_lock(&ksocknal_data
.ksnd_global_lock
);
1875 list_for_each (tmp
, &peer
->ksnp_conns
) {
1877 conn
= list_entry (tmp
, ksock_conn_t
,
1879 ksocknal_conn_addref(conn
);
1884 read_unlock(&ksocknal_data
.ksnd_global_lock
);
1889 ksocknal_lib_push_conn (conn
);
1890 ksocknal_conn_decref(conn
);
1895 ksocknal_push (lnet_ni_t
*ni
, lnet_process_id_t id
)
1898 struct list_head
*tmp
;
1904 for (i
= 0; i
< ksocknal_data
.ksnd_peer_hash_size
; i
++) {
1905 for (j
= 0; ; j
++) {
1906 read_lock(&ksocknal_data
.ksnd_global_lock
);
1911 list_for_each (tmp
, &ksocknal_data
.ksnd_peers
[i
]) {
1912 peer
= list_entry(tmp
, ksock_peer_t
,
1915 if (!((id
.nid
== LNET_NID_ANY
||
1916 id
.nid
== peer
->ksnp_id
.nid
) &&
1917 (id
.pid
== LNET_PID_ANY
||
1918 id
.pid
== peer
->ksnp_id
.pid
))) {
1924 ksocknal_peer_addref(peer
);
1929 read_unlock(&ksocknal_data
.ksnd_global_lock
);
1933 ksocknal_push_peer (peer
);
1934 ksocknal_peer_decref(peer
);
1944 ksocknal_add_interface(lnet_ni_t
*ni
, __u32 ipaddress
, __u32 netmask
)
1946 ksock_net_t
*net
= ni
->ni_data
;
1947 ksock_interface_t
*iface
;
1951 struct list_head
*ptmp
;
1953 struct list_head
*rtmp
;
1954 ksock_route_t
*route
;
1956 if (ipaddress
== 0 ||
1960 write_lock_bh(&ksocknal_data
.ksnd_global_lock
);
1962 iface
= ksocknal_ip2iface(ni
, ipaddress
);
1963 if (iface
!= NULL
) {
1964 /* silently ignore dups */
1966 } else if (net
->ksnn_ninterfaces
== LNET_MAX_INTERFACES
) {
1969 iface
= &net
->ksnn_interfaces
[net
->ksnn_ninterfaces
++];
1971 iface
->ksni_ipaddr
= ipaddress
;
1972 iface
->ksni_netmask
= netmask
;
1973 iface
->ksni_nroutes
= 0;
1974 iface
->ksni_npeers
= 0;
1976 for (i
= 0; i
< ksocknal_data
.ksnd_peer_hash_size
; i
++) {
1977 list_for_each(ptmp
, &ksocknal_data
.ksnd_peers
[i
]) {
1978 peer
= list_entry(ptmp
, ksock_peer_t
,
1981 for (j
= 0; j
< peer
->ksnp_n_passive_ips
; j
++)
1982 if (peer
->ksnp_passive_ips
[j
] == ipaddress
)
1983 iface
->ksni_npeers
++;
1985 list_for_each(rtmp
, &peer
->ksnp_routes
) {
1986 route
= list_entry(rtmp
,
1990 if (route
->ksnr_myipaddr
== ipaddress
)
1991 iface
->ksni_nroutes
++;
1997 /* NB only new connections will pay attention to the new interface! */
2000 write_unlock_bh(&ksocknal_data
.ksnd_global_lock
);
2006 ksocknal_peer_del_interface_locked(ksock_peer_t
*peer
, __u32 ipaddr
)
2008 struct list_head
*tmp
;
2009 struct list_head
*nxt
;
2010 ksock_route_t
*route
;
2015 for (i
= 0; i
< peer
->ksnp_n_passive_ips
; i
++)
2016 if (peer
->ksnp_passive_ips
[i
] == ipaddr
) {
2017 for (j
= i
+1; j
< peer
->ksnp_n_passive_ips
; j
++)
2018 peer
->ksnp_passive_ips
[j
-1] =
2019 peer
->ksnp_passive_ips
[j
];
2020 peer
->ksnp_n_passive_ips
--;
2024 list_for_each_safe(tmp
, nxt
, &peer
->ksnp_routes
) {
2025 route
= list_entry (tmp
, ksock_route_t
, ksnr_list
);
2027 if (route
->ksnr_myipaddr
!= ipaddr
)
2030 if (route
->ksnr_share_count
!= 0) {
2031 /* Manually created; keep, but unbind */
2032 route
->ksnr_myipaddr
= 0;
2034 ksocknal_del_route_locked(route
);
2038 list_for_each_safe(tmp
, nxt
, &peer
->ksnp_conns
) {
2039 conn
= list_entry(tmp
, ksock_conn_t
, ksnc_list
);
2041 if (conn
->ksnc_myipaddr
== ipaddr
)
2042 ksocknal_close_conn_locked (conn
, 0);
2047 ksocknal_del_interface(lnet_ni_t
*ni
, __u32 ipaddress
)
2049 ksock_net_t
*net
= ni
->ni_data
;
2051 struct list_head
*tmp
;
2052 struct list_head
*nxt
;
2058 write_lock_bh(&ksocknal_data
.ksnd_global_lock
);
2060 for (i
= 0; i
< net
->ksnn_ninterfaces
; i
++) {
2061 this_ip
= net
->ksnn_interfaces
[i
].ksni_ipaddr
;
2063 if (!(ipaddress
== 0 ||
2064 ipaddress
== this_ip
))
2069 for (j
= i
+1; j
< net
->ksnn_ninterfaces
; j
++)
2070 net
->ksnn_interfaces
[j
-1] =
2071 net
->ksnn_interfaces
[j
];
2073 net
->ksnn_ninterfaces
--;
2075 for (j
= 0; j
< ksocknal_data
.ksnd_peer_hash_size
; j
++) {
2076 list_for_each_safe(tmp
, nxt
,
2077 &ksocknal_data
.ksnd_peers
[j
]) {
2078 peer
= list_entry(tmp
, ksock_peer_t
,
2081 if (peer
->ksnp_ni
!= ni
)
2084 ksocknal_peer_del_interface_locked(peer
, this_ip
);
2089 write_unlock_bh(&ksocknal_data
.ksnd_global_lock
);
2095 ksocknal_ctl(lnet_ni_t
*ni
, unsigned int cmd
, void *arg
)
2097 lnet_process_id_t id
= {0};
2098 struct libcfs_ioctl_data
*data
= arg
;
2102 case IOC_LIBCFS_GET_INTERFACE
: {
2103 ksock_net_t
*net
= ni
->ni_data
;
2104 ksock_interface_t
*iface
;
2106 read_lock(&ksocknal_data
.ksnd_global_lock
);
2108 if (data
->ioc_count
>= (__u32
)net
->ksnn_ninterfaces
) {
2112 iface
= &net
->ksnn_interfaces
[data
->ioc_count
];
2114 data
->ioc_u32
[0] = iface
->ksni_ipaddr
;
2115 data
->ioc_u32
[1] = iface
->ksni_netmask
;
2116 data
->ioc_u32
[2] = iface
->ksni_npeers
;
2117 data
->ioc_u32
[3] = iface
->ksni_nroutes
;
2120 read_unlock(&ksocknal_data
.ksnd_global_lock
);
2124 case IOC_LIBCFS_ADD_INTERFACE
:
2125 return ksocknal_add_interface(ni
,
2126 data
->ioc_u32
[0], /* IP address */
2127 data
->ioc_u32
[1]); /* net mask */
2129 case IOC_LIBCFS_DEL_INTERFACE
:
2130 return ksocknal_del_interface(ni
,
2131 data
->ioc_u32
[0]); /* IP address */
2133 case IOC_LIBCFS_GET_PEER
: {
2138 int share_count
= 0;
2140 rc
= ksocknal_get_peer_info(ni
, data
->ioc_count
,
2141 &id
, &myip
, &ip
, &port
,
2142 &conn_count
, &share_count
);
2146 data
->ioc_nid
= id
.nid
;
2147 data
->ioc_count
= share_count
;
2148 data
->ioc_u32
[0] = ip
;
2149 data
->ioc_u32
[1] = port
;
2150 data
->ioc_u32
[2] = myip
;
2151 data
->ioc_u32
[3] = conn_count
;
2152 data
->ioc_u32
[4] = id
.pid
;
2156 case IOC_LIBCFS_ADD_PEER
:
2157 id
.nid
= data
->ioc_nid
;
2158 id
.pid
= LUSTRE_SRV_LNET_PID
;
2159 return ksocknal_add_peer (ni
, id
,
2160 data
->ioc_u32
[0], /* IP */
2161 data
->ioc_u32
[1]); /* port */
2163 case IOC_LIBCFS_DEL_PEER
:
2164 id
.nid
= data
->ioc_nid
;
2165 id
.pid
= LNET_PID_ANY
;
2166 return ksocknal_del_peer (ni
, id
,
2167 data
->ioc_u32
[0]); /* IP */
2169 case IOC_LIBCFS_GET_CONN
: {
2173 ksock_conn_t
*conn
= ksocknal_get_conn_by_idx (ni
, data
->ioc_count
);
2178 ksocknal_lib_get_conn_tunables(conn
, &txmem
, &rxmem
, &nagle
);
2180 data
->ioc_count
= txmem
;
2181 data
->ioc_nid
= conn
->ksnc_peer
->ksnp_id
.nid
;
2182 data
->ioc_flags
= nagle
;
2183 data
->ioc_u32
[0] = conn
->ksnc_ipaddr
;
2184 data
->ioc_u32
[1] = conn
->ksnc_port
;
2185 data
->ioc_u32
[2] = conn
->ksnc_myipaddr
;
2186 data
->ioc_u32
[3] = conn
->ksnc_type
;
2187 data
->ioc_u32
[4] = conn
->ksnc_scheduler
->kss_info
->ksi_cpt
;
2188 data
->ioc_u32
[5] = rxmem
;
2189 data
->ioc_u32
[6] = conn
->ksnc_peer
->ksnp_id
.pid
;
2190 ksocknal_conn_decref(conn
);
2194 case IOC_LIBCFS_CLOSE_CONNECTION
:
2195 id
.nid
= data
->ioc_nid
;
2196 id
.pid
= LNET_PID_ANY
;
2197 return ksocknal_close_matching_conns (id
,
2200 case IOC_LIBCFS_REGISTER_MYNID
:
2201 /* Ignore if this is a noop */
2202 if (data
->ioc_nid
== ni
->ni_nid
)
2205 CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
2206 libcfs_nid2str(data
->ioc_nid
),
2207 libcfs_nid2str(ni
->ni_nid
));
2210 case IOC_LIBCFS_PUSH_CONNECTION
:
2211 id
.nid
= data
->ioc_nid
;
2212 id
.pid
= LNET_PID_ANY
;
2213 return ksocknal_push(ni
, id
);
2222 ksocknal_free_buffers (void)
2224 LASSERT (atomic_read(&ksocknal_data
.ksnd_nactive_txs
) == 0);
2226 if (ksocknal_data
.ksnd_sched_info
!= NULL
) {
2227 struct ksock_sched_info
*info
;
2230 cfs_percpt_for_each(info
, i
, ksocknal_data
.ksnd_sched_info
) {
2231 if (info
->ksi_scheds
!= NULL
) {
2232 LIBCFS_FREE(info
->ksi_scheds
,
2233 info
->ksi_nthreads_max
*
2234 sizeof(info
->ksi_scheds
[0]));
2237 cfs_percpt_free(ksocknal_data
.ksnd_sched_info
);
2240 LIBCFS_FREE (ksocknal_data
.ksnd_peers
,
2241 sizeof (struct list_head
) *
2242 ksocknal_data
.ksnd_peer_hash_size
);
2244 spin_lock(&ksocknal_data
.ksnd_tx_lock
);
2246 if (!list_empty(&ksocknal_data
.ksnd_idle_noop_txs
)) {
2247 struct list_head zlist
;
2250 list_add(&zlist
, &ksocknal_data
.ksnd_idle_noop_txs
);
2251 list_del_init(&ksocknal_data
.ksnd_idle_noop_txs
);
2252 spin_unlock(&ksocknal_data
.ksnd_tx_lock
);
2254 while (!list_empty(&zlist
)) {
2255 tx
= list_entry(zlist
.next
, ksock_tx_t
, tx_list
);
2256 list_del(&tx
->tx_list
);
2257 LIBCFS_FREE(tx
, tx
->tx_desc_size
);
2260 spin_unlock(&ksocknal_data
.ksnd_tx_lock
);
2265 ksocknal_base_shutdown(void)
2267 struct ksock_sched_info
*info
;
2268 ksock_sched_t
*sched
;
2272 CDEBUG(D_MALLOC
, "before NAL cleanup: kmem %d\n",
2273 atomic_read (&libcfs_kmemory
));
2274 LASSERT (ksocknal_data
.ksnd_nnets
== 0);
2276 switch (ksocknal_data
.ksnd_init
) {
2280 case SOCKNAL_INIT_ALL
:
2281 case SOCKNAL_INIT_DATA
:
2282 LASSERT (ksocknal_data
.ksnd_peers
!= NULL
);
2283 for (i
= 0; i
< ksocknal_data
.ksnd_peer_hash_size
; i
++) {
2284 LASSERT (list_empty (&ksocknal_data
.ksnd_peers
[i
]));
2287 LASSERT(list_empty(&ksocknal_data
.ksnd_nets
));
2288 LASSERT (list_empty (&ksocknal_data
.ksnd_enomem_conns
));
2289 LASSERT (list_empty (&ksocknal_data
.ksnd_zombie_conns
));
2290 LASSERT (list_empty (&ksocknal_data
.ksnd_connd_connreqs
));
2291 LASSERT (list_empty (&ksocknal_data
.ksnd_connd_routes
));
2293 if (ksocknal_data
.ksnd_sched_info
!= NULL
) {
2294 cfs_percpt_for_each(info
, i
,
2295 ksocknal_data
.ksnd_sched_info
) {
2296 if (info
->ksi_scheds
== NULL
)
2299 for (j
= 0; j
< info
->ksi_nthreads_max
; j
++) {
2301 sched
= &info
->ksi_scheds
[j
];
2302 LASSERT(list_empty(&sched
->\
2304 LASSERT(list_empty(&sched
->\
2306 LASSERT(list_empty(&sched
-> \
2307 kss_zombie_noop_txs
));
2308 LASSERT(sched
->kss_nconns
== 0);
2313 /* flag threads to terminate; wake and wait for them to die */
2314 ksocknal_data
.ksnd_shuttingdown
= 1;
2315 wake_up_all(&ksocknal_data
.ksnd_connd_waitq
);
2316 wake_up_all(&ksocknal_data
.ksnd_reaper_waitq
);
2318 if (ksocknal_data
.ksnd_sched_info
!= NULL
) {
2319 cfs_percpt_for_each(info
, i
,
2320 ksocknal_data
.ksnd_sched_info
) {
2321 if (info
->ksi_scheds
== NULL
)
2324 for (j
= 0; j
< info
->ksi_nthreads_max
; j
++) {
2325 sched
= &info
->ksi_scheds
[j
];
2326 wake_up_all(&sched
->kss_waitq
);
2332 read_lock(&ksocknal_data
.ksnd_global_lock
);
2333 while (ksocknal_data
.ksnd_nthreads
!= 0) {
2335 CDEBUG(((i
& (-i
)) == i
) ? D_WARNING
: D_NET
, /* power of 2? */
2336 "waiting for %d threads to terminate\n",
2337 ksocknal_data
.ksnd_nthreads
);
2338 read_unlock(&ksocknal_data
.ksnd_global_lock
);
2339 cfs_pause(cfs_time_seconds(1));
2340 read_lock(&ksocknal_data
.ksnd_global_lock
);
2342 read_unlock(&ksocknal_data
.ksnd_global_lock
);
2344 ksocknal_free_buffers();
2346 ksocknal_data
.ksnd_init
= SOCKNAL_INIT_NOTHING
;
2350 CDEBUG(D_MALLOC
, "after NAL cleanup: kmem %d\n",
2351 atomic_read (&libcfs_kmemory
));
2353 module_put(THIS_MODULE
);
2357 ksocknal_new_incarnation (void)
2361 /* The incarnation number is the time this module loaded and it
2362 * identifies this particular instance of the socknal. Hopefully
2363 * we won't be able to reboot more frequently than 1MHz for the
2364 * foreseeable future :) */
2366 do_gettimeofday(&tv
);
2368 return (((__u64
)tv
.tv_sec
) * 1000000) + tv
.tv_usec
;
2372 ksocknal_base_startup(void)
2374 struct ksock_sched_info
*info
;
2378 LASSERT (ksocknal_data
.ksnd_init
== SOCKNAL_INIT_NOTHING
);
2379 LASSERT (ksocknal_data
.ksnd_nnets
== 0);
2381 memset (&ksocknal_data
, 0, sizeof (ksocknal_data
)); /* zero pointers */
2383 ksocknal_data
.ksnd_peer_hash_size
= SOCKNAL_PEER_HASH_SIZE
;
2384 LIBCFS_ALLOC (ksocknal_data
.ksnd_peers
,
2385 sizeof (struct list_head
) *
2386 ksocknal_data
.ksnd_peer_hash_size
);
2387 if (ksocknal_data
.ksnd_peers
== NULL
)
2390 for (i
= 0; i
< ksocknal_data
.ksnd_peer_hash_size
; i
++)
2391 INIT_LIST_HEAD(&ksocknal_data
.ksnd_peers
[i
]);
2393 rwlock_init(&ksocknal_data
.ksnd_global_lock
);
2394 INIT_LIST_HEAD(&ksocknal_data
.ksnd_nets
);
2396 spin_lock_init(&ksocknal_data
.ksnd_reaper_lock
);
2397 INIT_LIST_HEAD (&ksocknal_data
.ksnd_enomem_conns
);
2398 INIT_LIST_HEAD (&ksocknal_data
.ksnd_zombie_conns
);
2399 INIT_LIST_HEAD (&ksocknal_data
.ksnd_deathrow_conns
);
2400 init_waitqueue_head(&ksocknal_data
.ksnd_reaper_waitq
);
2402 spin_lock_init(&ksocknal_data
.ksnd_connd_lock
);
2403 INIT_LIST_HEAD (&ksocknal_data
.ksnd_connd_connreqs
);
2404 INIT_LIST_HEAD (&ksocknal_data
.ksnd_connd_routes
);
2405 init_waitqueue_head(&ksocknal_data
.ksnd_connd_waitq
);
2407 spin_lock_init(&ksocknal_data
.ksnd_tx_lock
);
2408 INIT_LIST_HEAD (&ksocknal_data
.ksnd_idle_noop_txs
);
2410 /* NB memset above zeros whole of ksocknal_data */
2412 /* flag lists/ptrs/locks initialised */
2413 ksocknal_data
.ksnd_init
= SOCKNAL_INIT_DATA
;
2414 try_module_get(THIS_MODULE
);
2416 ksocknal_data
.ksnd_sched_info
= cfs_percpt_alloc(lnet_cpt_table(),
2418 if (ksocknal_data
.ksnd_sched_info
== NULL
)
2421 cfs_percpt_for_each(info
, i
, ksocknal_data
.ksnd_sched_info
) {
2422 ksock_sched_t
*sched
;
2425 nthrs
= cfs_cpt_weight(lnet_cpt_table(), i
);
2426 if (*ksocknal_tunables
.ksnd_nscheds
> 0) {
2427 nthrs
= min(nthrs
, *ksocknal_tunables
.ksnd_nscheds
);
2429 /* max to half of CPUs, assume another half should be
2430 * reserved for upper layer modules */
2431 nthrs
= min(max(SOCKNAL_NSCHEDS
, nthrs
>> 1), nthrs
);
2434 info
->ksi_nthreads_max
= nthrs
;
2437 LIBCFS_CPT_ALLOC(info
->ksi_scheds
, lnet_cpt_table(), i
,
2438 info
->ksi_nthreads_max
* sizeof(*sched
));
2439 if (info
->ksi_scheds
== NULL
)
2442 for (; nthrs
> 0; nthrs
--) {
2443 sched
= &info
->ksi_scheds
[nthrs
- 1];
2445 sched
->kss_info
= info
;
2446 spin_lock_init(&sched
->kss_lock
);
2447 INIT_LIST_HEAD(&sched
->kss_rx_conns
);
2448 INIT_LIST_HEAD(&sched
->kss_tx_conns
);
2449 INIT_LIST_HEAD(&sched
->kss_zombie_noop_txs
);
2450 init_waitqueue_head(&sched
->kss_waitq
);
2454 ksocknal_data
.ksnd_connd_starting
= 0;
2455 ksocknal_data
.ksnd_connd_failed_stamp
= 0;
2456 ksocknal_data
.ksnd_connd_starting_stamp
= cfs_time_current_sec();
2457 /* must have at least 2 connds to remain responsive to accepts while
2459 if (*ksocknal_tunables
.ksnd_nconnds
< SOCKNAL_CONND_RESV
+ 1)
2460 *ksocknal_tunables
.ksnd_nconnds
= SOCKNAL_CONND_RESV
+ 1;
2462 if (*ksocknal_tunables
.ksnd_nconnds_max
<
2463 *ksocknal_tunables
.ksnd_nconnds
) {
2464 ksocknal_tunables
.ksnd_nconnds_max
=
2465 ksocknal_tunables
.ksnd_nconnds
;
2468 for (i
= 0; i
< *ksocknal_tunables
.ksnd_nconnds
; i
++) {
2470 spin_lock_bh(&ksocknal_data
.ksnd_connd_lock
);
2471 ksocknal_data
.ksnd_connd_starting
++;
2472 spin_unlock_bh(&ksocknal_data
.ksnd_connd_lock
);
2475 snprintf(name
, sizeof(name
), "socknal_cd%02d", i
);
2476 rc
= ksocknal_thread_start(ksocknal_connd
,
2477 (void *)((ulong_ptr_t
)i
), name
);
2479 spin_lock_bh(&ksocknal_data
.ksnd_connd_lock
);
2480 ksocknal_data
.ksnd_connd_starting
--;
2481 spin_unlock_bh(&ksocknal_data
.ksnd_connd_lock
);
2482 CERROR("Can't spawn socknal connd: %d\n", rc
);
2487 rc
= ksocknal_thread_start(ksocknal_reaper
, NULL
, "socknal_reaper");
2489 CERROR ("Can't spawn socknal reaper: %d\n", rc
);
2493 /* flag everything initialised */
2494 ksocknal_data
.ksnd_init
= SOCKNAL_INIT_ALL
;
2499 ksocknal_base_shutdown();
2504 ksocknal_debug_peerhash (lnet_ni_t
*ni
)
2506 ksock_peer_t
*peer
= NULL
;
2507 struct list_head
*tmp
;
2510 read_lock(&ksocknal_data
.ksnd_global_lock
);
2512 for (i
= 0; i
< ksocknal_data
.ksnd_peer_hash_size
; i
++) {
2513 list_for_each (tmp
, &ksocknal_data
.ksnd_peers
[i
]) {
2514 peer
= list_entry (tmp
, ksock_peer_t
, ksnp_list
);
2516 if (peer
->ksnp_ni
== ni
) break;
2523 ksock_route_t
*route
;
2526 CWARN ("Active peer on shutdown: %s, ref %d, scnt %d, "
2527 "closing %d, accepting %d, err %d, zcookie "LPU64
", "
2528 "txq %d, zc_req %d\n", libcfs_id2str(peer
->ksnp_id
),
2529 atomic_read(&peer
->ksnp_refcount
),
2530 peer
->ksnp_sharecount
, peer
->ksnp_closing
,
2531 peer
->ksnp_accepting
, peer
->ksnp_error
,
2532 peer
->ksnp_zc_next_cookie
,
2533 !list_empty(&peer
->ksnp_tx_queue
),
2534 !list_empty(&peer
->ksnp_zc_req_list
));
2536 list_for_each (tmp
, &peer
->ksnp_routes
) {
2537 route
= list_entry(tmp
, ksock_route_t
, ksnr_list
);
2538 CWARN ("Route: ref %d, schd %d, conn %d, cnted %d, "
2539 "del %d\n", atomic_read(&route
->ksnr_refcount
),
2540 route
->ksnr_scheduled
, route
->ksnr_connecting
,
2541 route
->ksnr_connected
, route
->ksnr_deleted
);
2544 list_for_each (tmp
, &peer
->ksnp_conns
) {
2545 conn
= list_entry(tmp
, ksock_conn_t
, ksnc_list
);
2546 CWARN ("Conn: ref %d, sref %d, t %d, c %d\n",
2547 atomic_read(&conn
->ksnc_conn_refcount
),
2548 atomic_read(&conn
->ksnc_sock_refcount
),
2549 conn
->ksnc_type
, conn
->ksnc_closing
);
2553 read_unlock(&ksocknal_data
.ksnd_global_lock
);
2558 ksocknal_shutdown (lnet_ni_t
*ni
)
2560 ksock_net_t
*net
= ni
->ni_data
;
2562 lnet_process_id_t anyid
= {0};
2564 anyid
.nid
= LNET_NID_ANY
;
2565 anyid
.pid
= LNET_PID_ANY
;
2567 LASSERT(ksocknal_data
.ksnd_init
== SOCKNAL_INIT_ALL
);
2568 LASSERT(ksocknal_data
.ksnd_nnets
> 0);
2570 spin_lock_bh(&net
->ksnn_lock
);
2571 net
->ksnn_shutdown
= 1; /* prevent new peers */
2572 spin_unlock_bh(&net
->ksnn_lock
);
2574 /* Delete all peers */
2575 ksocknal_del_peer(ni
, anyid
, 0);
2577 /* Wait for all peer state to clean up */
2579 spin_lock_bh(&net
->ksnn_lock
);
2580 while (net
->ksnn_npeers
!= 0) {
2581 spin_unlock_bh(&net
->ksnn_lock
);
2584 CDEBUG(((i
& (-i
)) == i
) ? D_WARNING
: D_NET
, /* power of 2? */
2585 "waiting for %d peers to disconnect\n",
2587 cfs_pause(cfs_time_seconds(1));
2589 ksocknal_debug_peerhash(ni
);
2591 spin_lock_bh(&net
->ksnn_lock
);
2593 spin_unlock_bh(&net
->ksnn_lock
);
2595 for (i
= 0; i
< net
->ksnn_ninterfaces
; i
++) {
2596 LASSERT (net
->ksnn_interfaces
[i
].ksni_npeers
== 0);
2597 LASSERT (net
->ksnn_interfaces
[i
].ksni_nroutes
== 0);
2600 list_del(&net
->ksnn_list
);
2601 LIBCFS_FREE(net
, sizeof(*net
));
2603 ksocknal_data
.ksnd_nnets
--;
2604 if (ksocknal_data
.ksnd_nnets
== 0)
2605 ksocknal_base_shutdown();
2609 ksocknal_enumerate_interfaces(ksock_net_t
*net
)
2617 n
= libcfs_ipif_enumerate(&names
);
2619 CERROR("Can't enumerate interfaces: %d\n", n
);
2623 for (i
= j
= 0; i
< n
; i
++) {
2628 if (!strcmp(names
[i
], "lo")) /* skip the loopback IF */
2631 rc
= libcfs_ipif_query(names
[i
], &up
, &ip
, &mask
);
2633 CWARN("Can't get interface %s info: %d\n",
2639 CWARN("Ignoring interface %s (down)\n",
2644 if (j
== LNET_MAX_INTERFACES
) {
2645 CWARN("Ignoring interface %s (too many interfaces)\n",
2650 net
->ksnn_interfaces
[j
].ksni_ipaddr
= ip
;
2651 net
->ksnn_interfaces
[j
].ksni_netmask
= mask
;
2652 strncpy(&net
->ksnn_interfaces
[j
].ksni_name
[0],
2653 names
[i
], IFNAMSIZ
);
2657 libcfs_ipif_free_enumeration(names
, n
);
2660 CERROR("Can't find any usable interfaces\n");
2666 ksocknal_search_new_ipif(ksock_net_t
*net
)
2671 for (i
= 0; i
< net
->ksnn_ninterfaces
; i
++) {
2672 char *ifnam
= &net
->ksnn_interfaces
[i
].ksni_name
[0];
2673 char *colon
= strchr(ifnam
, ':');
2678 if (colon
!= NULL
) /* ignore alias device */
2681 list_for_each_entry(tmp
, &ksocknal_data
.ksnd_nets
,
2683 for (j
= 0; !found
&& j
< tmp
->ksnn_ninterfaces
; j
++) {
2684 char *ifnam2
= &tmp
->ksnn_interfaces
[j
].\
2686 char *colon2
= strchr(ifnam2
, ':');
2691 found
= strcmp(ifnam
, ifnam2
) == 0;
2708 ksocknal_start_schedulers(struct ksock_sched_info
*info
)
2714 if (info
->ksi_nthreads
== 0) {
2715 if (*ksocknal_tunables
.ksnd_nscheds
> 0) {
2716 nthrs
= info
->ksi_nthreads_max
;
2718 nthrs
= cfs_cpt_weight(lnet_cpt_table(),
2720 nthrs
= min(max(SOCKNAL_NSCHEDS
, nthrs
>> 1), nthrs
);
2721 nthrs
= min(SOCKNAL_NSCHEDS_HIGH
, nthrs
);
2723 nthrs
= min(nthrs
, info
->ksi_nthreads_max
);
2725 LASSERT(info
->ksi_nthreads
<= info
->ksi_nthreads_max
);
2726 /* increase two threads if there is new interface */
2727 nthrs
= min(2, info
->ksi_nthreads_max
- info
->ksi_nthreads
);
2730 for (i
= 0; i
< nthrs
; i
++) {
2733 ksock_sched_t
*sched
;
2734 id
= KSOCK_THREAD_ID(info
->ksi_cpt
, info
->ksi_nthreads
+ i
);
2735 sched
= &info
->ksi_scheds
[KSOCK_THREAD_SID(id
)];
2736 snprintf(name
, sizeof(name
), "socknal_sd%02d_%02d",
2737 info
->ksi_cpt
, (int)(sched
- &info
->ksi_scheds
[0]));
2739 rc
= ksocknal_thread_start(ksocknal_scheduler
,
2744 CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
2745 info
->ksi_cpt
, info
->ksi_nthreads
+ i
, rc
);
2749 info
->ksi_nthreads
+= i
;
2754 ksocknal_net_start_threads(ksock_net_t
*net
, __u32
*cpts
, int ncpts
)
2756 int newif
= ksocknal_search_new_ipif(net
);
2760 LASSERT(ncpts
> 0 && ncpts
<= cfs_cpt_number(lnet_cpt_table()));
2762 for (i
= 0; i
< ncpts
; i
++) {
2763 struct ksock_sched_info
*info
;
2764 int cpt
= (cpts
== NULL
) ? i
: cpts
[i
];
2766 LASSERT(cpt
< cfs_cpt_number(lnet_cpt_table()));
2767 info
= ksocknal_data
.ksnd_sched_info
[cpt
];
2769 if (!newif
&& info
->ksi_nthreads
> 0)
2772 rc
= ksocknal_start_schedulers(info
);
2780 ksocknal_startup (lnet_ni_t
*ni
)
2786 LASSERT (ni
->ni_lnd
== &the_ksocklnd
);
2788 if (ksocknal_data
.ksnd_init
== SOCKNAL_INIT_NOTHING
) {
2789 rc
= ksocknal_base_startup();
2794 LIBCFS_ALLOC(net
, sizeof(*net
));
2798 spin_lock_init(&net
->ksnn_lock
);
2799 net
->ksnn_incarnation
= ksocknal_new_incarnation();
2801 ni
->ni_peertimeout
= *ksocknal_tunables
.ksnd_peertimeout
;
2802 ni
->ni_maxtxcredits
= *ksocknal_tunables
.ksnd_credits
;
2803 ni
->ni_peertxcredits
= *ksocknal_tunables
.ksnd_peertxcredits
;
2804 ni
->ni_peerrtrcredits
= *ksocknal_tunables
.ksnd_peerrtrcredits
;
2806 if (ni
->ni_interfaces
[0] == NULL
) {
2807 rc
= ksocknal_enumerate_interfaces(net
);
2811 net
->ksnn_ninterfaces
= 1;
2813 for (i
= 0; i
< LNET_MAX_INTERFACES
; i
++) {
2816 if (ni
->ni_interfaces
[i
] == NULL
)
2819 rc
= libcfs_ipif_query(
2820 ni
->ni_interfaces
[i
], &up
,
2821 &net
->ksnn_interfaces
[i
].ksni_ipaddr
,
2822 &net
->ksnn_interfaces
[i
].ksni_netmask
);
2825 CERROR("Can't get interface %s info: %d\n",
2826 ni
->ni_interfaces
[i
], rc
);
2831 CERROR("Interface %s is down\n",
2832 ni
->ni_interfaces
[i
]);
2836 strncpy(&net
->ksnn_interfaces
[i
].ksni_name
[0],
2837 ni
->ni_interfaces
[i
], IFNAMSIZ
);
2839 net
->ksnn_ninterfaces
= i
;
2842 /* call it before add it to ksocknal_data.ksnd_nets */
2843 rc
= ksocknal_net_start_threads(net
, ni
->ni_cpts
, ni
->ni_ncpts
);
2847 ni
->ni_nid
= LNET_MKNID(LNET_NIDNET(ni
->ni_nid
),
2848 net
->ksnn_interfaces
[0].ksni_ipaddr
);
2849 list_add(&net
->ksnn_list
, &ksocknal_data
.ksnd_nets
);
2851 ksocknal_data
.ksnd_nnets
++;
2856 LIBCFS_FREE(net
, sizeof(*net
));
2858 if (ksocknal_data
.ksnd_nnets
== 0)
2859 ksocknal_base_shutdown();
2866 ksocknal_module_fini (void)
2868 lnet_unregister_lnd(&the_ksocklnd
);
2872 ksocknal_module_init (void)
2876 /* check ksnr_connected/connecting field large enough */
2877 CLASSERT (SOCKLND_CONN_NTYPES
<= 4);
2878 CLASSERT (SOCKLND_CONN_ACK
== SOCKLND_CONN_BULK_IN
);
2880 /* initialize the_ksocklnd */
2881 the_ksocklnd
.lnd_type
= SOCKLND
;
2882 the_ksocklnd
.lnd_startup
= ksocknal_startup
;
2883 the_ksocklnd
.lnd_shutdown
= ksocknal_shutdown
;
2884 the_ksocklnd
.lnd_ctl
= ksocknal_ctl
;
2885 the_ksocklnd
.lnd_send
= ksocknal_send
;
2886 the_ksocklnd
.lnd_recv
= ksocknal_recv
;
2887 the_ksocklnd
.lnd_notify
= ksocknal_notify
;
2888 the_ksocklnd
.lnd_query
= ksocknal_query
;
2889 the_ksocklnd
.lnd_accept
= ksocknal_accept
;
2891 rc
= ksocknal_tunables_init();
2895 lnet_register_lnd(&the_ksocklnd
);
2900 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
2901 MODULE_DESCRIPTION("Kernel TCP Socket LND v3.0.0");
2902 MODULE_LICENSE("GPL");
2903 MODULE_VERSION("3.0.0");
2905 module_init(ksocknal_module_init
);
2906 module_exit(ksocknal_module_fini
);