4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lnet/klnds/socklnd/socklnd.c
38 * Author: Zach Brown <zab@zabbo.net>
39 * Author: Peter J. Braam <braam@clusterfs.com>
40 * Author: Phil Schwan <phil@clusterfs.com>
41 * Author: Eric Barton <eric@bartonsoftware.com>
46 static lnd_t the_ksocklnd
;
47 ksock_nal_data_t ksocknal_data
;
49 static ksock_interface_t
*
50 ksocknal_ip2iface(lnet_ni_t
*ni
, __u32 ip
)
52 ksock_net_t
*net
= ni
->ni_data
;
54 ksock_interface_t
*iface
;
56 for (i
= 0; i
< net
->ksnn_ninterfaces
; i
++) {
57 LASSERT(i
< LNET_MAX_INTERFACES
);
58 iface
= &net
->ksnn_interfaces
[i
];
60 if (iface
->ksni_ipaddr
== ip
)
67 static ksock_route_t
*
68 ksocknal_create_route (__u32 ipaddr
, int port
)
72 LIBCFS_ALLOC (route
, sizeof (*route
));
76 atomic_set (&route
->ksnr_refcount
, 1);
77 route
->ksnr_peer
= NULL
;
78 route
->ksnr_retry_interval
= 0; /* OK to connect at any time */
79 route
->ksnr_ipaddr
= ipaddr
;
80 route
->ksnr_port
= port
;
81 route
->ksnr_scheduled
= 0;
82 route
->ksnr_connecting
= 0;
83 route
->ksnr_connected
= 0;
84 route
->ksnr_deleted
= 0;
85 route
->ksnr_conn_count
= 0;
86 route
->ksnr_share_count
= 0;
92 ksocknal_destroy_route (ksock_route_t
*route
)
94 LASSERT (atomic_read(&route
->ksnr_refcount
) == 0);
96 if (route
->ksnr_peer
!= NULL
)
97 ksocknal_peer_decref(route
->ksnr_peer
);
99 LIBCFS_FREE (route
, sizeof (*route
));
103 ksocknal_create_peer (ksock_peer_t
**peerp
, lnet_ni_t
*ni
, lnet_process_id_t id
)
105 ksock_net_t
*net
= ni
->ni_data
;
108 LASSERT (id
.nid
!= LNET_NID_ANY
);
109 LASSERT (id
.pid
!= LNET_PID_ANY
);
110 LASSERT (!in_interrupt());
112 LIBCFS_ALLOC (peer
, sizeof (*peer
));
118 atomic_set (&peer
->ksnp_refcount
, 1); /* 1 ref for caller */
119 peer
->ksnp_closing
= 0;
120 peer
->ksnp_accepting
= 0;
121 peer
->ksnp_proto
= NULL
;
122 peer
->ksnp_last_alive
= 0;
123 peer
->ksnp_zc_next_cookie
= SOCKNAL_KEEPALIVE_PING
+ 1;
125 INIT_LIST_HEAD (&peer
->ksnp_conns
);
126 INIT_LIST_HEAD (&peer
->ksnp_routes
);
127 INIT_LIST_HEAD (&peer
->ksnp_tx_queue
);
128 INIT_LIST_HEAD (&peer
->ksnp_zc_req_list
);
129 spin_lock_init(&peer
->ksnp_lock
);
131 spin_lock_bh(&net
->ksnn_lock
);
133 if (net
->ksnn_shutdown
) {
134 spin_unlock_bh(&net
->ksnn_lock
);
136 LIBCFS_FREE(peer
, sizeof(*peer
));
137 CERROR("Can't create peer: network shutdown\n");
143 spin_unlock_bh(&net
->ksnn_lock
);
150 ksocknal_destroy_peer (ksock_peer_t
*peer
)
152 ksock_net_t
*net
= peer
->ksnp_ni
->ni_data
;
154 CDEBUG (D_NET
, "peer %s %p deleted\n",
155 libcfs_id2str(peer
->ksnp_id
), peer
);
157 LASSERT (atomic_read (&peer
->ksnp_refcount
) == 0);
158 LASSERT (peer
->ksnp_accepting
== 0);
159 LASSERT (list_empty (&peer
->ksnp_conns
));
160 LASSERT (list_empty (&peer
->ksnp_routes
));
161 LASSERT (list_empty (&peer
->ksnp_tx_queue
));
162 LASSERT (list_empty (&peer
->ksnp_zc_req_list
));
164 LIBCFS_FREE (peer
, sizeof (*peer
));
166 /* NB a peer's connections and routes keep a reference on their peer
167 * until they are destroyed, so we can be assured that _all_ state to
168 * do with this peer has been cleaned up when its refcount drops to
170 spin_lock_bh(&net
->ksnn_lock
);
172 spin_unlock_bh(&net
->ksnn_lock
);
176 ksocknal_find_peer_locked (lnet_ni_t
*ni
, lnet_process_id_t id
)
178 struct list_head
*peer_list
= ksocknal_nid2peerlist(id
.nid
);
179 struct list_head
*tmp
;
182 list_for_each (tmp
, peer_list
) {
184 peer
= list_entry (tmp
, ksock_peer_t
, ksnp_list
);
186 LASSERT (!peer
->ksnp_closing
);
188 if (peer
->ksnp_ni
!= ni
)
191 if (peer
->ksnp_id
.nid
!= id
.nid
||
192 peer
->ksnp_id
.pid
!= id
.pid
)
195 CDEBUG(D_NET
, "got peer [%p] -> %s (%d)\n",
196 peer
, libcfs_id2str(id
),
197 atomic_read(&peer
->ksnp_refcount
));
204 ksocknal_find_peer (lnet_ni_t
*ni
, lnet_process_id_t id
)
208 read_lock(&ksocknal_data
.ksnd_global_lock
);
209 peer
= ksocknal_find_peer_locked(ni
, id
);
210 if (peer
!= NULL
) /* +1 ref for caller? */
211 ksocknal_peer_addref(peer
);
212 read_unlock(&ksocknal_data
.ksnd_global_lock
);
218 ksocknal_unlink_peer_locked (ksock_peer_t
*peer
)
222 ksock_interface_t
*iface
;
224 for (i
= 0; i
< peer
->ksnp_n_passive_ips
; i
++) {
225 LASSERT (i
< LNET_MAX_INTERFACES
);
226 ip
= peer
->ksnp_passive_ips
[i
];
228 iface
= ksocknal_ip2iface(peer
->ksnp_ni
, ip
);
229 /* All IPs in peer->ksnp_passive_ips[] come from the
230 * interface list, therefore the call must succeed. */
231 LASSERT (iface
!= NULL
);
233 CDEBUG(D_NET
, "peer=%p iface=%p ksni_nroutes=%d\n",
234 peer
, iface
, iface
->ksni_nroutes
);
235 iface
->ksni_npeers
--;
238 LASSERT (list_empty(&peer
->ksnp_conns
));
239 LASSERT (list_empty(&peer
->ksnp_routes
));
240 LASSERT (!peer
->ksnp_closing
);
241 peer
->ksnp_closing
= 1;
242 list_del (&peer
->ksnp_list
);
243 /* lose peerlist's ref */
244 ksocknal_peer_decref(peer
);
248 ksocknal_get_peer_info (lnet_ni_t
*ni
, int index
,
249 lnet_process_id_t
*id
, __u32
*myip
, __u32
*peer_ip
,
250 int *port
, int *conn_count
, int *share_count
)
253 struct list_head
*ptmp
;
254 ksock_route_t
*route
;
255 struct list_head
*rtmp
;
260 read_lock(&ksocknal_data
.ksnd_global_lock
);
262 for (i
= 0; i
< ksocknal_data
.ksnd_peer_hash_size
; i
++) {
264 list_for_each (ptmp
, &ksocknal_data
.ksnd_peers
[i
]) {
265 peer
= list_entry (ptmp
, ksock_peer_t
, ksnp_list
);
267 if (peer
->ksnp_ni
!= ni
)
270 if (peer
->ksnp_n_passive_ips
== 0 &&
271 list_empty(&peer
->ksnp_routes
)) {
285 for (j
= 0; j
< peer
->ksnp_n_passive_ips
; j
++) {
290 *myip
= peer
->ksnp_passive_ips
[j
];
299 list_for_each (rtmp
, &peer
->ksnp_routes
) {
303 route
= list_entry(rtmp
, ksock_route_t
,
307 *myip
= route
->ksnr_myipaddr
;
308 *peer_ip
= route
->ksnr_ipaddr
;
309 *port
= route
->ksnr_port
;
310 *conn_count
= route
->ksnr_conn_count
;
311 *share_count
= route
->ksnr_share_count
;
318 read_unlock(&ksocknal_data
.ksnd_global_lock
);
323 ksocknal_associate_route_conn_locked(ksock_route_t
*route
, ksock_conn_t
*conn
)
325 ksock_peer_t
*peer
= route
->ksnr_peer
;
326 int type
= conn
->ksnc_type
;
327 ksock_interface_t
*iface
;
329 conn
->ksnc_route
= route
;
330 ksocknal_route_addref(route
);
332 if (route
->ksnr_myipaddr
!= conn
->ksnc_myipaddr
) {
333 if (route
->ksnr_myipaddr
== 0) {
334 /* route wasn't bound locally yet (the initial route) */
335 CDEBUG(D_NET
, "Binding %s %pI4h to %pI4h\n",
336 libcfs_id2str(peer
->ksnp_id
),
338 &conn
->ksnc_myipaddr
);
340 CDEBUG(D_NET
, "Rebinding %s %pI4h from %pI4h to %pI4h\n",
341 libcfs_id2str(peer
->ksnp_id
),
343 &route
->ksnr_myipaddr
,
344 &conn
->ksnc_myipaddr
);
346 iface
= ksocknal_ip2iface(route
->ksnr_peer
->ksnp_ni
,
347 route
->ksnr_myipaddr
);
349 iface
->ksni_nroutes
--;
351 route
->ksnr_myipaddr
= conn
->ksnc_myipaddr
;
352 iface
= ksocknal_ip2iface(route
->ksnr_peer
->ksnp_ni
,
353 route
->ksnr_myipaddr
);
355 iface
->ksni_nroutes
++;
358 route
->ksnr_connected
|= (1<<type
);
359 route
->ksnr_conn_count
++;
361 /* Successful connection => further attempts can
362 * proceed immediately */
363 route
->ksnr_retry_interval
= 0;
367 ksocknal_add_route_locked (ksock_peer_t
*peer
, ksock_route_t
*route
)
369 struct list_head
*tmp
;
371 ksock_route_t
*route2
;
373 LASSERT (!peer
->ksnp_closing
);
374 LASSERT (route
->ksnr_peer
== NULL
);
375 LASSERT (!route
->ksnr_scheduled
);
376 LASSERT (!route
->ksnr_connecting
);
377 LASSERT (route
->ksnr_connected
== 0);
379 /* LASSERT(unique) */
380 list_for_each(tmp
, &peer
->ksnp_routes
) {
381 route2
= list_entry(tmp
, ksock_route_t
, ksnr_list
);
383 if (route2
->ksnr_ipaddr
== route
->ksnr_ipaddr
) {
384 CERROR("Duplicate route %s %pI4h\n",
385 libcfs_id2str(peer
->ksnp_id
),
386 &route
->ksnr_ipaddr
);
391 route
->ksnr_peer
= peer
;
392 ksocknal_peer_addref(peer
);
393 /* peer's routelist takes over my ref on 'route' */
394 list_add_tail(&route
->ksnr_list
, &peer
->ksnp_routes
);
396 list_for_each(tmp
, &peer
->ksnp_conns
) {
397 conn
= list_entry(tmp
, ksock_conn_t
, ksnc_list
);
399 if (conn
->ksnc_ipaddr
!= route
->ksnr_ipaddr
)
402 ksocknal_associate_route_conn_locked(route
, conn
);
403 /* keep going (typed routes) */
408 ksocknal_del_route_locked (ksock_route_t
*route
)
410 ksock_peer_t
*peer
= route
->ksnr_peer
;
411 ksock_interface_t
*iface
;
413 struct list_head
*ctmp
;
414 struct list_head
*cnxt
;
416 LASSERT (!route
->ksnr_deleted
);
418 /* Close associated conns */
419 list_for_each_safe (ctmp
, cnxt
, &peer
->ksnp_conns
) {
420 conn
= list_entry(ctmp
, ksock_conn_t
, ksnc_list
);
422 if (conn
->ksnc_route
!= route
)
425 ksocknal_close_conn_locked (conn
, 0);
428 if (route
->ksnr_myipaddr
!= 0) {
429 iface
= ksocknal_ip2iface(route
->ksnr_peer
->ksnp_ni
,
430 route
->ksnr_myipaddr
);
432 iface
->ksni_nroutes
--;
435 route
->ksnr_deleted
= 1;
436 list_del (&route
->ksnr_list
);
437 ksocknal_route_decref(route
); /* drop peer's ref */
439 if (list_empty (&peer
->ksnp_routes
) &&
440 list_empty (&peer
->ksnp_conns
)) {
441 /* I've just removed the last route to a peer with no active
443 ksocknal_unlink_peer_locked (peer
);
448 ksocknal_add_peer (lnet_ni_t
*ni
, lnet_process_id_t id
, __u32 ipaddr
, int port
)
450 struct list_head
*tmp
;
453 ksock_route_t
*route
;
454 ksock_route_t
*route2
;
457 if (id
.nid
== LNET_NID_ANY
||
458 id
.pid
== LNET_PID_ANY
)
461 /* Have a brand new peer ready... */
462 rc
= ksocknal_create_peer(&peer
, ni
, id
);
466 route
= ksocknal_create_route (ipaddr
, port
);
468 ksocknal_peer_decref(peer
);
472 write_lock_bh(&ksocknal_data
.ksnd_global_lock
);
474 /* always called with a ref on ni, so shutdown can't have started */
475 LASSERT (((ksock_net_t
*) ni
->ni_data
)->ksnn_shutdown
== 0);
477 peer2
= ksocknal_find_peer_locked (ni
, id
);
479 ksocknal_peer_decref(peer
);
482 /* peer table takes my ref on peer */
483 list_add_tail (&peer
->ksnp_list
,
484 ksocknal_nid2peerlist (id
.nid
));
488 list_for_each (tmp
, &peer
->ksnp_routes
) {
489 route2
= list_entry(tmp
, ksock_route_t
, ksnr_list
);
491 if (route2
->ksnr_ipaddr
== ipaddr
)
496 if (route2
== NULL
) {
497 ksocknal_add_route_locked(peer
, route
);
498 route
->ksnr_share_count
++;
500 ksocknal_route_decref(route
);
501 route2
->ksnr_share_count
++;
504 write_unlock_bh(&ksocknal_data
.ksnd_global_lock
);
510 ksocknal_del_peer_locked (ksock_peer_t
*peer
, __u32 ip
)
513 ksock_route_t
*route
;
514 struct list_head
*tmp
;
515 struct list_head
*nxt
;
518 LASSERT (!peer
->ksnp_closing
);
520 /* Extra ref prevents peer disappearing until I'm done with it */
521 ksocknal_peer_addref(peer
);
523 list_for_each_safe (tmp
, nxt
, &peer
->ksnp_routes
) {
524 route
= list_entry(tmp
, ksock_route_t
, ksnr_list
);
527 if (!(ip
== 0 || route
->ksnr_ipaddr
== ip
))
530 route
->ksnr_share_count
= 0;
531 /* This deletes associated conns too */
532 ksocknal_del_route_locked (route
);
536 list_for_each_safe (tmp
, nxt
, &peer
->ksnp_routes
) {
537 route
= list_entry(tmp
, ksock_route_t
, ksnr_list
);
538 nshared
+= route
->ksnr_share_count
;
542 /* remove everything else if there are no explicit entries
545 list_for_each_safe (tmp
, nxt
, &peer
->ksnp_routes
) {
546 route
= list_entry(tmp
, ksock_route_t
, ksnr_list
);
548 /* we should only be removing auto-entries */
549 LASSERT(route
->ksnr_share_count
== 0);
550 ksocknal_del_route_locked (route
);
553 list_for_each_safe (tmp
, nxt
, &peer
->ksnp_conns
) {
554 conn
= list_entry(tmp
, ksock_conn_t
, ksnc_list
);
556 ksocknal_close_conn_locked(conn
, 0);
560 ksocknal_peer_decref(peer
);
561 /* NB peer unlinks itself when last conn/route is removed */
565 ksocknal_del_peer (lnet_ni_t
*ni
, lnet_process_id_t id
, __u32 ip
)
568 struct list_head
*ptmp
;
569 struct list_head
*pnxt
;
576 write_lock_bh(&ksocknal_data
.ksnd_global_lock
);
578 if (id
.nid
!= LNET_NID_ANY
)
579 lo
= hi
= (int)(ksocknal_nid2peerlist(id
.nid
) - ksocknal_data
.ksnd_peers
);
582 hi
= ksocknal_data
.ksnd_peer_hash_size
- 1;
585 for (i
= lo
; i
<= hi
; i
++) {
586 list_for_each_safe (ptmp
, pnxt
,
587 &ksocknal_data
.ksnd_peers
[i
]) {
588 peer
= list_entry (ptmp
, ksock_peer_t
, ksnp_list
);
590 if (peer
->ksnp_ni
!= ni
)
593 if (!((id
.nid
== LNET_NID_ANY
|| peer
->ksnp_id
.nid
== id
.nid
) &&
594 (id
.pid
== LNET_PID_ANY
|| peer
->ksnp_id
.pid
== id
.pid
)))
597 ksocknal_peer_addref(peer
); /* a ref for me... */
599 ksocknal_del_peer_locked (peer
, ip
);
601 if (peer
->ksnp_closing
&&
602 !list_empty(&peer
->ksnp_tx_queue
)) {
603 LASSERT (list_empty(&peer
->ksnp_conns
));
604 LASSERT (list_empty(&peer
->ksnp_routes
));
606 list_splice_init(&peer
->ksnp_tx_queue
,
610 ksocknal_peer_decref(peer
); /* ...till here */
612 rc
= 0; /* matched! */
616 write_unlock_bh(&ksocknal_data
.ksnd_global_lock
);
618 ksocknal_txlist_done(ni
, &zombies
, 1);
623 static ksock_conn_t
*
624 ksocknal_get_conn_by_idx (lnet_ni_t
*ni
, int index
)
627 struct list_head
*ptmp
;
629 struct list_head
*ctmp
;
632 read_lock(&ksocknal_data
.ksnd_global_lock
);
634 for (i
= 0; i
< ksocknal_data
.ksnd_peer_hash_size
; i
++) {
635 list_for_each (ptmp
, &ksocknal_data
.ksnd_peers
[i
]) {
636 peer
= list_entry (ptmp
, ksock_peer_t
, ksnp_list
);
638 LASSERT (!peer
->ksnp_closing
);
640 if (peer
->ksnp_ni
!= ni
)
643 list_for_each (ctmp
, &peer
->ksnp_conns
) {
647 conn
= list_entry (ctmp
, ksock_conn_t
,
649 ksocknal_conn_addref(conn
);
650 read_unlock(&ksocknal_data
.ksnd_global_lock
);
656 read_unlock(&ksocknal_data
.ksnd_global_lock
);
660 static ksock_sched_t
*
661 ksocknal_choose_scheduler_locked(unsigned int cpt
)
663 struct ksock_sched_info
*info
= ksocknal_data
.ksnd_sched_info
[cpt
];
664 ksock_sched_t
*sched
;
667 LASSERT(info
->ksi_nthreads
> 0);
669 sched
= &info
->ksi_scheds
[0];
671 * NB: it's safe so far, but info->ksi_nthreads could be changed
672 * at runtime when we have dynamic LNet configuration, then we
673 * need to take care of this.
675 for (i
= 1; i
< info
->ksi_nthreads
; i
++) {
676 if (sched
->kss_nconns
> info
->ksi_scheds
[i
].kss_nconns
)
677 sched
= &info
->ksi_scheds
[i
];
684 ksocknal_local_ipvec (lnet_ni_t
*ni
, __u32
*ipaddrs
)
686 ksock_net_t
*net
= ni
->ni_data
;
690 read_lock(&ksocknal_data
.ksnd_global_lock
);
692 nip
= net
->ksnn_ninterfaces
;
693 LASSERT (nip
<= LNET_MAX_INTERFACES
);
695 /* Only offer interfaces for additional connections if I have
698 read_unlock(&ksocknal_data
.ksnd_global_lock
);
702 for (i
= 0; i
< nip
; i
++) {
703 ipaddrs
[i
] = net
->ksnn_interfaces
[i
].ksni_ipaddr
;
704 LASSERT (ipaddrs
[i
] != 0);
707 read_unlock(&ksocknal_data
.ksnd_global_lock
);
712 ksocknal_match_peerip (ksock_interface_t
*iface
, __u32
*ips
, int nips
)
714 int best_netmatch
= 0;
721 for (i
= 0; i
< nips
; i
++) {
725 this_xor
= (ips
[i
] ^ iface
->ksni_ipaddr
);
726 this_netmatch
= ((this_xor
& iface
->ksni_netmask
) == 0) ? 1 : 0;
729 best_netmatch
< this_netmatch
||
730 (best_netmatch
== this_netmatch
&&
731 best_xor
> this_xor
)))
735 best_netmatch
= this_netmatch
;
744 ksocknal_select_ips(ksock_peer_t
*peer
, __u32
*peerips
, int n_peerips
)
746 rwlock_t
*global_lock
= &ksocknal_data
.ksnd_global_lock
;
747 ksock_net_t
*net
= peer
->ksnp_ni
->ni_data
;
748 ksock_interface_t
*iface
;
749 ksock_interface_t
*best_iface
;
760 /* CAVEAT EMPTOR: We do all our interface matching with an
761 * exclusive hold of global lock at IRQ priority. We're only
762 * expecting to be dealing with small numbers of interfaces, so the
763 * O(n**3)-ness shouldn't matter */
765 /* Also note that I'm not going to return more than n_peerips
766 * interfaces, even if I have more myself */
768 write_lock_bh(global_lock
);
770 LASSERT (n_peerips
<= LNET_MAX_INTERFACES
);
771 LASSERT (net
->ksnn_ninterfaces
<= LNET_MAX_INTERFACES
);
773 /* Only match interfaces for additional connections
774 * if I have > 1 interface */
775 n_ips
= (net
->ksnn_ninterfaces
< 2) ? 0 :
776 min(n_peerips
, net
->ksnn_ninterfaces
);
778 for (i
= 0; peer
->ksnp_n_passive_ips
< n_ips
; i
++) {
779 /* ^ yes really... */
781 /* If we have any new interfaces, first tick off all the
782 * peer IPs that match old interfaces, then choose new
783 * interfaces to match the remaining peer IPS.
784 * We don't forget interfaces we've stopped using; we might
785 * start using them again... */
787 if (i
< peer
->ksnp_n_passive_ips
) {
789 ip
= peer
->ksnp_passive_ips
[i
];
790 best_iface
= ksocknal_ip2iface(peer
->ksnp_ni
, ip
);
793 /* choose a new interface */
794 LASSERT (i
== peer
->ksnp_n_passive_ips
);
800 for (j
= 0; j
< net
->ksnn_ninterfaces
; j
++) {
801 iface
= &net
->ksnn_interfaces
[j
];
802 ip
= iface
->ksni_ipaddr
;
804 for (k
= 0; k
< peer
->ksnp_n_passive_ips
; k
++)
805 if (peer
->ksnp_passive_ips
[k
] == ip
)
808 if (k
< peer
->ksnp_n_passive_ips
) /* using it already */
811 k
= ksocknal_match_peerip(iface
, peerips
, n_peerips
);
812 xor = (ip
^ peerips
[k
]);
813 this_netmatch
= ((xor & iface
->ksni_netmask
) == 0) ? 1 : 0;
815 if (!(best_iface
== NULL
||
816 best_netmatch
< this_netmatch
||
817 (best_netmatch
== this_netmatch
&&
818 best_npeers
> iface
->ksni_npeers
)))
822 best_netmatch
= this_netmatch
;
823 best_npeers
= iface
->ksni_npeers
;
826 best_iface
->ksni_npeers
++;
827 ip
= best_iface
->ksni_ipaddr
;
828 peer
->ksnp_passive_ips
[i
] = ip
;
829 peer
->ksnp_n_passive_ips
= i
+1;
832 /* mark the best matching peer IP used */
833 j
= ksocknal_match_peerip(best_iface
, peerips
, n_peerips
);
837 /* Overwrite input peer IP addresses */
838 memcpy(peerips
, peer
->ksnp_passive_ips
, n_ips
* sizeof(*peerips
));
840 write_unlock_bh(global_lock
);
846 ksocknal_create_routes(ksock_peer_t
*peer
, int port
,
847 __u32
*peer_ipaddrs
, int npeer_ipaddrs
)
849 ksock_route_t
*newroute
= NULL
;
850 rwlock_t
*global_lock
= &ksocknal_data
.ksnd_global_lock
;
851 lnet_ni_t
*ni
= peer
->ksnp_ni
;
852 ksock_net_t
*net
= ni
->ni_data
;
853 struct list_head
*rtmp
;
854 ksock_route_t
*route
;
855 ksock_interface_t
*iface
;
856 ksock_interface_t
*best_iface
;
863 /* CAVEAT EMPTOR: We do all our interface matching with an
864 * exclusive hold of global lock at IRQ priority. We're only
865 * expecting to be dealing with small numbers of interfaces, so the
866 * O(n**3)-ness here shouldn't matter */
868 write_lock_bh(global_lock
);
870 if (net
->ksnn_ninterfaces
< 2) {
871 /* Only create additional connections
872 * if I have > 1 interface */
873 write_unlock_bh(global_lock
);
877 LASSERT (npeer_ipaddrs
<= LNET_MAX_INTERFACES
);
879 for (i
= 0; i
< npeer_ipaddrs
; i
++) {
880 if (newroute
!= NULL
) {
881 newroute
->ksnr_ipaddr
= peer_ipaddrs
[i
];
883 write_unlock_bh(global_lock
);
885 newroute
= ksocknal_create_route(peer_ipaddrs
[i
], port
);
886 if (newroute
== NULL
)
889 write_lock_bh(global_lock
);
892 if (peer
->ksnp_closing
) {
893 /* peer got closed under me */
897 /* Already got a route? */
899 list_for_each(rtmp
, &peer
->ksnp_routes
) {
900 route
= list_entry(rtmp
, ksock_route_t
, ksnr_list
);
902 if (route
->ksnr_ipaddr
== newroute
->ksnr_ipaddr
)
914 LASSERT (net
->ksnn_ninterfaces
<= LNET_MAX_INTERFACES
);
916 /* Select interface to connect from */
917 for (j
= 0; j
< net
->ksnn_ninterfaces
; j
++) {
918 iface
= &net
->ksnn_interfaces
[j
];
920 /* Using this interface already? */
921 list_for_each(rtmp
, &peer
->ksnp_routes
) {
922 route
= list_entry(rtmp
, ksock_route_t
,
925 if (route
->ksnr_myipaddr
== iface
->ksni_ipaddr
)
933 this_netmatch
= (((iface
->ksni_ipaddr
^
934 newroute
->ksnr_ipaddr
) &
935 iface
->ksni_netmask
) == 0) ? 1 : 0;
937 if (!(best_iface
== NULL
||
938 best_netmatch
< this_netmatch
||
939 (best_netmatch
== this_netmatch
&&
940 best_nroutes
> iface
->ksni_nroutes
)))
944 best_netmatch
= this_netmatch
;
945 best_nroutes
= iface
->ksni_nroutes
;
948 if (best_iface
== NULL
)
951 newroute
->ksnr_myipaddr
= best_iface
->ksni_ipaddr
;
952 best_iface
->ksni_nroutes
++;
954 ksocknal_add_route_locked(peer
, newroute
);
958 write_unlock_bh(global_lock
);
959 if (newroute
!= NULL
)
960 ksocknal_route_decref(newroute
);
964 ksocknal_accept (lnet_ni_t
*ni
, struct socket
*sock
)
971 rc
= libcfs_sock_getaddr(sock
, 1, &peer_ip
, &peer_port
);
972 LASSERT (rc
== 0); /* we succeeded before */
974 LIBCFS_ALLOC(cr
, sizeof(*cr
));
976 LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from %pI4h: memory exhausted\n",
983 cr
->ksncr_sock
= sock
;
985 spin_lock_bh(&ksocknal_data
.ksnd_connd_lock
);
987 list_add_tail(&cr
->ksncr_list
, &ksocknal_data
.ksnd_connd_connreqs
);
988 wake_up(&ksocknal_data
.ksnd_connd_waitq
);
990 spin_unlock_bh(&ksocknal_data
.ksnd_connd_lock
);
995 ksocknal_connecting (ksock_peer_t
*peer
, __u32 ipaddr
)
997 ksock_route_t
*route
;
999 list_for_each_entry (route
, &peer
->ksnp_routes
, ksnr_list
) {
1001 if (route
->ksnr_ipaddr
== ipaddr
)
1002 return route
->ksnr_connecting
;
1008 ksocknal_create_conn (lnet_ni_t
*ni
, ksock_route_t
*route
,
1009 struct socket
*sock
, int type
)
1011 rwlock_t
*global_lock
= &ksocknal_data
.ksnd_global_lock
;
1012 LIST_HEAD (zombies
);
1013 lnet_process_id_t peerid
;
1014 struct list_head
*tmp
;
1017 ksock_conn_t
*conn2
;
1018 ksock_peer_t
*peer
= NULL
;
1019 ksock_peer_t
*peer2
;
1020 ksock_sched_t
*sched
;
1021 ksock_hello_msg_t
*hello
;
1029 active
= (route
!= NULL
);
1031 LASSERT (active
== (type
!= SOCKLND_CONN_NONE
));
1033 LIBCFS_ALLOC(conn
, sizeof(*conn
));
1039 conn
->ksnc_peer
= NULL
;
1040 conn
->ksnc_route
= NULL
;
1041 conn
->ksnc_sock
= sock
;
1042 /* 2 ref, 1 for conn, another extra ref prevents socket
1043 * being closed before establishment of connection */
1044 atomic_set (&conn
->ksnc_sock_refcount
, 2);
1045 conn
->ksnc_type
= type
;
1046 ksocknal_lib_save_callback(sock
, conn
);
1047 atomic_set (&conn
->ksnc_conn_refcount
, 1); /* 1 ref for me */
1049 conn
->ksnc_rx_ready
= 0;
1050 conn
->ksnc_rx_scheduled
= 0;
1052 INIT_LIST_HEAD (&conn
->ksnc_tx_queue
);
1053 conn
->ksnc_tx_ready
= 0;
1054 conn
->ksnc_tx_scheduled
= 0;
1055 conn
->ksnc_tx_carrier
= NULL
;
1056 atomic_set (&conn
->ksnc_tx_nob
, 0);
1058 LIBCFS_ALLOC(hello
, offsetof(ksock_hello_msg_t
,
1059 kshm_ips
[LNET_MAX_INTERFACES
]));
1060 if (hello
== NULL
) {
1065 /* stash conn's local and remote addrs */
1066 rc
= ksocknal_lib_get_conn_addrs (conn
);
1070 /* Find out/confirm peer's NID and connection type and get the
1071 * vector of interfaces she's willing to let me connect to.
1072 * Passive connections use the listener timeout since the peer sends
1076 peer
= route
->ksnr_peer
;
1077 LASSERT(ni
== peer
->ksnp_ni
);
1079 /* Active connection sends HELLO eagerly */
1080 hello
->kshm_nips
= ksocknal_local_ipvec(ni
, hello
->kshm_ips
);
1081 peerid
= peer
->ksnp_id
;
1083 write_lock_bh(global_lock
);
1084 conn
->ksnc_proto
= peer
->ksnp_proto
;
1085 write_unlock_bh(global_lock
);
1087 if (conn
->ksnc_proto
== NULL
) {
1088 conn
->ksnc_proto
= &ksocknal_protocol_v3x
;
1089 #if SOCKNAL_VERSION_DEBUG
1090 if (*ksocknal_tunables
.ksnd_protocol
== 2)
1091 conn
->ksnc_proto
= &ksocknal_protocol_v2x
;
1092 else if (*ksocknal_tunables
.ksnd_protocol
== 1)
1093 conn
->ksnc_proto
= &ksocknal_protocol_v1x
;
1097 rc
= ksocknal_send_hello (ni
, conn
, peerid
.nid
, hello
);
1101 peerid
.nid
= LNET_NID_ANY
;
1102 peerid
.pid
= LNET_PID_ANY
;
1104 /* Passive, get protocol from peer */
1105 conn
->ksnc_proto
= NULL
;
1108 rc
= ksocknal_recv_hello (ni
, conn
, hello
, &peerid
, &incarnation
);
1112 LASSERT (rc
== 0 || active
);
1113 LASSERT (conn
->ksnc_proto
!= NULL
);
1114 LASSERT (peerid
.nid
!= LNET_NID_ANY
);
1116 cpt
= lnet_cpt_of_nid(peerid
.nid
);
1119 ksocknal_peer_addref(peer
);
1120 write_lock_bh(global_lock
);
1122 rc
= ksocknal_create_peer(&peer
, ni
, peerid
);
1126 write_lock_bh(global_lock
);
1128 /* called with a ref on ni, so shutdown can't have started */
1129 LASSERT (((ksock_net_t
*) ni
->ni_data
)->ksnn_shutdown
== 0);
1131 peer2
= ksocknal_find_peer_locked(ni
, peerid
);
1132 if (peer2
== NULL
) {
1133 /* NB this puts an "empty" peer in the peer
1134 * table (which takes my ref) */
1135 list_add_tail(&peer
->ksnp_list
,
1136 ksocknal_nid2peerlist(peerid
.nid
));
1138 ksocknal_peer_decref(peer
);
1143 ksocknal_peer_addref(peer
);
1144 peer
->ksnp_accepting
++;
1146 /* Am I already connecting to this guy? Resolve in
1147 * favour of higher NID... */
1148 if (peerid
.nid
< ni
->ni_nid
&&
1149 ksocknal_connecting(peer
, conn
->ksnc_ipaddr
)) {
1151 warn
= "connection race resolution";
1156 if (peer
->ksnp_closing
||
1157 (active
&& route
->ksnr_deleted
)) {
1158 /* peer/route got closed under me */
1160 warn
= "peer/route removed";
1164 if (peer
->ksnp_proto
== NULL
) {
1165 /* Never connected before.
1166 * NB recv_hello may have returned EPROTO to signal my peer
1167 * wants a different protocol than the one I asked for.
1169 LASSERT (list_empty(&peer
->ksnp_conns
));
1171 peer
->ksnp_proto
= conn
->ksnc_proto
;
1172 peer
->ksnp_incarnation
= incarnation
;
1175 if (peer
->ksnp_proto
!= conn
->ksnc_proto
||
1176 peer
->ksnp_incarnation
!= incarnation
) {
1177 /* Peer rebooted or I've got the wrong protocol version */
1178 ksocknal_close_peer_conns_locked(peer
, 0, 0);
1180 peer
->ksnp_proto
= NULL
;
1182 warn
= peer
->ksnp_incarnation
!= incarnation
?
1184 "wrong proto version";
1194 warn
= "lost conn race";
1197 warn
= "retry with different protocol version";
1201 /* Refuse to duplicate an existing connection, unless this is a
1202 * loopback connection */
1203 if (conn
->ksnc_ipaddr
!= conn
->ksnc_myipaddr
) {
1204 list_for_each(tmp
, &peer
->ksnp_conns
) {
1205 conn2
= list_entry(tmp
, ksock_conn_t
, ksnc_list
);
1207 if (conn2
->ksnc_ipaddr
!= conn
->ksnc_ipaddr
||
1208 conn2
->ksnc_myipaddr
!= conn
->ksnc_myipaddr
||
1209 conn2
->ksnc_type
!= conn
->ksnc_type
)
1212 /* Reply on a passive connection attempt so the peer
1213 * realises we're connected. */
1223 /* If the connection created by this route didn't bind to the IP
1224 * address the route connected to, the connection/route matching
1225 * code below probably isn't going to work. */
1227 route
->ksnr_ipaddr
!= conn
->ksnc_ipaddr
) {
1228 CERROR("Route %s %pI4h connected to %pI4h\n",
1229 libcfs_id2str(peer
->ksnp_id
),
1230 &route
->ksnr_ipaddr
,
1231 &conn
->ksnc_ipaddr
);
1234 /* Search for a route corresponding to the new connection and
1235 * create an association. This allows incoming connections created
1236 * by routes in my peer to match my own route entries so I don't
1237 * continually create duplicate routes. */
1238 list_for_each (tmp
, &peer
->ksnp_routes
) {
1239 route
= list_entry(tmp
, ksock_route_t
, ksnr_list
);
1241 if (route
->ksnr_ipaddr
!= conn
->ksnc_ipaddr
)
1244 ksocknal_associate_route_conn_locked(route
, conn
);
1248 conn
->ksnc_peer
= peer
; /* conn takes my ref on peer */
1249 peer
->ksnp_last_alive
= cfs_time_current();
1250 peer
->ksnp_send_keepalive
= 0;
1251 peer
->ksnp_error
= 0;
1253 sched
= ksocknal_choose_scheduler_locked(cpt
);
1254 sched
->kss_nconns
++;
1255 conn
->ksnc_scheduler
= sched
;
1257 conn
->ksnc_tx_last_post
= cfs_time_current();
1258 /* Set the deadline for the outgoing HELLO to drain */
1259 conn
->ksnc_tx_bufnob
= sock
->sk
->sk_wmem_queued
;
1260 conn
->ksnc_tx_deadline
= cfs_time_shift(*ksocknal_tunables
.ksnd_timeout
);
1261 mb(); /* order with adding to peer's conn list */
1263 list_add (&conn
->ksnc_list
, &peer
->ksnp_conns
);
1264 ksocknal_conn_addref(conn
);
1266 ksocknal_new_packet(conn
, 0);
1268 conn
->ksnc_zc_capable
= ksocknal_lib_zc_capable(conn
);
1270 /* Take packets blocking for this connection. */
1271 list_for_each_entry_safe(tx
, txtmp
, &peer
->ksnp_tx_queue
, tx_list
) {
1272 if (conn
->ksnc_proto
->pro_match_tx(conn
, tx
, tx
->tx_nonblk
) == SOCKNAL_MATCH_NO
)
1275 list_del (&tx
->tx_list
);
1276 ksocknal_queue_tx_locked (tx
, conn
);
1279 write_unlock_bh(global_lock
);
1281 /* We've now got a new connection. Any errors from here on are just
1282 * like "normal" comms errors and we close the connection normally.
1283 * NB (a) we still have to send the reply HELLO for passive
1285 * (b) normal I/O on the conn is blocked until I setup and call the
1289 CDEBUG(D_NET
, "New conn %s p %d.x %pI4h -> %pI4h/%d incarnation:%lld sched[%d:%d]\n",
1290 libcfs_id2str(peerid
), conn
->ksnc_proto
->pro_version
,
1291 &conn
->ksnc_myipaddr
, &conn
->ksnc_ipaddr
,
1292 conn
->ksnc_port
, incarnation
, cpt
,
1293 (int)(sched
- &sched
->kss_info
->ksi_scheds
[0]));
1296 /* additional routes after interface exchange? */
1297 ksocknal_create_routes(peer
, conn
->ksnc_port
,
1298 hello
->kshm_ips
, hello
->kshm_nips
);
1300 hello
->kshm_nips
= ksocknal_select_ips(peer
, hello
->kshm_ips
,
1302 rc
= ksocknal_send_hello(ni
, conn
, peerid
.nid
, hello
);
1305 LIBCFS_FREE(hello
, offsetof(ksock_hello_msg_t
,
1306 kshm_ips
[LNET_MAX_INTERFACES
]));
1308 /* setup the socket AFTER I've received hello (it disables
1309 * SO_LINGER). I might call back to the acceptor who may want
1310 * to send a protocol version response and then close the
1311 * socket; this ensures the socket only tears down after the
1312 * response has been sent. */
1314 rc
= ksocknal_lib_setup_sock(sock
);
1316 write_lock_bh(global_lock
);
1318 /* NB my callbacks block while I hold ksnd_global_lock */
1319 ksocknal_lib_set_callback(sock
, conn
);
1322 peer
->ksnp_accepting
--;
1324 write_unlock_bh(global_lock
);
1327 write_lock_bh(global_lock
);
1328 if (!conn
->ksnc_closing
) {
1329 /* could be closed by another thread */
1330 ksocknal_close_conn_locked(conn
, rc
);
1332 write_unlock_bh(global_lock
);
1333 } else if (ksocknal_connsock_addref(conn
) == 0) {
1334 /* Allow I/O to proceed. */
1335 ksocknal_read_callback(conn
);
1336 ksocknal_write_callback(conn
);
1337 ksocknal_connsock_decref(conn
);
1340 ksocknal_connsock_decref(conn
);
1341 ksocknal_conn_decref(conn
);
1345 if (!peer
->ksnp_closing
&&
1346 list_empty (&peer
->ksnp_conns
) &&
1347 list_empty (&peer
->ksnp_routes
)) {
1348 list_add(&zombies
, &peer
->ksnp_tx_queue
);
1349 list_del_init(&peer
->ksnp_tx_queue
);
1350 ksocknal_unlink_peer_locked(peer
);
1353 write_unlock_bh(global_lock
);
1357 CERROR("Not creating conn %s type %d: %s\n",
1358 libcfs_id2str(peerid
), conn
->ksnc_type
, warn
);
1360 CDEBUG(D_NET
, "Not creating conn %s type %d: %s\n",
1361 libcfs_id2str(peerid
), conn
->ksnc_type
, warn
);
1366 /* Request retry by replying with CONN_NONE
1367 * ksnc_proto has been set already */
1368 conn
->ksnc_type
= SOCKLND_CONN_NONE
;
1369 hello
->kshm_nips
= 0;
1370 ksocknal_send_hello(ni
, conn
, peerid
.nid
, hello
);
1373 write_lock_bh(global_lock
);
1374 peer
->ksnp_accepting
--;
1375 write_unlock_bh(global_lock
);
1378 ksocknal_txlist_done(ni
, &zombies
, 1);
1379 ksocknal_peer_decref(peer
);
1383 LIBCFS_FREE(hello
, offsetof(ksock_hello_msg_t
,
1384 kshm_ips
[LNET_MAX_INTERFACES
]));
1386 LIBCFS_FREE (conn
, sizeof(*conn
));
1389 libcfs_sock_release(sock
);
1394 ksocknal_close_conn_locked (ksock_conn_t
*conn
, int error
)
1396 /* This just does the immmediate housekeeping, and queues the
1397 * connection for the reaper to terminate.
1398 * Caller holds ksnd_global_lock exclusively in irq context */
1399 ksock_peer_t
*peer
= conn
->ksnc_peer
;
1400 ksock_route_t
*route
;
1401 ksock_conn_t
*conn2
;
1402 struct list_head
*tmp
;
1404 LASSERT (peer
->ksnp_error
== 0);
1405 LASSERT (!conn
->ksnc_closing
);
1406 conn
->ksnc_closing
= 1;
1408 /* ksnd_deathrow_conns takes over peer's ref */
1409 list_del (&conn
->ksnc_list
);
1411 route
= conn
->ksnc_route
;
1412 if (route
!= NULL
) {
1413 /* dissociate conn from route... */
1414 LASSERT (!route
->ksnr_deleted
);
1415 LASSERT ((route
->ksnr_connected
& (1 << conn
->ksnc_type
)) != 0);
1418 list_for_each(tmp
, &peer
->ksnp_conns
) {
1419 conn2
= list_entry(tmp
, ksock_conn_t
, ksnc_list
);
1421 if (conn2
->ksnc_route
== route
&&
1422 conn2
->ksnc_type
== conn
->ksnc_type
)
1428 route
->ksnr_connected
&= ~(1 << conn
->ksnc_type
);
1430 conn
->ksnc_route
= NULL
;
1432 #if 0 /* irrelevant with only eager routes */
1433 /* make route least favourite */
1434 list_del (&route
->ksnr_list
);
1435 list_add_tail (&route
->ksnr_list
, &peer
->ksnp_routes
);
1437 ksocknal_route_decref(route
); /* drop conn's ref on route */
1440 if (list_empty (&peer
->ksnp_conns
)) {
1441 /* No more connections to this peer */
1443 if (!list_empty(&peer
->ksnp_tx_queue
)) {
1446 LASSERT (conn
->ksnc_proto
== &ksocknal_protocol_v3x
);
1448 /* throw them to the last connection...,
1449 * these TXs will be send to /dev/null by scheduler */
1450 list_for_each_entry(tx
, &peer
->ksnp_tx_queue
,
1452 ksocknal_tx_prep(conn
, tx
);
1454 spin_lock_bh(&conn
->ksnc_scheduler
->kss_lock
);
1455 list_splice_init(&peer
->ksnp_tx_queue
,
1456 &conn
->ksnc_tx_queue
);
1457 spin_unlock_bh(&conn
->ksnc_scheduler
->kss_lock
);
1460 peer
->ksnp_proto
= NULL
; /* renegotiate protocol version */
1461 peer
->ksnp_error
= error
; /* stash last conn close reason */
1463 if (list_empty (&peer
->ksnp_routes
)) {
1464 /* I've just closed last conn belonging to a
1465 * peer with no routes to it */
1466 ksocknal_unlink_peer_locked (peer
);
1470 spin_lock_bh(&ksocknal_data
.ksnd_reaper_lock
);
1472 list_add_tail(&conn
->ksnc_list
,
1473 &ksocknal_data
.ksnd_deathrow_conns
);
1474 wake_up(&ksocknal_data
.ksnd_reaper_waitq
);
1476 spin_unlock_bh(&ksocknal_data
.ksnd_reaper_lock
);
1480 ksocknal_peer_failed (ksock_peer_t
*peer
)
1483 unsigned long last_alive
= 0;
1485 /* There has been a connection failure or comms error; but I'll only
1486 * tell LNET I think the peer is dead if it's to another kernel and
1487 * there are no connections or connection attempts in existence. */
1489 read_lock(&ksocknal_data
.ksnd_global_lock
);
1491 if ((peer
->ksnp_id
.pid
& LNET_PID_USERFLAG
) == 0 &&
1492 list_empty(&peer
->ksnp_conns
) &&
1493 peer
->ksnp_accepting
== 0 &&
1494 ksocknal_find_connecting_route_locked(peer
) == NULL
) {
1496 last_alive
= peer
->ksnp_last_alive
;
1499 read_unlock(&ksocknal_data
.ksnd_global_lock
);
1502 lnet_notify (peer
->ksnp_ni
, peer
->ksnp_id
.nid
, 0,
1507 ksocknal_finalize_zcreq(ksock_conn_t
*conn
)
1509 ksock_peer_t
*peer
= conn
->ksnc_peer
;
1514 /* NB safe to finalize TXs because closing of socket will
1515 * abort all buffered data */
1516 LASSERT (conn
->ksnc_sock
== NULL
);
1518 spin_lock(&peer
->ksnp_lock
);
1520 list_for_each_entry_safe(tx
, tmp
, &peer
->ksnp_zc_req_list
, tx_zc_list
) {
1521 if (tx
->tx_conn
!= conn
)
1524 LASSERT (tx
->tx_msg
.ksm_zc_cookies
[0] != 0);
1526 tx
->tx_msg
.ksm_zc_cookies
[0] = 0;
1527 tx
->tx_zc_aborted
= 1; /* mark it as not-acked */
1528 list_del(&tx
->tx_zc_list
);
1529 list_add(&tx
->tx_zc_list
, &zlist
);
1532 spin_unlock(&peer
->ksnp_lock
);
1534 while (!list_empty(&zlist
)) {
1535 tx
= list_entry(zlist
.next
, ksock_tx_t
, tx_zc_list
);
1537 list_del(&tx
->tx_zc_list
);
1538 ksocknal_tx_decref(tx
);
1543 ksocknal_terminate_conn (ksock_conn_t
*conn
)
1545 /* This gets called by the reaper (guaranteed thread context) to
1546 * disengage the socket from its callbacks and close it.
1547 * ksnc_refcount will eventually hit zero, and then the reaper will
1549 ksock_peer_t
*peer
= conn
->ksnc_peer
;
1550 ksock_sched_t
*sched
= conn
->ksnc_scheduler
;
1553 LASSERT(conn
->ksnc_closing
);
1555 /* wake up the scheduler to "send" all remaining packets to /dev/null */
1556 spin_lock_bh(&sched
->kss_lock
);
1558 /* a closing conn is always ready to tx */
1559 conn
->ksnc_tx_ready
= 1;
1561 if (!conn
->ksnc_tx_scheduled
&&
1562 !list_empty(&conn
->ksnc_tx_queue
)) {
1563 list_add_tail (&conn
->ksnc_tx_list
,
1564 &sched
->kss_tx_conns
);
1565 conn
->ksnc_tx_scheduled
= 1;
1566 /* extra ref for scheduler */
1567 ksocknal_conn_addref(conn
);
1569 wake_up (&sched
->kss_waitq
);
1572 spin_unlock_bh(&sched
->kss_lock
);
1574 /* serialise with callbacks */
1575 write_lock_bh(&ksocknal_data
.ksnd_global_lock
);
1577 ksocknal_lib_reset_callback(conn
->ksnc_sock
, conn
);
1579 /* OK, so this conn may not be completely disengaged from its
1580 * scheduler yet, but it _has_ committed to terminate... */
1581 conn
->ksnc_scheduler
->kss_nconns
--;
1583 if (peer
->ksnp_error
!= 0) {
1584 /* peer's last conn closed in error */
1585 LASSERT (list_empty (&peer
->ksnp_conns
));
1587 peer
->ksnp_error
= 0; /* avoid multiple notifications */
1590 write_unlock_bh(&ksocknal_data
.ksnd_global_lock
);
1593 ksocknal_peer_failed(peer
);
1595 /* The socket is closed on the final put; either here, or in
1596 * ksocknal_{send,recv}msg(). Since we set up the linger2 option
1597 * when the connection was established, this will close the socket
1598 * immediately, aborting anything buffered in it. Any hung
1599 * zero-copy transmits will therefore complete in finite time. */
1600 ksocknal_connsock_decref(conn
);
1604 ksocknal_queue_zombie_conn (ksock_conn_t
*conn
)
1606 /* Queue the conn for the reaper to destroy */
1608 LASSERT(atomic_read(&conn
->ksnc_conn_refcount
) == 0);
1609 spin_lock_bh(&ksocknal_data
.ksnd_reaper_lock
);
1611 list_add_tail(&conn
->ksnc_list
, &ksocknal_data
.ksnd_zombie_conns
);
1612 wake_up(&ksocknal_data
.ksnd_reaper_waitq
);
1614 spin_unlock_bh(&ksocknal_data
.ksnd_reaper_lock
);
1618 ksocknal_destroy_conn (ksock_conn_t
*conn
)
1620 unsigned long last_rcv
;
1622 /* Final coup-de-grace of the reaper */
1623 CDEBUG (D_NET
, "connection %p\n", conn
);
1625 LASSERT (atomic_read (&conn
->ksnc_conn_refcount
) == 0);
1626 LASSERT (atomic_read (&conn
->ksnc_sock_refcount
) == 0);
1627 LASSERT (conn
->ksnc_sock
== NULL
);
1628 LASSERT (conn
->ksnc_route
== NULL
);
1629 LASSERT (!conn
->ksnc_tx_scheduled
);
1630 LASSERT (!conn
->ksnc_rx_scheduled
);
1631 LASSERT (list_empty(&conn
->ksnc_tx_queue
));
1633 /* complete current receive if any */
1634 switch (conn
->ksnc_rx_state
) {
1635 case SOCKNAL_RX_LNET_PAYLOAD
:
1636 last_rcv
= conn
->ksnc_rx_deadline
-
1637 cfs_time_seconds(*ksocknal_tunables
.ksnd_timeout
);
1638 CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %d, left: %d, last alive is %ld secs ago\n",
1639 libcfs_id2str(conn
->ksnc_peer
->ksnp_id
), conn
->ksnc_type
,
1640 &conn
->ksnc_ipaddr
, conn
->ksnc_port
,
1641 conn
->ksnc_rx_nob_wanted
, conn
->ksnc_rx_nob_left
,
1642 cfs_duration_sec(cfs_time_sub(cfs_time_current(),
1644 lnet_finalize (conn
->ksnc_peer
->ksnp_ni
,
1645 conn
->ksnc_cookie
, -EIO
);
1647 case SOCKNAL_RX_LNET_HEADER
:
1648 if (conn
->ksnc_rx_started
)
1649 CERROR("Incomplete receive of lnet header from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
1650 libcfs_id2str(conn
->ksnc_peer
->ksnp_id
),
1651 &conn
->ksnc_ipaddr
, conn
->ksnc_port
,
1652 conn
->ksnc_proto
->pro_version
);
1654 case SOCKNAL_RX_KSM_HEADER
:
1655 if (conn
->ksnc_rx_started
)
1656 CERROR("Incomplete receive of ksock message from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
1657 libcfs_id2str(conn
->ksnc_peer
->ksnp_id
),
1658 &conn
->ksnc_ipaddr
, conn
->ksnc_port
,
1659 conn
->ksnc_proto
->pro_version
);
1661 case SOCKNAL_RX_SLOP
:
1662 if (conn
->ksnc_rx_started
)
1663 CERROR("Incomplete receive of slops from %s, ip %pI4h:%d, with error\n",
1664 libcfs_id2str(conn
->ksnc_peer
->ksnp_id
),
1665 &conn
->ksnc_ipaddr
, conn
->ksnc_port
);
1672 ksocknal_peer_decref(conn
->ksnc_peer
);
1674 LIBCFS_FREE (conn
, sizeof (*conn
));
1678 ksocknal_close_peer_conns_locked (ksock_peer_t
*peer
, __u32 ipaddr
, int why
)
1681 struct list_head
*ctmp
;
1682 struct list_head
*cnxt
;
1685 list_for_each_safe (ctmp
, cnxt
, &peer
->ksnp_conns
) {
1686 conn
= list_entry (ctmp
, ksock_conn_t
, ksnc_list
);
1689 conn
->ksnc_ipaddr
== ipaddr
) {
1691 ksocknal_close_conn_locked (conn
, why
);
1699 ksocknal_close_conn_and_siblings (ksock_conn_t
*conn
, int why
)
1701 ksock_peer_t
*peer
= conn
->ksnc_peer
;
1702 __u32 ipaddr
= conn
->ksnc_ipaddr
;
1705 write_lock_bh(&ksocknal_data
.ksnd_global_lock
);
1707 count
= ksocknal_close_peer_conns_locked (peer
, ipaddr
, why
);
1709 write_unlock_bh(&ksocknal_data
.ksnd_global_lock
);
1715 ksocknal_close_matching_conns (lnet_process_id_t id
, __u32 ipaddr
)
1718 struct list_head
*ptmp
;
1719 struct list_head
*pnxt
;
1725 write_lock_bh(&ksocknal_data
.ksnd_global_lock
);
1727 if (id
.nid
!= LNET_NID_ANY
)
1728 lo
= hi
= (int)(ksocknal_nid2peerlist(id
.nid
) - ksocknal_data
.ksnd_peers
);
1731 hi
= ksocknal_data
.ksnd_peer_hash_size
- 1;
1734 for (i
= lo
; i
<= hi
; i
++) {
1735 list_for_each_safe (ptmp
, pnxt
,
1736 &ksocknal_data
.ksnd_peers
[i
]) {
1738 peer
= list_entry (ptmp
, ksock_peer_t
, ksnp_list
);
1740 if (!((id
.nid
== LNET_NID_ANY
|| id
.nid
== peer
->ksnp_id
.nid
) &&
1741 (id
.pid
== LNET_PID_ANY
|| id
.pid
== peer
->ksnp_id
.pid
)))
1744 count
+= ksocknal_close_peer_conns_locked (peer
, ipaddr
, 0);
1748 write_unlock_bh(&ksocknal_data
.ksnd_global_lock
);
1750 /* wildcards always succeed */
1751 if (id
.nid
== LNET_NID_ANY
|| id
.pid
== LNET_PID_ANY
|| ipaddr
== 0)
1761 ksocknal_notify (lnet_ni_t
*ni
, lnet_nid_t gw_nid
, int alive
)
1763 /* The router is telling me she's been notified of a change in
1764 * gateway state.... */
1765 lnet_process_id_t id
= {0};
1768 id
.pid
= LNET_PID_ANY
;
1770 CDEBUG (D_NET
, "gw %s %s\n", libcfs_nid2str(gw_nid
),
1771 alive
? "up" : "down");
1774 /* If the gateway crashed, close all open connections... */
1775 ksocknal_close_matching_conns (id
, 0);
1779 /* ...otherwise do nothing. We can only establish new connections
1780 * if we have autroutes, and these connect on demand. */
1784 ksocknal_query (lnet_ni_t
*ni
, lnet_nid_t nid
, unsigned long *when
)
1787 unsigned long last_alive
= 0;
1788 unsigned long now
= cfs_time_current();
1789 ksock_peer_t
*peer
= NULL
;
1790 rwlock_t
*glock
= &ksocknal_data
.ksnd_global_lock
;
1791 lnet_process_id_t id
= {.nid
= nid
, .pid
= LUSTRE_SRV_LNET_PID
};
1795 peer
= ksocknal_find_peer_locked(ni
, id
);
1797 struct list_head
*tmp
;
1801 list_for_each (tmp
, &peer
->ksnp_conns
) {
1802 conn
= list_entry(tmp
, ksock_conn_t
, ksnc_list
);
1803 bufnob
= conn
->ksnc_sock
->sk
->sk_wmem_queued
;
1805 if (bufnob
< conn
->ksnc_tx_bufnob
) {
1806 /* something got ACKed */
1807 conn
->ksnc_tx_deadline
=
1808 cfs_time_shift(*ksocknal_tunables
.ksnd_timeout
);
1809 peer
->ksnp_last_alive
= now
;
1810 conn
->ksnc_tx_bufnob
= bufnob
;
1814 last_alive
= peer
->ksnp_last_alive
;
1815 if (ksocknal_find_connectable_route_locked(peer
) == NULL
)
1821 if (last_alive
!= 0)
1824 CDEBUG(D_NET
, "Peer %s %p, alive %ld secs ago, connect %d\n",
1825 libcfs_nid2str(nid
), peer
,
1826 last_alive
? cfs_duration_sec(now
- last_alive
) : -1,
1832 ksocknal_add_peer(ni
, id
, LNET_NIDADDR(nid
), lnet_acceptor_port());
1834 write_lock_bh(glock
);
1836 peer
= ksocknal_find_peer_locked(ni
, id
);
1838 ksocknal_launch_all_connections_locked(peer
);
1840 write_unlock_bh(glock
);
1845 ksocknal_push_peer (ksock_peer_t
*peer
)
1849 struct list_head
*tmp
;
1852 for (index
= 0; ; index
++) {
1853 read_lock(&ksocknal_data
.ksnd_global_lock
);
1858 list_for_each (tmp
, &peer
->ksnp_conns
) {
1860 conn
= list_entry (tmp
, ksock_conn_t
,
1862 ksocknal_conn_addref(conn
);
1867 read_unlock(&ksocknal_data
.ksnd_global_lock
);
1872 ksocknal_lib_push_conn (conn
);
1873 ksocknal_conn_decref(conn
);
1878 ksocknal_push (lnet_ni_t
*ni
, lnet_process_id_t id
)
1881 struct list_head
*tmp
;
1887 for (i
= 0; i
< ksocknal_data
.ksnd_peer_hash_size
; i
++) {
1888 for (j
= 0; ; j
++) {
1889 read_lock(&ksocknal_data
.ksnd_global_lock
);
1894 list_for_each (tmp
, &ksocknal_data
.ksnd_peers
[i
]) {
1895 peer
= list_entry(tmp
, ksock_peer_t
,
1898 if (!((id
.nid
== LNET_NID_ANY
||
1899 id
.nid
== peer
->ksnp_id
.nid
) &&
1900 (id
.pid
== LNET_PID_ANY
||
1901 id
.pid
== peer
->ksnp_id
.pid
))) {
1907 ksocknal_peer_addref(peer
);
1912 read_unlock(&ksocknal_data
.ksnd_global_lock
);
1916 ksocknal_push_peer (peer
);
1917 ksocknal_peer_decref(peer
);
1927 ksocknal_add_interface(lnet_ni_t
*ni
, __u32 ipaddress
, __u32 netmask
)
1929 ksock_net_t
*net
= ni
->ni_data
;
1930 ksock_interface_t
*iface
;
1934 struct list_head
*ptmp
;
1936 struct list_head
*rtmp
;
1937 ksock_route_t
*route
;
1939 if (ipaddress
== 0 ||
1943 write_lock_bh(&ksocknal_data
.ksnd_global_lock
);
1945 iface
= ksocknal_ip2iface(ni
, ipaddress
);
1946 if (iface
!= NULL
) {
1947 /* silently ignore dups */
1949 } else if (net
->ksnn_ninterfaces
== LNET_MAX_INTERFACES
) {
1952 iface
= &net
->ksnn_interfaces
[net
->ksnn_ninterfaces
++];
1954 iface
->ksni_ipaddr
= ipaddress
;
1955 iface
->ksni_netmask
= netmask
;
1956 iface
->ksni_nroutes
= 0;
1957 iface
->ksni_npeers
= 0;
1959 for (i
= 0; i
< ksocknal_data
.ksnd_peer_hash_size
; i
++) {
1960 list_for_each(ptmp
, &ksocknal_data
.ksnd_peers
[i
]) {
1961 peer
= list_entry(ptmp
, ksock_peer_t
,
1964 for (j
= 0; j
< peer
->ksnp_n_passive_ips
; j
++)
1965 if (peer
->ksnp_passive_ips
[j
] == ipaddress
)
1966 iface
->ksni_npeers
++;
1968 list_for_each(rtmp
, &peer
->ksnp_routes
) {
1969 route
= list_entry(rtmp
,
1973 if (route
->ksnr_myipaddr
== ipaddress
)
1974 iface
->ksni_nroutes
++;
1980 /* NB only new connections will pay attention to the new interface! */
1983 write_unlock_bh(&ksocknal_data
.ksnd_global_lock
);
1989 ksocknal_peer_del_interface_locked(ksock_peer_t
*peer
, __u32 ipaddr
)
1991 struct list_head
*tmp
;
1992 struct list_head
*nxt
;
1993 ksock_route_t
*route
;
1998 for (i
= 0; i
< peer
->ksnp_n_passive_ips
; i
++)
1999 if (peer
->ksnp_passive_ips
[i
] == ipaddr
) {
2000 for (j
= i
+1; j
< peer
->ksnp_n_passive_ips
; j
++)
2001 peer
->ksnp_passive_ips
[j
-1] =
2002 peer
->ksnp_passive_ips
[j
];
2003 peer
->ksnp_n_passive_ips
--;
2007 list_for_each_safe(tmp
, nxt
, &peer
->ksnp_routes
) {
2008 route
= list_entry (tmp
, ksock_route_t
, ksnr_list
);
2010 if (route
->ksnr_myipaddr
!= ipaddr
)
2013 if (route
->ksnr_share_count
!= 0) {
2014 /* Manually created; keep, but unbind */
2015 route
->ksnr_myipaddr
= 0;
2017 ksocknal_del_route_locked(route
);
2021 list_for_each_safe(tmp
, nxt
, &peer
->ksnp_conns
) {
2022 conn
= list_entry(tmp
, ksock_conn_t
, ksnc_list
);
2024 if (conn
->ksnc_myipaddr
== ipaddr
)
2025 ksocknal_close_conn_locked (conn
, 0);
2030 ksocknal_del_interface(lnet_ni_t
*ni
, __u32 ipaddress
)
2032 ksock_net_t
*net
= ni
->ni_data
;
2034 struct list_head
*tmp
;
2035 struct list_head
*nxt
;
2041 write_lock_bh(&ksocknal_data
.ksnd_global_lock
);
2043 for (i
= 0; i
< net
->ksnn_ninterfaces
; i
++) {
2044 this_ip
= net
->ksnn_interfaces
[i
].ksni_ipaddr
;
2046 if (!(ipaddress
== 0 ||
2047 ipaddress
== this_ip
))
2052 for (j
= i
+1; j
< net
->ksnn_ninterfaces
; j
++)
2053 net
->ksnn_interfaces
[j
-1] =
2054 net
->ksnn_interfaces
[j
];
2056 net
->ksnn_ninterfaces
--;
2058 for (j
= 0; j
< ksocknal_data
.ksnd_peer_hash_size
; j
++) {
2059 list_for_each_safe(tmp
, nxt
,
2060 &ksocknal_data
.ksnd_peers
[j
]) {
2061 peer
= list_entry(tmp
, ksock_peer_t
,
2064 if (peer
->ksnp_ni
!= ni
)
2067 ksocknal_peer_del_interface_locked(peer
, this_ip
);
2072 write_unlock_bh(&ksocknal_data
.ksnd_global_lock
);
2078 ksocknal_ctl(lnet_ni_t
*ni
, unsigned int cmd
, void *arg
)
2080 lnet_process_id_t id
= {0};
2081 struct libcfs_ioctl_data
*data
= arg
;
2085 case IOC_LIBCFS_GET_INTERFACE
: {
2086 ksock_net_t
*net
= ni
->ni_data
;
2087 ksock_interface_t
*iface
;
2089 read_lock(&ksocknal_data
.ksnd_global_lock
);
2091 if (data
->ioc_count
>= (__u32
)net
->ksnn_ninterfaces
) {
2095 iface
= &net
->ksnn_interfaces
[data
->ioc_count
];
2097 data
->ioc_u32
[0] = iface
->ksni_ipaddr
;
2098 data
->ioc_u32
[1] = iface
->ksni_netmask
;
2099 data
->ioc_u32
[2] = iface
->ksni_npeers
;
2100 data
->ioc_u32
[3] = iface
->ksni_nroutes
;
2103 read_unlock(&ksocknal_data
.ksnd_global_lock
);
2107 case IOC_LIBCFS_ADD_INTERFACE
:
2108 return ksocknal_add_interface(ni
,
2109 data
->ioc_u32
[0], /* IP address */
2110 data
->ioc_u32
[1]); /* net mask */
2112 case IOC_LIBCFS_DEL_INTERFACE
:
2113 return ksocknal_del_interface(ni
,
2114 data
->ioc_u32
[0]); /* IP address */
2116 case IOC_LIBCFS_GET_PEER
: {
2121 int share_count
= 0;
2123 rc
= ksocknal_get_peer_info(ni
, data
->ioc_count
,
2124 &id
, &myip
, &ip
, &port
,
2125 &conn_count
, &share_count
);
2129 data
->ioc_nid
= id
.nid
;
2130 data
->ioc_count
= share_count
;
2131 data
->ioc_u32
[0] = ip
;
2132 data
->ioc_u32
[1] = port
;
2133 data
->ioc_u32
[2] = myip
;
2134 data
->ioc_u32
[3] = conn_count
;
2135 data
->ioc_u32
[4] = id
.pid
;
2139 case IOC_LIBCFS_ADD_PEER
:
2140 id
.nid
= data
->ioc_nid
;
2141 id
.pid
= LUSTRE_SRV_LNET_PID
;
2142 return ksocknal_add_peer (ni
, id
,
2143 data
->ioc_u32
[0], /* IP */
2144 data
->ioc_u32
[1]); /* port */
2146 case IOC_LIBCFS_DEL_PEER
:
2147 id
.nid
= data
->ioc_nid
;
2148 id
.pid
= LNET_PID_ANY
;
2149 return ksocknal_del_peer (ni
, id
,
2150 data
->ioc_u32
[0]); /* IP */
2152 case IOC_LIBCFS_GET_CONN
: {
2156 ksock_conn_t
*conn
= ksocknal_get_conn_by_idx (ni
, data
->ioc_count
);
2161 ksocknal_lib_get_conn_tunables(conn
, &txmem
, &rxmem
, &nagle
);
2163 data
->ioc_count
= txmem
;
2164 data
->ioc_nid
= conn
->ksnc_peer
->ksnp_id
.nid
;
2165 data
->ioc_flags
= nagle
;
2166 data
->ioc_u32
[0] = conn
->ksnc_ipaddr
;
2167 data
->ioc_u32
[1] = conn
->ksnc_port
;
2168 data
->ioc_u32
[2] = conn
->ksnc_myipaddr
;
2169 data
->ioc_u32
[3] = conn
->ksnc_type
;
2170 data
->ioc_u32
[4] = conn
->ksnc_scheduler
->kss_info
->ksi_cpt
;
2171 data
->ioc_u32
[5] = rxmem
;
2172 data
->ioc_u32
[6] = conn
->ksnc_peer
->ksnp_id
.pid
;
2173 ksocknal_conn_decref(conn
);
2177 case IOC_LIBCFS_CLOSE_CONNECTION
:
2178 id
.nid
= data
->ioc_nid
;
2179 id
.pid
= LNET_PID_ANY
;
2180 return ksocknal_close_matching_conns (id
,
2183 case IOC_LIBCFS_REGISTER_MYNID
:
2184 /* Ignore if this is a noop */
2185 if (data
->ioc_nid
== ni
->ni_nid
)
2188 CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
2189 libcfs_nid2str(data
->ioc_nid
),
2190 libcfs_nid2str(ni
->ni_nid
));
2193 case IOC_LIBCFS_PUSH_CONNECTION
:
2194 id
.nid
= data
->ioc_nid
;
2195 id
.pid
= LNET_PID_ANY
;
2196 return ksocknal_push(ni
, id
);
2205 ksocknal_free_buffers (void)
2207 LASSERT (atomic_read(&ksocknal_data
.ksnd_nactive_txs
) == 0);
2209 if (ksocknal_data
.ksnd_sched_info
!= NULL
) {
2210 struct ksock_sched_info
*info
;
2213 cfs_percpt_for_each(info
, i
, ksocknal_data
.ksnd_sched_info
) {
2214 if (info
->ksi_scheds
!= NULL
) {
2215 LIBCFS_FREE(info
->ksi_scheds
,
2216 info
->ksi_nthreads_max
*
2217 sizeof(info
->ksi_scheds
[0]));
2220 cfs_percpt_free(ksocknal_data
.ksnd_sched_info
);
2223 LIBCFS_FREE (ksocknal_data
.ksnd_peers
,
2224 sizeof (struct list_head
) *
2225 ksocknal_data
.ksnd_peer_hash_size
);
2227 spin_lock(&ksocknal_data
.ksnd_tx_lock
);
2229 if (!list_empty(&ksocknal_data
.ksnd_idle_noop_txs
)) {
2230 struct list_head zlist
;
2233 list_add(&zlist
, &ksocknal_data
.ksnd_idle_noop_txs
);
2234 list_del_init(&ksocknal_data
.ksnd_idle_noop_txs
);
2235 spin_unlock(&ksocknal_data
.ksnd_tx_lock
);
2237 while (!list_empty(&zlist
)) {
2238 tx
= list_entry(zlist
.next
, ksock_tx_t
, tx_list
);
2239 list_del(&tx
->tx_list
);
2240 LIBCFS_FREE(tx
, tx
->tx_desc_size
);
2243 spin_unlock(&ksocknal_data
.ksnd_tx_lock
);
2248 ksocknal_base_shutdown(void)
2250 struct ksock_sched_info
*info
;
2251 ksock_sched_t
*sched
;
2255 CDEBUG(D_MALLOC
, "before NAL cleanup: kmem %d\n",
2256 atomic_read (&libcfs_kmemory
));
2257 LASSERT (ksocknal_data
.ksnd_nnets
== 0);
2259 switch (ksocknal_data
.ksnd_init
) {
2263 case SOCKNAL_INIT_ALL
:
2264 case SOCKNAL_INIT_DATA
:
2265 LASSERT (ksocknal_data
.ksnd_peers
!= NULL
);
2266 for (i
= 0; i
< ksocknal_data
.ksnd_peer_hash_size
; i
++) {
2267 LASSERT (list_empty (&ksocknal_data
.ksnd_peers
[i
]));
2270 LASSERT(list_empty(&ksocknal_data
.ksnd_nets
));
2271 LASSERT (list_empty (&ksocknal_data
.ksnd_enomem_conns
));
2272 LASSERT (list_empty (&ksocknal_data
.ksnd_zombie_conns
));
2273 LASSERT (list_empty (&ksocknal_data
.ksnd_connd_connreqs
));
2274 LASSERT (list_empty (&ksocknal_data
.ksnd_connd_routes
));
2276 if (ksocknal_data
.ksnd_sched_info
!= NULL
) {
2277 cfs_percpt_for_each(info
, i
,
2278 ksocknal_data
.ksnd_sched_info
) {
2279 if (info
->ksi_scheds
== NULL
)
2282 for (j
= 0; j
< info
->ksi_nthreads_max
; j
++) {
2284 sched
= &info
->ksi_scheds
[j
];
2286 &sched
->kss_tx_conns
));
2288 &sched
->kss_rx_conns
));
2290 &sched
->kss_zombie_noop_txs
));
2291 LASSERT(sched
->kss_nconns
== 0);
2296 /* flag threads to terminate; wake and wait for them to die */
2297 ksocknal_data
.ksnd_shuttingdown
= 1;
2298 wake_up_all(&ksocknal_data
.ksnd_connd_waitq
);
2299 wake_up_all(&ksocknal_data
.ksnd_reaper_waitq
);
2301 if (ksocknal_data
.ksnd_sched_info
!= NULL
) {
2302 cfs_percpt_for_each(info
, i
,
2303 ksocknal_data
.ksnd_sched_info
) {
2304 if (info
->ksi_scheds
== NULL
)
2307 for (j
= 0; j
< info
->ksi_nthreads_max
; j
++) {
2308 sched
= &info
->ksi_scheds
[j
];
2309 wake_up_all(&sched
->kss_waitq
);
2315 read_lock(&ksocknal_data
.ksnd_global_lock
);
2316 while (ksocknal_data
.ksnd_nthreads
!= 0) {
2318 CDEBUG(((i
& (-i
)) == i
) ? D_WARNING
: D_NET
, /* power of 2? */
2319 "waiting for %d threads to terminate\n",
2320 ksocknal_data
.ksnd_nthreads
);
2321 read_unlock(&ksocknal_data
.ksnd_global_lock
);
2322 set_current_state(TASK_UNINTERRUPTIBLE
);
2323 schedule_timeout(cfs_time_seconds(1));
2324 read_lock(&ksocknal_data
.ksnd_global_lock
);
2326 read_unlock(&ksocknal_data
.ksnd_global_lock
);
2328 ksocknal_free_buffers();
2330 ksocknal_data
.ksnd_init
= SOCKNAL_INIT_NOTHING
;
2334 CDEBUG(D_MALLOC
, "after NAL cleanup: kmem %d\n",
2335 atomic_read (&libcfs_kmemory
));
2337 module_put(THIS_MODULE
);
2341 ksocknal_new_incarnation (void)
2344 /* The incarnation number is the time this module loaded and it
2345 * identifies this particular instance of the socknal.
2347 return ktime_get_ns();
2351 ksocknal_base_startup(void)
2353 struct ksock_sched_info
*info
;
2357 LASSERT (ksocknal_data
.ksnd_init
== SOCKNAL_INIT_NOTHING
);
2358 LASSERT (ksocknal_data
.ksnd_nnets
== 0);
2360 memset (&ksocknal_data
, 0, sizeof (ksocknal_data
)); /* zero pointers */
2362 ksocknal_data
.ksnd_peer_hash_size
= SOCKNAL_PEER_HASH_SIZE
;
2363 LIBCFS_ALLOC (ksocknal_data
.ksnd_peers
,
2364 sizeof (struct list_head
) *
2365 ksocknal_data
.ksnd_peer_hash_size
);
2366 if (ksocknal_data
.ksnd_peers
== NULL
)
2369 for (i
= 0; i
< ksocknal_data
.ksnd_peer_hash_size
; i
++)
2370 INIT_LIST_HEAD(&ksocknal_data
.ksnd_peers
[i
]);
2372 rwlock_init(&ksocknal_data
.ksnd_global_lock
);
2373 INIT_LIST_HEAD(&ksocknal_data
.ksnd_nets
);
2375 spin_lock_init(&ksocknal_data
.ksnd_reaper_lock
);
2376 INIT_LIST_HEAD (&ksocknal_data
.ksnd_enomem_conns
);
2377 INIT_LIST_HEAD (&ksocknal_data
.ksnd_zombie_conns
);
2378 INIT_LIST_HEAD (&ksocknal_data
.ksnd_deathrow_conns
);
2379 init_waitqueue_head(&ksocknal_data
.ksnd_reaper_waitq
);
2381 spin_lock_init(&ksocknal_data
.ksnd_connd_lock
);
2382 INIT_LIST_HEAD (&ksocknal_data
.ksnd_connd_connreqs
);
2383 INIT_LIST_HEAD (&ksocknal_data
.ksnd_connd_routes
);
2384 init_waitqueue_head(&ksocknal_data
.ksnd_connd_waitq
);
2386 spin_lock_init(&ksocknal_data
.ksnd_tx_lock
);
2387 INIT_LIST_HEAD (&ksocknal_data
.ksnd_idle_noop_txs
);
2389 /* NB memset above zeros whole of ksocknal_data */
2391 /* flag lists/ptrs/locks initialised */
2392 ksocknal_data
.ksnd_init
= SOCKNAL_INIT_DATA
;
2393 try_module_get(THIS_MODULE
);
2395 ksocknal_data
.ksnd_sched_info
= cfs_percpt_alloc(lnet_cpt_table(),
2397 if (ksocknal_data
.ksnd_sched_info
== NULL
)
2400 cfs_percpt_for_each(info
, i
, ksocknal_data
.ksnd_sched_info
) {
2401 ksock_sched_t
*sched
;
2404 nthrs
= cfs_cpt_weight(lnet_cpt_table(), i
);
2405 if (*ksocknal_tunables
.ksnd_nscheds
> 0) {
2406 nthrs
= min(nthrs
, *ksocknal_tunables
.ksnd_nscheds
);
2408 /* max to half of CPUs, assume another half should be
2409 * reserved for upper layer modules */
2410 nthrs
= min(max(SOCKNAL_NSCHEDS
, nthrs
>> 1), nthrs
);
2413 info
->ksi_nthreads_max
= nthrs
;
2416 LIBCFS_CPT_ALLOC(info
->ksi_scheds
, lnet_cpt_table(), i
,
2417 info
->ksi_nthreads_max
* sizeof(*sched
));
2418 if (info
->ksi_scheds
== NULL
)
2421 for (; nthrs
> 0; nthrs
--) {
2422 sched
= &info
->ksi_scheds
[nthrs
- 1];
2424 sched
->kss_info
= info
;
2425 spin_lock_init(&sched
->kss_lock
);
2426 INIT_LIST_HEAD(&sched
->kss_rx_conns
);
2427 INIT_LIST_HEAD(&sched
->kss_tx_conns
);
2428 INIT_LIST_HEAD(&sched
->kss_zombie_noop_txs
);
2429 init_waitqueue_head(&sched
->kss_waitq
);
2433 ksocknal_data
.ksnd_connd_starting
= 0;
2434 ksocknal_data
.ksnd_connd_failed_stamp
= 0;
2435 ksocknal_data
.ksnd_connd_starting_stamp
= get_seconds();
2436 /* must have at least 2 connds to remain responsive to accepts while
2438 if (*ksocknal_tunables
.ksnd_nconnds
< SOCKNAL_CONND_RESV
+ 1)
2439 *ksocknal_tunables
.ksnd_nconnds
= SOCKNAL_CONND_RESV
+ 1;
2441 if (*ksocknal_tunables
.ksnd_nconnds_max
<
2442 *ksocknal_tunables
.ksnd_nconnds
) {
2443 ksocknal_tunables
.ksnd_nconnds_max
=
2444 ksocknal_tunables
.ksnd_nconnds
;
2447 for (i
= 0; i
< *ksocknal_tunables
.ksnd_nconnds
; i
++) {
2449 spin_lock_bh(&ksocknal_data
.ksnd_connd_lock
);
2450 ksocknal_data
.ksnd_connd_starting
++;
2451 spin_unlock_bh(&ksocknal_data
.ksnd_connd_lock
);
2454 snprintf(name
, sizeof(name
), "socknal_cd%02d", i
);
2455 rc
= ksocknal_thread_start(ksocknal_connd
,
2456 (void *)((ulong_ptr_t
)i
), name
);
2458 spin_lock_bh(&ksocknal_data
.ksnd_connd_lock
);
2459 ksocknal_data
.ksnd_connd_starting
--;
2460 spin_unlock_bh(&ksocknal_data
.ksnd_connd_lock
);
2461 CERROR("Can't spawn socknal connd: %d\n", rc
);
2466 rc
= ksocknal_thread_start(ksocknal_reaper
, NULL
, "socknal_reaper");
2468 CERROR ("Can't spawn socknal reaper: %d\n", rc
);
2472 /* flag everything initialised */
2473 ksocknal_data
.ksnd_init
= SOCKNAL_INIT_ALL
;
2478 ksocknal_base_shutdown();
2483 ksocknal_debug_peerhash (lnet_ni_t
*ni
)
2485 ksock_peer_t
*peer
= NULL
;
2486 struct list_head
*tmp
;
2489 read_lock(&ksocknal_data
.ksnd_global_lock
);
2491 for (i
= 0; i
< ksocknal_data
.ksnd_peer_hash_size
; i
++) {
2492 list_for_each (tmp
, &ksocknal_data
.ksnd_peers
[i
]) {
2493 peer
= list_entry (tmp
, ksock_peer_t
, ksnp_list
);
2495 if (peer
->ksnp_ni
== ni
)
2503 ksock_route_t
*route
;
2506 CWARN("Active peer on shutdown: %s, ref %d, scnt %d, closing %d, accepting %d, err %d, zcookie %llu, txq %d, zc_req %d\n",
2507 libcfs_id2str(peer
->ksnp_id
),
2508 atomic_read(&peer
->ksnp_refcount
),
2509 peer
->ksnp_sharecount
, peer
->ksnp_closing
,
2510 peer
->ksnp_accepting
, peer
->ksnp_error
,
2511 peer
->ksnp_zc_next_cookie
,
2512 !list_empty(&peer
->ksnp_tx_queue
),
2513 !list_empty(&peer
->ksnp_zc_req_list
));
2515 list_for_each (tmp
, &peer
->ksnp_routes
) {
2516 route
= list_entry(tmp
, ksock_route_t
, ksnr_list
);
2517 CWARN("Route: ref %d, schd %d, conn %d, cnted %d, del %d\n",
2518 atomic_read(&route
->ksnr_refcount
),
2519 route
->ksnr_scheduled
, route
->ksnr_connecting
,
2520 route
->ksnr_connected
, route
->ksnr_deleted
);
2523 list_for_each (tmp
, &peer
->ksnp_conns
) {
2524 conn
= list_entry(tmp
, ksock_conn_t
, ksnc_list
);
2525 CWARN ("Conn: ref %d, sref %d, t %d, c %d\n",
2526 atomic_read(&conn
->ksnc_conn_refcount
),
2527 atomic_read(&conn
->ksnc_sock_refcount
),
2528 conn
->ksnc_type
, conn
->ksnc_closing
);
2532 read_unlock(&ksocknal_data
.ksnd_global_lock
);
2537 ksocknal_shutdown (lnet_ni_t
*ni
)
2539 ksock_net_t
*net
= ni
->ni_data
;
2541 lnet_process_id_t anyid
= {0};
2543 anyid
.nid
= LNET_NID_ANY
;
2544 anyid
.pid
= LNET_PID_ANY
;
2546 LASSERT(ksocknal_data
.ksnd_init
== SOCKNAL_INIT_ALL
);
2547 LASSERT(ksocknal_data
.ksnd_nnets
> 0);
2549 spin_lock_bh(&net
->ksnn_lock
);
2550 net
->ksnn_shutdown
= 1; /* prevent new peers */
2551 spin_unlock_bh(&net
->ksnn_lock
);
2553 /* Delete all peers */
2554 ksocknal_del_peer(ni
, anyid
, 0);
2556 /* Wait for all peer state to clean up */
2558 spin_lock_bh(&net
->ksnn_lock
);
2559 while (net
->ksnn_npeers
!= 0) {
2560 spin_unlock_bh(&net
->ksnn_lock
);
2563 CDEBUG(((i
& (-i
)) == i
) ? D_WARNING
: D_NET
, /* power of 2? */
2564 "waiting for %d peers to disconnect\n",
2566 set_current_state(TASK_UNINTERRUPTIBLE
);
2567 schedule_timeout(cfs_time_seconds(1));
2569 ksocknal_debug_peerhash(ni
);
2571 spin_lock_bh(&net
->ksnn_lock
);
2573 spin_unlock_bh(&net
->ksnn_lock
);
2575 for (i
= 0; i
< net
->ksnn_ninterfaces
; i
++) {
2576 LASSERT (net
->ksnn_interfaces
[i
].ksni_npeers
== 0);
2577 LASSERT (net
->ksnn_interfaces
[i
].ksni_nroutes
== 0);
2580 list_del(&net
->ksnn_list
);
2581 LIBCFS_FREE(net
, sizeof(*net
));
2583 ksocknal_data
.ksnd_nnets
--;
2584 if (ksocknal_data
.ksnd_nnets
== 0)
2585 ksocknal_base_shutdown();
2589 ksocknal_enumerate_interfaces(ksock_net_t
*net
)
2597 n
= libcfs_ipif_enumerate(&names
);
2599 CERROR("Can't enumerate interfaces: %d\n", n
);
2603 for (i
= j
= 0; i
< n
; i
++) {
2608 if (!strcmp(names
[i
], "lo")) /* skip the loopback IF */
2611 rc
= libcfs_ipif_query(names
[i
], &up
, &ip
, &mask
);
2613 CWARN("Can't get interface %s info: %d\n",
2619 CWARN("Ignoring interface %s (down)\n",
2624 if (j
== LNET_MAX_INTERFACES
) {
2625 CWARN("Ignoring interface %s (too many interfaces)\n",
2630 net
->ksnn_interfaces
[j
].ksni_ipaddr
= ip
;
2631 net
->ksnn_interfaces
[j
].ksni_netmask
= mask
;
2632 strncpy(&net
->ksnn_interfaces
[j
].ksni_name
[0],
2633 names
[i
], IFNAMSIZ
);
2637 libcfs_ipif_free_enumeration(names
, n
);
2640 CERROR("Can't find any usable interfaces\n");
2646 ksocknal_search_new_ipif(ksock_net_t
*net
)
2651 for (i
= 0; i
< net
->ksnn_ninterfaces
; i
++) {
2652 char *ifnam
= &net
->ksnn_interfaces
[i
].ksni_name
[0];
2653 char *colon
= strchr(ifnam
, ':');
2658 if (colon
!= NULL
) /* ignore alias device */
2661 list_for_each_entry(tmp
, &ksocknal_data
.ksnd_nets
,
2663 for (j
= 0; !found
&& j
< tmp
->ksnn_ninterfaces
; j
++) {
2665 &tmp
->ksnn_interfaces
[j
].ksni_name
[0];
2666 char *colon2
= strchr(ifnam2
, ':');
2671 found
= strcmp(ifnam
, ifnam2
) == 0;
2688 ksocknal_start_schedulers(struct ksock_sched_info
*info
)
2694 if (info
->ksi_nthreads
== 0) {
2695 if (*ksocknal_tunables
.ksnd_nscheds
> 0) {
2696 nthrs
= info
->ksi_nthreads_max
;
2698 nthrs
= cfs_cpt_weight(lnet_cpt_table(),
2700 nthrs
= min(max(SOCKNAL_NSCHEDS
, nthrs
>> 1), nthrs
);
2701 nthrs
= min(SOCKNAL_NSCHEDS_HIGH
, nthrs
);
2703 nthrs
= min(nthrs
, info
->ksi_nthreads_max
);
2705 LASSERT(info
->ksi_nthreads
<= info
->ksi_nthreads_max
);
2706 /* increase two threads if there is new interface */
2707 nthrs
= min(2, info
->ksi_nthreads_max
- info
->ksi_nthreads
);
2710 for (i
= 0; i
< nthrs
; i
++) {
2713 ksock_sched_t
*sched
;
2714 id
= KSOCK_THREAD_ID(info
->ksi_cpt
, info
->ksi_nthreads
+ i
);
2715 sched
= &info
->ksi_scheds
[KSOCK_THREAD_SID(id
)];
2716 snprintf(name
, sizeof(name
), "socknal_sd%02d_%02d",
2717 info
->ksi_cpt
, (int)(sched
- &info
->ksi_scheds
[0]));
2719 rc
= ksocknal_thread_start(ksocknal_scheduler
,
2724 CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
2725 info
->ksi_cpt
, info
->ksi_nthreads
+ i
, rc
);
2729 info
->ksi_nthreads
+= i
;
2734 ksocknal_net_start_threads(ksock_net_t
*net
, __u32
*cpts
, int ncpts
)
2736 int newif
= ksocknal_search_new_ipif(net
);
2740 LASSERT(ncpts
> 0 && ncpts
<= cfs_cpt_number(lnet_cpt_table()));
2742 for (i
= 0; i
< ncpts
; i
++) {
2743 struct ksock_sched_info
*info
;
2744 int cpt
= (cpts
== NULL
) ? i
: cpts
[i
];
2746 LASSERT(cpt
< cfs_cpt_number(lnet_cpt_table()));
2747 info
= ksocknal_data
.ksnd_sched_info
[cpt
];
2749 if (!newif
&& info
->ksi_nthreads
> 0)
2752 rc
= ksocknal_start_schedulers(info
);
2760 ksocknal_startup (lnet_ni_t
*ni
)
2766 LASSERT (ni
->ni_lnd
== &the_ksocklnd
);
2768 if (ksocknal_data
.ksnd_init
== SOCKNAL_INIT_NOTHING
) {
2769 rc
= ksocknal_base_startup();
2774 LIBCFS_ALLOC(net
, sizeof(*net
));
2778 spin_lock_init(&net
->ksnn_lock
);
2779 net
->ksnn_incarnation
= ksocknal_new_incarnation();
2781 ni
->ni_peertimeout
= *ksocknal_tunables
.ksnd_peertimeout
;
2782 ni
->ni_maxtxcredits
= *ksocknal_tunables
.ksnd_credits
;
2783 ni
->ni_peertxcredits
= *ksocknal_tunables
.ksnd_peertxcredits
;
2784 ni
->ni_peerrtrcredits
= *ksocknal_tunables
.ksnd_peerrtrcredits
;
2786 if (ni
->ni_interfaces
[0] == NULL
) {
2787 rc
= ksocknal_enumerate_interfaces(net
);
2791 net
->ksnn_ninterfaces
= 1;
2793 for (i
= 0; i
< LNET_MAX_INTERFACES
; i
++) {
2796 if (ni
->ni_interfaces
[i
] == NULL
)
2799 rc
= libcfs_ipif_query(
2800 ni
->ni_interfaces
[i
], &up
,
2801 &net
->ksnn_interfaces
[i
].ksni_ipaddr
,
2802 &net
->ksnn_interfaces
[i
].ksni_netmask
);
2805 CERROR("Can't get interface %s info: %d\n",
2806 ni
->ni_interfaces
[i
], rc
);
2811 CERROR("Interface %s is down\n",
2812 ni
->ni_interfaces
[i
]);
2816 strncpy(&net
->ksnn_interfaces
[i
].ksni_name
[0],
2817 ni
->ni_interfaces
[i
], IFNAMSIZ
);
2819 net
->ksnn_ninterfaces
= i
;
2822 /* call it before add it to ksocknal_data.ksnd_nets */
2823 rc
= ksocknal_net_start_threads(net
, ni
->ni_cpts
, ni
->ni_ncpts
);
2827 ni
->ni_nid
= LNET_MKNID(LNET_NIDNET(ni
->ni_nid
),
2828 net
->ksnn_interfaces
[0].ksni_ipaddr
);
2829 list_add(&net
->ksnn_list
, &ksocknal_data
.ksnd_nets
);
2831 ksocknal_data
.ksnd_nnets
++;
2836 LIBCFS_FREE(net
, sizeof(*net
));
2838 if (ksocknal_data
.ksnd_nnets
== 0)
2839 ksocknal_base_shutdown();
2846 ksocknal_module_fini (void)
2848 lnet_unregister_lnd(&the_ksocklnd
);
2852 ksocknal_module_init (void)
2856 /* check ksnr_connected/connecting field large enough */
2857 CLASSERT (SOCKLND_CONN_NTYPES
<= 4);
2858 CLASSERT (SOCKLND_CONN_ACK
== SOCKLND_CONN_BULK_IN
);
2860 /* initialize the_ksocklnd */
2861 the_ksocklnd
.lnd_type
= SOCKLND
;
2862 the_ksocklnd
.lnd_startup
= ksocknal_startup
;
2863 the_ksocklnd
.lnd_shutdown
= ksocknal_shutdown
;
2864 the_ksocklnd
.lnd_ctl
= ksocknal_ctl
;
2865 the_ksocklnd
.lnd_send
= ksocknal_send
;
2866 the_ksocklnd
.lnd_recv
= ksocknal_recv
;
2867 the_ksocklnd
.lnd_notify
= ksocknal_notify
;
2868 the_ksocklnd
.lnd_query
= ksocknal_query
;
2869 the_ksocklnd
.lnd_accept
= ksocknal_accept
;
2871 rc
= ksocknal_tunables_init();
2875 lnet_register_lnd(&the_ksocklnd
);
2880 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
2881 MODULE_DESCRIPTION("Kernel TCP Socket LND v3.0.0");
2882 MODULE_LICENSE("GPL");
2883 MODULE_VERSION("3.0.0");
2885 module_init(ksocknal_module_init
);
2886 module_exit(ksocknal_module_fini
);