2 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
4 * Copyright (c) 2011, 2012, Intel Corporation.
6 * This file is part of Portals
7 * http://sourceforge.net/projects/sandiaportals/
9 * Portals is free software; you can redistribute it and/or
10 * modify it under the terms of version 2 of the GNU General Public
11 * License as published by the Free Software Foundation.
13 * Portals is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with Portals; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_LNET
25 #include "../../include/linux/lnet/lib-lnet.h"
27 #if defined(LNET_ROUTER)
29 #define LNET_NRB_TINY_MIN 512 /* min value for each CPT */
30 #define LNET_NRB_TINY (LNET_NRB_TINY_MIN * 4)
31 #define LNET_NRB_SMALL_MIN 4096 /* min value for each CPT */
32 #define LNET_NRB_SMALL (LNET_NRB_SMALL_MIN * 4)
33 #define LNET_NRB_LARGE_MIN 256 /* min value for each CPT */
34 #define LNET_NRB_LARGE (LNET_NRB_LARGE_MIN * 4)
36 static char *forwarding
= "";
37 module_param(forwarding
, charp
, 0444);
38 MODULE_PARM_DESC(forwarding
, "Explicitly enable/disable forwarding between networks");
40 static int tiny_router_buffers
;
41 module_param(tiny_router_buffers
, int, 0444);
42 MODULE_PARM_DESC(tiny_router_buffers
, "# of 0 payload messages to buffer in the router");
43 static int small_router_buffers
;
44 module_param(small_router_buffers
, int, 0444);
45 MODULE_PARM_DESC(small_router_buffers
, "# of small (1 page) messages to buffer in the router");
46 static int large_router_buffers
;
47 module_param(large_router_buffers
, int, 0444);
48 MODULE_PARM_DESC(large_router_buffers
, "# of large messages to buffer in the router");
49 static int peer_buffer_credits
;
50 module_param(peer_buffer_credits
, int, 0444);
51 MODULE_PARM_DESC(peer_buffer_credits
, "# router buffer credits per peer");
53 static int auto_down
= 1;
54 module_param(auto_down
, int, 0444);
55 MODULE_PARM_DESC(auto_down
, "Automatically mark peers down on comms error");
58 lnet_peer_buffer_credits(lnet_ni_t
*ni
)
60 /* NI option overrides LNet default */
61 if (ni
->ni_peerrtrcredits
> 0)
62 return ni
->ni_peerrtrcredits
;
63 if (peer_buffer_credits
> 0)
64 return peer_buffer_credits
;
66 /* As an approximation, allow this peer the same number of router
67 * buffers as it is allowed outstanding sends */
68 return ni
->ni_peertxcredits
;
72 static int lnet_router_checker(void *);
76 lnet_peer_buffer_credits(lnet_ni_t
*ni
)
83 static int check_routers_before_use
;
84 module_param(check_routers_before_use
, int, 0444);
85 MODULE_PARM_DESC(check_routers_before_use
, "Assume routers are down and ping them before use");
87 int avoid_asym_router_failure
= 1;
88 module_param(avoid_asym_router_failure
, int, 0644);
89 MODULE_PARM_DESC(avoid_asym_router_failure
, "Avoid asymmetrical router failures (0 to disable)");
91 static int dead_router_check_interval
= 60;
92 module_param(dead_router_check_interval
, int, 0644);
93 MODULE_PARM_DESC(dead_router_check_interval
, "Seconds between dead router health checks (<= 0 to disable)");
95 static int live_router_check_interval
= 60;
96 module_param(live_router_check_interval
, int, 0644);
97 MODULE_PARM_DESC(live_router_check_interval
, "Seconds between live router health checks (<= 0 to disable)");
99 static int router_ping_timeout
= 50;
100 module_param(router_ping_timeout
, int, 0644);
101 MODULE_PARM_DESC(router_ping_timeout
, "Seconds to wait for the reply to a router health query");
104 lnet_peers_start_down(void)
106 return check_routers_before_use
;
110 lnet_notify_locked(lnet_peer_t
*lp
, int notifylnd
, int alive
, unsigned long when
)
112 if (time_before(when
, lp
->lp_timestamp
)) { /* out of date information */
113 CDEBUG(D_NET
, "Out of date\n");
117 lp
->lp_timestamp
= when
; /* update timestamp */
118 lp
->lp_ping_deadline
= 0; /* disable ping timeout */
120 if (lp
->lp_alive_count
!= 0 && /* got old news */
121 (!lp
->lp_alive
) == (!alive
)) { /* new date for old news */
122 CDEBUG(D_NET
, "Old news\n");
126 /* Flag that notification is outstanding */
128 lp
->lp_alive_count
++;
129 lp
->lp_alive
= !(!alive
); /* 1 bit! */
131 lp
->lp_notifylnd
|= notifylnd
;
133 lp
->lp_ping_feats
= LNET_PING_FEAT_INVAL
; /* reset */
135 CDEBUG(D_NET
, "set %s %d\n", libcfs_nid2str(lp
->lp_nid
), alive
);
139 lnet_ni_notify_locked(lnet_ni_t
*ni
, lnet_peer_t
*lp
)
144 /* Notify only in 1 thread at any time to ensure ordered notification.
145 * NB individual events can be missed; the only guarantee is that you
146 * always get the most recent news */
148 if (lp
->lp_notifying
|| ni
== NULL
)
151 lp
->lp_notifying
= 1;
153 while (lp
->lp_notify
) {
154 alive
= lp
->lp_alive
;
155 notifylnd
= lp
->lp_notifylnd
;
157 lp
->lp_notifylnd
= 0;
160 if (notifylnd
&& ni
->ni_lnd
->lnd_notify
!= NULL
) {
161 lnet_net_unlock(lp
->lp_cpt
);
163 /* A new notification could happen now; I'll handle it
164 * when control returns to me */
166 (ni
->ni_lnd
->lnd_notify
)(ni
, lp
->lp_nid
, alive
);
168 lnet_net_lock(lp
->lp_cpt
);
172 lp
->lp_notifying
= 0;
177 lnet_rtr_addref_locked(lnet_peer_t
*lp
)
179 LASSERT(lp
->lp_refcount
> 0);
180 LASSERT(lp
->lp_rtr_refcount
>= 0);
182 /* lnet_net_lock must be exclusively locked */
183 lp
->lp_rtr_refcount
++;
184 if (lp
->lp_rtr_refcount
== 1) {
185 struct list_head
*pos
;
187 /* a simple insertion sort */
188 list_for_each_prev(pos
, &the_lnet
.ln_routers
) {
189 lnet_peer_t
*rtr
= list_entry(pos
, lnet_peer_t
,
192 if (rtr
->lp_nid
< lp
->lp_nid
)
196 list_add(&lp
->lp_rtr_list
, pos
);
197 /* addref for the_lnet.ln_routers */
198 lnet_peer_addref_locked(lp
);
199 the_lnet
.ln_routers_version
++;
204 lnet_rtr_decref_locked(lnet_peer_t
*lp
)
206 LASSERT(lp
->lp_refcount
> 0);
207 LASSERT(lp
->lp_rtr_refcount
> 0);
209 /* lnet_net_lock must be exclusively locked */
210 lp
->lp_rtr_refcount
--;
211 if (lp
->lp_rtr_refcount
== 0) {
212 LASSERT(list_empty(&lp
->lp_routes
));
214 if (lp
->lp_rcd
!= NULL
) {
215 list_add(&lp
->lp_rcd
->rcd_list
,
216 &the_lnet
.ln_rcd_deathrow
);
220 list_del(&lp
->lp_rtr_list
);
221 /* decref for the_lnet.ln_routers */
222 lnet_peer_decref_locked(lp
);
223 the_lnet
.ln_routers_version
++;
228 lnet_find_net_locked (__u32 net
)
230 lnet_remotenet_t
*rnet
;
231 struct list_head
*tmp
;
232 struct list_head
*rn_list
;
234 LASSERT(!the_lnet
.ln_shutdown
);
236 rn_list
= lnet_net2rnethash(net
);
237 list_for_each(tmp
, rn_list
) {
238 rnet
= list_entry(tmp
, lnet_remotenet_t
, lrn_list
);
240 if (rnet
->lrn_net
== net
)
246 static void lnet_shuffle_seed(void)
249 int lnd_type
, seed
[2];
252 struct list_head
*tmp
;
257 cfs_get_random_bytes(seed
, sizeof(seed
));
259 /* Nodes with small feet have little entropy
260 * the NID for this node gives the most entropy in the low bits */
261 list_for_each(tmp
, &the_lnet
.ln_nis
) {
262 ni
= list_entry(tmp
, lnet_ni_t
, ni_list
);
263 lnd_type
= LNET_NETTYP(LNET_NIDNET(ni
->ni_nid
));
265 if (lnd_type
!= LOLND
)
266 seed
[0] ^= (LNET_NIDADDR(ni
->ni_nid
) | lnd_type
);
269 do_gettimeofday(&tv
);
270 cfs_srand(tv
.tv_sec
^ seed
[0], tv
.tv_usec
^ seed
[1]);
275 /* NB expects LNET_LOCK held */
277 lnet_add_route_to_rnet (lnet_remotenet_t
*rnet
, lnet_route_t
*route
)
279 unsigned int len
= 0;
280 unsigned int offset
= 0;
285 list_for_each (e
, &rnet
->lrn_routes
) {
289 /* len+1 positions to add a new entry, also prevents division by 0 */
290 offset
= cfs_rand() % (len
+ 1);
291 list_for_each (e
, &rnet
->lrn_routes
) {
296 list_add(&route
->lr_list
, e
);
297 list_add(&route
->lr_gwlist
, &route
->lr_gateway
->lp_routes
);
299 the_lnet
.ln_remote_nets_version
++;
300 lnet_rtr_addref_locked(route
->lr_gateway
);
304 lnet_add_route(__u32 net
, unsigned int hops
, lnet_nid_t gateway
,
305 unsigned int priority
)
308 lnet_remotenet_t
*rnet
;
309 lnet_remotenet_t
*rnet2
;
315 CDEBUG(D_NET
, "Add route: net %s hops %u priority %u gw %s\n",
316 libcfs_net2str(net
), hops
, priority
, libcfs_nid2str(gateway
));
318 if (gateway
== LNET_NID_ANY
||
319 LNET_NETTYP(LNET_NIDNET(gateway
)) == LOLND
||
320 net
== LNET_NIDNET(LNET_NID_ANY
) ||
321 LNET_NETTYP(net
) == LOLND
||
322 LNET_NIDNET(gateway
) == net
||
323 hops
< 1 || hops
> 255)
326 if (lnet_islocalnet(net
)) /* it's a local network */
327 return 0; /* ignore the route entry */
329 /* Assume net, route, all new */
330 LIBCFS_ALLOC(route
, sizeof(*route
));
331 LIBCFS_ALLOC(rnet
, sizeof(*rnet
));
332 if (route
== NULL
|| rnet
== NULL
) {
333 CERROR("Out of memory creating route %s %d %s\n",
334 libcfs_net2str(net
), hops
, libcfs_nid2str(gateway
));
336 LIBCFS_FREE(route
, sizeof(*route
));
338 LIBCFS_FREE(rnet
, sizeof(*rnet
));
342 INIT_LIST_HEAD(&rnet
->lrn_routes
);
344 route
->lr_hops
= hops
;
346 route
->lr_priority
= priority
;
348 lnet_net_lock(LNET_LOCK_EX
);
350 rc
= lnet_nid2peer_locked(&route
->lr_gateway
, gateway
, LNET_LOCK_EX
);
352 lnet_net_unlock(LNET_LOCK_EX
);
354 LIBCFS_FREE(route
, sizeof(*route
));
355 LIBCFS_FREE(rnet
, sizeof(*rnet
));
357 if (rc
== -EHOSTUNREACH
) { /* gateway is not on a local net */
358 return 0; /* ignore the route entry */
360 CERROR("Error %d creating route %s %d %s\n", rc
,
361 libcfs_net2str(net
), hops
,
362 libcfs_nid2str(gateway
));
367 LASSERT (!the_lnet
.ln_shutdown
);
369 rnet2
= lnet_find_net_locked(net
);
372 list_add_tail(&rnet
->lrn_list
, lnet_net2rnethash(net
));
376 /* Search for a duplicate route (it's a NOOP if it is) */
378 list_for_each (e
, &rnet2
->lrn_routes
) {
379 lnet_route_t
*route2
= list_entry(e
, lnet_route_t
, lr_list
);
381 if (route2
->lr_gateway
== route
->lr_gateway
) {
386 /* our lookups must be true */
387 LASSERT (route2
->lr_gateway
->lp_nid
!= gateway
);
391 lnet_peer_addref_locked(route
->lr_gateway
); /* +1 for notify */
392 lnet_add_route_to_rnet(rnet2
, route
);
394 ni
= route
->lr_gateway
->lp_ni
;
395 lnet_net_unlock(LNET_LOCK_EX
);
397 /* XXX Assume alive */
398 if (ni
->ni_lnd
->lnd_notify
!= NULL
)
399 (ni
->ni_lnd
->lnd_notify
)(ni
, gateway
, 1);
401 lnet_net_lock(LNET_LOCK_EX
);
404 /* -1 for notify or !add_route */
405 lnet_peer_decref_locked(route
->lr_gateway
);
406 lnet_net_unlock(LNET_LOCK_EX
);
409 LIBCFS_FREE(route
, sizeof(*route
));
412 LIBCFS_FREE(rnet
, sizeof(*rnet
));
418 lnet_check_routes(void)
420 lnet_remotenet_t
*rnet
;
422 lnet_route_t
*route2
;
423 struct list_head
*e1
;
424 struct list_head
*e2
;
426 struct list_head
*rn_list
;
429 cpt
= lnet_net_lock_current();
431 for (i
= 0; i
< LNET_REMOTE_NETS_HASH_SIZE
; i
++) {
432 rn_list
= &the_lnet
.ln_remote_nets_hash
[i
];
433 list_for_each(e1
, rn_list
) {
434 rnet
= list_entry(e1
, lnet_remotenet_t
, lrn_list
);
437 list_for_each(e2
, &rnet
->lrn_routes
) {
442 route
= list_entry(e2
, lnet_route_t
,
445 if (route2
== NULL
) {
450 if (route
->lr_gateway
->lp_ni
==
451 route2
->lr_gateway
->lp_ni
)
454 nid1
= route
->lr_gateway
->lp_nid
;
455 nid2
= route2
->lr_gateway
->lp_nid
;
458 lnet_net_unlock(cpt
);
460 CERROR("Routes to %s via %s and %s not supported\n",
462 libcfs_nid2str(nid1
),
463 libcfs_nid2str(nid2
));
469 lnet_net_unlock(cpt
);
474 lnet_del_route(__u32 net
, lnet_nid_t gw_nid
)
476 struct lnet_peer
*gateway
;
477 lnet_remotenet_t
*rnet
;
479 struct list_head
*e1
;
480 struct list_head
*e2
;
482 struct list_head
*rn_list
;
485 CDEBUG(D_NET
, "Del route: net %s : gw %s\n",
486 libcfs_net2str(net
), libcfs_nid2str(gw_nid
));
488 /* NB Caller may specify either all routes via the given gateway
489 * or a specific route entry actual NIDs) */
491 lnet_net_lock(LNET_LOCK_EX
);
492 if (net
== LNET_NIDNET(LNET_NID_ANY
))
493 rn_list
= &the_lnet
.ln_remote_nets_hash
[0];
495 rn_list
= lnet_net2rnethash(net
);
498 list_for_each(e1
, rn_list
) {
499 rnet
= list_entry(e1
, lnet_remotenet_t
, lrn_list
);
501 if (!(net
== LNET_NIDNET(LNET_NID_ANY
) ||
502 net
== rnet
->lrn_net
))
505 list_for_each(e2
, &rnet
->lrn_routes
) {
506 route
= list_entry(e2
, lnet_route_t
, lr_list
);
508 gateway
= route
->lr_gateway
;
509 if (!(gw_nid
== LNET_NID_ANY
||
510 gw_nid
== gateway
->lp_nid
))
513 list_del(&route
->lr_list
);
514 list_del(&route
->lr_gwlist
);
515 the_lnet
.ln_remote_nets_version
++;
517 if (list_empty(&rnet
->lrn_routes
))
518 list_del(&rnet
->lrn_list
);
522 lnet_rtr_decref_locked(gateway
);
523 lnet_peer_decref_locked(gateway
);
525 lnet_net_unlock(LNET_LOCK_EX
);
527 LIBCFS_FREE(route
, sizeof(*route
));
530 LIBCFS_FREE(rnet
, sizeof(*rnet
));
533 lnet_net_lock(LNET_LOCK_EX
);
538 if (net
== LNET_NIDNET(LNET_NID_ANY
) &&
539 ++idx
< LNET_REMOTE_NETS_HASH_SIZE
) {
540 rn_list
= &the_lnet
.ln_remote_nets_hash
[idx
];
543 lnet_net_unlock(LNET_LOCK_EX
);
549 lnet_destroy_routes (void)
551 lnet_del_route(LNET_NIDNET(LNET_NID_ANY
), LNET_NID_ANY
);
555 lnet_get_route(int idx
, __u32
*net
, __u32
*hops
,
556 lnet_nid_t
*gateway
, __u32
*alive
, __u32
*priority
)
558 struct list_head
*e1
;
559 struct list_head
*e2
;
560 lnet_remotenet_t
*rnet
;
564 struct list_head
*rn_list
;
566 cpt
= lnet_net_lock_current();
568 for (i
= 0; i
< LNET_REMOTE_NETS_HASH_SIZE
; i
++) {
569 rn_list
= &the_lnet
.ln_remote_nets_hash
[i
];
570 list_for_each(e1
, rn_list
) {
571 rnet
= list_entry(e1
, lnet_remotenet_t
, lrn_list
);
573 list_for_each(e2
, &rnet
->lrn_routes
) {
574 route
= list_entry(e2
, lnet_route_t
,
578 *net
= rnet
->lrn_net
;
579 *hops
= route
->lr_hops
;
580 *priority
= route
->lr_priority
;
581 *gateway
= route
->lr_gateway
->lp_nid
;
582 *alive
= route
->lr_gateway
->lp_alive
;
583 lnet_net_unlock(cpt
);
590 lnet_net_unlock(cpt
);
595 lnet_swap_pinginfo(lnet_ping_info_t
*info
)
598 lnet_ni_status_t
*stat
;
600 __swab32s(&info
->pi_magic
);
601 __swab32s(&info
->pi_features
);
602 __swab32s(&info
->pi_pid
);
603 __swab32s(&info
->pi_nnis
);
604 for (i
= 0; i
< info
->pi_nnis
&& i
< LNET_MAX_RTR_NIS
; i
++) {
605 stat
= &info
->pi_ni
[i
];
606 __swab64s(&stat
->ns_nid
);
607 __swab32s(&stat
->ns_status
);
613 * parse router-checker pinginfo, record number of down NIs for remote
614 * networks on that router.
617 lnet_parse_rc_info(lnet_rc_data_t
*rcd
)
619 lnet_ping_info_t
*info
= rcd
->rcd_pinginfo
;
620 struct lnet_peer
*gw
= rcd
->rcd_gateway
;
626 if (info
->pi_magic
== __swab32(LNET_PROTO_PING_MAGIC
))
627 lnet_swap_pinginfo(info
);
629 /* NB always racing with network! */
630 if (info
->pi_magic
!= LNET_PROTO_PING_MAGIC
) {
631 CDEBUG(D_NET
, "%s: Unexpected magic %08x\n",
632 libcfs_nid2str(gw
->lp_nid
), info
->pi_magic
);
633 gw
->lp_ping_feats
= LNET_PING_FEAT_INVAL
;
637 gw
->lp_ping_feats
= info
->pi_features
;
638 if ((gw
->lp_ping_feats
& LNET_PING_FEAT_MASK
) == 0) {
639 CDEBUG(D_NET
, "%s: Unexpected features 0x%x\n",
640 libcfs_nid2str(gw
->lp_nid
), gw
->lp_ping_feats
);
641 return; /* nothing I can understand */
644 if ((gw
->lp_ping_feats
& LNET_PING_FEAT_NI_STATUS
) == 0)
645 return; /* can't carry NI status info */
647 list_for_each_entry(rtr
, &gw
->lp_routes
, lr_gwlist
) {
648 int ptl_status
= LNET_NI_STATUS_INVALID
;
653 for (i
= 0; i
< info
->pi_nnis
&& i
< LNET_MAX_RTR_NIS
; i
++) {
654 lnet_ni_status_t
*stat
= &info
->pi_ni
[i
];
655 lnet_nid_t nid
= stat
->ns_nid
;
657 if (nid
== LNET_NID_ANY
) {
658 CDEBUG(D_NET
, "%s: unexpected LNET_NID_ANY\n",
659 libcfs_nid2str(gw
->lp_nid
));
660 gw
->lp_ping_feats
= LNET_PING_FEAT_INVAL
;
664 if (LNET_NETTYP(LNET_NIDNET(nid
)) == LOLND
)
667 if (stat
->ns_status
== LNET_NI_STATUS_DOWN
) {
668 if (LNET_NETTYP(LNET_NIDNET(nid
)) != PTLLND
)
670 else if (ptl_status
!= LNET_NI_STATUS_UP
)
671 ptl_status
= LNET_NI_STATUS_DOWN
;
675 if (stat
->ns_status
== LNET_NI_STATUS_UP
) {
676 if (LNET_NIDNET(nid
) == rtr
->lr_net
) {
680 /* ptl NIs are considered down only when
681 * they're all down */
682 if (LNET_NETTYP(LNET_NIDNET(nid
)) == PTLLND
)
683 ptl_status
= LNET_NI_STATUS_UP
;
687 CDEBUG(D_NET
, "%s: Unexpected status 0x%x\n",
688 libcfs_nid2str(gw
->lp_nid
), stat
->ns_status
);
689 gw
->lp_ping_feats
= LNET_PING_FEAT_INVAL
;
693 if (up
) { /* ignore downed NIs if NI for dest network is up */
697 rtr
->lr_downis
= down
+ (ptl_status
== LNET_NI_STATUS_DOWN
);
702 lnet_router_checker_event(lnet_event_t
*event
)
704 lnet_rc_data_t
*rcd
= event
->md
.user_ptr
;
705 struct lnet_peer
*lp
;
707 LASSERT(rcd
!= NULL
);
709 if (event
->unlinked
) {
710 LNetInvalidateHandle(&rcd
->rcd_mdh
);
714 LASSERT(event
->type
== LNET_EVENT_SEND
||
715 event
->type
== LNET_EVENT_REPLY
);
717 lp
= rcd
->rcd_gateway
;
720 /* NB: it's called with holding lnet_res_lock, we have a few
721 * places need to hold both locks at the same time, please take
722 * care of lock ordering */
723 lnet_net_lock(lp
->lp_cpt
);
724 if (!lnet_isrouter(lp
) || lp
->lp_rcd
!= rcd
) {
725 /* ignore if no longer a router or rcd is replaced */
729 if (event
->type
== LNET_EVENT_SEND
) {
730 lp
->lp_ping_notsent
= 0;
731 if (event
->status
== 0)
735 /* LNET_EVENT_REPLY */
736 /* A successful REPLY means the router is up. If _any_ comms
737 * to the router fail I assume it's down (this will happen if
738 * we ping alive routers to try to detect router death before
739 * apps get burned). */
741 lnet_notify_locked(lp
, 1, (event
->status
== 0), cfs_time_current());
742 /* The router checker will wake up very shortly and do the
743 * actual notification.
744 * XXX If 'lp' stops being a router before then, it will still
745 * have the notification pending!!! */
747 if (avoid_asym_router_failure
&& event
->status
== 0)
748 lnet_parse_rc_info(rcd
);
751 lnet_net_unlock(lp
->lp_cpt
);
755 lnet_wait_known_routerstate(void)
758 struct list_head
*entry
;
761 LASSERT (the_lnet
.ln_rc_state
== LNET_RC_STATE_RUNNING
);
764 int cpt
= lnet_net_lock_current();
767 list_for_each (entry
, &the_lnet
.ln_routers
) {
768 rtr
= list_entry(entry
, lnet_peer_t
, lp_rtr_list
);
770 if (rtr
->lp_alive_count
== 0) {
776 lnet_net_unlock(cpt
);
781 set_current_state(TASK_UNINTERRUPTIBLE
);
782 schedule_timeout(cfs_time_seconds(1));
787 lnet_router_ni_update_locked(lnet_peer_t
*gw
, __u32 net
)
791 if ((gw
->lp_ping_feats
& LNET_PING_FEAT_NI_STATUS
) != 0) {
792 list_for_each_entry(rte
, &gw
->lp_routes
, lr_gwlist
) {
793 if (rte
->lr_net
== net
) {
802 lnet_update_ni_status_locked(void)
808 LASSERT(the_lnet
.ln_routing
);
810 timeout
= router_ping_timeout
+
811 max(live_router_check_interval
, dead_router_check_interval
);
814 list_for_each_entry(ni
, &the_lnet
.ln_nis
, ni_list
) {
815 if (ni
->ni_lnd
->lnd_type
== LOLND
)
818 if (now
< ni
->ni_last_alive
+ timeout
)
822 /* re-check with lock */
823 if (now
< ni
->ni_last_alive
+ timeout
) {
828 LASSERT(ni
->ni_status
!= NULL
);
830 if (ni
->ni_status
->ns_status
!= LNET_NI_STATUS_DOWN
) {
831 CDEBUG(D_NET
, "NI(%s:%d) status changed to down\n",
832 libcfs_nid2str(ni
->ni_nid
), timeout
);
833 /* NB: so far, this is the only place to set
834 * NI status to "down" */
835 ni
->ni_status
->ns_status
= LNET_NI_STATUS_DOWN
;
842 lnet_destroy_rc_data(lnet_rc_data_t
*rcd
)
844 LASSERT(list_empty(&rcd
->rcd_list
));
845 /* detached from network */
846 LASSERT(LNetHandleIsInvalid(rcd
->rcd_mdh
));
848 if (rcd
->rcd_gateway
!= NULL
) {
849 int cpt
= rcd
->rcd_gateway
->lp_cpt
;
852 lnet_peer_decref_locked(rcd
->rcd_gateway
);
853 lnet_net_unlock(cpt
);
856 if (rcd
->rcd_pinginfo
!= NULL
)
857 LIBCFS_FREE(rcd
->rcd_pinginfo
, LNET_PINGINFO_SIZE
);
859 LIBCFS_FREE(rcd
, sizeof(*rcd
));
862 static lnet_rc_data_t
*
863 lnet_create_rc_data_locked(lnet_peer_t
*gateway
)
865 lnet_rc_data_t
*rcd
= NULL
;
866 lnet_ping_info_t
*pi
;
870 lnet_net_unlock(gateway
->lp_cpt
);
872 LIBCFS_ALLOC(rcd
, sizeof(*rcd
));
876 LNetInvalidateHandle(&rcd
->rcd_mdh
);
877 INIT_LIST_HEAD(&rcd
->rcd_list
);
879 LIBCFS_ALLOC(pi
, LNET_PINGINFO_SIZE
);
883 for (i
= 0; i
< LNET_MAX_RTR_NIS
; i
++) {
884 pi
->pi_ni
[i
].ns_nid
= LNET_NID_ANY
;
885 pi
->pi_ni
[i
].ns_status
= LNET_NI_STATUS_INVALID
;
887 rcd
->rcd_pinginfo
= pi
;
889 LASSERT (!LNetHandleIsInvalid(the_lnet
.ln_rc_eqh
));
890 rc
= LNetMDBind((lnet_md_t
){.start
= pi
,
892 .length
= LNET_PINGINFO_SIZE
,
893 .threshold
= LNET_MD_THRESH_INF
,
894 .options
= LNET_MD_TRUNCATE
,
895 .eq_handle
= the_lnet
.ln_rc_eqh
},
899 CERROR("Can't bind MD: %d\n", rc
);
904 lnet_net_lock(gateway
->lp_cpt
);
905 /* router table changed or someone has created rcd for this gateway */
906 if (!lnet_isrouter(gateway
) || gateway
->lp_rcd
!= NULL
) {
907 lnet_net_unlock(gateway
->lp_cpt
);
911 lnet_peer_addref_locked(gateway
);
912 rcd
->rcd_gateway
= gateway
;
913 gateway
->lp_rcd
= rcd
;
914 gateway
->lp_ping_notsent
= 0;
920 if (!LNetHandleIsInvalid(rcd
->rcd_mdh
)) {
921 rc
= LNetMDUnlink(rcd
->rcd_mdh
);
924 lnet_destroy_rc_data(rcd
);
927 lnet_net_lock(gateway
->lp_cpt
);
928 return gateway
->lp_rcd
;
932 lnet_router_check_interval (lnet_peer_t
*rtr
)
936 secs
= rtr
->lp_alive
? live_router_check_interval
:
937 dead_router_check_interval
;
945 lnet_ping_router_locked (lnet_peer_t
*rtr
)
947 lnet_rc_data_t
*rcd
= NULL
;
948 unsigned long now
= cfs_time_current();
951 lnet_peer_addref_locked(rtr
);
953 if (rtr
->lp_ping_deadline
!= 0 && /* ping timed out? */
954 cfs_time_after(now
, rtr
->lp_ping_deadline
))
955 lnet_notify_locked(rtr
, 1, 0, now
);
957 /* Run any outstanding notifications */
958 lnet_ni_notify_locked(rtr
->lp_ni
, rtr
);
960 if (!lnet_isrouter(rtr
) ||
961 the_lnet
.ln_rc_state
!= LNET_RC_STATE_RUNNING
) {
962 /* router table changed or router checker is shutting down */
963 lnet_peer_decref_locked(rtr
);
967 rcd
= rtr
->lp_rcd
!= NULL
?
968 rtr
->lp_rcd
: lnet_create_rc_data_locked(rtr
);
973 secs
= lnet_router_check_interval(rtr
);
976 "rtr %s %d: deadline %lu ping_notsent %d alive %d alive_count %d lp_ping_timestamp %lu\n",
977 libcfs_nid2str(rtr
->lp_nid
), secs
,
978 rtr
->lp_ping_deadline
, rtr
->lp_ping_notsent
,
979 rtr
->lp_alive
, rtr
->lp_alive_count
, rtr
->lp_ping_timestamp
);
981 if (secs
!= 0 && !rtr
->lp_ping_notsent
&&
982 cfs_time_after(now
, cfs_time_add(rtr
->lp_ping_timestamp
,
983 cfs_time_seconds(secs
)))) {
985 lnet_process_id_t id
;
986 lnet_handle_md_t mdh
;
988 id
.nid
= rtr
->lp_nid
;
989 id
.pid
= LUSTRE_SRV_LNET_PID
;
990 CDEBUG(D_NET
, "Check: %s\n", libcfs_id2str(id
));
992 rtr
->lp_ping_notsent
= 1;
993 rtr
->lp_ping_timestamp
= now
;
997 if (rtr
->lp_ping_deadline
== 0) {
998 rtr
->lp_ping_deadline
=
999 cfs_time_shift(router_ping_timeout
);
1002 lnet_net_unlock(rtr
->lp_cpt
);
1004 rc
= LNetGet(LNET_NID_ANY
, mdh
, id
, LNET_RESERVED_PORTAL
,
1005 LNET_PROTO_PING_MATCHBITS
, 0);
1007 lnet_net_lock(rtr
->lp_cpt
);
1009 rtr
->lp_ping_notsent
= 0; /* no event pending */
1012 lnet_peer_decref_locked(rtr
);
1017 lnet_router_checker_start(void)
1022 LASSERT (the_lnet
.ln_rc_state
== LNET_RC_STATE_SHUTDOWN
);
1024 if (check_routers_before_use
&&
1025 dead_router_check_interval
<= 0) {
1026 LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be set if 'check_routers_before_use' is set\n");
1030 if (!the_lnet
.ln_routing
&&
1031 live_router_check_interval
<= 0 &&
1032 dead_router_check_interval
<= 0)
1035 sema_init(&the_lnet
.ln_rc_signal
, 0);
1036 /* EQ size doesn't matter; the callback is guaranteed to get every
1039 rc
= LNetEQAlloc(eqsz
, lnet_router_checker_event
,
1040 &the_lnet
.ln_rc_eqh
);
1042 CERROR("Can't allocate EQ(%d): %d\n", eqsz
, rc
);
1046 the_lnet
.ln_rc_state
= LNET_RC_STATE_RUNNING
;
1047 rc
= PTR_ERR(kthread_run(lnet_router_checker
,
1048 NULL
, "router_checker"));
1049 if (IS_ERR_VALUE(rc
)) {
1050 CERROR("Can't start router checker thread: %d\n", rc
);
1051 /* block until event callback signals exit */
1052 down(&the_lnet
.ln_rc_signal
);
1053 rc
= LNetEQFree(the_lnet
.ln_rc_eqh
);
1055 the_lnet
.ln_rc_state
= LNET_RC_STATE_SHUTDOWN
;
1059 if (check_routers_before_use
) {
1060 /* Note that a helpful side-effect of pinging all known routers
1061 * at startup is that it makes them drop stale connections they
1062 * may have to a previous instance of me. */
1063 lnet_wait_known_routerstate();
1070 lnet_router_checker_stop (void)
1074 if (the_lnet
.ln_rc_state
== LNET_RC_STATE_SHUTDOWN
)
1077 LASSERT (the_lnet
.ln_rc_state
== LNET_RC_STATE_RUNNING
);
1078 the_lnet
.ln_rc_state
= LNET_RC_STATE_STOPPING
;
1080 /* block until event callback signals exit */
1081 down(&the_lnet
.ln_rc_signal
);
1082 LASSERT(the_lnet
.ln_rc_state
== LNET_RC_STATE_SHUTDOWN
);
1084 rc
= LNetEQFree(the_lnet
.ln_rc_eqh
);
1090 lnet_prune_rc_data(int wait_unlink
)
1092 lnet_rc_data_t
*rcd
;
1093 lnet_rc_data_t
*tmp
;
1095 struct list_head head
;
1098 if (likely(the_lnet
.ln_rc_state
== LNET_RC_STATE_RUNNING
&&
1099 list_empty(&the_lnet
.ln_rcd_deathrow
) &&
1100 list_empty(&the_lnet
.ln_rcd_zombie
)))
1103 INIT_LIST_HEAD(&head
);
1105 lnet_net_lock(LNET_LOCK_EX
);
1107 if (the_lnet
.ln_rc_state
!= LNET_RC_STATE_RUNNING
) {
1108 /* router checker is stopping, prune all */
1109 list_for_each_entry(lp
, &the_lnet
.ln_routers
,
1111 if (lp
->lp_rcd
== NULL
)
1114 LASSERT(list_empty(&lp
->lp_rcd
->rcd_list
));
1115 list_add(&lp
->lp_rcd
->rcd_list
,
1116 &the_lnet
.ln_rcd_deathrow
);
1121 /* unlink all RCDs on deathrow list */
1122 list_splice_init(&the_lnet
.ln_rcd_deathrow
, &head
);
1124 if (!list_empty(&head
)) {
1125 lnet_net_unlock(LNET_LOCK_EX
);
1127 list_for_each_entry(rcd
, &head
, rcd_list
)
1128 LNetMDUnlink(rcd
->rcd_mdh
);
1130 lnet_net_lock(LNET_LOCK_EX
);
1133 list_splice_init(&head
, &the_lnet
.ln_rcd_zombie
);
1135 /* release all zombie RCDs */
1136 while (!list_empty(&the_lnet
.ln_rcd_zombie
)) {
1137 list_for_each_entry_safe(rcd
, tmp
, &the_lnet
.ln_rcd_zombie
,
1139 if (LNetHandleIsInvalid(rcd
->rcd_mdh
))
1140 list_move(&rcd
->rcd_list
, &head
);
1143 wait_unlink
= wait_unlink
&&
1144 !list_empty(&the_lnet
.ln_rcd_zombie
);
1146 lnet_net_unlock(LNET_LOCK_EX
);
1148 while (!list_empty(&head
)) {
1149 rcd
= list_entry(head
.next
,
1150 lnet_rc_data_t
, rcd_list
);
1151 list_del_init(&rcd
->rcd_list
);
1152 lnet_destroy_rc_data(rcd
);
1159 CDEBUG(((i
& (-i
)) == i
) ? D_WARNING
: D_NET
,
1160 "Waiting for rc buffers to unlink\n");
1161 set_current_state(TASK_UNINTERRUPTIBLE
);
1162 schedule_timeout(cfs_time_seconds(1) / 4);
1164 lnet_net_lock(LNET_LOCK_EX
);
1167 lnet_net_unlock(LNET_LOCK_EX
);
1171 #if defined(LNET_ROUTER)
1174 lnet_router_checker(void *arg
)
1177 struct list_head
*entry
;
1179 cfs_block_allsigs();
1181 LASSERT (the_lnet
.ln_rc_state
== LNET_RC_STATE_RUNNING
);
1183 while (the_lnet
.ln_rc_state
== LNET_RC_STATE_RUNNING
) {
1188 cpt
= lnet_net_lock_current();
1190 version
= the_lnet
.ln_routers_version
;
1192 list_for_each(entry
, &the_lnet
.ln_routers
) {
1193 rtr
= list_entry(entry
, lnet_peer_t
, lp_rtr_list
);
1195 cpt2
= lnet_cpt_of_nid_locked(rtr
->lp_nid
);
1197 lnet_net_unlock(cpt
);
1200 /* the routers list has changed */
1201 if (version
!= the_lnet
.ln_routers_version
)
1205 lnet_ping_router_locked(rtr
);
1207 /* NB dropped lock */
1208 if (version
!= the_lnet
.ln_routers_version
) {
1209 /* the routers list has changed */
1214 if (the_lnet
.ln_routing
)
1215 lnet_update_ni_status_locked();
1217 lnet_net_unlock(cpt
);
1219 lnet_prune_rc_data(0); /* don't wait for UNLINK */
1221 /* Call schedule_timeout() here always adds 1 to load average
1222 * because kernel counts # active tasks as nr_running
1223 * + nr_uninterruptible. */
1224 set_current_state(TASK_INTERRUPTIBLE
);
1225 schedule_timeout(cfs_time_seconds(1));
1228 LASSERT(the_lnet
.ln_rc_state
== LNET_RC_STATE_STOPPING
);
1230 lnet_prune_rc_data(1); /* wait for UNLINK */
1232 the_lnet
.ln_rc_state
= LNET_RC_STATE_SHUTDOWN
;
1233 up(&the_lnet
.ln_rc_signal
);
1234 /* The unlink event callback will signal final completion */
1239 lnet_destroy_rtrbuf(lnet_rtrbuf_t
*rb
, int npages
)
1241 int sz
= offsetof(lnet_rtrbuf_t
, rb_kiov
[npages
]);
1243 while (--npages
>= 0)
1244 __free_page(rb
->rb_kiov
[npages
].kiov_page
);
1246 LIBCFS_FREE(rb
, sz
);
1249 static lnet_rtrbuf_t
*
1250 lnet_new_rtrbuf(lnet_rtrbufpool_t
*rbp
, int cpt
)
1252 int npages
= rbp
->rbp_npages
;
1253 int sz
= offsetof(lnet_rtrbuf_t
, rb_kiov
[npages
]);
1258 LIBCFS_CPT_ALLOC(rb
, lnet_cpt_table(), cpt
, sz
);
1264 for (i
= 0; i
< npages
; i
++) {
1265 page
= alloc_pages_node(
1266 cfs_cpt_spread_node(lnet_cpt_table(), cpt
),
1267 __GFP_ZERO
| GFP_IOFS
, 0);
1270 __free_page(rb
->rb_kiov
[i
].kiov_page
);
1272 LIBCFS_FREE(rb
, sz
);
1276 rb
->rb_kiov
[i
].kiov_len
= PAGE_CACHE_SIZE
;
1277 rb
->rb_kiov
[i
].kiov_offset
= 0;
1278 rb
->rb_kiov
[i
].kiov_page
= page
;
1285 lnet_rtrpool_free_bufs(lnet_rtrbufpool_t
*rbp
)
1287 int npages
= rbp
->rbp_npages
;
1291 if (rbp
->rbp_nbuffers
== 0) /* not initialized or already freed */
1294 LASSERT (list_empty(&rbp
->rbp_msgs
));
1295 LASSERT (rbp
->rbp_credits
== rbp
->rbp_nbuffers
);
1297 while (!list_empty(&rbp
->rbp_bufs
)) {
1298 LASSERT (rbp
->rbp_credits
> 0);
1300 rb
= list_entry(rbp
->rbp_bufs
.next
,
1301 lnet_rtrbuf_t
, rb_list
);
1302 list_del(&rb
->rb_list
);
1303 lnet_destroy_rtrbuf(rb
, npages
);
1307 LASSERT (rbp
->rbp_nbuffers
== nbuffers
);
1308 LASSERT (rbp
->rbp_credits
== nbuffers
);
1310 rbp
->rbp_nbuffers
= rbp
->rbp_credits
= 0;
1314 lnet_rtrpool_alloc_bufs(lnet_rtrbufpool_t
*rbp
, int nbufs
, int cpt
)
1319 if (rbp
->rbp_nbuffers
!= 0) {
1320 LASSERT (rbp
->rbp_nbuffers
== nbufs
);
1324 for (i
= 0; i
< nbufs
; i
++) {
1325 rb
= lnet_new_rtrbuf(rbp
, cpt
);
1328 CERROR("Failed to allocate %d router bufs of %d pages\n",
1329 nbufs
, rbp
->rbp_npages
);
1333 rbp
->rbp_nbuffers
++;
1335 rbp
->rbp_mincredits
++;
1336 list_add(&rb
->rb_list
, &rbp
->rbp_bufs
);
1338 /* No allocation "under fire" */
1339 /* Otherwise we'd need code to schedule blocked msgs etc */
1340 LASSERT (!the_lnet
.ln_routing
);
1343 LASSERT (rbp
->rbp_credits
== nbufs
);
1348 lnet_rtrpool_init(lnet_rtrbufpool_t
*rbp
, int npages
)
1350 INIT_LIST_HEAD(&rbp
->rbp_msgs
);
1351 INIT_LIST_HEAD(&rbp
->rbp_bufs
);
1353 rbp
->rbp_npages
= npages
;
1354 rbp
->rbp_credits
= 0;
1355 rbp
->rbp_mincredits
= 0;
1359 lnet_rtrpools_free(void)
1361 lnet_rtrbufpool_t
*rtrp
;
1364 if (the_lnet
.ln_rtrpools
== NULL
) /* uninitialized or freed */
1367 cfs_percpt_for_each(rtrp
, i
, the_lnet
.ln_rtrpools
) {
1368 lnet_rtrpool_free_bufs(&rtrp
[0]);
1369 lnet_rtrpool_free_bufs(&rtrp
[1]);
1370 lnet_rtrpool_free_bufs(&rtrp
[2]);
1373 cfs_percpt_free(the_lnet
.ln_rtrpools
);
1374 the_lnet
.ln_rtrpools
= NULL
;
1378 lnet_nrb_tiny_calculate(int npages
)
1380 int nrbs
= LNET_NRB_TINY
;
1382 if (tiny_router_buffers
< 0) {
1383 LCONSOLE_ERROR_MSG(0x10c,
1384 "tiny_router_buffers=%d invalid when routing enabled\n",
1385 tiny_router_buffers
);
1389 if (tiny_router_buffers
> 0)
1390 nrbs
= tiny_router_buffers
;
1392 nrbs
/= LNET_CPT_NUMBER
;
1393 return max(nrbs
, LNET_NRB_TINY_MIN
);
1397 lnet_nrb_small_calculate(int npages
)
1399 int nrbs
= LNET_NRB_SMALL
;
1401 if (small_router_buffers
< 0) {
1402 LCONSOLE_ERROR_MSG(0x10c,
1403 "small_router_buffers=%d invalid when routing enabled\n",
1404 small_router_buffers
);
1408 if (small_router_buffers
> 0)
1409 nrbs
= small_router_buffers
;
1411 nrbs
/= LNET_CPT_NUMBER
;
1412 return max(nrbs
, LNET_NRB_SMALL_MIN
);
1416 lnet_nrb_large_calculate(int npages
)
1418 int nrbs
= LNET_NRB_LARGE
;
1420 if (large_router_buffers
< 0) {
1421 LCONSOLE_ERROR_MSG(0x10c,
1422 "large_router_buffers=%d invalid when routing enabled\n",
1423 large_router_buffers
);
1427 if (large_router_buffers
> 0)
1428 nrbs
= large_router_buffers
;
1430 nrbs
/= LNET_CPT_NUMBER
;
1431 return max(nrbs
, LNET_NRB_LARGE_MIN
);
1435 lnet_rtrpools_alloc(int im_a_router
)
1437 lnet_rtrbufpool_t
*rtrp
;
1438 int large_pages
= (LNET_MTU
+ PAGE_CACHE_SIZE
- 1) >> PAGE_CACHE_SHIFT
;
1439 int small_pages
= 1;
1446 if (!strcmp(forwarding
, "")) {
1447 /* not set either way */
1450 } else if (!strcmp(forwarding
, "disabled")) {
1451 /* explicitly disabled */
1453 } else if (!strcmp(forwarding
, "enabled")) {
1454 /* explicitly enabled */
1456 LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either 'enabled' or 'disabled'\n");
1460 nrb_tiny
= lnet_nrb_tiny_calculate(0);
1464 nrb_small
= lnet_nrb_small_calculate(small_pages
);
1468 nrb_large
= lnet_nrb_large_calculate(large_pages
);
1472 the_lnet
.ln_rtrpools
= cfs_percpt_alloc(lnet_cpt_table(),
1474 sizeof(lnet_rtrbufpool_t
));
1475 if (the_lnet
.ln_rtrpools
== NULL
) {
1476 LCONSOLE_ERROR_MSG(0x10c,
1477 "Failed to initialize router buffe pool\n");
1481 cfs_percpt_for_each(rtrp
, i
, the_lnet
.ln_rtrpools
) {
1482 lnet_rtrpool_init(&rtrp
[0], 0);
1483 rc
= lnet_rtrpool_alloc_bufs(&rtrp
[0], nrb_tiny
, i
);
1487 lnet_rtrpool_init(&rtrp
[1], small_pages
);
1488 rc
= lnet_rtrpool_alloc_bufs(&rtrp
[1], nrb_small
, i
);
1492 lnet_rtrpool_init(&rtrp
[2], large_pages
);
1493 rc
= lnet_rtrpool_alloc_bufs(&rtrp
[2], nrb_large
, i
);
1498 lnet_net_lock(LNET_LOCK_EX
);
1499 the_lnet
.ln_routing
= 1;
1500 lnet_net_unlock(LNET_LOCK_EX
);
1505 lnet_rtrpools_free();
1510 lnet_notify(lnet_ni_t
*ni
, lnet_nid_t nid
, int alive
, unsigned long when
)
1512 struct lnet_peer
*lp
= NULL
;
1513 unsigned long now
= cfs_time_current();
1514 int cpt
= lnet_cpt_of_nid(nid
);
1516 LASSERT (!in_interrupt ());
1518 CDEBUG (D_NET
, "%s notifying %s: %s\n",
1519 (ni
== NULL
) ? "userspace" : libcfs_nid2str(ni
->ni_nid
),
1520 libcfs_nid2str(nid
),
1521 alive
? "up" : "down");
1524 LNET_NIDNET(ni
->ni_nid
) != LNET_NIDNET(nid
)) {
1525 CWARN ("Ignoring notification of %s %s by %s (different net)\n",
1526 libcfs_nid2str(nid
), alive
? "birth" : "death",
1527 libcfs_nid2str(ni
->ni_nid
));
1531 /* can't do predictions... */
1532 if (cfs_time_after(when
, now
)) {
1533 CWARN("Ignoring prediction from %s of %s %s %ld seconds in the future\n",
1534 (ni
== NULL
) ? "userspace" : libcfs_nid2str(ni
->ni_nid
),
1535 libcfs_nid2str(nid
), alive
? "up" : "down",
1536 cfs_duration_sec(cfs_time_sub(when
, now
)));
1540 if (ni
!= NULL
&& !alive
&& /* LND telling me she's down */
1541 !auto_down
) { /* auto-down disabled */
1542 CDEBUG(D_NET
, "Auto-down disabled\n");
1548 if (the_lnet
.ln_shutdown
) {
1549 lnet_net_unlock(cpt
);
1553 lp
= lnet_find_peer_locked(the_lnet
.ln_peer_tables
[cpt
], nid
);
1556 lnet_net_unlock(cpt
);
1557 CDEBUG(D_NET
, "%s not found\n", libcfs_nid2str(nid
));
1561 /* We can't fully trust LND on reporting exact peer last_alive
1562 * if he notifies us about dead peer. For example ksocklnd can
1563 * call us with when == _time_when_the_node_was_booted_ if
1564 * no connections were successfully established */
1565 if (ni
!= NULL
&& !alive
&& when
< lp
->lp_last_alive
)
1566 when
= lp
->lp_last_alive
;
1568 lnet_notify_locked(lp
, ni
== NULL
, alive
, when
);
1570 lnet_ni_notify_locked(ni
, lp
);
1572 lnet_peer_decref_locked(lp
);
1574 lnet_net_unlock(cpt
);
1577 EXPORT_SYMBOL(lnet_notify
);
1580 lnet_get_tunables (void)
1588 lnet_notify (lnet_ni_t
*ni
, lnet_nid_t nid
, int alive
, unsigned long when
)
1594 lnet_router_checker (void)
1599 time_t now
= get_seconds();
1600 int interval
= now
- last
;
1605 /* It's no use to call me again within a sec - all intervals and
1606 * timeouts are measured in seconds */
1607 if (last
!= 0 && interval
< 2)
1611 interval
> max(live_router_check_interval
,
1612 dead_router_check_interval
))
1613 CNETERR("Checker(%d/%d) not called for %d seconds\n",
1614 live_router_check_interval
, dead_router_check_interval
,
1617 LASSERT(LNET_CPT_NUMBER
== 1);
1620 LASSERT(!running
); /* recursion check */
1626 if (the_lnet
.ln_rc_state
== LNET_RC_STATE_STOPPING
)
1627 lnet_prune_rc_data(0); /* unlink all rcd and nowait */
1629 /* consume all pending events */
1634 /* NB ln_rc_eqh must be the 1st in 'eventqs' otherwise the
1635 * recursion breaker in LNetEQPoll would fail */
1636 rc
= LNetEQPoll(&the_lnet
.ln_rc_eqh
, 1, 0, &ev
, &i
);
1637 if (rc
== 0) /* no event pending */
1640 /* NB a lost SENT prevents me from pinging a router again */
1641 if (rc
== -EOVERFLOW
) {
1642 CERROR("Dropped an event!!!\n");
1648 lnet_router_checker_event(&ev
);
1651 if (the_lnet
.ln_rc_state
== LNET_RC_STATE_STOPPING
) {
1652 lnet_prune_rc_data(1); /* release rcd */
1653 the_lnet
.ln_rc_state
= LNET_RC_STATE_SHUTDOWN
;
1658 LASSERT (the_lnet
.ln_rc_state
== LNET_RC_STATE_RUNNING
);
1662 version
= the_lnet
.ln_routers_version
;
1663 list_for_each_entry (rtr
, &the_lnet
.ln_routers
, lp_rtr_list
) {
1664 lnet_ping_router_locked(rtr
);
1665 LASSERT (version
== the_lnet
.ln_routers_version
);
1670 running
= 0; /* lock only needed for the recursion check */
1674 /* NB lnet_peers_start_down depends on me,
1675 * so must be called before any peer creation */
1677 lnet_get_tunables (void)
1681 s
= getenv("LNET_ROUTER_PING_TIMEOUT");
1683 router_ping_timeout
= atoi(s
);
1685 s
= getenv("LNET_LIVE_ROUTER_CHECK_INTERVAL");
1687 live_router_check_interval
= atoi(s
);
1689 s
= getenv("LNET_DEAD_ROUTER_CHECK_INTERVAL");
1691 dead_router_check_interval
= atoi(s
);
1693 /* This replaces old lnd_notify mechanism */
1694 check_routers_before_use
= 1;
1695 if (dead_router_check_interval
<= 0)
1696 dead_router_check_interval
= 30;
1700 lnet_rtrpools_free(void)
1705 lnet_rtrpools_alloc(int im_a_arouter
)