]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - drivers/staging/lustre/lnet/lnet/router.c
pinctrl: sirf: move sgpio lock into state container
[mirror_ubuntu-zesty-kernel.git] / drivers / staging / lustre / lnet / lnet / router.c
1 /*
2 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
3 *
4 * Copyright (c) 2011, 2012, Intel Corporation.
5 *
6 * This file is part of Portals
7 * http://sourceforge.net/projects/sandiaportals/
8 *
9 * Portals is free software; you can redistribute it and/or
10 * modify it under the terms of version 2 of the GNU General Public
11 * License as published by the Free Software Foundation.
12 *
13 * Portals is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with Portals; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 *
22 */
23
24 #define DEBUG_SUBSYSTEM S_LNET
25 #include "../../include/linux/lnet/lib-lnet.h"
26
27 #if defined(LNET_ROUTER)
28
29 #define LNET_NRB_TINY_MIN 512 /* min value for each CPT */
30 #define LNET_NRB_TINY (LNET_NRB_TINY_MIN * 4)
31 #define LNET_NRB_SMALL_MIN 4096 /* min value for each CPT */
32 #define LNET_NRB_SMALL (LNET_NRB_SMALL_MIN * 4)
33 #define LNET_NRB_LARGE_MIN 256 /* min value for each CPT */
34 #define LNET_NRB_LARGE (LNET_NRB_LARGE_MIN * 4)
35
36 static char *forwarding = "";
37 module_param(forwarding, charp, 0444);
38 MODULE_PARM_DESC(forwarding, "Explicitly enable/disable forwarding between networks");
39
40 static int tiny_router_buffers;
41 module_param(tiny_router_buffers, int, 0444);
42 MODULE_PARM_DESC(tiny_router_buffers, "# of 0 payload messages to buffer in the router");
43 static int small_router_buffers;
44 module_param(small_router_buffers, int, 0444);
45 MODULE_PARM_DESC(small_router_buffers, "# of small (1 page) messages to buffer in the router");
46 static int large_router_buffers;
47 module_param(large_router_buffers, int, 0444);
48 MODULE_PARM_DESC(large_router_buffers, "# of large messages to buffer in the router");
49 static int peer_buffer_credits;
50 module_param(peer_buffer_credits, int, 0444);
51 MODULE_PARM_DESC(peer_buffer_credits, "# router buffer credits per peer");
52
53 static int auto_down = 1;
54 module_param(auto_down, int, 0444);
55 MODULE_PARM_DESC(auto_down, "Automatically mark peers down on comms error");
56
57 int
58 lnet_peer_buffer_credits(lnet_ni_t *ni)
59 {
60 /* NI option overrides LNet default */
61 if (ni->ni_peerrtrcredits > 0)
62 return ni->ni_peerrtrcredits;
63 if (peer_buffer_credits > 0)
64 return peer_buffer_credits;
65
66 /* As an approximation, allow this peer the same number of router
67 * buffers as it is allowed outstanding sends */
68 return ni->ni_peertxcredits;
69 }
70
71 /* forward ref's */
72 static int lnet_router_checker(void *);
73 #else
74
75 int
76 lnet_peer_buffer_credits(lnet_ni_t *ni)
77 {
78 return 0;
79 }
80
81 #endif
82
83 static int check_routers_before_use;
84 module_param(check_routers_before_use, int, 0444);
85 MODULE_PARM_DESC(check_routers_before_use, "Assume routers are down and ping them before use");
86
87 int avoid_asym_router_failure = 1;
88 module_param(avoid_asym_router_failure, int, 0644);
89 MODULE_PARM_DESC(avoid_asym_router_failure, "Avoid asymmetrical router failures (0 to disable)");
90
91 static int dead_router_check_interval = 60;
92 module_param(dead_router_check_interval, int, 0644);
93 MODULE_PARM_DESC(dead_router_check_interval, "Seconds between dead router health checks (<= 0 to disable)");
94
95 static int live_router_check_interval = 60;
96 module_param(live_router_check_interval, int, 0644);
97 MODULE_PARM_DESC(live_router_check_interval, "Seconds between live router health checks (<= 0 to disable)");
98
99 static int router_ping_timeout = 50;
100 module_param(router_ping_timeout, int, 0644);
101 MODULE_PARM_DESC(router_ping_timeout, "Seconds to wait for the reply to a router health query");
102
103 int
104 lnet_peers_start_down(void)
105 {
106 return check_routers_before_use;
107 }
108
109 void
110 lnet_notify_locked(lnet_peer_t *lp, int notifylnd, int alive, unsigned long when)
111 {
112 if (time_before(when, lp->lp_timestamp)) { /* out of date information */
113 CDEBUG(D_NET, "Out of date\n");
114 return;
115 }
116
117 lp->lp_timestamp = when; /* update timestamp */
118 lp->lp_ping_deadline = 0; /* disable ping timeout */
119
120 if (lp->lp_alive_count != 0 && /* got old news */
121 (!lp->lp_alive) == (!alive)) { /* new date for old news */
122 CDEBUG(D_NET, "Old news\n");
123 return;
124 }
125
126 /* Flag that notification is outstanding */
127
128 lp->lp_alive_count++;
129 lp->lp_alive = !(!alive); /* 1 bit! */
130 lp->lp_notify = 1;
131 lp->lp_notifylnd |= notifylnd;
132 if (lp->lp_alive)
133 lp->lp_ping_feats = LNET_PING_FEAT_INVAL; /* reset */
134
135 CDEBUG(D_NET, "set %s %d\n", libcfs_nid2str(lp->lp_nid), alive);
136 }
137
138 static void
139 lnet_ni_notify_locked(lnet_ni_t *ni, lnet_peer_t *lp)
140 {
141 int alive;
142 int notifylnd;
143
144 /* Notify only in 1 thread at any time to ensure ordered notification.
145 * NB individual events can be missed; the only guarantee is that you
146 * always get the most recent news */
147
148 if (lp->lp_notifying || ni == NULL)
149 return;
150
151 lp->lp_notifying = 1;
152
153 while (lp->lp_notify) {
154 alive = lp->lp_alive;
155 notifylnd = lp->lp_notifylnd;
156
157 lp->lp_notifylnd = 0;
158 lp->lp_notify = 0;
159
160 if (notifylnd && ni->ni_lnd->lnd_notify != NULL) {
161 lnet_net_unlock(lp->lp_cpt);
162
163 /* A new notification could happen now; I'll handle it
164 * when control returns to me */
165
166 (ni->ni_lnd->lnd_notify)(ni, lp->lp_nid, alive);
167
168 lnet_net_lock(lp->lp_cpt);
169 }
170 }
171
172 lp->lp_notifying = 0;
173 }
174
175
176 static void
177 lnet_rtr_addref_locked(lnet_peer_t *lp)
178 {
179 LASSERT(lp->lp_refcount > 0);
180 LASSERT(lp->lp_rtr_refcount >= 0);
181
182 /* lnet_net_lock must be exclusively locked */
183 lp->lp_rtr_refcount++;
184 if (lp->lp_rtr_refcount == 1) {
185 struct list_head *pos;
186
187 /* a simple insertion sort */
188 list_for_each_prev(pos, &the_lnet.ln_routers) {
189 lnet_peer_t *rtr = list_entry(pos, lnet_peer_t,
190 lp_rtr_list);
191
192 if (rtr->lp_nid < lp->lp_nid)
193 break;
194 }
195
196 list_add(&lp->lp_rtr_list, pos);
197 /* addref for the_lnet.ln_routers */
198 lnet_peer_addref_locked(lp);
199 the_lnet.ln_routers_version++;
200 }
201 }
202
203 static void
204 lnet_rtr_decref_locked(lnet_peer_t *lp)
205 {
206 LASSERT(lp->lp_refcount > 0);
207 LASSERT(lp->lp_rtr_refcount > 0);
208
209 /* lnet_net_lock must be exclusively locked */
210 lp->lp_rtr_refcount--;
211 if (lp->lp_rtr_refcount == 0) {
212 LASSERT(list_empty(&lp->lp_routes));
213
214 if (lp->lp_rcd != NULL) {
215 list_add(&lp->lp_rcd->rcd_list,
216 &the_lnet.ln_rcd_deathrow);
217 lp->lp_rcd = NULL;
218 }
219
220 list_del(&lp->lp_rtr_list);
221 /* decref for the_lnet.ln_routers */
222 lnet_peer_decref_locked(lp);
223 the_lnet.ln_routers_version++;
224 }
225 }
226
227 lnet_remotenet_t *
228 lnet_find_net_locked (__u32 net)
229 {
230 lnet_remotenet_t *rnet;
231 struct list_head *tmp;
232 struct list_head *rn_list;
233
234 LASSERT(!the_lnet.ln_shutdown);
235
236 rn_list = lnet_net2rnethash(net);
237 list_for_each(tmp, rn_list) {
238 rnet = list_entry(tmp, lnet_remotenet_t, lrn_list);
239
240 if (rnet->lrn_net == net)
241 return rnet;
242 }
243 return NULL;
244 }
245
246 static void lnet_shuffle_seed(void)
247 {
248 static int seeded;
249 int lnd_type, seed[2];
250 struct timeval tv;
251 lnet_ni_t *ni;
252 struct list_head *tmp;
253
254 if (seeded)
255 return;
256
257 cfs_get_random_bytes(seed, sizeof(seed));
258
259 /* Nodes with small feet have little entropy
260 * the NID for this node gives the most entropy in the low bits */
261 list_for_each(tmp, &the_lnet.ln_nis) {
262 ni = list_entry(tmp, lnet_ni_t, ni_list);
263 lnd_type = LNET_NETTYP(LNET_NIDNET(ni->ni_nid));
264
265 if (lnd_type != LOLND)
266 seed[0] ^= (LNET_NIDADDR(ni->ni_nid) | lnd_type);
267 }
268
269 do_gettimeofday(&tv);
270 cfs_srand(tv.tv_sec ^ seed[0], tv.tv_usec ^ seed[1]);
271 seeded = 1;
272 return;
273 }
274
275 /* NB expects LNET_LOCK held */
276 static void
277 lnet_add_route_to_rnet (lnet_remotenet_t *rnet, lnet_route_t *route)
278 {
279 unsigned int len = 0;
280 unsigned int offset = 0;
281 struct list_head *e;
282
283 lnet_shuffle_seed();
284
285 list_for_each (e, &rnet->lrn_routes) {
286 len++;
287 }
288
289 /* len+1 positions to add a new entry, also prevents division by 0 */
290 offset = cfs_rand() % (len + 1);
291 list_for_each (e, &rnet->lrn_routes) {
292 if (offset == 0)
293 break;
294 offset--;
295 }
296 list_add(&route->lr_list, e);
297 list_add(&route->lr_gwlist, &route->lr_gateway->lp_routes);
298
299 the_lnet.ln_remote_nets_version++;
300 lnet_rtr_addref_locked(route->lr_gateway);
301 }
302
303 int
304 lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway,
305 unsigned int priority)
306 {
307 struct list_head *e;
308 lnet_remotenet_t *rnet;
309 lnet_remotenet_t *rnet2;
310 lnet_route_t *route;
311 lnet_ni_t *ni;
312 int add_route;
313 int rc;
314
315 CDEBUG(D_NET, "Add route: net %s hops %u priority %u gw %s\n",
316 libcfs_net2str(net), hops, priority, libcfs_nid2str(gateway));
317
318 if (gateway == LNET_NID_ANY ||
319 LNET_NETTYP(LNET_NIDNET(gateway)) == LOLND ||
320 net == LNET_NIDNET(LNET_NID_ANY) ||
321 LNET_NETTYP(net) == LOLND ||
322 LNET_NIDNET(gateway) == net ||
323 hops < 1 || hops > 255)
324 return -EINVAL;
325
326 if (lnet_islocalnet(net)) /* it's a local network */
327 return 0; /* ignore the route entry */
328
329 /* Assume net, route, all new */
330 LIBCFS_ALLOC(route, sizeof(*route));
331 LIBCFS_ALLOC(rnet, sizeof(*rnet));
332 if (route == NULL || rnet == NULL) {
333 CERROR("Out of memory creating route %s %d %s\n",
334 libcfs_net2str(net), hops, libcfs_nid2str(gateway));
335 if (route != NULL)
336 LIBCFS_FREE(route, sizeof(*route));
337 if (rnet != NULL)
338 LIBCFS_FREE(rnet, sizeof(*rnet));
339 return -ENOMEM;
340 }
341
342 INIT_LIST_HEAD(&rnet->lrn_routes);
343 rnet->lrn_net = net;
344 route->lr_hops = hops;
345 route->lr_net = net;
346 route->lr_priority = priority;
347
348 lnet_net_lock(LNET_LOCK_EX);
349
350 rc = lnet_nid2peer_locked(&route->lr_gateway, gateway, LNET_LOCK_EX);
351 if (rc != 0) {
352 lnet_net_unlock(LNET_LOCK_EX);
353
354 LIBCFS_FREE(route, sizeof(*route));
355 LIBCFS_FREE(rnet, sizeof(*rnet));
356
357 if (rc == -EHOSTUNREACH) { /* gateway is not on a local net */
358 return 0; /* ignore the route entry */
359 } else {
360 CERROR("Error %d creating route %s %d %s\n", rc,
361 libcfs_net2str(net), hops,
362 libcfs_nid2str(gateway));
363 }
364 return rc;
365 }
366
367 LASSERT (!the_lnet.ln_shutdown);
368
369 rnet2 = lnet_find_net_locked(net);
370 if (rnet2 == NULL) {
371 /* new network */
372 list_add_tail(&rnet->lrn_list, lnet_net2rnethash(net));
373 rnet2 = rnet;
374 }
375
376 /* Search for a duplicate route (it's a NOOP if it is) */
377 add_route = 1;
378 list_for_each (e, &rnet2->lrn_routes) {
379 lnet_route_t *route2 = list_entry(e, lnet_route_t, lr_list);
380
381 if (route2->lr_gateway == route->lr_gateway) {
382 add_route = 0;
383 break;
384 }
385
386 /* our lookups must be true */
387 LASSERT (route2->lr_gateway->lp_nid != gateway);
388 }
389
390 if (add_route) {
391 lnet_peer_addref_locked(route->lr_gateway); /* +1 for notify */
392 lnet_add_route_to_rnet(rnet2, route);
393
394 ni = route->lr_gateway->lp_ni;
395 lnet_net_unlock(LNET_LOCK_EX);
396
397 /* XXX Assume alive */
398 if (ni->ni_lnd->lnd_notify != NULL)
399 (ni->ni_lnd->lnd_notify)(ni, gateway, 1);
400
401 lnet_net_lock(LNET_LOCK_EX);
402 }
403
404 /* -1 for notify or !add_route */
405 lnet_peer_decref_locked(route->lr_gateway);
406 lnet_net_unlock(LNET_LOCK_EX);
407
408 if (!add_route)
409 LIBCFS_FREE(route, sizeof(*route));
410
411 if (rnet != rnet2)
412 LIBCFS_FREE(rnet, sizeof(*rnet));
413
414 return 0;
415 }
416
417 int
418 lnet_check_routes(void)
419 {
420 lnet_remotenet_t *rnet;
421 lnet_route_t *route;
422 lnet_route_t *route2;
423 struct list_head *e1;
424 struct list_head *e2;
425 int cpt;
426 struct list_head *rn_list;
427 int i;
428
429 cpt = lnet_net_lock_current();
430
431 for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
432 rn_list = &the_lnet.ln_remote_nets_hash[i];
433 list_for_each(e1, rn_list) {
434 rnet = list_entry(e1, lnet_remotenet_t, lrn_list);
435
436 route2 = NULL;
437 list_for_each(e2, &rnet->lrn_routes) {
438 lnet_nid_t nid1;
439 lnet_nid_t nid2;
440 int net;
441
442 route = list_entry(e2, lnet_route_t,
443 lr_list);
444
445 if (route2 == NULL) {
446 route2 = route;
447 continue;
448 }
449
450 if (route->lr_gateway->lp_ni ==
451 route2->lr_gateway->lp_ni)
452 continue;
453
454 nid1 = route->lr_gateway->lp_nid;
455 nid2 = route2->lr_gateway->lp_nid;
456 net = rnet->lrn_net;
457
458 lnet_net_unlock(cpt);
459
460 CERROR("Routes to %s via %s and %s not supported\n",
461 libcfs_net2str(net),
462 libcfs_nid2str(nid1),
463 libcfs_nid2str(nid2));
464 return -EINVAL;
465 }
466 }
467 }
468
469 lnet_net_unlock(cpt);
470 return 0;
471 }
472
473 int
474 lnet_del_route(__u32 net, lnet_nid_t gw_nid)
475 {
476 struct lnet_peer *gateway;
477 lnet_remotenet_t *rnet;
478 lnet_route_t *route;
479 struct list_head *e1;
480 struct list_head *e2;
481 int rc = -ENOENT;
482 struct list_head *rn_list;
483 int idx = 0;
484
485 CDEBUG(D_NET, "Del route: net %s : gw %s\n",
486 libcfs_net2str(net), libcfs_nid2str(gw_nid));
487
488 /* NB Caller may specify either all routes via the given gateway
489 * or a specific route entry actual NIDs) */
490
491 lnet_net_lock(LNET_LOCK_EX);
492 if (net == LNET_NIDNET(LNET_NID_ANY))
493 rn_list = &the_lnet.ln_remote_nets_hash[0];
494 else
495 rn_list = lnet_net2rnethash(net);
496
497 again:
498 list_for_each(e1, rn_list) {
499 rnet = list_entry(e1, lnet_remotenet_t, lrn_list);
500
501 if (!(net == LNET_NIDNET(LNET_NID_ANY) ||
502 net == rnet->lrn_net))
503 continue;
504
505 list_for_each(e2, &rnet->lrn_routes) {
506 route = list_entry(e2, lnet_route_t, lr_list);
507
508 gateway = route->lr_gateway;
509 if (!(gw_nid == LNET_NID_ANY ||
510 gw_nid == gateway->lp_nid))
511 continue;
512
513 list_del(&route->lr_list);
514 list_del(&route->lr_gwlist);
515 the_lnet.ln_remote_nets_version++;
516
517 if (list_empty(&rnet->lrn_routes))
518 list_del(&rnet->lrn_list);
519 else
520 rnet = NULL;
521
522 lnet_rtr_decref_locked(gateway);
523 lnet_peer_decref_locked(gateway);
524
525 lnet_net_unlock(LNET_LOCK_EX);
526
527 LIBCFS_FREE(route, sizeof(*route));
528
529 if (rnet != NULL)
530 LIBCFS_FREE(rnet, sizeof(*rnet));
531
532 rc = 0;
533 lnet_net_lock(LNET_LOCK_EX);
534 goto again;
535 }
536 }
537
538 if (net == LNET_NIDNET(LNET_NID_ANY) &&
539 ++idx < LNET_REMOTE_NETS_HASH_SIZE) {
540 rn_list = &the_lnet.ln_remote_nets_hash[idx];
541 goto again;
542 }
543 lnet_net_unlock(LNET_LOCK_EX);
544
545 return rc;
546 }
547
548 void
549 lnet_destroy_routes (void)
550 {
551 lnet_del_route(LNET_NIDNET(LNET_NID_ANY), LNET_NID_ANY);
552 }
553
554 int
555 lnet_get_route(int idx, __u32 *net, __u32 *hops,
556 lnet_nid_t *gateway, __u32 *alive, __u32 *priority)
557 {
558 struct list_head *e1;
559 struct list_head *e2;
560 lnet_remotenet_t *rnet;
561 lnet_route_t *route;
562 int cpt;
563 int i;
564 struct list_head *rn_list;
565
566 cpt = lnet_net_lock_current();
567
568 for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
569 rn_list = &the_lnet.ln_remote_nets_hash[i];
570 list_for_each(e1, rn_list) {
571 rnet = list_entry(e1, lnet_remotenet_t, lrn_list);
572
573 list_for_each(e2, &rnet->lrn_routes) {
574 route = list_entry(e2, lnet_route_t,
575 lr_list);
576
577 if (idx-- == 0) {
578 *net = rnet->lrn_net;
579 *hops = route->lr_hops;
580 *priority = route->lr_priority;
581 *gateway = route->lr_gateway->lp_nid;
582 *alive = route->lr_gateway->lp_alive;
583 lnet_net_unlock(cpt);
584 return 0;
585 }
586 }
587 }
588 }
589
590 lnet_net_unlock(cpt);
591 return -ENOENT;
592 }
593
594 void
595 lnet_swap_pinginfo(lnet_ping_info_t *info)
596 {
597 int i;
598 lnet_ni_status_t *stat;
599
600 __swab32s(&info->pi_magic);
601 __swab32s(&info->pi_features);
602 __swab32s(&info->pi_pid);
603 __swab32s(&info->pi_nnis);
604 for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
605 stat = &info->pi_ni[i];
606 __swab64s(&stat->ns_nid);
607 __swab32s(&stat->ns_status);
608 }
609 return;
610 }
611
612 /**
613 * parse router-checker pinginfo, record number of down NIs for remote
614 * networks on that router.
615 */
616 static void
617 lnet_parse_rc_info(lnet_rc_data_t *rcd)
618 {
619 lnet_ping_info_t *info = rcd->rcd_pinginfo;
620 struct lnet_peer *gw = rcd->rcd_gateway;
621 lnet_route_t *rtr;
622
623 if (!gw->lp_alive)
624 return;
625
626 if (info->pi_magic == __swab32(LNET_PROTO_PING_MAGIC))
627 lnet_swap_pinginfo(info);
628
629 /* NB always racing with network! */
630 if (info->pi_magic != LNET_PROTO_PING_MAGIC) {
631 CDEBUG(D_NET, "%s: Unexpected magic %08x\n",
632 libcfs_nid2str(gw->lp_nid), info->pi_magic);
633 gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
634 return;
635 }
636
637 gw->lp_ping_feats = info->pi_features;
638 if ((gw->lp_ping_feats & LNET_PING_FEAT_MASK) == 0) {
639 CDEBUG(D_NET, "%s: Unexpected features 0x%x\n",
640 libcfs_nid2str(gw->lp_nid), gw->lp_ping_feats);
641 return; /* nothing I can understand */
642 }
643
644 if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) == 0)
645 return; /* can't carry NI status info */
646
647 list_for_each_entry(rtr, &gw->lp_routes, lr_gwlist) {
648 int ptl_status = LNET_NI_STATUS_INVALID;
649 int down = 0;
650 int up = 0;
651 int i;
652
653 for (i = 0; i < info->pi_nnis && i < LNET_MAX_RTR_NIS; i++) {
654 lnet_ni_status_t *stat = &info->pi_ni[i];
655 lnet_nid_t nid = stat->ns_nid;
656
657 if (nid == LNET_NID_ANY) {
658 CDEBUG(D_NET, "%s: unexpected LNET_NID_ANY\n",
659 libcfs_nid2str(gw->lp_nid));
660 gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
661 return;
662 }
663
664 if (LNET_NETTYP(LNET_NIDNET(nid)) == LOLND)
665 continue;
666
667 if (stat->ns_status == LNET_NI_STATUS_DOWN) {
668 if (LNET_NETTYP(LNET_NIDNET(nid)) != PTLLND)
669 down++;
670 else if (ptl_status != LNET_NI_STATUS_UP)
671 ptl_status = LNET_NI_STATUS_DOWN;
672 continue;
673 }
674
675 if (stat->ns_status == LNET_NI_STATUS_UP) {
676 if (LNET_NIDNET(nid) == rtr->lr_net) {
677 up = 1;
678 break;
679 }
680 /* ptl NIs are considered down only when
681 * they're all down */
682 if (LNET_NETTYP(LNET_NIDNET(nid)) == PTLLND)
683 ptl_status = LNET_NI_STATUS_UP;
684 continue;
685 }
686
687 CDEBUG(D_NET, "%s: Unexpected status 0x%x\n",
688 libcfs_nid2str(gw->lp_nid), stat->ns_status);
689 gw->lp_ping_feats = LNET_PING_FEAT_INVAL;
690 return;
691 }
692
693 if (up) { /* ignore downed NIs if NI for dest network is up */
694 rtr->lr_downis = 0;
695 continue;
696 }
697 rtr->lr_downis = down + (ptl_status == LNET_NI_STATUS_DOWN);
698 }
699 }
700
701 static void
702 lnet_router_checker_event(lnet_event_t *event)
703 {
704 lnet_rc_data_t *rcd = event->md.user_ptr;
705 struct lnet_peer *lp;
706
707 LASSERT(rcd != NULL);
708
709 if (event->unlinked) {
710 LNetInvalidateHandle(&rcd->rcd_mdh);
711 return;
712 }
713
714 LASSERT(event->type == LNET_EVENT_SEND ||
715 event->type == LNET_EVENT_REPLY);
716
717 lp = rcd->rcd_gateway;
718 LASSERT(lp != NULL);
719
720 /* NB: it's called with holding lnet_res_lock, we have a few
721 * places need to hold both locks at the same time, please take
722 * care of lock ordering */
723 lnet_net_lock(lp->lp_cpt);
724 if (!lnet_isrouter(lp) || lp->lp_rcd != rcd) {
725 /* ignore if no longer a router or rcd is replaced */
726 goto out;
727 }
728
729 if (event->type == LNET_EVENT_SEND) {
730 lp->lp_ping_notsent = 0;
731 if (event->status == 0)
732 goto out;
733 }
734
735 /* LNET_EVENT_REPLY */
736 /* A successful REPLY means the router is up. If _any_ comms
737 * to the router fail I assume it's down (this will happen if
738 * we ping alive routers to try to detect router death before
739 * apps get burned). */
740
741 lnet_notify_locked(lp, 1, (event->status == 0), cfs_time_current());
742 /* The router checker will wake up very shortly and do the
743 * actual notification.
744 * XXX If 'lp' stops being a router before then, it will still
745 * have the notification pending!!! */
746
747 if (avoid_asym_router_failure && event->status == 0)
748 lnet_parse_rc_info(rcd);
749
750 out:
751 lnet_net_unlock(lp->lp_cpt);
752 }
753
754 static void
755 lnet_wait_known_routerstate(void)
756 {
757 lnet_peer_t *rtr;
758 struct list_head *entry;
759 int all_known;
760
761 LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
762
763 for (;;) {
764 int cpt = lnet_net_lock_current();
765
766 all_known = 1;
767 list_for_each (entry, &the_lnet.ln_routers) {
768 rtr = list_entry(entry, lnet_peer_t, lp_rtr_list);
769
770 if (rtr->lp_alive_count == 0) {
771 all_known = 0;
772 break;
773 }
774 }
775
776 lnet_net_unlock(cpt);
777
778 if (all_known)
779 return;
780
781 set_current_state(TASK_UNINTERRUPTIBLE);
782 schedule_timeout(cfs_time_seconds(1));
783 }
784 }
785
786 void
787 lnet_router_ni_update_locked(lnet_peer_t *gw, __u32 net)
788 {
789 lnet_route_t *rte;
790
791 if ((gw->lp_ping_feats & LNET_PING_FEAT_NI_STATUS) != 0) {
792 list_for_each_entry(rte, &gw->lp_routes, lr_gwlist) {
793 if (rte->lr_net == net) {
794 rte->lr_downis = 0;
795 break;
796 }
797 }
798 }
799 }
800
801 static void
802 lnet_update_ni_status_locked(void)
803 {
804 lnet_ni_t *ni;
805 long now;
806 int timeout;
807
808 LASSERT(the_lnet.ln_routing);
809
810 timeout = router_ping_timeout +
811 max(live_router_check_interval, dead_router_check_interval);
812
813 now = get_seconds();
814 list_for_each_entry(ni, &the_lnet.ln_nis, ni_list) {
815 if (ni->ni_lnd->lnd_type == LOLND)
816 continue;
817
818 if (now < ni->ni_last_alive + timeout)
819 continue;
820
821 lnet_ni_lock(ni);
822 /* re-check with lock */
823 if (now < ni->ni_last_alive + timeout) {
824 lnet_ni_unlock(ni);
825 continue;
826 }
827
828 LASSERT(ni->ni_status != NULL);
829
830 if (ni->ni_status->ns_status != LNET_NI_STATUS_DOWN) {
831 CDEBUG(D_NET, "NI(%s:%d) status changed to down\n",
832 libcfs_nid2str(ni->ni_nid), timeout);
833 /* NB: so far, this is the only place to set
834 * NI status to "down" */
835 ni->ni_status->ns_status = LNET_NI_STATUS_DOWN;
836 }
837 lnet_ni_unlock(ni);
838 }
839 }
840
841 static void
842 lnet_destroy_rc_data(lnet_rc_data_t *rcd)
843 {
844 LASSERT(list_empty(&rcd->rcd_list));
845 /* detached from network */
846 LASSERT(LNetHandleIsInvalid(rcd->rcd_mdh));
847
848 if (rcd->rcd_gateway != NULL) {
849 int cpt = rcd->rcd_gateway->lp_cpt;
850
851 lnet_net_lock(cpt);
852 lnet_peer_decref_locked(rcd->rcd_gateway);
853 lnet_net_unlock(cpt);
854 }
855
856 if (rcd->rcd_pinginfo != NULL)
857 LIBCFS_FREE(rcd->rcd_pinginfo, LNET_PINGINFO_SIZE);
858
859 LIBCFS_FREE(rcd, sizeof(*rcd));
860 }
861
862 static lnet_rc_data_t *
863 lnet_create_rc_data_locked(lnet_peer_t *gateway)
864 {
865 lnet_rc_data_t *rcd = NULL;
866 lnet_ping_info_t *pi;
867 int rc;
868 int i;
869
870 lnet_net_unlock(gateway->lp_cpt);
871
872 LIBCFS_ALLOC(rcd, sizeof(*rcd));
873 if (rcd == NULL)
874 goto out;
875
876 LNetInvalidateHandle(&rcd->rcd_mdh);
877 INIT_LIST_HEAD(&rcd->rcd_list);
878
879 LIBCFS_ALLOC(pi, LNET_PINGINFO_SIZE);
880 if (pi == NULL)
881 goto out;
882
883 for (i = 0; i < LNET_MAX_RTR_NIS; i++) {
884 pi->pi_ni[i].ns_nid = LNET_NID_ANY;
885 pi->pi_ni[i].ns_status = LNET_NI_STATUS_INVALID;
886 }
887 rcd->rcd_pinginfo = pi;
888
889 LASSERT (!LNetHandleIsInvalid(the_lnet.ln_rc_eqh));
890 rc = LNetMDBind((lnet_md_t){.start = pi,
891 .user_ptr = rcd,
892 .length = LNET_PINGINFO_SIZE,
893 .threshold = LNET_MD_THRESH_INF,
894 .options = LNET_MD_TRUNCATE,
895 .eq_handle = the_lnet.ln_rc_eqh},
896 LNET_UNLINK,
897 &rcd->rcd_mdh);
898 if (rc < 0) {
899 CERROR("Can't bind MD: %d\n", rc);
900 goto out;
901 }
902 LASSERT(rc == 0);
903
904 lnet_net_lock(gateway->lp_cpt);
905 /* router table changed or someone has created rcd for this gateway */
906 if (!lnet_isrouter(gateway) || gateway->lp_rcd != NULL) {
907 lnet_net_unlock(gateway->lp_cpt);
908 goto out;
909 }
910
911 lnet_peer_addref_locked(gateway);
912 rcd->rcd_gateway = gateway;
913 gateway->lp_rcd = rcd;
914 gateway->lp_ping_notsent = 0;
915
916 return rcd;
917
918 out:
919 if (rcd != NULL) {
920 if (!LNetHandleIsInvalid(rcd->rcd_mdh)) {
921 rc = LNetMDUnlink(rcd->rcd_mdh);
922 LASSERT(rc == 0);
923 }
924 lnet_destroy_rc_data(rcd);
925 }
926
927 lnet_net_lock(gateway->lp_cpt);
928 return gateway->lp_rcd;
929 }
930
931 static int
932 lnet_router_check_interval (lnet_peer_t *rtr)
933 {
934 int secs;
935
936 secs = rtr->lp_alive ? live_router_check_interval :
937 dead_router_check_interval;
938 if (secs < 0)
939 secs = 0;
940
941 return secs;
942 }
943
944 static void
945 lnet_ping_router_locked (lnet_peer_t *rtr)
946 {
947 lnet_rc_data_t *rcd = NULL;
948 unsigned long now = cfs_time_current();
949 int secs;
950
951 lnet_peer_addref_locked(rtr);
952
953 if (rtr->lp_ping_deadline != 0 && /* ping timed out? */
954 cfs_time_after(now, rtr->lp_ping_deadline))
955 lnet_notify_locked(rtr, 1, 0, now);
956
957 /* Run any outstanding notifications */
958 lnet_ni_notify_locked(rtr->lp_ni, rtr);
959
960 if (!lnet_isrouter(rtr) ||
961 the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
962 /* router table changed or router checker is shutting down */
963 lnet_peer_decref_locked(rtr);
964 return;
965 }
966
967 rcd = rtr->lp_rcd != NULL ?
968 rtr->lp_rcd : lnet_create_rc_data_locked(rtr);
969
970 if (rcd == NULL)
971 return;
972
973 secs = lnet_router_check_interval(rtr);
974
975 CDEBUG(D_NET,
976 "rtr %s %d: deadline %lu ping_notsent %d alive %d alive_count %d lp_ping_timestamp %lu\n",
977 libcfs_nid2str(rtr->lp_nid), secs,
978 rtr->lp_ping_deadline, rtr->lp_ping_notsent,
979 rtr->lp_alive, rtr->lp_alive_count, rtr->lp_ping_timestamp);
980
981 if (secs != 0 && !rtr->lp_ping_notsent &&
982 cfs_time_after(now, cfs_time_add(rtr->lp_ping_timestamp,
983 cfs_time_seconds(secs)))) {
984 int rc;
985 lnet_process_id_t id;
986 lnet_handle_md_t mdh;
987
988 id.nid = rtr->lp_nid;
989 id.pid = LUSTRE_SRV_LNET_PID;
990 CDEBUG(D_NET, "Check: %s\n", libcfs_id2str(id));
991
992 rtr->lp_ping_notsent = 1;
993 rtr->lp_ping_timestamp = now;
994
995 mdh = rcd->rcd_mdh;
996
997 if (rtr->lp_ping_deadline == 0) {
998 rtr->lp_ping_deadline =
999 cfs_time_shift(router_ping_timeout);
1000 }
1001
1002 lnet_net_unlock(rtr->lp_cpt);
1003
1004 rc = LNetGet(LNET_NID_ANY, mdh, id, LNET_RESERVED_PORTAL,
1005 LNET_PROTO_PING_MATCHBITS, 0);
1006
1007 lnet_net_lock(rtr->lp_cpt);
1008 if (rc != 0)
1009 rtr->lp_ping_notsent = 0; /* no event pending */
1010 }
1011
1012 lnet_peer_decref_locked(rtr);
1013 return;
1014 }
1015
1016 int
1017 lnet_router_checker_start(void)
1018 {
1019 int rc;
1020 int eqsz;
1021
1022 LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
1023
1024 if (check_routers_before_use &&
1025 dead_router_check_interval <= 0) {
1026 LCONSOLE_ERROR_MSG(0x10a, "'dead_router_check_interval' must be set if 'check_routers_before_use' is set\n");
1027 return -EINVAL;
1028 }
1029
1030 if (!the_lnet.ln_routing &&
1031 live_router_check_interval <= 0 &&
1032 dead_router_check_interval <= 0)
1033 return 0;
1034
1035 sema_init(&the_lnet.ln_rc_signal, 0);
1036 /* EQ size doesn't matter; the callback is guaranteed to get every
1037 * event */
1038 eqsz = 0;
1039 rc = LNetEQAlloc(eqsz, lnet_router_checker_event,
1040 &the_lnet.ln_rc_eqh);
1041 if (rc != 0) {
1042 CERROR("Can't allocate EQ(%d): %d\n", eqsz, rc);
1043 return -ENOMEM;
1044 }
1045
1046 the_lnet.ln_rc_state = LNET_RC_STATE_RUNNING;
1047 rc = PTR_ERR(kthread_run(lnet_router_checker,
1048 NULL, "router_checker"));
1049 if (IS_ERR_VALUE(rc)) {
1050 CERROR("Can't start router checker thread: %d\n", rc);
1051 /* block until event callback signals exit */
1052 down(&the_lnet.ln_rc_signal);
1053 rc = LNetEQFree(the_lnet.ln_rc_eqh);
1054 LASSERT(rc == 0);
1055 the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
1056 return -ENOMEM;
1057 }
1058
1059 if (check_routers_before_use) {
1060 /* Note that a helpful side-effect of pinging all known routers
1061 * at startup is that it makes them drop stale connections they
1062 * may have to a previous instance of me. */
1063 lnet_wait_known_routerstate();
1064 }
1065
1066 return 0;
1067 }
1068
1069 void
1070 lnet_router_checker_stop (void)
1071 {
1072 int rc;
1073
1074 if (the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN)
1075 return;
1076
1077 LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
1078 the_lnet.ln_rc_state = LNET_RC_STATE_STOPPING;
1079
1080 /* block until event callback signals exit */
1081 down(&the_lnet.ln_rc_signal);
1082 LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_SHUTDOWN);
1083
1084 rc = LNetEQFree(the_lnet.ln_rc_eqh);
1085 LASSERT (rc == 0);
1086 return;
1087 }
1088
1089 static void
1090 lnet_prune_rc_data(int wait_unlink)
1091 {
1092 lnet_rc_data_t *rcd;
1093 lnet_rc_data_t *tmp;
1094 lnet_peer_t *lp;
1095 struct list_head head;
1096 int i = 2;
1097
1098 if (likely(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING &&
1099 list_empty(&the_lnet.ln_rcd_deathrow) &&
1100 list_empty(&the_lnet.ln_rcd_zombie)))
1101 return;
1102
1103 INIT_LIST_HEAD(&head);
1104
1105 lnet_net_lock(LNET_LOCK_EX);
1106
1107 if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) {
1108 /* router checker is stopping, prune all */
1109 list_for_each_entry(lp, &the_lnet.ln_routers,
1110 lp_rtr_list) {
1111 if (lp->lp_rcd == NULL)
1112 continue;
1113
1114 LASSERT(list_empty(&lp->lp_rcd->rcd_list));
1115 list_add(&lp->lp_rcd->rcd_list,
1116 &the_lnet.ln_rcd_deathrow);
1117 lp->lp_rcd = NULL;
1118 }
1119 }
1120
1121 /* unlink all RCDs on deathrow list */
1122 list_splice_init(&the_lnet.ln_rcd_deathrow, &head);
1123
1124 if (!list_empty(&head)) {
1125 lnet_net_unlock(LNET_LOCK_EX);
1126
1127 list_for_each_entry(rcd, &head, rcd_list)
1128 LNetMDUnlink(rcd->rcd_mdh);
1129
1130 lnet_net_lock(LNET_LOCK_EX);
1131 }
1132
1133 list_splice_init(&head, &the_lnet.ln_rcd_zombie);
1134
1135 /* release all zombie RCDs */
1136 while (!list_empty(&the_lnet.ln_rcd_zombie)) {
1137 list_for_each_entry_safe(rcd, tmp, &the_lnet.ln_rcd_zombie,
1138 rcd_list) {
1139 if (LNetHandleIsInvalid(rcd->rcd_mdh))
1140 list_move(&rcd->rcd_list, &head);
1141 }
1142
1143 wait_unlink = wait_unlink &&
1144 !list_empty(&the_lnet.ln_rcd_zombie);
1145
1146 lnet_net_unlock(LNET_LOCK_EX);
1147
1148 while (!list_empty(&head)) {
1149 rcd = list_entry(head.next,
1150 lnet_rc_data_t, rcd_list);
1151 list_del_init(&rcd->rcd_list);
1152 lnet_destroy_rc_data(rcd);
1153 }
1154
1155 if (!wait_unlink)
1156 return;
1157
1158 i++;
1159 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
1160 "Waiting for rc buffers to unlink\n");
1161 set_current_state(TASK_UNINTERRUPTIBLE);
1162 schedule_timeout(cfs_time_seconds(1) / 4);
1163
1164 lnet_net_lock(LNET_LOCK_EX);
1165 }
1166
1167 lnet_net_unlock(LNET_LOCK_EX);
1168 }
1169
1170
1171 #if defined(LNET_ROUTER)
1172
1173 static int
1174 lnet_router_checker(void *arg)
1175 {
1176 lnet_peer_t *rtr;
1177 struct list_head *entry;
1178
1179 cfs_block_allsigs();
1180
1181 LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
1182
1183 while (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING) {
1184 __u64 version;
1185 int cpt;
1186 int cpt2;
1187
1188 cpt = lnet_net_lock_current();
1189 rescan:
1190 version = the_lnet.ln_routers_version;
1191
1192 list_for_each(entry, &the_lnet.ln_routers) {
1193 rtr = list_entry(entry, lnet_peer_t, lp_rtr_list);
1194
1195 cpt2 = lnet_cpt_of_nid_locked(rtr->lp_nid);
1196 if (cpt != cpt2) {
1197 lnet_net_unlock(cpt);
1198 cpt = cpt2;
1199 lnet_net_lock(cpt);
1200 /* the routers list has changed */
1201 if (version != the_lnet.ln_routers_version)
1202 goto rescan;
1203 }
1204
1205 lnet_ping_router_locked(rtr);
1206
1207 /* NB dropped lock */
1208 if (version != the_lnet.ln_routers_version) {
1209 /* the routers list has changed */
1210 goto rescan;
1211 }
1212 }
1213
1214 if (the_lnet.ln_routing)
1215 lnet_update_ni_status_locked();
1216
1217 lnet_net_unlock(cpt);
1218
1219 lnet_prune_rc_data(0); /* don't wait for UNLINK */
1220
1221 /* Call schedule_timeout() here always adds 1 to load average
1222 * because kernel counts # active tasks as nr_running
1223 * + nr_uninterruptible. */
1224 set_current_state(TASK_INTERRUPTIBLE);
1225 schedule_timeout(cfs_time_seconds(1));
1226 }
1227
1228 LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_STOPPING);
1229
1230 lnet_prune_rc_data(1); /* wait for UNLINK */
1231
1232 the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
1233 up(&the_lnet.ln_rc_signal);
1234 /* The unlink event callback will signal final completion */
1235 return 0;
1236 }
1237
1238 static void
1239 lnet_destroy_rtrbuf(lnet_rtrbuf_t *rb, int npages)
1240 {
1241 int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]);
1242
1243 while (--npages >= 0)
1244 __free_page(rb->rb_kiov[npages].kiov_page);
1245
1246 LIBCFS_FREE(rb, sz);
1247 }
1248
1249 static lnet_rtrbuf_t *
1250 lnet_new_rtrbuf(lnet_rtrbufpool_t *rbp, int cpt)
1251 {
1252 int npages = rbp->rbp_npages;
1253 int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]);
1254 struct page *page;
1255 lnet_rtrbuf_t *rb;
1256 int i;
1257
1258 LIBCFS_CPT_ALLOC(rb, lnet_cpt_table(), cpt, sz);
1259 if (rb == NULL)
1260 return NULL;
1261
1262 rb->rb_pool = rbp;
1263
1264 for (i = 0; i < npages; i++) {
1265 page = alloc_pages_node(
1266 cfs_cpt_spread_node(lnet_cpt_table(), cpt),
1267 __GFP_ZERO | GFP_IOFS, 0);
1268 if (page == NULL) {
1269 while (--i >= 0)
1270 __free_page(rb->rb_kiov[i].kiov_page);
1271
1272 LIBCFS_FREE(rb, sz);
1273 return NULL;
1274 }
1275
1276 rb->rb_kiov[i].kiov_len = PAGE_CACHE_SIZE;
1277 rb->rb_kiov[i].kiov_offset = 0;
1278 rb->rb_kiov[i].kiov_page = page;
1279 }
1280
1281 return rb;
1282 }
1283
1284 static void
1285 lnet_rtrpool_free_bufs(lnet_rtrbufpool_t *rbp)
1286 {
1287 int npages = rbp->rbp_npages;
1288 int nbuffers = 0;
1289 lnet_rtrbuf_t *rb;
1290
1291 if (rbp->rbp_nbuffers == 0) /* not initialized or already freed */
1292 return;
1293
1294 LASSERT (list_empty(&rbp->rbp_msgs));
1295 LASSERT (rbp->rbp_credits == rbp->rbp_nbuffers);
1296
1297 while (!list_empty(&rbp->rbp_bufs)) {
1298 LASSERT (rbp->rbp_credits > 0);
1299
1300 rb = list_entry(rbp->rbp_bufs.next,
1301 lnet_rtrbuf_t, rb_list);
1302 list_del(&rb->rb_list);
1303 lnet_destroy_rtrbuf(rb, npages);
1304 nbuffers++;
1305 }
1306
1307 LASSERT (rbp->rbp_nbuffers == nbuffers);
1308 LASSERT (rbp->rbp_credits == nbuffers);
1309
1310 rbp->rbp_nbuffers = rbp->rbp_credits = 0;
1311 }
1312
1313 static int
1314 lnet_rtrpool_alloc_bufs(lnet_rtrbufpool_t *rbp, int nbufs, int cpt)
1315 {
1316 lnet_rtrbuf_t *rb;
1317 int i;
1318
1319 if (rbp->rbp_nbuffers != 0) {
1320 LASSERT (rbp->rbp_nbuffers == nbufs);
1321 return 0;
1322 }
1323
1324 for (i = 0; i < nbufs; i++) {
1325 rb = lnet_new_rtrbuf(rbp, cpt);
1326
1327 if (rb == NULL) {
1328 CERROR("Failed to allocate %d router bufs of %d pages\n",
1329 nbufs, rbp->rbp_npages);
1330 return -ENOMEM;
1331 }
1332
1333 rbp->rbp_nbuffers++;
1334 rbp->rbp_credits++;
1335 rbp->rbp_mincredits++;
1336 list_add(&rb->rb_list, &rbp->rbp_bufs);
1337
1338 /* No allocation "under fire" */
1339 /* Otherwise we'd need code to schedule blocked msgs etc */
1340 LASSERT (!the_lnet.ln_routing);
1341 }
1342
1343 LASSERT (rbp->rbp_credits == nbufs);
1344 return 0;
1345 }
1346
1347 static void
1348 lnet_rtrpool_init(lnet_rtrbufpool_t *rbp, int npages)
1349 {
1350 INIT_LIST_HEAD(&rbp->rbp_msgs);
1351 INIT_LIST_HEAD(&rbp->rbp_bufs);
1352
1353 rbp->rbp_npages = npages;
1354 rbp->rbp_credits = 0;
1355 rbp->rbp_mincredits = 0;
1356 }
1357
1358 void
1359 lnet_rtrpools_free(void)
1360 {
1361 lnet_rtrbufpool_t *rtrp;
1362 int i;
1363
1364 if (the_lnet.ln_rtrpools == NULL) /* uninitialized or freed */
1365 return;
1366
1367 cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
1368 lnet_rtrpool_free_bufs(&rtrp[0]);
1369 lnet_rtrpool_free_bufs(&rtrp[1]);
1370 lnet_rtrpool_free_bufs(&rtrp[2]);
1371 }
1372
1373 cfs_percpt_free(the_lnet.ln_rtrpools);
1374 the_lnet.ln_rtrpools = NULL;
1375 }
1376
1377 static int
1378 lnet_nrb_tiny_calculate(int npages)
1379 {
1380 int nrbs = LNET_NRB_TINY;
1381
1382 if (tiny_router_buffers < 0) {
1383 LCONSOLE_ERROR_MSG(0x10c,
1384 "tiny_router_buffers=%d invalid when routing enabled\n",
1385 tiny_router_buffers);
1386 return -1;
1387 }
1388
1389 if (tiny_router_buffers > 0)
1390 nrbs = tiny_router_buffers;
1391
1392 nrbs /= LNET_CPT_NUMBER;
1393 return max(nrbs, LNET_NRB_TINY_MIN);
1394 }
1395
1396 static int
1397 lnet_nrb_small_calculate(int npages)
1398 {
1399 int nrbs = LNET_NRB_SMALL;
1400
1401 if (small_router_buffers < 0) {
1402 LCONSOLE_ERROR_MSG(0x10c,
1403 "small_router_buffers=%d invalid when routing enabled\n",
1404 small_router_buffers);
1405 return -1;
1406 }
1407
1408 if (small_router_buffers > 0)
1409 nrbs = small_router_buffers;
1410
1411 nrbs /= LNET_CPT_NUMBER;
1412 return max(nrbs, LNET_NRB_SMALL_MIN);
1413 }
1414
1415 static int
1416 lnet_nrb_large_calculate(int npages)
1417 {
1418 int nrbs = LNET_NRB_LARGE;
1419
1420 if (large_router_buffers < 0) {
1421 LCONSOLE_ERROR_MSG(0x10c,
1422 "large_router_buffers=%d invalid when routing enabled\n",
1423 large_router_buffers);
1424 return -1;
1425 }
1426
1427 if (large_router_buffers > 0)
1428 nrbs = large_router_buffers;
1429
1430 nrbs /= LNET_CPT_NUMBER;
1431 return max(nrbs, LNET_NRB_LARGE_MIN);
1432 }
1433
1434 int
1435 lnet_rtrpools_alloc(int im_a_router)
1436 {
1437 lnet_rtrbufpool_t *rtrp;
1438 int large_pages = (LNET_MTU + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1439 int small_pages = 1;
1440 int nrb_tiny;
1441 int nrb_small;
1442 int nrb_large;
1443 int rc;
1444 int i;
1445
1446 if (!strcmp(forwarding, "")) {
1447 /* not set either way */
1448 if (!im_a_router)
1449 return 0;
1450 } else if (!strcmp(forwarding, "disabled")) {
1451 /* explicitly disabled */
1452 return 0;
1453 } else if (!strcmp(forwarding, "enabled")) {
1454 /* explicitly enabled */
1455 } else {
1456 LCONSOLE_ERROR_MSG(0x10b, "'forwarding' not set to either 'enabled' or 'disabled'\n");
1457 return -EINVAL;
1458 }
1459
1460 nrb_tiny = lnet_nrb_tiny_calculate(0);
1461 if (nrb_tiny < 0)
1462 return -EINVAL;
1463
1464 nrb_small = lnet_nrb_small_calculate(small_pages);
1465 if (nrb_small < 0)
1466 return -EINVAL;
1467
1468 nrb_large = lnet_nrb_large_calculate(large_pages);
1469 if (nrb_large < 0)
1470 return -EINVAL;
1471
1472 the_lnet.ln_rtrpools = cfs_percpt_alloc(lnet_cpt_table(),
1473 LNET_NRBPOOLS *
1474 sizeof(lnet_rtrbufpool_t));
1475 if (the_lnet.ln_rtrpools == NULL) {
1476 LCONSOLE_ERROR_MSG(0x10c,
1477 "Failed to initialize router buffe pool\n");
1478 return -ENOMEM;
1479 }
1480
1481 cfs_percpt_for_each(rtrp, i, the_lnet.ln_rtrpools) {
1482 lnet_rtrpool_init(&rtrp[0], 0);
1483 rc = lnet_rtrpool_alloc_bufs(&rtrp[0], nrb_tiny, i);
1484 if (rc != 0)
1485 goto failed;
1486
1487 lnet_rtrpool_init(&rtrp[1], small_pages);
1488 rc = lnet_rtrpool_alloc_bufs(&rtrp[1], nrb_small, i);
1489 if (rc != 0)
1490 goto failed;
1491
1492 lnet_rtrpool_init(&rtrp[2], large_pages);
1493 rc = lnet_rtrpool_alloc_bufs(&rtrp[2], nrb_large, i);
1494 if (rc != 0)
1495 goto failed;
1496 }
1497
1498 lnet_net_lock(LNET_LOCK_EX);
1499 the_lnet.ln_routing = 1;
1500 lnet_net_unlock(LNET_LOCK_EX);
1501
1502 return 0;
1503
1504 failed:
1505 lnet_rtrpools_free();
1506 return rc;
1507 }
1508
1509 int
1510 lnet_notify(lnet_ni_t *ni, lnet_nid_t nid, int alive, unsigned long when)
1511 {
1512 struct lnet_peer *lp = NULL;
1513 unsigned long now = cfs_time_current();
1514 int cpt = lnet_cpt_of_nid(nid);
1515
1516 LASSERT (!in_interrupt ());
1517
1518 CDEBUG (D_NET, "%s notifying %s: %s\n",
1519 (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid),
1520 libcfs_nid2str(nid),
1521 alive ? "up" : "down");
1522
1523 if (ni != NULL &&
1524 LNET_NIDNET(ni->ni_nid) != LNET_NIDNET(nid)) {
1525 CWARN ("Ignoring notification of %s %s by %s (different net)\n",
1526 libcfs_nid2str(nid), alive ? "birth" : "death",
1527 libcfs_nid2str(ni->ni_nid));
1528 return -EINVAL;
1529 }
1530
1531 /* can't do predictions... */
1532 if (cfs_time_after(when, now)) {
1533 CWARN("Ignoring prediction from %s of %s %s %ld seconds in the future\n",
1534 (ni == NULL) ? "userspace" : libcfs_nid2str(ni->ni_nid),
1535 libcfs_nid2str(nid), alive ? "up" : "down",
1536 cfs_duration_sec(cfs_time_sub(when, now)));
1537 return -EINVAL;
1538 }
1539
1540 if (ni != NULL && !alive && /* LND telling me she's down */
1541 !auto_down) { /* auto-down disabled */
1542 CDEBUG(D_NET, "Auto-down disabled\n");
1543 return 0;
1544 }
1545
1546 lnet_net_lock(cpt);
1547
1548 if (the_lnet.ln_shutdown) {
1549 lnet_net_unlock(cpt);
1550 return -ESHUTDOWN;
1551 }
1552
1553 lp = lnet_find_peer_locked(the_lnet.ln_peer_tables[cpt], nid);
1554 if (lp == NULL) {
1555 /* nid not found */
1556 lnet_net_unlock(cpt);
1557 CDEBUG(D_NET, "%s not found\n", libcfs_nid2str(nid));
1558 return 0;
1559 }
1560
1561 /* We can't fully trust LND on reporting exact peer last_alive
1562 * if he notifies us about dead peer. For example ksocklnd can
1563 * call us with when == _time_when_the_node_was_booted_ if
1564 * no connections were successfully established */
1565 if (ni != NULL && !alive && when < lp->lp_last_alive)
1566 when = lp->lp_last_alive;
1567
1568 lnet_notify_locked(lp, ni == NULL, alive, when);
1569
1570 lnet_ni_notify_locked(ni, lp);
1571
1572 lnet_peer_decref_locked(lp);
1573
1574 lnet_net_unlock(cpt);
1575 return 0;
1576 }
1577 EXPORT_SYMBOL(lnet_notify);
1578
1579 void
1580 lnet_get_tunables (void)
1581 {
1582 return;
1583 }
1584
1585 #else
1586
1587 int
1588 lnet_notify (lnet_ni_t *ni, lnet_nid_t nid, int alive, unsigned long when)
1589 {
1590 return -EOPNOTSUPP;
1591 }
1592
1593 void
1594 lnet_router_checker (void)
1595 {
1596 static time_t last;
1597 static int running;
1598
1599 time_t now = get_seconds();
1600 int interval = now - last;
1601 int rc;
1602 __u64 version;
1603 lnet_peer_t *rtr;
1604
1605 /* It's no use to call me again within a sec - all intervals and
1606 * timeouts are measured in seconds */
1607 if (last != 0 && interval < 2)
1608 return;
1609
1610 if (last != 0 &&
1611 interval > max(live_router_check_interval,
1612 dead_router_check_interval))
1613 CNETERR("Checker(%d/%d) not called for %d seconds\n",
1614 live_router_check_interval, dead_router_check_interval,
1615 interval);
1616
1617 LASSERT(LNET_CPT_NUMBER == 1);
1618
1619 lnet_net_lock(0);
1620 LASSERT(!running); /* recursion check */
1621 running = 1;
1622 lnet_net_unlock(0);
1623
1624 last = now;
1625
1626 if (the_lnet.ln_rc_state == LNET_RC_STATE_STOPPING)
1627 lnet_prune_rc_data(0); /* unlink all rcd and nowait */
1628
1629 /* consume all pending events */
1630 while (1) {
1631 int i;
1632 lnet_event_t ev;
1633
1634 /* NB ln_rc_eqh must be the 1st in 'eventqs' otherwise the
1635 * recursion breaker in LNetEQPoll would fail */
1636 rc = LNetEQPoll(&the_lnet.ln_rc_eqh, 1, 0, &ev, &i);
1637 if (rc == 0) /* no event pending */
1638 break;
1639
1640 /* NB a lost SENT prevents me from pinging a router again */
1641 if (rc == -EOVERFLOW) {
1642 CERROR("Dropped an event!!!\n");
1643 abort();
1644 }
1645
1646 LASSERT (rc == 1);
1647
1648 lnet_router_checker_event(&ev);
1649 }
1650
1651 if (the_lnet.ln_rc_state == LNET_RC_STATE_STOPPING) {
1652 lnet_prune_rc_data(1); /* release rcd */
1653 the_lnet.ln_rc_state = LNET_RC_STATE_SHUTDOWN;
1654 running = 0;
1655 return;
1656 }
1657
1658 LASSERT (the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING);
1659
1660 lnet_net_lock(0);
1661
1662 version = the_lnet.ln_routers_version;
1663 list_for_each_entry (rtr, &the_lnet.ln_routers, lp_rtr_list) {
1664 lnet_ping_router_locked(rtr);
1665 LASSERT (version == the_lnet.ln_routers_version);
1666 }
1667
1668 lnet_net_unlock(0);
1669
1670 running = 0; /* lock only needed for the recursion check */
1671 return;
1672 }
1673
1674 /* NB lnet_peers_start_down depends on me,
1675 * so must be called before any peer creation */
1676 void
1677 lnet_get_tunables (void)
1678 {
1679 char *s;
1680
1681 s = getenv("LNET_ROUTER_PING_TIMEOUT");
1682 if (s != NULL)
1683 router_ping_timeout = atoi(s);
1684
1685 s = getenv("LNET_LIVE_ROUTER_CHECK_INTERVAL");
1686 if (s != NULL)
1687 live_router_check_interval = atoi(s);
1688
1689 s = getenv("LNET_DEAD_ROUTER_CHECK_INTERVAL");
1690 if (s != NULL)
1691 dead_router_check_interval = atoi(s);
1692
1693 /* This replaces old lnd_notify mechanism */
1694 check_routers_before_use = 1;
1695 if (dead_router_check_interval <= 0)
1696 dead_router_check_interval = 30;
1697 }
1698
1699 void
1700 lnet_rtrpools_free(void)
1701 {
1702 }
1703
1704 int
1705 lnet_rtrpools_alloc(int im_a_arouter)
1706 {
1707 return 0;
1708 }
1709
1710 #endif