]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
c37ebcff9944e5dc9379556e68919597b4f980a1
[mirror_ubuntu-bionic-kernel.git] / drivers / staging / lustre / lnet / klnds / socklnd / socklnd.c
1 /*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19 *
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
22 * have any questions.
23 *
24 * GPL HEADER END
25 */
26 /*
27 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
29 *
30 * Copyright (c) 2011, 2015, Intel Corporation.
31 */
32 /*
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
35 *
36 * lnet/klnds/socklnd/socklnd.c
37 *
38 * Author: Zach Brown <zab@zabbo.net>
39 * Author: Peter J. Braam <braam@clusterfs.com>
40 * Author: Phil Schwan <phil@clusterfs.com>
41 * Author: Eric Barton <eric@bartonsoftware.com>
42 */
43
44 #include "socklnd.h"
45
46 static lnd_t the_ksocklnd;
47 ksock_nal_data_t ksocknal_data;
48
49 static ksock_interface_t *
50 ksocknal_ip2iface(lnet_ni_t *ni, __u32 ip)
51 {
52 ksock_net_t *net = ni->ni_data;
53 int i;
54 ksock_interface_t *iface;
55
56 for (i = 0; i < net->ksnn_ninterfaces; i++) {
57 LASSERT(i < LNET_MAX_INTERFACES);
58 iface = &net->ksnn_interfaces[i];
59
60 if (iface->ksni_ipaddr == ip)
61 return iface;
62 }
63
64 return NULL;
65 }
66
67 static ksock_route_t *
68 ksocknal_create_route(__u32 ipaddr, int port)
69 {
70 ksock_route_t *route;
71
72 LIBCFS_ALLOC(route, sizeof(*route));
73 if (!route)
74 return NULL;
75
76 atomic_set(&route->ksnr_refcount, 1);
77 route->ksnr_peer = NULL;
78 route->ksnr_retry_interval = 0; /* OK to connect at any time */
79 route->ksnr_ipaddr = ipaddr;
80 route->ksnr_port = port;
81 route->ksnr_scheduled = 0;
82 route->ksnr_connecting = 0;
83 route->ksnr_connected = 0;
84 route->ksnr_deleted = 0;
85 route->ksnr_conn_count = 0;
86 route->ksnr_share_count = 0;
87
88 return route;
89 }
90
91 void
92 ksocknal_destroy_route(ksock_route_t *route)
93 {
94 LASSERT(!atomic_read(&route->ksnr_refcount));
95
96 if (route->ksnr_peer)
97 ksocknal_peer_decref(route->ksnr_peer);
98
99 LIBCFS_FREE(route, sizeof(*route));
100 }
101
102 static int
103 ksocknal_create_peer(ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
104 {
105 ksock_net_t *net = ni->ni_data;
106 ksock_peer_t *peer;
107
108 LASSERT(id.nid != LNET_NID_ANY);
109 LASSERT(id.pid != LNET_PID_ANY);
110 LASSERT(!in_interrupt());
111
112 LIBCFS_ALLOC(peer, sizeof(*peer));
113 if (!peer)
114 return -ENOMEM;
115
116 peer->ksnp_ni = ni;
117 peer->ksnp_id = id;
118 atomic_set(&peer->ksnp_refcount, 1); /* 1 ref for caller */
119 peer->ksnp_closing = 0;
120 peer->ksnp_accepting = 0;
121 peer->ksnp_proto = NULL;
122 peer->ksnp_last_alive = 0;
123 peer->ksnp_zc_next_cookie = SOCKNAL_KEEPALIVE_PING + 1;
124
125 INIT_LIST_HEAD(&peer->ksnp_conns);
126 INIT_LIST_HEAD(&peer->ksnp_routes);
127 INIT_LIST_HEAD(&peer->ksnp_tx_queue);
128 INIT_LIST_HEAD(&peer->ksnp_zc_req_list);
129 spin_lock_init(&peer->ksnp_lock);
130
131 spin_lock_bh(&net->ksnn_lock);
132
133 if (net->ksnn_shutdown) {
134 spin_unlock_bh(&net->ksnn_lock);
135
136 LIBCFS_FREE(peer, sizeof(*peer));
137 CERROR("Can't create peer: network shutdown\n");
138 return -ESHUTDOWN;
139 }
140
141 net->ksnn_npeers++;
142
143 spin_unlock_bh(&net->ksnn_lock);
144
145 *peerp = peer;
146 return 0;
147 }
148
149 void
150 ksocknal_destroy_peer(ksock_peer_t *peer)
151 {
152 ksock_net_t *net = peer->ksnp_ni->ni_data;
153
154 CDEBUG(D_NET, "peer %s %p deleted\n",
155 libcfs_id2str(peer->ksnp_id), peer);
156
157 LASSERT(!atomic_read(&peer->ksnp_refcount));
158 LASSERT(!peer->ksnp_accepting);
159 LASSERT(list_empty(&peer->ksnp_conns));
160 LASSERT(list_empty(&peer->ksnp_routes));
161 LASSERT(list_empty(&peer->ksnp_tx_queue));
162 LASSERT(list_empty(&peer->ksnp_zc_req_list));
163
164 LIBCFS_FREE(peer, sizeof(*peer));
165
166 /*
167 * NB a peer's connections and routes keep a reference on their peer
168 * until they are destroyed, so we can be assured that _all_ state to
169 * do with this peer has been cleaned up when its refcount drops to
170 * zero.
171 */
172 spin_lock_bh(&net->ksnn_lock);
173 net->ksnn_npeers--;
174 spin_unlock_bh(&net->ksnn_lock);
175 }
176
177 ksock_peer_t *
178 ksocknal_find_peer_locked(lnet_ni_t *ni, lnet_process_id_t id)
179 {
180 struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
181 struct list_head *tmp;
182 ksock_peer_t *peer;
183
184 list_for_each(tmp, peer_list) {
185 peer = list_entry(tmp, ksock_peer_t, ksnp_list);
186
187 LASSERT(!peer->ksnp_closing);
188
189 if (peer->ksnp_ni != ni)
190 continue;
191
192 if (peer->ksnp_id.nid != id.nid ||
193 peer->ksnp_id.pid != id.pid)
194 continue;
195
196 CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
197 peer, libcfs_id2str(id),
198 atomic_read(&peer->ksnp_refcount));
199 return peer;
200 }
201 return NULL;
202 }
203
204 ksock_peer_t *
205 ksocknal_find_peer(lnet_ni_t *ni, lnet_process_id_t id)
206 {
207 ksock_peer_t *peer;
208
209 read_lock(&ksocknal_data.ksnd_global_lock);
210 peer = ksocknal_find_peer_locked(ni, id);
211 if (peer) /* +1 ref for caller? */
212 ksocknal_peer_addref(peer);
213 read_unlock(&ksocknal_data.ksnd_global_lock);
214
215 return peer;
216 }
217
218 static void
219 ksocknal_unlink_peer_locked(ksock_peer_t *peer)
220 {
221 int i;
222 __u32 ip;
223 ksock_interface_t *iface;
224
225 for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
226 LASSERT(i < LNET_MAX_INTERFACES);
227 ip = peer->ksnp_passive_ips[i];
228
229 iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
230 /*
231 * All IPs in peer->ksnp_passive_ips[] come from the
232 * interface list, therefore the call must succeed.
233 */
234 LASSERT(iface);
235
236 CDEBUG(D_NET, "peer=%p iface=%p ksni_nroutes=%d\n",
237 peer, iface, iface->ksni_nroutes);
238 iface->ksni_npeers--;
239 }
240
241 LASSERT(list_empty(&peer->ksnp_conns));
242 LASSERT(list_empty(&peer->ksnp_routes));
243 LASSERT(!peer->ksnp_closing);
244 peer->ksnp_closing = 1;
245 list_del(&peer->ksnp_list);
246 /* lose peerlist's ref */
247 ksocknal_peer_decref(peer);
248 }
249
250 static int
251 ksocknal_get_peer_info(lnet_ni_t *ni, int index,
252 lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip,
253 int *port, int *conn_count, int *share_count)
254 {
255 ksock_peer_t *peer;
256 struct list_head *ptmp;
257 ksock_route_t *route;
258 struct list_head *rtmp;
259 int i;
260 int j;
261 int rc = -ENOENT;
262
263 read_lock(&ksocknal_data.ksnd_global_lock);
264
265 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
266 list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
267 peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
268
269 if (peer->ksnp_ni != ni)
270 continue;
271
272 if (!peer->ksnp_n_passive_ips &&
273 list_empty(&peer->ksnp_routes)) {
274 if (index-- > 0)
275 continue;
276
277 *id = peer->ksnp_id;
278 *myip = 0;
279 *peer_ip = 0;
280 *port = 0;
281 *conn_count = 0;
282 *share_count = 0;
283 rc = 0;
284 goto out;
285 }
286
287 for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
288 if (index-- > 0)
289 continue;
290
291 *id = peer->ksnp_id;
292 *myip = peer->ksnp_passive_ips[j];
293 *peer_ip = 0;
294 *port = 0;
295 *conn_count = 0;
296 *share_count = 0;
297 rc = 0;
298 goto out;
299 }
300
301 list_for_each(rtmp, &peer->ksnp_routes) {
302 if (index-- > 0)
303 continue;
304
305 route = list_entry(rtmp, ksock_route_t,
306 ksnr_list);
307
308 *id = peer->ksnp_id;
309 *myip = route->ksnr_myipaddr;
310 *peer_ip = route->ksnr_ipaddr;
311 *port = route->ksnr_port;
312 *conn_count = route->ksnr_conn_count;
313 *share_count = route->ksnr_share_count;
314 rc = 0;
315 goto out;
316 }
317 }
318 }
319 out:
320 read_unlock(&ksocknal_data.ksnd_global_lock);
321 return rc;
322 }
323
324 static void
325 ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn)
326 {
327 ksock_peer_t *peer = route->ksnr_peer;
328 int type = conn->ksnc_type;
329 ksock_interface_t *iface;
330
331 conn->ksnc_route = route;
332 ksocknal_route_addref(route);
333
334 if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
335 if (!route->ksnr_myipaddr) {
336 /* route wasn't bound locally yet (the initial route) */
337 CDEBUG(D_NET, "Binding %s %pI4h to %pI4h\n",
338 libcfs_id2str(peer->ksnp_id),
339 &route->ksnr_ipaddr,
340 &conn->ksnc_myipaddr);
341 } else {
342 CDEBUG(D_NET, "Rebinding %s %pI4h from %pI4h to %pI4h\n",
343 libcfs_id2str(peer->ksnp_id),
344 &route->ksnr_ipaddr,
345 &route->ksnr_myipaddr,
346 &conn->ksnc_myipaddr);
347
348 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
349 route->ksnr_myipaddr);
350 if (iface)
351 iface->ksni_nroutes--;
352 }
353 route->ksnr_myipaddr = conn->ksnc_myipaddr;
354 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
355 route->ksnr_myipaddr);
356 if (iface)
357 iface->ksni_nroutes++;
358 }
359
360 route->ksnr_connected |= (1 << type);
361 route->ksnr_conn_count++;
362
363 /*
364 * Successful connection => further attempts can
365 * proceed immediately
366 */
367 route->ksnr_retry_interval = 0;
368 }
369
370 static void
371 ksocknal_add_route_locked(ksock_peer_t *peer, ksock_route_t *route)
372 {
373 struct list_head *tmp;
374 ksock_conn_t *conn;
375 ksock_route_t *route2;
376
377 LASSERT(!peer->ksnp_closing);
378 LASSERT(!route->ksnr_peer);
379 LASSERT(!route->ksnr_scheduled);
380 LASSERT(!route->ksnr_connecting);
381 LASSERT(!route->ksnr_connected);
382
383 /* LASSERT(unique) */
384 list_for_each(tmp, &peer->ksnp_routes) {
385 route2 = list_entry(tmp, ksock_route_t, ksnr_list);
386
387 if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
388 CERROR("Duplicate route %s %pI4h\n",
389 libcfs_id2str(peer->ksnp_id),
390 &route->ksnr_ipaddr);
391 LBUG();
392 }
393 }
394
395 route->ksnr_peer = peer;
396 ksocknal_peer_addref(peer);
397 /* peer's routelist takes over my ref on 'route' */
398 list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
399
400 list_for_each(tmp, &peer->ksnp_conns) {
401 conn = list_entry(tmp, ksock_conn_t, ksnc_list);
402
403 if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
404 continue;
405
406 ksocknal_associate_route_conn_locked(route, conn);
407 /* keep going (typed routes) */
408 }
409 }
410
411 static void
412 ksocknal_del_route_locked(ksock_route_t *route)
413 {
414 ksock_peer_t *peer = route->ksnr_peer;
415 ksock_interface_t *iface;
416 ksock_conn_t *conn;
417 struct list_head *ctmp;
418 struct list_head *cnxt;
419
420 LASSERT(!route->ksnr_deleted);
421
422 /* Close associated conns */
423 list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
424 conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
425
426 if (conn->ksnc_route != route)
427 continue;
428
429 ksocknal_close_conn_locked(conn, 0);
430 }
431
432 if (route->ksnr_myipaddr) {
433 iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
434 route->ksnr_myipaddr);
435 if (iface)
436 iface->ksni_nroutes--;
437 }
438
439 route->ksnr_deleted = 1;
440 list_del(&route->ksnr_list);
441 ksocknal_route_decref(route); /* drop peer's ref */
442
443 if (list_empty(&peer->ksnp_routes) &&
444 list_empty(&peer->ksnp_conns)) {
445 /*
446 * I've just removed the last route to a peer with no active
447 * connections
448 */
449 ksocknal_unlink_peer_locked(peer);
450 }
451 }
452
453 int
454 ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port)
455 {
456 struct list_head *tmp;
457 ksock_peer_t *peer;
458 ksock_peer_t *peer2;
459 ksock_route_t *route;
460 ksock_route_t *route2;
461 int rc;
462
463 if (id.nid == LNET_NID_ANY ||
464 id.pid == LNET_PID_ANY)
465 return -EINVAL;
466
467 /* Have a brand new peer ready... */
468 rc = ksocknal_create_peer(&peer, ni, id);
469 if (rc)
470 return rc;
471
472 route = ksocknal_create_route(ipaddr, port);
473 if (!route) {
474 ksocknal_peer_decref(peer);
475 return -ENOMEM;
476 }
477
478 write_lock_bh(&ksocknal_data.ksnd_global_lock);
479
480 /* always called with a ref on ni, so shutdown can't have started */
481 LASSERT(!((ksock_net_t *) ni->ni_data)->ksnn_shutdown);
482
483 peer2 = ksocknal_find_peer_locked(ni, id);
484 if (peer2) {
485 ksocknal_peer_decref(peer);
486 peer = peer2;
487 } else {
488 /* peer table takes my ref on peer */
489 list_add_tail(&peer->ksnp_list,
490 ksocknal_nid2peerlist(id.nid));
491 }
492
493 route2 = NULL;
494 list_for_each(tmp, &peer->ksnp_routes) {
495 route2 = list_entry(tmp, ksock_route_t, ksnr_list);
496
497 if (route2->ksnr_ipaddr == ipaddr)
498 break;
499
500 route2 = NULL;
501 }
502 if (!route2) {
503 ksocknal_add_route_locked(peer, route);
504 route->ksnr_share_count++;
505 } else {
506 ksocknal_route_decref(route);
507 route2->ksnr_share_count++;
508 }
509
510 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
511
512 return 0;
513 }
514
515 static void
516 ksocknal_del_peer_locked(ksock_peer_t *peer, __u32 ip)
517 {
518 ksock_conn_t *conn;
519 ksock_route_t *route;
520 struct list_head *tmp;
521 struct list_head *nxt;
522 int nshared;
523
524 LASSERT(!peer->ksnp_closing);
525
526 /* Extra ref prevents peer disappearing until I'm done with it */
527 ksocknal_peer_addref(peer);
528
529 list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
530 route = list_entry(tmp, ksock_route_t, ksnr_list);
531
532 /* no match */
533 if (!(!ip || route->ksnr_ipaddr == ip))
534 continue;
535
536 route->ksnr_share_count = 0;
537 /* This deletes associated conns too */
538 ksocknal_del_route_locked(route);
539 }
540
541 nshared = 0;
542 list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
543 route = list_entry(tmp, ksock_route_t, ksnr_list);
544 nshared += route->ksnr_share_count;
545 }
546
547 if (!nshared) {
548 /*
549 * remove everything else if there are no explicit entries
550 * left
551 */
552 list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
553 route = list_entry(tmp, ksock_route_t, ksnr_list);
554
555 /* we should only be removing auto-entries */
556 LASSERT(!route->ksnr_share_count);
557 ksocknal_del_route_locked(route);
558 }
559
560 list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
561 conn = list_entry(tmp, ksock_conn_t, ksnc_list);
562
563 ksocknal_close_conn_locked(conn, 0);
564 }
565 }
566
567 ksocknal_peer_decref(peer);
568 /* NB peer unlinks itself when last conn/route is removed */
569 }
570
571 static int
572 ksocknal_del_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip)
573 {
574 LIST_HEAD(zombies);
575 struct list_head *ptmp;
576 struct list_head *pnxt;
577 ksock_peer_t *peer;
578 int lo;
579 int hi;
580 int i;
581 int rc = -ENOENT;
582
583 write_lock_bh(&ksocknal_data.ksnd_global_lock);
584
585 if (id.nid != LNET_NID_ANY) {
586 lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
587 hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
588 } else {
589 lo = 0;
590 hi = ksocknal_data.ksnd_peer_hash_size - 1;
591 }
592
593 for (i = lo; i <= hi; i++) {
594 list_for_each_safe(ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
595 peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
596
597 if (peer->ksnp_ni != ni)
598 continue;
599
600 if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) &&
601 (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid)))
602 continue;
603
604 ksocknal_peer_addref(peer); /* a ref for me... */
605
606 ksocknal_del_peer_locked(peer, ip);
607
608 if (peer->ksnp_closing &&
609 !list_empty(&peer->ksnp_tx_queue)) {
610 LASSERT(list_empty(&peer->ksnp_conns));
611 LASSERT(list_empty(&peer->ksnp_routes));
612
613 list_splice_init(&peer->ksnp_tx_queue,
614 &zombies);
615 }
616
617 ksocknal_peer_decref(peer); /* ...till here */
618
619 rc = 0; /* matched! */
620 }
621 }
622
623 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
624
625 ksocknal_txlist_done(ni, &zombies, 1);
626
627 return rc;
628 }
629
630 static ksock_conn_t *
631 ksocknal_get_conn_by_idx(lnet_ni_t *ni, int index)
632 {
633 ksock_peer_t *peer;
634 struct list_head *ptmp;
635 ksock_conn_t *conn;
636 struct list_head *ctmp;
637 int i;
638
639 read_lock(&ksocknal_data.ksnd_global_lock);
640
641 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
642 list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
643 peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
644
645 LASSERT(!peer->ksnp_closing);
646
647 if (peer->ksnp_ni != ni)
648 continue;
649
650 list_for_each(ctmp, &peer->ksnp_conns) {
651 if (index-- > 0)
652 continue;
653
654 conn = list_entry(ctmp, ksock_conn_t,
655 ksnc_list);
656 ksocknal_conn_addref(conn);
657 read_unlock(&ksocknal_data.ksnd_global_lock);
658 return conn;
659 }
660 }
661 }
662
663 read_unlock(&ksocknal_data.ksnd_global_lock);
664 return NULL;
665 }
666
667 static ksock_sched_t *
668 ksocknal_choose_scheduler_locked(unsigned int cpt)
669 {
670 struct ksock_sched_info *info = ksocknal_data.ksnd_sched_info[cpt];
671 ksock_sched_t *sched;
672 int i;
673
674 LASSERT(info->ksi_nthreads > 0);
675
676 sched = &info->ksi_scheds[0];
677 /*
678 * NB: it's safe so far, but info->ksi_nthreads could be changed
679 * at runtime when we have dynamic LNet configuration, then we
680 * need to take care of this.
681 */
682 for (i = 1; i < info->ksi_nthreads; i++) {
683 if (sched->kss_nconns > info->ksi_scheds[i].kss_nconns)
684 sched = &info->ksi_scheds[i];
685 }
686
687 return sched;
688 }
689
690 static int
691 ksocknal_local_ipvec(lnet_ni_t *ni, __u32 *ipaddrs)
692 {
693 ksock_net_t *net = ni->ni_data;
694 int i;
695 int nip;
696
697 read_lock(&ksocknal_data.ksnd_global_lock);
698
699 nip = net->ksnn_ninterfaces;
700 LASSERT(nip <= LNET_MAX_INTERFACES);
701
702 /*
703 * Only offer interfaces for additional connections if I have
704 * more than one.
705 */
706 if (nip < 2) {
707 read_unlock(&ksocknal_data.ksnd_global_lock);
708 return 0;
709 }
710
711 for (i = 0; i < nip; i++) {
712 ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
713 LASSERT(ipaddrs[i]);
714 }
715
716 read_unlock(&ksocknal_data.ksnd_global_lock);
717 return nip;
718 }
719
720 static int
721 ksocknal_match_peerip(ksock_interface_t *iface, __u32 *ips, int nips)
722 {
723 int best_netmatch = 0;
724 int best_xor = 0;
725 int best = -1;
726 int this_xor;
727 int this_netmatch;
728 int i;
729
730 for (i = 0; i < nips; i++) {
731 if (!ips[i])
732 continue;
733
734 this_xor = ips[i] ^ iface->ksni_ipaddr;
735 this_netmatch = !(this_xor & iface->ksni_netmask) ? 1 : 0;
736
737 if (!(best < 0 ||
738 best_netmatch < this_netmatch ||
739 (best_netmatch == this_netmatch &&
740 best_xor > this_xor)))
741 continue;
742
743 best = i;
744 best_netmatch = this_netmatch;
745 best_xor = this_xor;
746 }
747
748 LASSERT(best >= 0);
749 return best;
750 }
751
752 static int
753 ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
754 {
755 rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
756 ksock_net_t *net = peer->ksnp_ni->ni_data;
757 ksock_interface_t *iface;
758 ksock_interface_t *best_iface;
759 int n_ips;
760 int i;
761 int j;
762 int k;
763 __u32 ip;
764 __u32 xor;
765 int this_netmatch;
766 int best_netmatch;
767 int best_npeers;
768
769 /*
770 * CAVEAT EMPTOR: We do all our interface matching with an
771 * exclusive hold of global lock at IRQ priority. We're only
772 * expecting to be dealing with small numbers of interfaces, so the
773 * O(n**3)-ness shouldn't matter
774 */
775 /*
776 * Also note that I'm not going to return more than n_peerips
777 * interfaces, even if I have more myself
778 */
779 write_lock_bh(global_lock);
780
781 LASSERT(n_peerips <= LNET_MAX_INTERFACES);
782 LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
783
784 /*
785 * Only match interfaces for additional connections
786 * if I have > 1 interface
787 */
788 n_ips = (net->ksnn_ninterfaces < 2) ? 0 :
789 min(n_peerips, net->ksnn_ninterfaces);
790
791 for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
792 /* ^ yes really... */
793
794 /*
795 * If we have any new interfaces, first tick off all the
796 * peer IPs that match old interfaces, then choose new
797 * interfaces to match the remaining peer IPS.
798 * We don't forget interfaces we've stopped using; we might
799 * start using them again...
800 */
801 if (i < peer->ksnp_n_passive_ips) {
802 /* Old interface. */
803 ip = peer->ksnp_passive_ips[i];
804 best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
805
806 } else {
807 /* choose a new interface */
808 LASSERT(i == peer->ksnp_n_passive_ips);
809
810 best_iface = NULL;
811 best_netmatch = 0;
812 best_npeers = 0;
813
814 for (j = 0; j < net->ksnn_ninterfaces; j++) {
815 iface = &net->ksnn_interfaces[j];
816 ip = iface->ksni_ipaddr;
817
818 for (k = 0; k < peer->ksnp_n_passive_ips; k++)
819 if (peer->ksnp_passive_ips[k] == ip)
820 break;
821
822 if (k < peer->ksnp_n_passive_ips) /* using it already */
823 continue;
824
825 k = ksocknal_match_peerip(iface, peerips, n_peerips);
826 xor = ip ^ peerips[k];
827 this_netmatch = !(xor & iface->ksni_netmask) ? 1 : 0;
828
829 if (!(!best_iface ||
830 best_netmatch < this_netmatch ||
831 (best_netmatch == this_netmatch &&
832 best_npeers > iface->ksni_npeers)))
833 continue;
834
835 best_iface = iface;
836 best_netmatch = this_netmatch;
837 best_npeers = iface->ksni_npeers;
838 }
839
840 best_iface->ksni_npeers++;
841 ip = best_iface->ksni_ipaddr;
842 peer->ksnp_passive_ips[i] = ip;
843 peer->ksnp_n_passive_ips = i + 1;
844 }
845
846 /* mark the best matching peer IP used */
847 j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
848 peerips[j] = 0;
849 }
850
851 /* Overwrite input peer IP addresses */
852 memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
853
854 write_unlock_bh(global_lock);
855
856 return n_ips;
857 }
858
859 static void
860 ksocknal_create_routes(ksock_peer_t *peer, int port,
861 __u32 *peer_ipaddrs, int npeer_ipaddrs)
862 {
863 ksock_route_t *newroute = NULL;
864 rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
865 lnet_ni_t *ni = peer->ksnp_ni;
866 ksock_net_t *net = ni->ni_data;
867 struct list_head *rtmp;
868 ksock_route_t *route;
869 ksock_interface_t *iface;
870 ksock_interface_t *best_iface;
871 int best_netmatch;
872 int this_netmatch;
873 int best_nroutes;
874 int i;
875 int j;
876
877 /*
878 * CAVEAT EMPTOR: We do all our interface matching with an
879 * exclusive hold of global lock at IRQ priority. We're only
880 * expecting to be dealing with small numbers of interfaces, so the
881 * O(n**3)-ness here shouldn't matter
882 */
883 write_lock_bh(global_lock);
884
885 if (net->ksnn_ninterfaces < 2) {
886 /*
887 * Only create additional connections
888 * if I have > 1 interface
889 */
890 write_unlock_bh(global_lock);
891 return;
892 }
893
894 LASSERT(npeer_ipaddrs <= LNET_MAX_INTERFACES);
895
896 for (i = 0; i < npeer_ipaddrs; i++) {
897 if (newroute) {
898 newroute->ksnr_ipaddr = peer_ipaddrs[i];
899 } else {
900 write_unlock_bh(global_lock);
901
902 newroute = ksocknal_create_route(peer_ipaddrs[i], port);
903 if (!newroute)
904 return;
905
906 write_lock_bh(global_lock);
907 }
908
909 if (peer->ksnp_closing) {
910 /* peer got closed under me */
911 break;
912 }
913
914 /* Already got a route? */
915 route = NULL;
916 list_for_each(rtmp, &peer->ksnp_routes) {
917 route = list_entry(rtmp, ksock_route_t, ksnr_list);
918
919 if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
920 break;
921
922 route = NULL;
923 }
924 if (route)
925 continue;
926
927 best_iface = NULL;
928 best_nroutes = 0;
929 best_netmatch = 0;
930
931 LASSERT(net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
932
933 /* Select interface to connect from */
934 for (j = 0; j < net->ksnn_ninterfaces; j++) {
935 iface = &net->ksnn_interfaces[j];
936
937 /* Using this interface already? */
938 list_for_each(rtmp, &peer->ksnp_routes) {
939 route = list_entry(rtmp, ksock_route_t,
940 ksnr_list);
941
942 if (route->ksnr_myipaddr == iface->ksni_ipaddr)
943 break;
944
945 route = NULL;
946 }
947 if (route)
948 continue;
949
950 this_netmatch = (!((iface->ksni_ipaddr ^
951 newroute->ksnr_ipaddr) &
952 iface->ksni_netmask)) ? 1 : 0;
953
954 if (!(!best_iface ||
955 best_netmatch < this_netmatch ||
956 (best_netmatch == this_netmatch &&
957 best_nroutes > iface->ksni_nroutes)))
958 continue;
959
960 best_iface = iface;
961 best_netmatch = this_netmatch;
962 best_nroutes = iface->ksni_nroutes;
963 }
964
965 if (!best_iface)
966 continue;
967
968 newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
969 best_iface->ksni_nroutes++;
970
971 ksocknal_add_route_locked(peer, newroute);
972 newroute = NULL;
973 }
974
975 write_unlock_bh(global_lock);
976 if (newroute)
977 ksocknal_route_decref(newroute);
978 }
979
980 int
981 ksocknal_accept(lnet_ni_t *ni, struct socket *sock)
982 {
983 ksock_connreq_t *cr;
984 int rc;
985 __u32 peer_ip;
986 int peer_port;
987
988 rc = lnet_sock_getaddr(sock, 1, &peer_ip, &peer_port);
989 LASSERT(!rc); /* we succeeded before */
990
991 LIBCFS_ALLOC(cr, sizeof(*cr));
992 if (!cr) {
993 LCONSOLE_ERROR_MSG(0x12f, "Dropping connection request from %pI4h: memory exhausted\n",
994 &peer_ip);
995 return -ENOMEM;
996 }
997
998 lnet_ni_addref(ni);
999 cr->ksncr_ni = ni;
1000 cr->ksncr_sock = sock;
1001
1002 spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
1003
1004 list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
1005 wake_up(&ksocknal_data.ksnd_connd_waitq);
1006
1007 spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
1008 return 0;
1009 }
1010
1011 static int
1012 ksocknal_connecting(ksock_peer_t *peer, __u32 ipaddr)
1013 {
1014 ksock_route_t *route;
1015
1016 list_for_each_entry(route, &peer->ksnp_routes, ksnr_list) {
1017 if (route->ksnr_ipaddr == ipaddr)
1018 return route->ksnr_connecting;
1019 }
1020 return 0;
1021 }
1022
1023 int
1024 ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route,
1025 struct socket *sock, int type)
1026 {
1027 rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
1028 LIST_HEAD(zombies);
1029 lnet_process_id_t peerid;
1030 struct list_head *tmp;
1031 __u64 incarnation;
1032 ksock_conn_t *conn;
1033 ksock_conn_t *conn2;
1034 ksock_peer_t *peer = NULL;
1035 ksock_peer_t *peer2;
1036 ksock_sched_t *sched;
1037 ksock_hello_msg_t *hello;
1038 int cpt;
1039 ksock_tx_t *tx;
1040 ksock_tx_t *txtmp;
1041 int rc;
1042 int active;
1043 char *warn = NULL;
1044
1045 active = !!route;
1046
1047 LASSERT(active == (type != SOCKLND_CONN_NONE));
1048
1049 LIBCFS_ALLOC(conn, sizeof(*conn));
1050 if (!conn) {
1051 rc = -ENOMEM;
1052 goto failed_0;
1053 }
1054
1055 conn->ksnc_peer = NULL;
1056 conn->ksnc_route = NULL;
1057 conn->ksnc_sock = sock;
1058 /*
1059 * 2 ref, 1 for conn, another extra ref prevents socket
1060 * being closed before establishment of connection
1061 */
1062 atomic_set(&conn->ksnc_sock_refcount, 2);
1063 conn->ksnc_type = type;
1064 ksocknal_lib_save_callback(sock, conn);
1065 atomic_set(&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
1066
1067 conn->ksnc_rx_ready = 0;
1068 conn->ksnc_rx_scheduled = 0;
1069
1070 INIT_LIST_HEAD(&conn->ksnc_tx_queue);
1071 conn->ksnc_tx_ready = 0;
1072 conn->ksnc_tx_scheduled = 0;
1073 conn->ksnc_tx_carrier = NULL;
1074 atomic_set(&conn->ksnc_tx_nob, 0);
1075
1076 LIBCFS_ALLOC(hello, offsetof(ksock_hello_msg_t,
1077 kshm_ips[LNET_MAX_INTERFACES]));
1078 if (!hello) {
1079 rc = -ENOMEM;
1080 goto failed_1;
1081 }
1082
1083 /* stash conn's local and remote addrs */
1084 rc = ksocknal_lib_get_conn_addrs(conn);
1085 if (rc)
1086 goto failed_1;
1087
1088 /*
1089 * Find out/confirm peer's NID and connection type and get the
1090 * vector of interfaces she's willing to let me connect to.
1091 * Passive connections use the listener timeout since the peer sends
1092 * eagerly
1093 */
1094 if (active) {
1095 peer = route->ksnr_peer;
1096 LASSERT(ni == peer->ksnp_ni);
1097
1098 /* Active connection sends HELLO eagerly */
1099 hello->kshm_nips = ksocknal_local_ipvec(ni, hello->kshm_ips);
1100 peerid = peer->ksnp_id;
1101
1102 write_lock_bh(global_lock);
1103 conn->ksnc_proto = peer->ksnp_proto;
1104 write_unlock_bh(global_lock);
1105
1106 if (!conn->ksnc_proto) {
1107 conn->ksnc_proto = &ksocknal_protocol_v3x;
1108 #if SOCKNAL_VERSION_DEBUG
1109 if (*ksocknal_tunables.ksnd_protocol == 2)
1110 conn->ksnc_proto = &ksocknal_protocol_v2x;
1111 else if (*ksocknal_tunables.ksnd_protocol == 1)
1112 conn->ksnc_proto = &ksocknal_protocol_v1x;
1113 #endif
1114 }
1115
1116 rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
1117 if (rc)
1118 goto failed_1;
1119 } else {
1120 peerid.nid = LNET_NID_ANY;
1121 peerid.pid = LNET_PID_ANY;
1122
1123 /* Passive, get protocol from peer */
1124 conn->ksnc_proto = NULL;
1125 }
1126
1127 rc = ksocknal_recv_hello(ni, conn, hello, &peerid, &incarnation);
1128 if (rc < 0)
1129 goto failed_1;
1130
1131 LASSERT(!rc || active);
1132 LASSERT(conn->ksnc_proto);
1133 LASSERT(peerid.nid != LNET_NID_ANY);
1134
1135 cpt = lnet_cpt_of_nid(peerid.nid);
1136
1137 if (active) {
1138 ksocknal_peer_addref(peer);
1139 write_lock_bh(global_lock);
1140 } else {
1141 rc = ksocknal_create_peer(&peer, ni, peerid);
1142 if (rc)
1143 goto failed_1;
1144
1145 write_lock_bh(global_lock);
1146
1147 /* called with a ref on ni, so shutdown can't have started */
1148 LASSERT(!((ksock_net_t *) ni->ni_data)->ksnn_shutdown);
1149
1150 peer2 = ksocknal_find_peer_locked(ni, peerid);
1151 if (!peer2) {
1152 /*
1153 * NB this puts an "empty" peer in the peer
1154 * table (which takes my ref)
1155 */
1156 list_add_tail(&peer->ksnp_list,
1157 ksocknal_nid2peerlist(peerid.nid));
1158 } else {
1159 ksocknal_peer_decref(peer);
1160 peer = peer2;
1161 }
1162
1163 /* +1 ref for me */
1164 ksocknal_peer_addref(peer);
1165 peer->ksnp_accepting++;
1166
1167 /*
1168 * Am I already connecting to this guy? Resolve in
1169 * favour of higher NID...
1170 */
1171 if (peerid.nid < ni->ni_nid &&
1172 ksocknal_connecting(peer, conn->ksnc_ipaddr)) {
1173 rc = EALREADY;
1174 warn = "connection race resolution";
1175 goto failed_2;
1176 }
1177 }
1178
1179 if (peer->ksnp_closing ||
1180 (active && route->ksnr_deleted)) {
1181 /* peer/route got closed under me */
1182 rc = -ESTALE;
1183 warn = "peer/route removed";
1184 goto failed_2;
1185 }
1186
1187 if (!peer->ksnp_proto) {
1188 /*
1189 * Never connected before.
1190 * NB recv_hello may have returned EPROTO to signal my peer
1191 * wants a different protocol than the one I asked for.
1192 */
1193 LASSERT(list_empty(&peer->ksnp_conns));
1194
1195 peer->ksnp_proto = conn->ksnc_proto;
1196 peer->ksnp_incarnation = incarnation;
1197 }
1198
1199 if (peer->ksnp_proto != conn->ksnc_proto ||
1200 peer->ksnp_incarnation != incarnation) {
1201 /* Peer rebooted or I've got the wrong protocol version */
1202 ksocknal_close_peer_conns_locked(peer, 0, 0);
1203
1204 peer->ksnp_proto = NULL;
1205 rc = ESTALE;
1206 warn = peer->ksnp_incarnation != incarnation ?
1207 "peer rebooted" :
1208 "wrong proto version";
1209 goto failed_2;
1210 }
1211
1212 switch (rc) {
1213 default:
1214 LBUG();
1215 case 0:
1216 break;
1217 case EALREADY:
1218 warn = "lost conn race";
1219 goto failed_2;
1220 case EPROTO:
1221 warn = "retry with different protocol version";
1222 goto failed_2;
1223 }
1224
1225 /*
1226 * Refuse to duplicate an existing connection, unless this is a
1227 * loopback connection
1228 */
1229 if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
1230 list_for_each(tmp, &peer->ksnp_conns) {
1231 conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
1232
1233 if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
1234 conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
1235 conn2->ksnc_type != conn->ksnc_type)
1236 continue;
1237
1238 /*
1239 * Reply on a passive connection attempt so the peer
1240 * realises we're connected.
1241 */
1242 LASSERT(!rc);
1243 if (!active)
1244 rc = EALREADY;
1245
1246 warn = "duplicate";
1247 goto failed_2;
1248 }
1249 }
1250
1251 /*
1252 * If the connection created by this route didn't bind to the IP
1253 * address the route connected to, the connection/route matching
1254 * code below probably isn't going to work.
1255 */
1256 if (active &&
1257 route->ksnr_ipaddr != conn->ksnc_ipaddr) {
1258 CERROR("Route %s %pI4h connected to %pI4h\n",
1259 libcfs_id2str(peer->ksnp_id),
1260 &route->ksnr_ipaddr,
1261 &conn->ksnc_ipaddr);
1262 }
1263
1264 /*
1265 * Search for a route corresponding to the new connection and
1266 * create an association. This allows incoming connections created
1267 * by routes in my peer to match my own route entries so I don't
1268 * continually create duplicate routes.
1269 */
1270 list_for_each(tmp, &peer->ksnp_routes) {
1271 route = list_entry(tmp, ksock_route_t, ksnr_list);
1272
1273 if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
1274 continue;
1275
1276 ksocknal_associate_route_conn_locked(route, conn);
1277 break;
1278 }
1279
1280 conn->ksnc_peer = peer; /* conn takes my ref on peer */
1281 peer->ksnp_last_alive = cfs_time_current();
1282 peer->ksnp_send_keepalive = 0;
1283 peer->ksnp_error = 0;
1284
1285 sched = ksocknal_choose_scheduler_locked(cpt);
1286 sched->kss_nconns++;
1287 conn->ksnc_scheduler = sched;
1288
1289 conn->ksnc_tx_last_post = cfs_time_current();
1290 /* Set the deadline for the outgoing HELLO to drain */
1291 conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued;
1292 conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
1293 mb(); /* order with adding to peer's conn list */
1294
1295 list_add(&conn->ksnc_list, &peer->ksnp_conns);
1296 ksocknal_conn_addref(conn);
1297
1298 ksocknal_new_packet(conn, 0);
1299
1300 conn->ksnc_zc_capable = ksocknal_lib_zc_capable(conn);
1301
1302 /* Take packets blocking for this connection. */
1303 list_for_each_entry_safe(tx, txtmp, &peer->ksnp_tx_queue, tx_list) {
1304 if (conn->ksnc_proto->pro_match_tx(conn, tx, tx->tx_nonblk) == SOCKNAL_MATCH_NO)
1305 continue;
1306
1307 list_del(&tx->tx_list);
1308 ksocknal_queue_tx_locked(tx, conn);
1309 }
1310
1311 write_unlock_bh(global_lock);
1312
1313 /*
1314 * We've now got a new connection. Any errors from here on are just
1315 * like "normal" comms errors and we close the connection normally.
1316 * NB (a) we still have to send the reply HELLO for passive
1317 * connections,
1318 * (b) normal I/O on the conn is blocked until I setup and call the
1319 * socket callbacks.
1320 */
1321 CDEBUG(D_NET, "New conn %s p %d.x %pI4h -> %pI4h/%d incarnation:%lld sched[%d:%d]\n",
1322 libcfs_id2str(peerid), conn->ksnc_proto->pro_version,
1323 &conn->ksnc_myipaddr, &conn->ksnc_ipaddr,
1324 conn->ksnc_port, incarnation, cpt,
1325 (int)(sched - &sched->kss_info->ksi_scheds[0]));
1326
1327 if (active) {
1328 /* additional routes after interface exchange? */
1329 ksocknal_create_routes(peer, conn->ksnc_port,
1330 hello->kshm_ips, hello->kshm_nips);
1331 } else {
1332 hello->kshm_nips = ksocknal_select_ips(peer, hello->kshm_ips,
1333 hello->kshm_nips);
1334 rc = ksocknal_send_hello(ni, conn, peerid.nid, hello);
1335 }
1336
1337 LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t,
1338 kshm_ips[LNET_MAX_INTERFACES]));
1339
1340 /*
1341 * setup the socket AFTER I've received hello (it disables
1342 * SO_LINGER). I might call back to the acceptor who may want
1343 * to send a protocol version response and then close the
1344 * socket; this ensures the socket only tears down after the
1345 * response has been sent.
1346 */
1347 if (!rc)
1348 rc = ksocknal_lib_setup_sock(sock);
1349
1350 write_lock_bh(global_lock);
1351
1352 /* NB my callbacks block while I hold ksnd_global_lock */
1353 ksocknal_lib_set_callback(sock, conn);
1354
1355 if (!active)
1356 peer->ksnp_accepting--;
1357
1358 write_unlock_bh(global_lock);
1359
1360 if (rc) {
1361 write_lock_bh(global_lock);
1362 if (!conn->ksnc_closing) {
1363 /* could be closed by another thread */
1364 ksocknal_close_conn_locked(conn, rc);
1365 }
1366 write_unlock_bh(global_lock);
1367 } else if (!ksocknal_connsock_addref(conn)) {
1368 /* Allow I/O to proceed. */
1369 ksocknal_read_callback(conn);
1370 ksocknal_write_callback(conn);
1371 ksocknal_connsock_decref(conn);
1372 }
1373
1374 ksocknal_connsock_decref(conn);
1375 ksocknal_conn_decref(conn);
1376 return rc;
1377
1378 failed_2:
1379 if (!peer->ksnp_closing &&
1380 list_empty(&peer->ksnp_conns) &&
1381 list_empty(&peer->ksnp_routes)) {
1382 list_add(&zombies, &peer->ksnp_tx_queue);
1383 list_del_init(&peer->ksnp_tx_queue);
1384 ksocknal_unlink_peer_locked(peer);
1385 }
1386
1387 write_unlock_bh(global_lock);
1388
1389 if (warn) {
1390 if (rc < 0)
1391 CERROR("Not creating conn %s type %d: %s\n",
1392 libcfs_id2str(peerid), conn->ksnc_type, warn);
1393 else
1394 CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
1395 libcfs_id2str(peerid), conn->ksnc_type, warn);
1396 }
1397
1398 if (!active) {
1399 if (rc > 0) {
1400 /*
1401 * Request retry by replying with CONN_NONE
1402 * ksnc_proto has been set already
1403 */
1404 conn->ksnc_type = SOCKLND_CONN_NONE;
1405 hello->kshm_nips = 0;
1406 ksocknal_send_hello(ni, conn, peerid.nid, hello);
1407 }
1408
1409 write_lock_bh(global_lock);
1410 peer->ksnp_accepting--;
1411 write_unlock_bh(global_lock);
1412 }
1413
1414 ksocknal_txlist_done(ni, &zombies, 1);
1415 ksocknal_peer_decref(peer);
1416
1417 failed_1:
1418 if (hello)
1419 LIBCFS_FREE(hello, offsetof(ksock_hello_msg_t,
1420 kshm_ips[LNET_MAX_INTERFACES]));
1421
1422 LIBCFS_FREE(conn, sizeof(*conn));
1423
1424 failed_0:
1425 sock_release(sock);
1426 return rc;
1427 }
1428
1429 void
1430 ksocknal_close_conn_locked(ksock_conn_t *conn, int error)
1431 {
1432 /*
1433 * This just does the immmediate housekeeping, and queues the
1434 * connection for the reaper to terminate.
1435 * Caller holds ksnd_global_lock exclusively in irq context
1436 */
1437 ksock_peer_t *peer = conn->ksnc_peer;
1438 ksock_route_t *route;
1439 ksock_conn_t *conn2;
1440 struct list_head *tmp;
1441
1442 LASSERT(!peer->ksnp_error);
1443 LASSERT(!conn->ksnc_closing);
1444 conn->ksnc_closing = 1;
1445
1446 /* ksnd_deathrow_conns takes over peer's ref */
1447 list_del(&conn->ksnc_list);
1448
1449 route = conn->ksnc_route;
1450 if (route) {
1451 /* dissociate conn from route... */
1452 LASSERT(!route->ksnr_deleted);
1453 LASSERT(route->ksnr_connected & (1 << conn->ksnc_type));
1454
1455 conn2 = NULL;
1456 list_for_each(tmp, &peer->ksnp_conns) {
1457 conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
1458
1459 if (conn2->ksnc_route == route &&
1460 conn2->ksnc_type == conn->ksnc_type)
1461 break;
1462
1463 conn2 = NULL;
1464 }
1465 if (!conn2)
1466 route->ksnr_connected &= ~(1 << conn->ksnc_type);
1467
1468 conn->ksnc_route = NULL;
1469
1470 #if 0 /* irrelevant with only eager routes */
1471 /* make route least favourite */
1472 list_del(&route->ksnr_list);
1473 list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
1474 #endif
1475 ksocknal_route_decref(route); /* drop conn's ref on route */
1476 }
1477
1478 if (list_empty(&peer->ksnp_conns)) {
1479 /* No more connections to this peer */
1480
1481 if (!list_empty(&peer->ksnp_tx_queue)) {
1482 ksock_tx_t *tx;
1483
1484 LASSERT(conn->ksnc_proto == &ksocknal_protocol_v3x);
1485
1486 /*
1487 * throw them to the last connection...,
1488 * these TXs will be send to /dev/null by scheduler
1489 */
1490 list_for_each_entry(tx, &peer->ksnp_tx_queue,
1491 tx_list)
1492 ksocknal_tx_prep(conn, tx);
1493
1494 spin_lock_bh(&conn->ksnc_scheduler->kss_lock);
1495 list_splice_init(&peer->ksnp_tx_queue,
1496 &conn->ksnc_tx_queue);
1497 spin_unlock_bh(&conn->ksnc_scheduler->kss_lock);
1498 }
1499
1500 peer->ksnp_proto = NULL; /* renegotiate protocol version */
1501 peer->ksnp_error = error; /* stash last conn close reason */
1502
1503 if (list_empty(&peer->ksnp_routes)) {
1504 /*
1505 * I've just closed last conn belonging to a
1506 * peer with no routes to it
1507 */
1508 ksocknal_unlink_peer_locked(peer);
1509 }
1510 }
1511
1512 spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
1513
1514 list_add_tail(&conn->ksnc_list,
1515 &ksocknal_data.ksnd_deathrow_conns);
1516 wake_up(&ksocknal_data.ksnd_reaper_waitq);
1517
1518 spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
1519 }
1520
1521 void
1522 ksocknal_peer_failed(ksock_peer_t *peer)
1523 {
1524 int notify = 0;
1525 unsigned long last_alive = 0;
1526
1527 /*
1528 * There has been a connection failure or comms error; but I'll only
1529 * tell LNET I think the peer is dead if it's to another kernel and
1530 * there are no connections or connection attempts in existence.
1531 */
1532 read_lock(&ksocknal_data.ksnd_global_lock);
1533
1534 if (!(peer->ksnp_id.pid & LNET_PID_USERFLAG) &&
1535 list_empty(&peer->ksnp_conns) &&
1536 !peer->ksnp_accepting &&
1537 !ksocknal_find_connecting_route_locked(peer)) {
1538 notify = 1;
1539 last_alive = peer->ksnp_last_alive;
1540 }
1541
1542 read_unlock(&ksocknal_data.ksnd_global_lock);
1543
1544 if (notify)
1545 lnet_notify(peer->ksnp_ni, peer->ksnp_id.nid, 0,
1546 last_alive);
1547 }
1548
1549 void
1550 ksocknal_finalize_zcreq(ksock_conn_t *conn)
1551 {
1552 ksock_peer_t *peer = conn->ksnc_peer;
1553 ksock_tx_t *tx;
1554 ksock_tx_t *tmp;
1555 LIST_HEAD(zlist);
1556
1557 /*
1558 * NB safe to finalize TXs because closing of socket will
1559 * abort all buffered data
1560 */
1561 LASSERT(!conn->ksnc_sock);
1562
1563 spin_lock(&peer->ksnp_lock);
1564
1565 list_for_each_entry_safe(tx, tmp, &peer->ksnp_zc_req_list, tx_zc_list) {
1566 if (tx->tx_conn != conn)
1567 continue;
1568
1569 LASSERT(tx->tx_msg.ksm_zc_cookies[0]);
1570
1571 tx->tx_msg.ksm_zc_cookies[0] = 0;
1572 tx->tx_zc_aborted = 1; /* mark it as not-acked */
1573 list_del(&tx->tx_zc_list);
1574 list_add(&tx->tx_zc_list, &zlist);
1575 }
1576
1577 spin_unlock(&peer->ksnp_lock);
1578
1579 while (!list_empty(&zlist)) {
1580 tx = list_entry(zlist.next, ksock_tx_t, tx_zc_list);
1581
1582 list_del(&tx->tx_zc_list);
1583 ksocknal_tx_decref(tx);
1584 }
1585 }
1586
1587 void
1588 ksocknal_terminate_conn(ksock_conn_t *conn)
1589 {
1590 /*
1591 * This gets called by the reaper (guaranteed thread context) to
1592 * disengage the socket from its callbacks and close it.
1593 * ksnc_refcount will eventually hit zero, and then the reaper will
1594 * destroy it.
1595 */
1596 ksock_peer_t *peer = conn->ksnc_peer;
1597 ksock_sched_t *sched = conn->ksnc_scheduler;
1598 int failed = 0;
1599
1600 LASSERT(conn->ksnc_closing);
1601
1602 /* wake up the scheduler to "send" all remaining packets to /dev/null */
1603 spin_lock_bh(&sched->kss_lock);
1604
1605 /* a closing conn is always ready to tx */
1606 conn->ksnc_tx_ready = 1;
1607
1608 if (!conn->ksnc_tx_scheduled &&
1609 !list_empty(&conn->ksnc_tx_queue)) {
1610 list_add_tail(&conn->ksnc_tx_list,
1611 &sched->kss_tx_conns);
1612 conn->ksnc_tx_scheduled = 1;
1613 /* extra ref for scheduler */
1614 ksocknal_conn_addref(conn);
1615
1616 wake_up(&sched->kss_waitq);
1617 }
1618
1619 spin_unlock_bh(&sched->kss_lock);
1620
1621 /* serialise with callbacks */
1622 write_lock_bh(&ksocknal_data.ksnd_global_lock);
1623
1624 ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
1625
1626 /*
1627 * OK, so this conn may not be completely disengaged from its
1628 * scheduler yet, but it _has_ committed to terminate...
1629 */
1630 conn->ksnc_scheduler->kss_nconns--;
1631
1632 if (peer->ksnp_error) {
1633 /* peer's last conn closed in error */
1634 LASSERT(list_empty(&peer->ksnp_conns));
1635 failed = 1;
1636 peer->ksnp_error = 0; /* avoid multiple notifications */
1637 }
1638
1639 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1640
1641 if (failed)
1642 ksocknal_peer_failed(peer);
1643
1644 /*
1645 * The socket is closed on the final put; either here, or in
1646 * ksocknal_{send,recv}msg(). Since we set up the linger2 option
1647 * when the connection was established, this will close the socket
1648 * immediately, aborting anything buffered in it. Any hung
1649 * zero-copy transmits will therefore complete in finite time.
1650 */
1651 ksocknal_connsock_decref(conn);
1652 }
1653
1654 void
1655 ksocknal_queue_zombie_conn(ksock_conn_t *conn)
1656 {
1657 /* Queue the conn for the reaper to destroy */
1658
1659 LASSERT(!atomic_read(&conn->ksnc_conn_refcount));
1660 spin_lock_bh(&ksocknal_data.ksnd_reaper_lock);
1661
1662 list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
1663 wake_up(&ksocknal_data.ksnd_reaper_waitq);
1664
1665 spin_unlock_bh(&ksocknal_data.ksnd_reaper_lock);
1666 }
1667
1668 void
1669 ksocknal_destroy_conn(ksock_conn_t *conn)
1670 {
1671 unsigned long last_rcv;
1672
1673 /* Final coup-de-grace of the reaper */
1674 CDEBUG(D_NET, "connection %p\n", conn);
1675
1676 LASSERT(!atomic_read(&conn->ksnc_conn_refcount));
1677 LASSERT(!atomic_read(&conn->ksnc_sock_refcount));
1678 LASSERT(!conn->ksnc_sock);
1679 LASSERT(!conn->ksnc_route);
1680 LASSERT(!conn->ksnc_tx_scheduled);
1681 LASSERT(!conn->ksnc_rx_scheduled);
1682 LASSERT(list_empty(&conn->ksnc_tx_queue));
1683
1684 /* complete current receive if any */
1685 switch (conn->ksnc_rx_state) {
1686 case SOCKNAL_RX_LNET_PAYLOAD:
1687 last_rcv = conn->ksnc_rx_deadline -
1688 cfs_time_seconds(*ksocknal_tunables.ksnd_timeout);
1689 CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %d, left: %d, last alive is %ld secs ago\n",
1690 libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type,
1691 &conn->ksnc_ipaddr, conn->ksnc_port,
1692 conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left,
1693 cfs_duration_sec(cfs_time_sub(cfs_time_current(),
1694 last_rcv)));
1695 lnet_finalize(conn->ksnc_peer->ksnp_ni,
1696 conn->ksnc_cookie, -EIO);
1697 break;
1698 case SOCKNAL_RX_LNET_HEADER:
1699 if (conn->ksnc_rx_started)
1700 CERROR("Incomplete receive of lnet header from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
1701 libcfs_id2str(conn->ksnc_peer->ksnp_id),
1702 &conn->ksnc_ipaddr, conn->ksnc_port,
1703 conn->ksnc_proto->pro_version);
1704 break;
1705 case SOCKNAL_RX_KSM_HEADER:
1706 if (conn->ksnc_rx_started)
1707 CERROR("Incomplete receive of ksock message from %s, ip %pI4h:%d, with error, protocol: %d.x.\n",
1708 libcfs_id2str(conn->ksnc_peer->ksnp_id),
1709 &conn->ksnc_ipaddr, conn->ksnc_port,
1710 conn->ksnc_proto->pro_version);
1711 break;
1712 case SOCKNAL_RX_SLOP:
1713 if (conn->ksnc_rx_started)
1714 CERROR("Incomplete receive of slops from %s, ip %pI4h:%d, with error\n",
1715 libcfs_id2str(conn->ksnc_peer->ksnp_id),
1716 &conn->ksnc_ipaddr, conn->ksnc_port);
1717 break;
1718 default:
1719 LBUG();
1720 break;
1721 }
1722
1723 ksocknal_peer_decref(conn->ksnc_peer);
1724
1725 LIBCFS_FREE(conn, sizeof(*conn));
1726 }
1727
1728 int
1729 ksocknal_close_peer_conns_locked(ksock_peer_t *peer, __u32 ipaddr, int why)
1730 {
1731 ksock_conn_t *conn;
1732 struct list_head *ctmp;
1733 struct list_head *cnxt;
1734 int count = 0;
1735
1736 list_for_each_safe(ctmp, cnxt, &peer->ksnp_conns) {
1737 conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
1738
1739 if (!ipaddr || conn->ksnc_ipaddr == ipaddr) {
1740 count++;
1741 ksocknal_close_conn_locked(conn, why);
1742 }
1743 }
1744
1745 return count;
1746 }
1747
1748 int
1749 ksocknal_close_conn_and_siblings(ksock_conn_t *conn, int why)
1750 {
1751 ksock_peer_t *peer = conn->ksnc_peer;
1752 __u32 ipaddr = conn->ksnc_ipaddr;
1753 int count;
1754
1755 write_lock_bh(&ksocknal_data.ksnd_global_lock);
1756
1757 count = ksocknal_close_peer_conns_locked(peer, ipaddr, why);
1758
1759 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1760
1761 return count;
1762 }
1763
1764 int
1765 ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr)
1766 {
1767 ksock_peer_t *peer;
1768 struct list_head *ptmp;
1769 struct list_head *pnxt;
1770 int lo;
1771 int hi;
1772 int i;
1773 int count = 0;
1774
1775 write_lock_bh(&ksocknal_data.ksnd_global_lock);
1776
1777 if (id.nid != LNET_NID_ANY) {
1778 lo = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
1779 hi = (int)(ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers);
1780 } else {
1781 lo = 0;
1782 hi = ksocknal_data.ksnd_peer_hash_size - 1;
1783 }
1784
1785 for (i = lo; i <= hi; i++) {
1786 list_for_each_safe(ptmp, pnxt,
1787 &ksocknal_data.ksnd_peers[i]) {
1788 peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
1789
1790 if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
1791 (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
1792 continue;
1793
1794 count += ksocknal_close_peer_conns_locked(peer, ipaddr, 0);
1795 }
1796 }
1797
1798 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
1799
1800 /* wildcards always succeed */
1801 if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || !ipaddr)
1802 return 0;
1803
1804 if (!count)
1805 return -ENOENT;
1806 else
1807 return 0;
1808 }
1809
1810 void
1811 ksocknal_notify(lnet_ni_t *ni, lnet_nid_t gw_nid, int alive)
1812 {
1813 /*
1814 * The router is telling me she's been notified of a change in
1815 * gateway state....
1816 */
1817 lnet_process_id_t id = {0};
1818
1819 id.nid = gw_nid;
1820 id.pid = LNET_PID_ANY;
1821
1822 CDEBUG(D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
1823 alive ? "up" : "down");
1824
1825 if (!alive) {
1826 /* If the gateway crashed, close all open connections... */
1827 ksocknal_close_matching_conns(id, 0);
1828 return;
1829 }
1830
1831 /*
1832 * ...otherwise do nothing. We can only establish new connections
1833 * if we have autroutes, and these connect on demand.
1834 */
1835 }
1836
1837 void
1838 ksocknal_query(lnet_ni_t *ni, lnet_nid_t nid, unsigned long *when)
1839 {
1840 int connect = 1;
1841 unsigned long last_alive = 0;
1842 unsigned long now = cfs_time_current();
1843 ksock_peer_t *peer = NULL;
1844 rwlock_t *glock = &ksocknal_data.ksnd_global_lock;
1845 lnet_process_id_t id = {
1846 .nid = nid,
1847 .pid = LNET_PID_LUSTRE,
1848 };
1849
1850 read_lock(glock);
1851
1852 peer = ksocknal_find_peer_locked(ni, id);
1853 if (peer) {
1854 struct list_head *tmp;
1855 ksock_conn_t *conn;
1856 int bufnob;
1857
1858 list_for_each(tmp, &peer->ksnp_conns) {
1859 conn = list_entry(tmp, ksock_conn_t, ksnc_list);
1860 bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
1861
1862 if (bufnob < conn->ksnc_tx_bufnob) {
1863 /* something got ACKed */
1864 conn->ksnc_tx_deadline =
1865 cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
1866 peer->ksnp_last_alive = now;
1867 conn->ksnc_tx_bufnob = bufnob;
1868 }
1869 }
1870
1871 last_alive = peer->ksnp_last_alive;
1872 if (!ksocknal_find_connectable_route_locked(peer))
1873 connect = 0;
1874 }
1875
1876 read_unlock(glock);
1877
1878 if (last_alive)
1879 *when = last_alive;
1880
1881 CDEBUG(D_NET, "Peer %s %p, alive %ld secs ago, connect %d\n",
1882 libcfs_nid2str(nid), peer,
1883 last_alive ? cfs_duration_sec(now - last_alive) : -1,
1884 connect);
1885
1886 if (!connect)
1887 return;
1888
1889 ksocknal_add_peer(ni, id, LNET_NIDADDR(nid), lnet_acceptor_port());
1890
1891 write_lock_bh(glock);
1892
1893 peer = ksocknal_find_peer_locked(ni, id);
1894 if (peer)
1895 ksocknal_launch_all_connections_locked(peer);
1896
1897 write_unlock_bh(glock);
1898 }
1899
1900 static void
1901 ksocknal_push_peer(ksock_peer_t *peer)
1902 {
1903 int index;
1904 int i;
1905 struct list_head *tmp;
1906 ksock_conn_t *conn;
1907
1908 for (index = 0; ; index++) {
1909 read_lock(&ksocknal_data.ksnd_global_lock);
1910
1911 i = 0;
1912 conn = NULL;
1913
1914 list_for_each(tmp, &peer->ksnp_conns) {
1915 if (i++ == index) {
1916 conn = list_entry(tmp, ksock_conn_t,
1917 ksnc_list);
1918 ksocknal_conn_addref(conn);
1919 break;
1920 }
1921 }
1922
1923 read_unlock(&ksocknal_data.ksnd_global_lock);
1924
1925 if (!conn)
1926 break;
1927
1928 ksocknal_lib_push_conn(conn);
1929 ksocknal_conn_decref(conn);
1930 }
1931 }
1932
1933 static int ksocknal_push(lnet_ni_t *ni, lnet_process_id_t id)
1934 {
1935 struct list_head *start;
1936 struct list_head *end;
1937 struct list_head *tmp;
1938 int rc = -ENOENT;
1939 unsigned int hsize = ksocknal_data.ksnd_peer_hash_size;
1940
1941 if (id.nid == LNET_NID_ANY) {
1942 start = &ksocknal_data.ksnd_peers[0];
1943 end = &ksocknal_data.ksnd_peers[hsize - 1];
1944 } else {
1945 start = ksocknal_nid2peerlist(id.nid);
1946 end = ksocknal_nid2peerlist(id.nid);
1947 }
1948
1949 for (tmp = start; tmp <= end; tmp++) {
1950 int peer_off; /* searching offset in peer hash table */
1951
1952 for (peer_off = 0; ; peer_off++) {
1953 ksock_peer_t *peer;
1954 int i = 0;
1955
1956 read_lock(&ksocknal_data.ksnd_global_lock);
1957 list_for_each_entry(peer, tmp, ksnp_list) {
1958 if (!((id.nid == LNET_NID_ANY ||
1959 id.nid == peer->ksnp_id.nid) &&
1960 (id.pid == LNET_PID_ANY ||
1961 id.pid == peer->ksnp_id.pid)))
1962 continue;
1963
1964 if (i++ == peer_off) {
1965 ksocknal_peer_addref(peer);
1966 break;
1967 }
1968 }
1969 read_unlock(&ksocknal_data.ksnd_global_lock);
1970
1971 if (!i) /* no match */
1972 break;
1973
1974 rc = 0;
1975 ksocknal_push_peer(peer);
1976 ksocknal_peer_decref(peer);
1977 }
1978 }
1979 return rc;
1980 }
1981
1982 static int
1983 ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask)
1984 {
1985 ksock_net_t *net = ni->ni_data;
1986 ksock_interface_t *iface;
1987 int rc;
1988 int i;
1989 int j;
1990 struct list_head *ptmp;
1991 ksock_peer_t *peer;
1992 struct list_head *rtmp;
1993 ksock_route_t *route;
1994
1995 if (!ipaddress || !netmask)
1996 return -EINVAL;
1997
1998 write_lock_bh(&ksocknal_data.ksnd_global_lock);
1999
2000 iface = ksocknal_ip2iface(ni, ipaddress);
2001 if (iface) {
2002 /* silently ignore dups */
2003 rc = 0;
2004 } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) {
2005 rc = -ENOSPC;
2006 } else {
2007 iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++];
2008
2009 iface->ksni_ipaddr = ipaddress;
2010 iface->ksni_netmask = netmask;
2011 iface->ksni_nroutes = 0;
2012 iface->ksni_npeers = 0;
2013
2014 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
2015 list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
2016 peer = list_entry(ptmp, ksock_peer_t,
2017 ksnp_list);
2018
2019 for (j = 0; j < peer->ksnp_n_passive_ips; j++)
2020 if (peer->ksnp_passive_ips[j] == ipaddress)
2021 iface->ksni_npeers++;
2022
2023 list_for_each(rtmp, &peer->ksnp_routes) {
2024 route = list_entry(rtmp, ksock_route_t,
2025 ksnr_list);
2026
2027 if (route->ksnr_myipaddr == ipaddress)
2028 iface->ksni_nroutes++;
2029 }
2030 }
2031 }
2032
2033 rc = 0;
2034 /* NB only new connections will pay attention to the new interface! */
2035 }
2036
2037 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
2038
2039 return rc;
2040 }
2041
2042 static void
2043 ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr)
2044 {
2045 struct list_head *tmp;
2046 struct list_head *nxt;
2047 ksock_route_t *route;
2048 ksock_conn_t *conn;
2049 int i;
2050 int j;
2051
2052 for (i = 0; i < peer->ksnp_n_passive_ips; i++)
2053 if (peer->ksnp_passive_ips[i] == ipaddr) {
2054 for (j = i + 1; j < peer->ksnp_n_passive_ips; j++)
2055 peer->ksnp_passive_ips[j - 1] =
2056 peer->ksnp_passive_ips[j];
2057 peer->ksnp_n_passive_ips--;
2058 break;
2059 }
2060
2061 list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
2062 route = list_entry(tmp, ksock_route_t, ksnr_list);
2063
2064 if (route->ksnr_myipaddr != ipaddr)
2065 continue;
2066
2067 if (route->ksnr_share_count) {
2068 /* Manually created; keep, but unbind */
2069 route->ksnr_myipaddr = 0;
2070 } else {
2071 ksocknal_del_route_locked(route);
2072 }
2073 }
2074
2075 list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
2076 conn = list_entry(tmp, ksock_conn_t, ksnc_list);
2077
2078 if (conn->ksnc_myipaddr == ipaddr)
2079 ksocknal_close_conn_locked(conn, 0);
2080 }
2081 }
2082
2083 static int
2084 ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress)
2085 {
2086 ksock_net_t *net = ni->ni_data;
2087 int rc = -ENOENT;
2088 struct list_head *tmp;
2089 struct list_head *nxt;
2090 ksock_peer_t *peer;
2091 __u32 this_ip;
2092 int i;
2093 int j;
2094
2095 write_lock_bh(&ksocknal_data.ksnd_global_lock);
2096
2097 for (i = 0; i < net->ksnn_ninterfaces; i++) {
2098 this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
2099
2100 if (!(!ipaddress || ipaddress == this_ip))
2101 continue;
2102
2103 rc = 0;
2104
2105 for (j = i + 1; j < net->ksnn_ninterfaces; j++)
2106 net->ksnn_interfaces[j - 1] =
2107 net->ksnn_interfaces[j];
2108
2109 net->ksnn_ninterfaces--;
2110
2111 for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
2112 list_for_each_safe(tmp, nxt,
2113 &ksocknal_data.ksnd_peers[j]) {
2114 peer = list_entry(tmp, ksock_peer_t, ksnp_list);
2115
2116 if (peer->ksnp_ni != ni)
2117 continue;
2118
2119 ksocknal_peer_del_interface_locked(peer, this_ip);
2120 }
2121 }
2122 }
2123
2124 write_unlock_bh(&ksocknal_data.ksnd_global_lock);
2125
2126 return rc;
2127 }
2128
2129 int
2130 ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
2131 {
2132 lnet_process_id_t id = {0};
2133 struct libcfs_ioctl_data *data = arg;
2134 int rc;
2135
2136 switch (cmd) {
2137 case IOC_LIBCFS_GET_INTERFACE: {
2138 ksock_net_t *net = ni->ni_data;
2139 ksock_interface_t *iface;
2140
2141 read_lock(&ksocknal_data.ksnd_global_lock);
2142
2143 if (data->ioc_count >= (__u32)net->ksnn_ninterfaces) {
2144 rc = -ENOENT;
2145 } else {
2146 rc = 0;
2147 iface = &net->ksnn_interfaces[data->ioc_count];
2148
2149 data->ioc_u32[0] = iface->ksni_ipaddr;
2150 data->ioc_u32[1] = iface->ksni_netmask;
2151 data->ioc_u32[2] = iface->ksni_npeers;
2152 data->ioc_u32[3] = iface->ksni_nroutes;
2153 }
2154
2155 read_unlock(&ksocknal_data.ksnd_global_lock);
2156 return rc;
2157 }
2158
2159 case IOC_LIBCFS_ADD_INTERFACE:
2160 return ksocknal_add_interface(ni,
2161 data->ioc_u32[0], /* IP address */
2162 data->ioc_u32[1]); /* net mask */
2163
2164 case IOC_LIBCFS_DEL_INTERFACE:
2165 return ksocknal_del_interface(ni,
2166 data->ioc_u32[0]); /* IP address */
2167
2168 case IOC_LIBCFS_GET_PEER: {
2169 __u32 myip = 0;
2170 __u32 ip = 0;
2171 int port = 0;
2172 int conn_count = 0;
2173 int share_count = 0;
2174
2175 rc = ksocknal_get_peer_info(ni, data->ioc_count,
2176 &id, &myip, &ip, &port,
2177 &conn_count, &share_count);
2178 if (rc)
2179 return rc;
2180
2181 data->ioc_nid = id.nid;
2182 data->ioc_count = share_count;
2183 data->ioc_u32[0] = ip;
2184 data->ioc_u32[1] = port;
2185 data->ioc_u32[2] = myip;
2186 data->ioc_u32[3] = conn_count;
2187 data->ioc_u32[4] = id.pid;
2188 return 0;
2189 }
2190
2191 case IOC_LIBCFS_ADD_PEER:
2192 id.nid = data->ioc_nid;
2193 id.pid = LNET_PID_LUSTRE;
2194 return ksocknal_add_peer(ni, id,
2195 data->ioc_u32[0], /* IP */
2196 data->ioc_u32[1]); /* port */
2197
2198 case IOC_LIBCFS_DEL_PEER:
2199 id.nid = data->ioc_nid;
2200 id.pid = LNET_PID_ANY;
2201 return ksocknal_del_peer(ni, id,
2202 data->ioc_u32[0]); /* IP */
2203
2204 case IOC_LIBCFS_GET_CONN: {
2205 int txmem;
2206 int rxmem;
2207 int nagle;
2208 ksock_conn_t *conn = ksocknal_get_conn_by_idx(ni, data->ioc_count);
2209
2210 if (!conn)
2211 return -ENOENT;
2212
2213 ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
2214
2215 data->ioc_count = txmem;
2216 data->ioc_nid = conn->ksnc_peer->ksnp_id.nid;
2217 data->ioc_flags = nagle;
2218 data->ioc_u32[0] = conn->ksnc_ipaddr;
2219 data->ioc_u32[1] = conn->ksnc_port;
2220 data->ioc_u32[2] = conn->ksnc_myipaddr;
2221 data->ioc_u32[3] = conn->ksnc_type;
2222 data->ioc_u32[4] = conn->ksnc_scheduler->kss_info->ksi_cpt;
2223 data->ioc_u32[5] = rxmem;
2224 data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
2225 ksocknal_conn_decref(conn);
2226 return 0;
2227 }
2228
2229 case IOC_LIBCFS_CLOSE_CONNECTION:
2230 id.nid = data->ioc_nid;
2231 id.pid = LNET_PID_ANY;
2232 return ksocknal_close_matching_conns(id,
2233 data->ioc_u32[0]);
2234
2235 case IOC_LIBCFS_REGISTER_MYNID:
2236 /* Ignore if this is a noop */
2237 if (data->ioc_nid == ni->ni_nid)
2238 return 0;
2239
2240 CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
2241 libcfs_nid2str(data->ioc_nid),
2242 libcfs_nid2str(ni->ni_nid));
2243 return -EINVAL;
2244
2245 case IOC_LIBCFS_PUSH_CONNECTION:
2246 id.nid = data->ioc_nid;
2247 id.pid = LNET_PID_ANY;
2248 return ksocknal_push(ni, id);
2249
2250 default:
2251 return -EINVAL;
2252 }
2253 /* not reached */
2254 }
2255
2256 static void
2257 ksocknal_free_buffers(void)
2258 {
2259 LASSERT(!atomic_read(&ksocknal_data.ksnd_nactive_txs));
2260
2261 if (ksocknal_data.ksnd_sched_info) {
2262 struct ksock_sched_info *info;
2263 int i;
2264
2265 cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
2266 if (info->ksi_scheds) {
2267 LIBCFS_FREE(info->ksi_scheds,
2268 info->ksi_nthreads_max *
2269 sizeof(info->ksi_scheds[0]));
2270 }
2271 }
2272 cfs_percpt_free(ksocknal_data.ksnd_sched_info);
2273 }
2274
2275 LIBCFS_FREE(ksocknal_data.ksnd_peers,
2276 sizeof(struct list_head) *
2277 ksocknal_data.ksnd_peer_hash_size);
2278
2279 spin_lock(&ksocknal_data.ksnd_tx_lock);
2280
2281 if (!list_empty(&ksocknal_data.ksnd_idle_noop_txs)) {
2282 struct list_head zlist;
2283 ksock_tx_t *tx;
2284
2285 list_add(&zlist, &ksocknal_data.ksnd_idle_noop_txs);
2286 list_del_init(&ksocknal_data.ksnd_idle_noop_txs);
2287 spin_unlock(&ksocknal_data.ksnd_tx_lock);
2288
2289 while (!list_empty(&zlist)) {
2290 tx = list_entry(zlist.next, ksock_tx_t, tx_list);
2291 list_del(&tx->tx_list);
2292 LIBCFS_FREE(tx, tx->tx_desc_size);
2293 }
2294 } else {
2295 spin_unlock(&ksocknal_data.ksnd_tx_lock);
2296 }
2297 }
2298
2299 static void
2300 ksocknal_base_shutdown(void)
2301 {
2302 struct ksock_sched_info *info;
2303 ksock_sched_t *sched;
2304 int i;
2305 int j;
2306
2307 LASSERT(!ksocknal_data.ksnd_nnets);
2308
2309 switch (ksocknal_data.ksnd_init) {
2310 default:
2311 LASSERT(0);
2312
2313 case SOCKNAL_INIT_ALL:
2314 case SOCKNAL_INIT_DATA:
2315 LASSERT(ksocknal_data.ksnd_peers);
2316 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
2317 LASSERT(list_empty(&ksocknal_data.ksnd_peers[i]));
2318
2319 LASSERT(list_empty(&ksocknal_data.ksnd_nets));
2320 LASSERT(list_empty(&ksocknal_data.ksnd_enomem_conns));
2321 LASSERT(list_empty(&ksocknal_data.ksnd_zombie_conns));
2322 LASSERT(list_empty(&ksocknal_data.ksnd_connd_connreqs));
2323 LASSERT(list_empty(&ksocknal_data.ksnd_connd_routes));
2324
2325 if (ksocknal_data.ksnd_sched_info) {
2326 cfs_percpt_for_each(info, i,
2327 ksocknal_data.ksnd_sched_info) {
2328 if (!info->ksi_scheds)
2329 continue;
2330
2331 for (j = 0; j < info->ksi_nthreads_max; j++) {
2332 sched = &info->ksi_scheds[j];
2333 LASSERT(list_empty(
2334 &sched->kss_tx_conns));
2335 LASSERT(list_empty(
2336 &sched->kss_rx_conns));
2337 LASSERT(list_empty(
2338 &sched->kss_zombie_noop_txs));
2339 LASSERT(!sched->kss_nconns);
2340 }
2341 }
2342 }
2343
2344 /* flag threads to terminate; wake and wait for them to die */
2345 ksocknal_data.ksnd_shuttingdown = 1;
2346 wake_up_all(&ksocknal_data.ksnd_connd_waitq);
2347 wake_up_all(&ksocknal_data.ksnd_reaper_waitq);
2348
2349 if (ksocknal_data.ksnd_sched_info) {
2350 cfs_percpt_for_each(info, i,
2351 ksocknal_data.ksnd_sched_info) {
2352 if (!info->ksi_scheds)
2353 continue;
2354
2355 for (j = 0; j < info->ksi_nthreads_max; j++) {
2356 sched = &info->ksi_scheds[j];
2357 wake_up_all(&sched->kss_waitq);
2358 }
2359 }
2360 }
2361
2362 i = 4;
2363 read_lock(&ksocknal_data.ksnd_global_lock);
2364 while (ksocknal_data.ksnd_nthreads) {
2365 i++;
2366 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
2367 "waiting for %d threads to terminate\n",
2368 ksocknal_data.ksnd_nthreads);
2369 read_unlock(&ksocknal_data.ksnd_global_lock);
2370 set_current_state(TASK_UNINTERRUPTIBLE);
2371 schedule_timeout(cfs_time_seconds(1));
2372 read_lock(&ksocknal_data.ksnd_global_lock);
2373 }
2374 read_unlock(&ksocknal_data.ksnd_global_lock);
2375
2376 ksocknal_free_buffers();
2377
2378 ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
2379 break;
2380 }
2381
2382 module_put(THIS_MODULE);
2383 }
2384
2385 static __u64
2386 ksocknal_new_incarnation(void)
2387 {
2388 /* The incarnation number is the time this module loaded and it
2389 * identifies this particular instance of the socknal.
2390 */
2391 return ktime_get_ns();
2392 }
2393
2394 static int
2395 ksocknal_base_startup(void)
2396 {
2397 struct ksock_sched_info *info;
2398 int rc;
2399 int i;
2400
2401 LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
2402 LASSERT(!ksocknal_data.ksnd_nnets);
2403
2404 memset(&ksocknal_data, 0, sizeof(ksocknal_data)); /* zero pointers */
2405
2406 ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
2407 LIBCFS_ALLOC(ksocknal_data.ksnd_peers,
2408 sizeof(struct list_head) *
2409 ksocknal_data.ksnd_peer_hash_size);
2410 if (!ksocknal_data.ksnd_peers)
2411 return -ENOMEM;
2412
2413 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
2414 INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
2415
2416 rwlock_init(&ksocknal_data.ksnd_global_lock);
2417 INIT_LIST_HEAD(&ksocknal_data.ksnd_nets);
2418
2419 spin_lock_init(&ksocknal_data.ksnd_reaper_lock);
2420 INIT_LIST_HEAD(&ksocknal_data.ksnd_enomem_conns);
2421 INIT_LIST_HEAD(&ksocknal_data.ksnd_zombie_conns);
2422 INIT_LIST_HEAD(&ksocknal_data.ksnd_deathrow_conns);
2423 init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
2424
2425 spin_lock_init(&ksocknal_data.ksnd_connd_lock);
2426 INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_connreqs);
2427 INIT_LIST_HEAD(&ksocknal_data.ksnd_connd_routes);
2428 init_waitqueue_head(&ksocknal_data.ksnd_connd_waitq);
2429
2430 spin_lock_init(&ksocknal_data.ksnd_tx_lock);
2431 INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_noop_txs);
2432
2433 /* NB memset above zeros whole of ksocknal_data */
2434
2435 /* flag lists/ptrs/locks initialised */
2436 ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
2437 try_module_get(THIS_MODULE);
2438
2439 ksocknal_data.ksnd_sched_info = cfs_percpt_alloc(lnet_cpt_table(),
2440 sizeof(*info));
2441 if (!ksocknal_data.ksnd_sched_info)
2442 goto failed;
2443
2444 cfs_percpt_for_each(info, i, ksocknal_data.ksnd_sched_info) {
2445 ksock_sched_t *sched;
2446 int nthrs;
2447
2448 nthrs = cfs_cpt_weight(lnet_cpt_table(), i);
2449 if (*ksocknal_tunables.ksnd_nscheds > 0) {
2450 nthrs = min(nthrs, *ksocknal_tunables.ksnd_nscheds);
2451 } else {
2452 /*
2453 * max to half of CPUs, assume another half should be
2454 * reserved for upper layer modules
2455 */
2456 nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
2457 }
2458
2459 info->ksi_nthreads_max = nthrs;
2460 info->ksi_cpt = i;
2461
2462 LIBCFS_CPT_ALLOC(info->ksi_scheds, lnet_cpt_table(), i,
2463 info->ksi_nthreads_max * sizeof(*sched));
2464 if (!info->ksi_scheds)
2465 goto failed;
2466
2467 for (; nthrs > 0; nthrs--) {
2468 sched = &info->ksi_scheds[nthrs - 1];
2469
2470 sched->kss_info = info;
2471 spin_lock_init(&sched->kss_lock);
2472 INIT_LIST_HEAD(&sched->kss_rx_conns);
2473 INIT_LIST_HEAD(&sched->kss_tx_conns);
2474 INIT_LIST_HEAD(&sched->kss_zombie_noop_txs);
2475 init_waitqueue_head(&sched->kss_waitq);
2476 }
2477 }
2478
2479 ksocknal_data.ksnd_connd_starting = 0;
2480 ksocknal_data.ksnd_connd_failed_stamp = 0;
2481 ksocknal_data.ksnd_connd_starting_stamp = ktime_get_real_seconds();
2482 /*
2483 * must have at least 2 connds to remain responsive to accepts while
2484 * connecting
2485 */
2486 if (*ksocknal_tunables.ksnd_nconnds < SOCKNAL_CONND_RESV + 1)
2487 *ksocknal_tunables.ksnd_nconnds = SOCKNAL_CONND_RESV + 1;
2488
2489 if (*ksocknal_tunables.ksnd_nconnds_max <
2490 *ksocknal_tunables.ksnd_nconnds) {
2491 ksocknal_tunables.ksnd_nconnds_max =
2492 ksocknal_tunables.ksnd_nconnds;
2493 }
2494
2495 for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
2496 char name[16];
2497
2498 spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
2499 ksocknal_data.ksnd_connd_starting++;
2500 spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
2501
2502 snprintf(name, sizeof(name), "socknal_cd%02d", i);
2503 rc = ksocknal_thread_start(ksocknal_connd,
2504 (void *)((ulong_ptr_t)i), name);
2505 if (rc) {
2506 spin_lock_bh(&ksocknal_data.ksnd_connd_lock);
2507 ksocknal_data.ksnd_connd_starting--;
2508 spin_unlock_bh(&ksocknal_data.ksnd_connd_lock);
2509 CERROR("Can't spawn socknal connd: %d\n", rc);
2510 goto failed;
2511 }
2512 }
2513
2514 rc = ksocknal_thread_start(ksocknal_reaper, NULL, "socknal_reaper");
2515 if (rc) {
2516 CERROR("Can't spawn socknal reaper: %d\n", rc);
2517 goto failed;
2518 }
2519
2520 /* flag everything initialised */
2521 ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
2522
2523 return 0;
2524
2525 failed:
2526 ksocknal_base_shutdown();
2527 return -ENETDOWN;
2528 }
2529
2530 static void
2531 ksocknal_debug_peerhash(lnet_ni_t *ni)
2532 {
2533 ksock_peer_t *peer = NULL;
2534 struct list_head *tmp;
2535 int i;
2536
2537 read_lock(&ksocknal_data.ksnd_global_lock);
2538
2539 for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
2540 list_for_each(tmp, &ksocknal_data.ksnd_peers[i]) {
2541 peer = list_entry(tmp, ksock_peer_t, ksnp_list);
2542
2543 if (peer->ksnp_ni == ni)
2544 break;
2545
2546 peer = NULL;
2547 }
2548 }
2549
2550 if (peer) {
2551 ksock_route_t *route;
2552 ksock_conn_t *conn;
2553
2554 CWARN("Active peer on shutdown: %s, ref %d, scnt %d, closing %d, accepting %d, err %d, zcookie %llu, txq %d, zc_req %d\n",
2555 libcfs_id2str(peer->ksnp_id),
2556 atomic_read(&peer->ksnp_refcount),
2557 peer->ksnp_sharecount, peer->ksnp_closing,
2558 peer->ksnp_accepting, peer->ksnp_error,
2559 peer->ksnp_zc_next_cookie,
2560 !list_empty(&peer->ksnp_tx_queue),
2561 !list_empty(&peer->ksnp_zc_req_list));
2562
2563 list_for_each(tmp, &peer->ksnp_routes) {
2564 route = list_entry(tmp, ksock_route_t, ksnr_list);
2565 CWARN("Route: ref %d, schd %d, conn %d, cnted %d, del %d\n",
2566 atomic_read(&route->ksnr_refcount),
2567 route->ksnr_scheduled, route->ksnr_connecting,
2568 route->ksnr_connected, route->ksnr_deleted);
2569 }
2570
2571 list_for_each(tmp, &peer->ksnp_conns) {
2572 conn = list_entry(tmp, ksock_conn_t, ksnc_list);
2573 CWARN("Conn: ref %d, sref %d, t %d, c %d\n",
2574 atomic_read(&conn->ksnc_conn_refcount),
2575 atomic_read(&conn->ksnc_sock_refcount),
2576 conn->ksnc_type, conn->ksnc_closing);
2577 }
2578 }
2579
2580 read_unlock(&ksocknal_data.ksnd_global_lock);
2581 return;
2582 }
2583
2584 void
2585 ksocknal_shutdown(lnet_ni_t *ni)
2586 {
2587 ksock_net_t *net = ni->ni_data;
2588 int i;
2589 lnet_process_id_t anyid = {0};
2590
2591 anyid.nid = LNET_NID_ANY;
2592 anyid.pid = LNET_PID_ANY;
2593
2594 LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
2595 LASSERT(ksocknal_data.ksnd_nnets > 0);
2596
2597 spin_lock_bh(&net->ksnn_lock);
2598 net->ksnn_shutdown = 1; /* prevent new peers */
2599 spin_unlock_bh(&net->ksnn_lock);
2600
2601 /* Delete all peers */
2602 ksocknal_del_peer(ni, anyid, 0);
2603
2604 /* Wait for all peer state to clean up */
2605 i = 2;
2606 spin_lock_bh(&net->ksnn_lock);
2607 while (net->ksnn_npeers) {
2608 spin_unlock_bh(&net->ksnn_lock);
2609
2610 i++;
2611 CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
2612 "waiting for %d peers to disconnect\n",
2613 net->ksnn_npeers);
2614 set_current_state(TASK_UNINTERRUPTIBLE);
2615 schedule_timeout(cfs_time_seconds(1));
2616
2617 ksocknal_debug_peerhash(ni);
2618
2619 spin_lock_bh(&net->ksnn_lock);
2620 }
2621 spin_unlock_bh(&net->ksnn_lock);
2622
2623 for (i = 0; i < net->ksnn_ninterfaces; i++) {
2624 LASSERT(!net->ksnn_interfaces[i].ksni_npeers);
2625 LASSERT(!net->ksnn_interfaces[i].ksni_nroutes);
2626 }
2627
2628 list_del(&net->ksnn_list);
2629 LIBCFS_FREE(net, sizeof(*net));
2630
2631 ksocknal_data.ksnd_nnets--;
2632 if (!ksocknal_data.ksnd_nnets)
2633 ksocknal_base_shutdown();
2634 }
2635
2636 static int
2637 ksocknal_enumerate_interfaces(ksock_net_t *net)
2638 {
2639 char **names;
2640 int i;
2641 int j;
2642 int rc;
2643 int n;
2644
2645 n = lnet_ipif_enumerate(&names);
2646 if (n <= 0) {
2647 CERROR("Can't enumerate interfaces: %d\n", n);
2648 return n;
2649 }
2650
2651 for (i = j = 0; i < n; i++) {
2652 int up;
2653 __u32 ip;
2654 __u32 mask;
2655
2656 if (!strcmp(names[i], "lo")) /* skip the loopback IF */
2657 continue;
2658
2659 rc = lnet_ipif_query(names[i], &up, &ip, &mask);
2660 if (rc) {
2661 CWARN("Can't get interface %s info: %d\n",
2662 names[i], rc);
2663 continue;
2664 }
2665
2666 if (!up) {
2667 CWARN("Ignoring interface %s (down)\n",
2668 names[i]);
2669 continue;
2670 }
2671
2672 if (j == LNET_MAX_INTERFACES) {
2673 CWARN("Ignoring interface %s (too many interfaces)\n",
2674 names[i]);
2675 continue;
2676 }
2677
2678 net->ksnn_interfaces[j].ksni_ipaddr = ip;
2679 net->ksnn_interfaces[j].ksni_netmask = mask;
2680 strlcpy(net->ksnn_interfaces[j].ksni_name,
2681 names[i], sizeof(net->ksnn_interfaces[j].ksni_name));
2682 j++;
2683 }
2684
2685 lnet_ipif_free_enumeration(names, n);
2686
2687 if (!j)
2688 CERROR("Can't find any usable interfaces\n");
2689
2690 return j;
2691 }
2692
2693 static int
2694 ksocknal_search_new_ipif(ksock_net_t *net)
2695 {
2696 int new_ipif = 0;
2697 int i;
2698
2699 for (i = 0; i < net->ksnn_ninterfaces; i++) {
2700 char *ifnam = &net->ksnn_interfaces[i].ksni_name[0];
2701 char *colon = strchr(ifnam, ':');
2702 int found = 0;
2703 ksock_net_t *tmp;
2704 int j;
2705
2706 if (colon) /* ignore alias device */
2707 *colon = 0;
2708
2709 list_for_each_entry(tmp, &ksocknal_data.ksnd_nets, ksnn_list) {
2710 for (j = 0; !found && j < tmp->ksnn_ninterfaces; j++) {
2711 char *ifnam2 =
2712 &tmp->ksnn_interfaces[j].ksni_name[0];
2713 char *colon2 = strchr(ifnam2, ':');
2714
2715 if (colon2)
2716 *colon2 = 0;
2717
2718 found = !strcmp(ifnam, ifnam2);
2719 if (colon2)
2720 *colon2 = ':';
2721 }
2722 if (found)
2723 break;
2724 }
2725
2726 new_ipif += !found;
2727 if (colon)
2728 *colon = ':';
2729 }
2730
2731 return new_ipif;
2732 }
2733
2734 static int
2735 ksocknal_start_schedulers(struct ksock_sched_info *info)
2736 {
2737 int nthrs;
2738 int rc = 0;
2739 int i;
2740
2741 if (!info->ksi_nthreads) {
2742 if (*ksocknal_tunables.ksnd_nscheds > 0) {
2743 nthrs = info->ksi_nthreads_max;
2744 } else {
2745 nthrs = cfs_cpt_weight(lnet_cpt_table(),
2746 info->ksi_cpt);
2747 nthrs = min(max(SOCKNAL_NSCHEDS, nthrs >> 1), nthrs);
2748 nthrs = min(SOCKNAL_NSCHEDS_HIGH, nthrs);
2749 }
2750 nthrs = min(nthrs, info->ksi_nthreads_max);
2751 } else {
2752 LASSERT(info->ksi_nthreads <= info->ksi_nthreads_max);
2753 /* increase two threads if there is new interface */
2754 nthrs = min(2, info->ksi_nthreads_max - info->ksi_nthreads);
2755 }
2756
2757 for (i = 0; i < nthrs; i++) {
2758 long id;
2759 char name[20];
2760 ksock_sched_t *sched;
2761
2762 id = KSOCK_THREAD_ID(info->ksi_cpt, info->ksi_nthreads + i);
2763 sched = &info->ksi_scheds[KSOCK_THREAD_SID(id)];
2764 snprintf(name, sizeof(name), "socknal_sd%02d_%02d",
2765 info->ksi_cpt, (int)(sched - &info->ksi_scheds[0]));
2766
2767 rc = ksocknal_thread_start(ksocknal_scheduler,
2768 (void *)id, name);
2769 if (!rc)
2770 continue;
2771
2772 CERROR("Can't spawn thread %d for scheduler[%d]: %d\n",
2773 info->ksi_cpt, info->ksi_nthreads + i, rc);
2774 break;
2775 }
2776
2777 info->ksi_nthreads += i;
2778 return rc;
2779 }
2780
2781 static int
2782 ksocknal_net_start_threads(ksock_net_t *net, __u32 *cpts, int ncpts)
2783 {
2784 int newif = ksocknal_search_new_ipif(net);
2785 int rc;
2786 int i;
2787
2788 LASSERT(ncpts > 0 && ncpts <= cfs_cpt_number(lnet_cpt_table()));
2789
2790 for (i = 0; i < ncpts; i++) {
2791 struct ksock_sched_info *info;
2792 int cpt = !cpts ? i : cpts[i];
2793
2794 LASSERT(cpt < cfs_cpt_number(lnet_cpt_table()));
2795 info = ksocknal_data.ksnd_sched_info[cpt];
2796
2797 if (!newif && info->ksi_nthreads > 0)
2798 continue;
2799
2800 rc = ksocknal_start_schedulers(info);
2801 if (rc)
2802 return rc;
2803 }
2804 return 0;
2805 }
2806
2807 int
2808 ksocknal_startup(lnet_ni_t *ni)
2809 {
2810 ksock_net_t *net;
2811 int rc;
2812 int i;
2813
2814 LASSERT(ni->ni_lnd == &the_ksocklnd);
2815
2816 if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
2817 rc = ksocknal_base_startup();
2818 if (rc)
2819 return rc;
2820 }
2821
2822 LIBCFS_ALLOC(net, sizeof(*net));
2823 if (!net)
2824 goto fail_0;
2825
2826 spin_lock_init(&net->ksnn_lock);
2827 net->ksnn_incarnation = ksocknal_new_incarnation();
2828 ni->ni_data = net;
2829 ni->ni_peertimeout = *ksocknal_tunables.ksnd_peertimeout;
2830 ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits;
2831 ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peertxcredits;
2832 ni->ni_peerrtrcredits = *ksocknal_tunables.ksnd_peerrtrcredits;
2833
2834 if (!ni->ni_interfaces[0]) {
2835 rc = ksocknal_enumerate_interfaces(net);
2836 if (rc <= 0)
2837 goto fail_1;
2838
2839 net->ksnn_ninterfaces = 1;
2840 } else {
2841 for (i = 0; i < LNET_MAX_INTERFACES; i++) {
2842 int up;
2843
2844 if (!ni->ni_interfaces[i])
2845 break;
2846
2847 rc = lnet_ipif_query(ni->ni_interfaces[i], &up,
2848 &net->ksnn_interfaces[i].ksni_ipaddr,
2849 &net->ksnn_interfaces[i].ksni_netmask);
2850
2851 if (rc) {
2852 CERROR("Can't get interface %s info: %d\n",
2853 ni->ni_interfaces[i], rc);
2854 goto fail_1;
2855 }
2856
2857 if (!up) {
2858 CERROR("Interface %s is down\n",
2859 ni->ni_interfaces[i]);
2860 goto fail_1;
2861 }
2862
2863 strlcpy(net->ksnn_interfaces[i].ksni_name,
2864 ni->ni_interfaces[i],
2865 sizeof(net->ksnn_interfaces[i].ksni_name));
2866 }
2867 net->ksnn_ninterfaces = i;
2868 }
2869
2870 /* call it before add it to ksocknal_data.ksnd_nets */
2871 rc = ksocknal_net_start_threads(net, ni->ni_cpts, ni->ni_ncpts);
2872 if (rc)
2873 goto fail_1;
2874
2875 ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
2876 net->ksnn_interfaces[0].ksni_ipaddr);
2877 list_add(&net->ksnn_list, &ksocknal_data.ksnd_nets);
2878
2879 ksocknal_data.ksnd_nnets++;
2880
2881 return 0;
2882
2883 fail_1:
2884 LIBCFS_FREE(net, sizeof(*net));
2885 fail_0:
2886 if (!ksocknal_data.ksnd_nnets)
2887 ksocknal_base_shutdown();
2888
2889 return -ENETDOWN;
2890 }
2891
2892 static void __exit
2893 ksocknal_module_fini(void)
2894 {
2895 lnet_unregister_lnd(&the_ksocklnd);
2896 }
2897
2898 static int __init
2899 ksocknal_module_init(void)
2900 {
2901 int rc;
2902
2903 /* check ksnr_connected/connecting field large enough */
2904 CLASSERT(SOCKLND_CONN_NTYPES <= 4);
2905 CLASSERT(SOCKLND_CONN_ACK == SOCKLND_CONN_BULK_IN);
2906
2907 /* initialize the_ksocklnd */
2908 the_ksocklnd.lnd_type = SOCKLND;
2909 the_ksocklnd.lnd_startup = ksocknal_startup;
2910 the_ksocklnd.lnd_shutdown = ksocknal_shutdown;
2911 the_ksocklnd.lnd_ctl = ksocknal_ctl;
2912 the_ksocklnd.lnd_send = ksocknal_send;
2913 the_ksocklnd.lnd_recv = ksocknal_recv;
2914 the_ksocklnd.lnd_notify = ksocknal_notify;
2915 the_ksocklnd.lnd_query = ksocknal_query;
2916 the_ksocklnd.lnd_accept = ksocknal_accept;
2917
2918 rc = ksocknal_tunables_init();
2919 if (rc)
2920 return rc;
2921
2922 lnet_register_lnd(&the_ksocklnd);
2923
2924 return 0;
2925 }
2926
2927 MODULE_AUTHOR("OpenSFS, Inc. <http://www.lustre.org/>");
2928 MODULE_DESCRIPTION("Kernel TCP Socket LND v3.0.0");
2929 MODULE_VERSION("2.7.0");
2930 MODULE_LICENSE("GPL");
2931
2932 module_init(ksocknal_module_init);
2933 module_exit(ksocknal_module_fini);