]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_network.c
Merge pull request #11242 from patrasar/pimv6_issue_11233
[mirror_frr.git] / bgpd / bgp_network.c
1 /* BGP network related fucntions
2 * Copyright (C) 1999 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #include "thread.h"
24 #include "sockunion.h"
25 #include "sockopt.h"
26 #include "memory.h"
27 #include "log.h"
28 #include "if.h"
29 #include "prefix.h"
30 #include "command.h"
31 #include "privs.h"
32 #include "linklist.h"
33 #include "network.h"
34 #include "queue.h"
35 #include "hash.h"
36 #include "filter.h"
37 #include "ns.h"
38 #include "lib_errors.h"
39 #include "nexthop.h"
40
41 #include "bgpd/bgpd.h"
42 #include "bgpd/bgp_open.h"
43 #include "bgpd/bgp_fsm.h"
44 #include "bgpd/bgp_attr.h"
45 #include "bgpd/bgp_debug.h"
46 #include "bgpd/bgp_errors.h"
47 #include "bgpd/bgp_network.h"
48 #include "bgpd/bgp_zebra.h"
49 #include "bgpd/bgp_nht.h"
50
51 extern struct zebra_privs_t bgpd_privs;
52
53 static char *bgp_get_bound_name(struct peer *peer);
54
55 void bgp_dump_listener_info(struct vty *vty)
56 {
57 struct listnode *node;
58 struct bgp_listener *listener;
59
60 vty_out(vty, "Name fd Address\n");
61 vty_out(vty, "---------------------------\n");
62 for (ALL_LIST_ELEMENTS_RO(bm->listen_sockets, node, listener))
63 vty_out(vty, "%-16s %d %pSU\n",
64 listener->name ? listener->name : VRF_DEFAULT_NAME,
65 listener->fd, &listener->su);
66 }
67
68 /*
69 * Set MD5 key for the socket, for the given IPv4 peer address.
70 * If the password is NULL or zero-length, the option will be disabled.
71 */
72 static int bgp_md5_set_socket(int socket, union sockunion *su,
73 uint16_t prefixlen, const char *password)
74 {
75 int ret = -1;
76 int en = ENOSYS;
77 #if HAVE_DECL_TCP_MD5SIG
78 union sockunion su2;
79 #endif /* HAVE_TCP_MD5SIG */
80
81 assert(socket >= 0);
82
83 #if HAVE_DECL_TCP_MD5SIG
84 /* Ensure there is no extraneous port information. */
85 memcpy(&su2, su, sizeof(union sockunion));
86 if (su2.sa.sa_family == AF_INET)
87 su2.sin.sin_port = 0;
88 else
89 su2.sin6.sin6_port = 0;
90
91 /* For addresses, use the non-extended signature functionality */
92 if ((su2.sa.sa_family == AF_INET && prefixlen == IPV4_MAX_BITLEN)
93 || (su2.sa.sa_family == AF_INET6 && prefixlen == IPV6_MAX_BITLEN))
94 ret = sockopt_tcp_signature(socket, &su2, password);
95 else
96 ret = sockopt_tcp_signature_ext(socket, &su2, prefixlen,
97 password);
98 en = errno;
99 #endif /* HAVE_TCP_MD5SIG */
100
101 if (ret < 0) {
102 switch (ret) {
103 case -2:
104 flog_warn(
105 EC_BGP_NO_TCP_MD5,
106 "Unable to set TCP MD5 option on socket for peer %pSU (sock=%d): This platform does not support MD5 auth for prefixes",
107 su, socket);
108 break;
109 default:
110 flog_warn(
111 EC_BGP_NO_TCP_MD5,
112 "Unable to set TCP MD5 option on socket for peer %pSU (sock=%d): %s",
113 su, socket, safe_strerror(en));
114 }
115 }
116
117 return ret;
118 }
119
120 /* Helper for bgp_connect */
121 static int bgp_md5_set_connect(int socket, union sockunion *su,
122 uint16_t prefixlen, const char *password)
123 {
124 int ret = -1;
125
126 #if HAVE_DECL_TCP_MD5SIG
127 frr_with_privs(&bgpd_privs) {
128 ret = bgp_md5_set_socket(socket, su, prefixlen, password);
129 }
130 #endif /* HAVE_TCP_MD5SIG */
131
132 return ret;
133 }
134
135 static int bgp_md5_set_password(struct peer *peer, const char *password)
136 {
137 struct listnode *node;
138 int ret = 0;
139 struct bgp_listener *listener;
140
141 /*
142 * Set or unset the password on the listen socket(s). Outbound
143 * connections are taken care of in bgp_connect() below.
144 */
145 frr_with_privs(&bgpd_privs) {
146 for (ALL_LIST_ELEMENTS_RO(bm->listen_sockets, node, listener))
147 if (listener->su.sa.sa_family ==
148 peer->su.sa.sa_family) {
149 uint16_t prefixlen =
150 peer->su.sa.sa_family == AF_INET
151 ? IPV4_MAX_BITLEN
152 : IPV6_MAX_BITLEN;
153
154 /*
155 * if we have stored a BGP vrf instance in the
156 * listener it must match the bgp instance in
157 * the peer otherwise the peer bgp instance
158 * must be the default vrf or a view instance
159 */
160 if (!listener->bgp) {
161 if (peer->bgp->vrf_id != VRF_DEFAULT)
162 continue;
163 } else if (listener->bgp != peer->bgp)
164 continue;
165
166 ret = bgp_md5_set_socket(listener->fd,
167 &peer->su, prefixlen,
168 password);
169 break;
170 }
171 }
172 return ret;
173 }
174
175 int bgp_md5_set_prefix(struct bgp *bgp, struct prefix *p, const char *password)
176 {
177 int ret = 0;
178 union sockunion su;
179 struct listnode *node;
180 struct bgp_listener *listener;
181
182 /* Set or unset the password on the listen socket(s). */
183 frr_with_privs(&bgpd_privs) {
184 for (ALL_LIST_ELEMENTS_RO(bm->listen_sockets, node, listener))
185 if (listener->su.sa.sa_family == p->family
186 && ((bgp->vrf_id == VRF_DEFAULT)
187 || (listener->bgp == bgp))) {
188 prefix2sockunion(p, &su);
189 ret = bgp_md5_set_socket(listener->fd, &su,
190 p->prefixlen,
191 password);
192 break;
193 }
194 }
195
196 return ret;
197 }
198
199 int bgp_md5_unset_prefix(struct bgp *bgp, struct prefix *p)
200 {
201 return bgp_md5_set_prefix(bgp, p, NULL);
202 }
203
204 int bgp_md5_set(struct peer *peer)
205 {
206 /* Set the password from listen socket. */
207 return bgp_md5_set_password(peer, peer->password);
208 }
209
210 int bgp_md5_unset(struct peer *peer)
211 {
212 /* Unset the password from listen socket. */
213 return bgp_md5_set_password(peer, NULL);
214 }
215
216 int bgp_set_socket_ttl(struct peer *peer, int bgp_sock)
217 {
218 int ret = 0;
219
220 if (!peer->gtsm_hops) {
221 ret = sockopt_ttl(peer->su.sa.sa_family, bgp_sock, peer->ttl);
222 if (ret) {
223 flog_err(
224 EC_LIB_SOCKET,
225 "%s: Can't set TxTTL on peer (rtrid %pI4) socket, err = %d",
226 __func__, &peer->remote_id, errno);
227 return ret;
228 }
229 } else {
230 /* On Linux, setting minttl without setting ttl seems to mess
231 with the
232 outgoing ttl. Therefore setting both.
233 */
234 ret = sockopt_ttl(peer->su.sa.sa_family, bgp_sock, MAXTTL);
235 if (ret) {
236 flog_err(
237 EC_LIB_SOCKET,
238 "%s: Can't set TxTTL on peer (rtrid %pI4) socket, err = %d",
239 __func__, &peer->remote_id, errno);
240 return ret;
241 }
242 ret = sockopt_minttl(peer->su.sa.sa_family, bgp_sock,
243 MAXTTL + 1 - peer->gtsm_hops);
244 if (ret) {
245 flog_err(
246 EC_LIB_SOCKET,
247 "%s: Can't set MinTTL on peer (rtrid %pI4) socket, err = %d",
248 __func__, &peer->remote_id, errno);
249 return ret;
250 }
251 }
252
253 return ret;
254 }
255
256 /*
257 * Obtain the BGP instance that the incoming connection should be processed
258 * against. This is important because more than one VRF could be using the
259 * same IP address space. The instance is got by obtaining the device to
260 * which the incoming connection is bound to. This could either be a VRF
261 * or it could be an interface, which in turn determines the VRF.
262 */
263 static int bgp_get_instance_for_inc_conn(int sock, struct bgp **bgp_inst)
264 {
265 #ifndef SO_BINDTODEVICE
266 /* only Linux has SO_BINDTODEVICE, but we're in Linux-specific code here
267 * anyway since the assumption is that the interface name returned by
268 * getsockopt() is useful in identifying the VRF, particularly with
269 * Linux's
270 * VRF l3master device. The whole mechanism is specific to Linux, so...
271 * when other platforms add VRF support, this will need handling here as
272 * well. (or, some restructuring) */
273 *bgp_inst = bgp_get_default();
274 return !*bgp_inst;
275
276 #else
277 char name[VRF_NAMSIZ + 1];
278 socklen_t name_len = VRF_NAMSIZ;
279 struct bgp *bgp;
280 int rc;
281 struct listnode *node, *nnode;
282
283 *bgp_inst = NULL;
284 name[0] = '\0';
285 rc = getsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, name, &name_len);
286 if (rc != 0) {
287 #if defined(HAVE_CUMULUS)
288 flog_err(EC_LIB_SOCKET,
289 "[Error] BGP SO_BINDTODEVICE get failed (%s), sock %d",
290 safe_strerror(errno), sock);
291 return -1;
292 #endif
293 }
294
295 if (!strlen(name)) {
296 *bgp_inst = bgp_get_default();
297 return 0; /* default instance. */
298 }
299
300 /* First try match to instance; if that fails, check for interfaces. */
301 bgp = bgp_lookup_by_name(name);
302 if (bgp) {
303 if (!bgp->vrf_id) // unexpected
304 return -1;
305 *bgp_inst = bgp;
306 return 0;
307 }
308
309 /* TODO - This will be optimized once interfaces move into the NS */
310 for (ALL_LIST_ELEMENTS(bm->bgp, node, nnode, bgp)) {
311 struct interface *ifp;
312
313 if (bgp->inst_type == BGP_INSTANCE_TYPE_VIEW)
314 continue;
315
316 ifp = if_lookup_by_name(name, bgp->vrf_id);
317 if (ifp) {
318 *bgp_inst = bgp;
319 return 0;
320 }
321 }
322
323 /* We didn't match to either an instance or an interface. */
324 return -1;
325 #endif
326 }
327
328 static void bgp_socket_set_buffer_size(const int fd)
329 {
330 if (getsockopt_so_sendbuf(fd) < (int)bm->socket_buffer)
331 setsockopt_so_sendbuf(fd, bm->socket_buffer);
332 if (getsockopt_so_recvbuf(fd) < (int)bm->socket_buffer)
333 setsockopt_so_recvbuf(fd, bm->socket_buffer);
334 }
335
336 /* Accept bgp connection. */
337 static void bgp_accept(struct thread *thread)
338 {
339 int bgp_sock;
340 int accept_sock;
341 union sockunion su;
342 struct bgp_listener *listener = THREAD_ARG(thread);
343 struct peer *peer;
344 struct peer *peer1;
345 char buf[SU_ADDRSTRLEN];
346 struct bgp *bgp = NULL;
347
348 sockunion_init(&su);
349
350 bgp = bgp_lookup_by_name(listener->name);
351
352 /* Register accept thread. */
353 accept_sock = THREAD_FD(thread);
354 if (accept_sock < 0) {
355 flog_err_sys(EC_LIB_SOCKET,
356 "[Error] BGP accept socket fd is negative: %d",
357 accept_sock);
358 return;
359 }
360
361 thread_add_read(bm->master, bgp_accept, listener, accept_sock,
362 &listener->thread);
363
364 /* Accept client connection. */
365 bgp_sock = sockunion_accept(accept_sock, &su);
366 int save_errno = errno;
367 if (bgp_sock < 0) {
368 if (save_errno == EINVAL) {
369 struct vrf *vrf =
370 bgp ? vrf_lookup_by_id(bgp->vrf_id) : NULL;
371
372 /*
373 * It appears that sometimes, when VRFs are deleted on
374 * the system, it takes a little while for us to get
375 * notified about that. In the meantime we endlessly
376 * loop on accept(), because the socket, having been
377 * bound to a now-deleted VRF device, is in some weird
378 * state which causes accept() to fail.
379 *
380 * To avoid this, if we see accept() fail with EINVAL,
381 * we cancel ourselves and trust that when the VRF
382 * deletion notification comes in the event handler for
383 * that will take care of cleaning us up.
384 */
385 flog_err_sys(
386 EC_LIB_SOCKET,
387 "[Error] accept() failed with error \"%s\" on BGP listener socket %d for BGP instance in VRF \"%s\"; refreshing socket",
388 safe_strerror(save_errno), accept_sock,
389 VRF_LOGNAME(vrf));
390 THREAD_OFF(listener->thread);
391 } else {
392 flog_err_sys(
393 EC_LIB_SOCKET,
394 "[Error] BGP socket accept failed (%s); retrying",
395 safe_strerror(save_errno));
396 }
397 return;
398 }
399 set_nonblocking(bgp_sock);
400
401 /* Obtain BGP instance this connection is meant for.
402 * - if it is a VRF netns sock, then BGP is in listener structure
403 * - otherwise, the bgp instance need to be demultiplexed
404 */
405 if (listener->bgp)
406 bgp = listener->bgp;
407 else if (bgp_get_instance_for_inc_conn(bgp_sock, &bgp)) {
408 if (bgp_debug_neighbor_events(NULL))
409 zlog_debug(
410 "[Event] Could not get instance for incoming conn from %s",
411 inet_sutop(&su, buf));
412 close(bgp_sock);
413 return;
414 }
415
416 bgp_socket_set_buffer_size(bgp_sock);
417
418 /* Check remote IP address */
419 peer1 = peer_lookup(bgp, &su);
420
421 if (!peer1) {
422 peer1 = peer_lookup_dynamic_neighbor(bgp, &su);
423 if (peer1) {
424 /* Dynamic neighbor has been created, let it proceed */
425 peer1->fd = bgp_sock;
426
427 /* Set the user configured MSS to TCP socket */
428 if (CHECK_FLAG(peer1->flags, PEER_FLAG_TCP_MSS))
429 sockopt_tcp_mss_set(bgp_sock, peer1->tcp_mss);
430
431 bgp_fsm_change_status(peer1, Active);
432 BGP_TIMER_OFF(
433 peer1->t_start); /* created in peer_create() */
434
435 if (peer_active(peer1)) {
436 if (CHECK_FLAG(peer1->flags,
437 PEER_FLAG_TIMER_DELAYOPEN))
438 BGP_EVENT_ADD(
439 peer1,
440 TCP_connection_open_w_delay);
441 else
442 BGP_EVENT_ADD(peer1,
443 TCP_connection_open);
444 }
445
446 return;
447 }
448 }
449
450 if (!peer1) {
451 if (bgp_debug_neighbor_events(NULL)) {
452 zlog_debug(
453 "[Event] %s connection rejected(%s:%u:%s) - not configured and not valid for dynamic",
454 inet_sutop(&su, buf), bgp->name_pretty, bgp->as,
455 VRF_LOGNAME(vrf_lookup_by_id(bgp->vrf_id)));
456 }
457 close(bgp_sock);
458 return;
459 }
460
461 if (CHECK_FLAG(peer1->flags, PEER_FLAG_SHUTDOWN)
462 || CHECK_FLAG(peer1->bgp->flags, BGP_FLAG_SHUTDOWN)) {
463 if (bgp_debug_neighbor_events(peer1))
464 zlog_debug(
465 "[Event] connection from %s rejected(%s:%u:%s) due to admin shutdown",
466 inet_sutop(&su, buf), bgp->name_pretty, bgp->as,
467 VRF_LOGNAME(vrf_lookup_by_id(bgp->vrf_id)));
468 close(bgp_sock);
469 return;
470 }
471
472 /*
473 * Do not accept incoming connections in Clearing state. This can result
474 * in incorect state transitions - e.g., the connection goes back to
475 * Established and then the Clearing_Completed event is generated. Also,
476 * block incoming connection in Deleted state.
477 */
478 if (peer1->status == Clearing || peer1->status == Deleted) {
479 if (bgp_debug_neighbor_events(peer1))
480 zlog_debug(
481 "[Event] Closing incoming conn for %s (%p) state %d",
482 peer1->host, peer1, peer1->status);
483 close(bgp_sock);
484 return;
485 }
486
487 /* Check that at least one AF is activated for the peer. */
488 if (!peer_active(peer1)) {
489 if (bgp_debug_neighbor_events(peer1))
490 zlog_debug(
491 "%s - incoming conn rejected - no AF activated for peer",
492 peer1->host);
493 close(bgp_sock);
494 return;
495 }
496
497 /* Do not try to reconnect if the peer reached maximum
498 * prefixes, restart timer is still running or the peer
499 * is shutdown.
500 */
501 if (BGP_PEER_START_SUPPRESSED(peer1)) {
502 if (bgp_debug_neighbor_events(peer1))
503 zlog_debug(
504 "[Event] Incoming BGP connection rejected from %s due to maximum-prefix or shutdown",
505 peer1->host);
506 close(bgp_sock);
507 return;
508 }
509
510 if (bgp_debug_neighbor_events(peer1))
511 zlog_debug("[Event] BGP connection from host %s fd %d",
512 inet_sutop(&su, buf), bgp_sock);
513
514 if (peer1->doppelganger) {
515 /* We have an existing connection. Kill the existing one and run
516 with this one.
517 */
518 if (bgp_debug_neighbor_events(peer1))
519 zlog_debug(
520 "[Event] New active connection from peer %s, Killing previous active connection",
521 peer1->host);
522 peer_delete(peer1->doppelganger);
523 }
524
525 if (bgp_set_socket_ttl(peer1, bgp_sock) < 0)
526 if (bgp_debug_neighbor_events(peer1))
527 zlog_debug(
528 "[Event] Unable to set min/max TTL on peer %s, Continuing",
529 peer1->host);
530
531 peer = peer_create(&su, peer1->conf_if, peer1->bgp, peer1->local_as,
532 peer1->as, peer1->as_type, NULL);
533 hash_release(peer->bgp->peerhash, peer);
534 (void)hash_get(peer->bgp->peerhash, peer, hash_alloc_intern);
535
536 peer_xfer_config(peer, peer1);
537 bgp_peer_gr_flags_update(peer);
538
539 BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(peer->bgp,
540 peer->bgp->peer);
541
542 if (bgp_peer_gr_mode_get(peer) == PEER_DISABLE) {
543
544 UNSET_FLAG(peer->sflags, PEER_STATUS_NSF_MODE);
545
546 if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT)) {
547 peer_nsf_stop(peer);
548 }
549 }
550
551 UNSET_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE);
552
553 peer->doppelganger = peer1;
554 peer1->doppelganger = peer;
555 peer->fd = bgp_sock;
556 frr_with_privs(&bgpd_privs) {
557 vrf_bind(peer->bgp->vrf_id, bgp_sock, bgp_get_bound_name(peer));
558 }
559 bgp_peer_reg_with_nht(peer);
560 bgp_fsm_change_status(peer, Active);
561 BGP_TIMER_OFF(peer->t_start); /* created in peer_create() */
562
563 SET_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER);
564 /* Make dummy peer until read Open packet. */
565 if (peer_established(peer1)
566 && CHECK_FLAG(peer1->sflags, PEER_STATUS_NSF_MODE)) {
567 /* If we have an existing established connection with graceful
568 * restart
569 * capability announced with one or more address families, then
570 * drop
571 * existing established connection and move state to connect.
572 */
573 peer1->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
574
575 if (CHECK_FLAG(peer1->flags, PEER_FLAG_GRACEFUL_RESTART)
576 || CHECK_FLAG(peer1->flags,
577 PEER_FLAG_GRACEFUL_RESTART_HELPER))
578 SET_FLAG(peer1->sflags, PEER_STATUS_NSF_WAIT);
579
580 bgp_event_update(peer1, TCP_connection_closed);
581 }
582
583 if (peer_active(peer)) {
584 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER_DELAYOPEN))
585 BGP_EVENT_ADD(peer, TCP_connection_open_w_delay);
586 else
587 BGP_EVENT_ADD(peer, TCP_connection_open);
588 }
589
590 /*
591 * If we are doing nht for a peer that is v6 LL based
592 * massage the event system to make things happy
593 */
594 bgp_nht_interface_events(peer);
595 }
596
597 /* BGP socket bind. */
598 static char *bgp_get_bound_name(struct peer *peer)
599 {
600 if (!peer)
601 return NULL;
602
603 if ((peer->bgp->vrf_id == VRF_DEFAULT) && !peer->ifname
604 && !peer->conf_if)
605 return NULL;
606
607 if (peer->su.sa.sa_family != AF_INET
608 && peer->su.sa.sa_family != AF_INET6)
609 return NULL; // unexpected
610
611 /* For IPv6 peering, interface (unnumbered or link-local with interface)
612 * takes precedence over VRF. For IPv4 peering, explicit interface or
613 * VRF are the situations to bind.
614 */
615 if (peer->su.sa.sa_family == AF_INET6 && peer->conf_if)
616 return peer->conf_if;
617
618 if (peer->ifname)
619 return peer->ifname;
620
621 if (peer->bgp->inst_type == BGP_INSTANCE_TYPE_VIEW)
622 return NULL;
623
624 return peer->bgp->name;
625 }
626
627 int bgp_update_address(struct interface *ifp, const union sockunion *dst,
628 union sockunion *addr)
629 {
630 struct prefix *p, *sel, d;
631 struct connected *connected;
632 struct listnode *node;
633 int common;
634
635 if (!sockunion2hostprefix(dst, &d))
636 return 1;
637
638 sel = NULL;
639 common = -1;
640
641 for (ALL_LIST_ELEMENTS_RO(ifp->connected, node, connected)) {
642 p = connected->address;
643 if (p->family != d.family)
644 continue;
645 if (prefix_common_bits(p, &d) > common) {
646 sel = p;
647 common = prefix_common_bits(sel, &d);
648 }
649 }
650
651 if (!sel)
652 return 1;
653
654 prefix2sockunion(sel, addr);
655 return 0;
656 }
657
658 /* Update source selection. */
659 static int bgp_update_source(struct peer *peer)
660 {
661 struct interface *ifp;
662 union sockunion addr;
663 int ret = 0;
664
665 sockunion_init(&addr);
666
667 /* Source is specified with interface name. */
668 if (peer->update_if) {
669 ifp = if_lookup_by_name(peer->update_if, peer->bgp->vrf_id);
670 if (!ifp)
671 return -1;
672
673 if (bgp_update_address(ifp, &peer->su, &addr))
674 return -1;
675
676 ret = sockunion_bind(peer->fd, &addr, 0, &addr);
677 }
678
679 /* Source is specified with IP address. */
680 if (peer->update_source)
681 ret = sockunion_bind(peer->fd, peer->update_source, 0,
682 peer->update_source);
683
684 return ret;
685 }
686
687 /* BGP try to connect to the peer. */
688 int bgp_connect(struct peer *peer)
689 {
690 assert(!CHECK_FLAG(peer->thread_flags, PEER_THREAD_WRITES_ON));
691 assert(!CHECK_FLAG(peer->thread_flags, PEER_THREAD_READS_ON));
692 ifindex_t ifindex = 0;
693
694 if (peer->conf_if && BGP_PEER_SU_UNSPEC(peer)) {
695 if (bgp_debug_neighbor_events(peer))
696 zlog_debug("Peer address not learnt: Returning from connect");
697 return 0;
698 }
699 frr_with_privs(&bgpd_privs) {
700 /* Make socket for the peer. */
701 peer->fd = vrf_sockunion_socket(&peer->su, peer->bgp->vrf_id,
702 bgp_get_bound_name(peer));
703 }
704 if (peer->fd < 0) {
705 peer->last_reset = PEER_DOWN_SOCKET_ERROR;
706 if (bgp_debug_neighbor_events(peer))
707 zlog_debug("%s: Failure to create socket for connection to %s, error received: %s(%d)",
708 __func__, peer->host, safe_strerror(errno),
709 errno);
710 return -1;
711 }
712
713 set_nonblocking(peer->fd);
714
715 /* Set the user configured MSS to TCP socket */
716 if (CHECK_FLAG(peer->flags, PEER_FLAG_TCP_MSS))
717 sockopt_tcp_mss_set(peer->fd, peer->tcp_mss);
718
719 bgp_socket_set_buffer_size(peer->fd);
720
721 if (bgp_set_socket_ttl(peer, peer->fd) < 0) {
722 peer->last_reset = PEER_DOWN_SOCKET_ERROR;
723 if (bgp_debug_neighbor_events(peer))
724 zlog_debug("%s: Failure to set socket ttl for connection to %s, error received: %s(%d)",
725 __func__, peer->host, safe_strerror(errno),
726 errno);
727 return -1;
728 }
729
730 sockopt_reuseaddr(peer->fd);
731 sockopt_reuseport(peer->fd);
732
733 #ifdef IPTOS_PREC_INTERNETCONTROL
734 frr_with_privs(&bgpd_privs) {
735 if (sockunion_family(&peer->su) == AF_INET)
736 setsockopt_ipv4_tos(peer->fd, bm->tcp_dscp);
737 else if (sockunion_family(&peer->su) == AF_INET6)
738 setsockopt_ipv6_tclass(peer->fd, bm->tcp_dscp);
739 }
740 #endif
741
742 if (peer->password) {
743 uint16_t prefixlen = peer->su.sa.sa_family == AF_INET
744 ? IPV4_MAX_BITLEN
745 : IPV6_MAX_BITLEN;
746
747 bgp_md5_set_connect(peer->fd, &peer->su, prefixlen,
748 peer->password);
749 }
750
751 /* Update source bind. */
752 if (bgp_update_source(peer) < 0) {
753 peer->last_reset = PEER_DOWN_SOCKET_ERROR;
754 return connect_error;
755 }
756
757 if (peer->conf_if || peer->ifname)
758 ifindex = ifname2ifindex(peer->conf_if ? peer->conf_if
759 : peer->ifname,
760 peer->bgp->vrf_id);
761
762 if (bgp_debug_neighbor_events(peer))
763 zlog_debug("%s [Event] Connect start to %s fd %d", peer->host,
764 peer->host, peer->fd);
765
766 /* Connect to the remote peer. */
767 return sockunion_connect(peer->fd, &peer->su, htons(peer->port),
768 ifindex);
769 }
770
771 /* After TCP connection is established. Get local address and port. */
772 int bgp_getsockname(struct peer *peer)
773 {
774 if (peer->su_local) {
775 sockunion_free(peer->su_local);
776 peer->su_local = NULL;
777 }
778
779 if (peer->su_remote) {
780 sockunion_free(peer->su_remote);
781 peer->su_remote = NULL;
782 }
783
784 peer->su_local = sockunion_getsockname(peer->fd);
785 if (!peer->su_local)
786 return -1;
787 peer->su_remote = sockunion_getpeername(peer->fd);
788 if (!peer->su_remote)
789 return -1;
790
791 if (!bgp_zebra_nexthop_set(peer->su_local, peer->su_remote,
792 &peer->nexthop, peer)) {
793 flog_err(EC_BGP_NH_UPD,
794 "%s: nexthop_set failed, resetting connection - intf %p",
795 peer->host, peer->nexthop.ifp);
796 return -1;
797 }
798 return 0;
799 }
800
801
802 static int bgp_listener(int sock, struct sockaddr *sa, socklen_t salen,
803 struct bgp *bgp)
804 {
805 struct bgp_listener *listener;
806 int ret, en;
807
808 sockopt_reuseaddr(sock);
809 sockopt_reuseport(sock);
810
811 frr_with_privs(&bgpd_privs) {
812
813 #ifdef IPTOS_PREC_INTERNETCONTROL
814 if (sa->sa_family == AF_INET)
815 setsockopt_ipv4_tos(sock, bm->tcp_dscp);
816 else if (sa->sa_family == AF_INET6)
817 setsockopt_ipv6_tclass(sock, bm->tcp_dscp);
818 #endif
819
820 sockopt_v6only(sa->sa_family, sock);
821
822 ret = bind(sock, sa, salen);
823 en = errno;
824 }
825
826 if (ret < 0) {
827 flog_err_sys(EC_LIB_SOCKET, "bind: %s", safe_strerror(en));
828 return ret;
829 }
830
831 ret = listen(sock, SOMAXCONN);
832 if (ret < 0) {
833 flog_err_sys(EC_LIB_SOCKET, "listen: %s", safe_strerror(errno));
834 return ret;
835 }
836
837 listener = XCALLOC(MTYPE_BGP_LISTENER, sizeof(*listener));
838 listener->fd = sock;
839 listener->name = XSTRDUP(MTYPE_BGP_LISTENER, bgp->name);
840
841 /* this socket is in a vrf record bgp back pointer */
842 if (bgp->vrf_id != VRF_DEFAULT)
843 listener->bgp = bgp;
844
845 memcpy(&listener->su, sa, salen);
846 thread_add_read(bm->master, bgp_accept, listener, sock,
847 &listener->thread);
848 listnode_add(bm->listen_sockets, listener);
849
850 return 0;
851 }
852
853 /* IPv6 supported version of BGP server socket setup. */
854 int bgp_socket(struct bgp *bgp, unsigned short port, const char *address)
855 {
856 struct addrinfo *ainfo;
857 struct addrinfo *ainfo_save;
858 static const struct addrinfo req = {
859 .ai_family = AF_UNSPEC,
860 .ai_flags = AI_PASSIVE,
861 .ai_socktype = SOCK_STREAM,
862 };
863 int ret, count;
864 char port_str[BUFSIZ];
865
866 snprintf(port_str, sizeof(port_str), "%d", port);
867 port_str[sizeof(port_str) - 1] = '\0';
868
869 frr_with_privs(&bgpd_privs) {
870 ret = vrf_getaddrinfo(address, port_str, &req, &ainfo_save,
871 bgp->vrf_id);
872 }
873 if (ret != 0) {
874 flog_err_sys(EC_LIB_SOCKET, "getaddrinfo: %s",
875 gai_strerror(ret));
876 return -1;
877 }
878 if (bgp_option_check(BGP_OPT_NO_ZEBRA) &&
879 bgp->vrf_id != VRF_DEFAULT) {
880 freeaddrinfo(ainfo_save);
881 return -1;
882 }
883 count = 0;
884 for (ainfo = ainfo_save; ainfo; ainfo = ainfo->ai_next) {
885 int sock;
886
887 if (ainfo->ai_family != AF_INET && ainfo->ai_family != AF_INET6)
888 continue;
889
890 frr_with_privs(&bgpd_privs) {
891 sock = vrf_socket(ainfo->ai_family,
892 ainfo->ai_socktype,
893 ainfo->ai_protocol,
894 bgp->vrf_id,
895 (bgp->inst_type
896 == BGP_INSTANCE_TYPE_VRF
897 ? bgp->name : NULL));
898 }
899 if (sock < 0) {
900 flog_err_sys(EC_LIB_SOCKET, "socket: %s",
901 safe_strerror(errno));
902 continue;
903 }
904
905 /* if we intend to implement ttl-security, this socket needs
906 * ttl=255 */
907 sockopt_ttl(ainfo->ai_family, sock, MAXTTL);
908
909 ret = bgp_listener(sock, ainfo->ai_addr, ainfo->ai_addrlen,
910 bgp);
911 if (ret == 0)
912 ++count;
913 else
914 close(sock);
915 }
916 freeaddrinfo(ainfo_save);
917 if (count == 0 && bgp->inst_type != BGP_INSTANCE_TYPE_VRF) {
918 flog_err(
919 EC_LIB_SOCKET,
920 "%s: no usable addresses please check other programs usage of specified port %d",
921 __func__, port);
922 flog_err_sys(EC_LIB_SOCKET, "%s: Program cannot continue",
923 __func__);
924 exit(-1);
925 }
926
927 return 0;
928 }
929
930 /* this function closes vrf socket
931 * this should be called only for vrf socket with netns backend
932 */
933 void bgp_close_vrf_socket(struct bgp *bgp)
934 {
935 struct listnode *node, *next;
936 struct bgp_listener *listener;
937
938 if (!bgp)
939 return;
940
941 if (bm->listen_sockets == NULL)
942 return;
943
944 for (ALL_LIST_ELEMENTS(bm->listen_sockets, node, next, listener)) {
945 if (listener->bgp == bgp) {
946 THREAD_OFF(listener->thread);
947 close(listener->fd);
948 listnode_delete(bm->listen_sockets, listener);
949 XFREE(MTYPE_BGP_LISTENER, listener->name);
950 XFREE(MTYPE_BGP_LISTENER, listener);
951 }
952 }
953 }
954
955 /* this function closes main socket
956 */
957 void bgp_close(void)
958 {
959 struct listnode *node, *next;
960 struct bgp_listener *listener;
961
962 if (bm->listen_sockets == NULL)
963 return;
964
965 for (ALL_LIST_ELEMENTS(bm->listen_sockets, node, next, listener)) {
966 if (listener->bgp)
967 continue;
968 THREAD_OFF(listener->thread);
969 close(listener->fd);
970 listnode_delete(bm->listen_sockets, listener);
971 XFREE(MTYPE_BGP_LISTENER, listener->name);
972 XFREE(MTYPE_BGP_LISTENER, listener);
973 }
974 }