1 /* BGP network related fucntions
2 * Copyright (C) 1999 Kunihiro Ishiguro
4 * This file is part of GNU Zebra.
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "sockunion.h"
38 #include "lib_errors.h"
40 #include "bgpd/bgpd.h"
41 #include "bgpd/bgp_open.h"
42 #include "bgpd/bgp_fsm.h"
43 #include "bgpd/bgp_attr.h"
44 #include "bgpd/bgp_debug.h"
45 #include "bgpd/bgp_errors.h"
46 #include "bgpd/bgp_network.h"
48 extern struct zebra_privs_t bgpd_privs
;
50 static char *bgp_get_bound_name(struct peer
*peer
);
52 /* BGP listening socket. */
56 struct thread
*thread
;
61 * Set MD5 key for the socket, for the given IPv4 peer address.
62 * If the password is NULL or zero-length, the option will be disabled.
64 static int bgp_md5_set_socket(int socket
, union sockunion
*su
,
69 #if HAVE_DECL_TCP_MD5SIG
71 #endif /* HAVE_TCP_MD5SIG */
75 #if HAVE_DECL_TCP_MD5SIG
76 /* Ensure there is no extraneous port information. */
77 memcpy(&su2
, su
, sizeof(union sockunion
));
78 if (su2
.sa
.sa_family
== AF_INET
)
81 su2
.sin6
.sin6_port
= 0;
82 ret
= sockopt_tcp_signature(socket
, &su2
, password
);
84 #endif /* HAVE_TCP_MD5SIG */
87 zlog_warn("can't set TCP_MD5SIG option on socket %d: %s",
88 socket
, safe_strerror(en
));
93 /* Helper for bgp_connect */
94 static int bgp_md5_set_connect(int socket
, union sockunion
*su
,
99 #if HAVE_DECL_TCP_MD5SIG
100 if (bgpd_privs
.change(ZPRIVS_RAISE
)) {
101 zlog_ferr(BGP_ERR_PRIVILEGES
, "%s: could not raise privs",
106 ret
= bgp_md5_set_socket(socket
, su
, password
);
108 if (bgpd_privs
.change(ZPRIVS_LOWER
))
109 zlog_ferr(BGP_ERR_PRIVILEGES
, "%s: could not lower privs",
111 #endif /* HAVE_TCP_MD5SIG */
116 static int bgp_md5_set_password(struct peer
*peer
, const char *password
)
118 struct listnode
*node
;
120 struct bgp_listener
*listener
;
122 if (bgpd_privs
.change(ZPRIVS_RAISE
)) {
123 zlog_ferr(BGP_ERR_PRIVILEGES
, "%s: could not raise privs",
128 /* Set or unset the password on the listen socket(s). Outbound
130 * are taken care of in bgp_connect() below.
132 for (ALL_LIST_ELEMENTS_RO(bm
->listen_sockets
, node
, listener
))
133 if (listener
->su
.sa
.sa_family
== peer
->su
.sa
.sa_family
) {
134 ret
= bgp_md5_set_socket(listener
->fd
, &peer
->su
,
139 if (bgpd_privs
.change(ZPRIVS_LOWER
))
140 zlog_ferr(BGP_ERR_PRIVILEGES
, "%s: could not lower privs",
146 int bgp_md5_set(struct peer
*peer
)
148 /* Set the password from listen socket. */
149 return bgp_md5_set_password(peer
, peer
->password
);
152 int bgp_md5_unset(struct peer
*peer
)
154 /* Unset the password from listen socket. */
155 return bgp_md5_set_password(peer
, NULL
);
158 int bgp_set_socket_ttl(struct peer
*peer
, int bgp_sock
)
160 char buf
[INET_ADDRSTRLEN
];
163 /* In case of peer is EBGP, we should set TTL for this connection. */
164 if (!peer
->gtsm_hops
&& (peer_sort(peer
) == BGP_PEER_EBGP
)) {
165 ret
= sockopt_ttl(peer
->su
.sa
.sa_family
, bgp_sock
, peer
->ttl
);
169 "%s: Can't set TxTTL on peer (rtrid %s) socket, err = %d",
171 inet_ntop(AF_INET
, &peer
->remote_id
, buf
,
176 } else if (peer
->gtsm_hops
) {
177 /* On Linux, setting minttl without setting ttl seems to mess
179 outgoing ttl. Therefore setting both.
181 ret
= sockopt_ttl(peer
->su
.sa
.sa_family
, bgp_sock
, MAXTTL
);
185 "%s: Can't set TxTTL on peer (rtrid %s) socket, err = %d",
187 inet_ntop(AF_INET
, &peer
->remote_id
, buf
,
192 ret
= sockopt_minttl(peer
->su
.sa
.sa_family
, bgp_sock
,
193 MAXTTL
+ 1 - peer
->gtsm_hops
);
197 "%s: Can't set MinTTL on peer (rtrid %s) socket, err = %d",
199 inet_ntop(AF_INET
, &peer
->remote_id
, buf
,
210 * Obtain the BGP instance that the incoming connection should be processed
211 * against. This is important because more than one VRF could be using the
212 * same IP address space. The instance is got by obtaining the device to
213 * which the incoming connection is bound to. This could either be a VRF
214 * or it could be an interface, which in turn determines the VRF.
216 static int bgp_get_instance_for_inc_conn(int sock
, struct bgp
**bgp_inst
)
218 #ifndef SO_BINDTODEVICE
219 /* only Linux has SO_BINDTODEVICE, but we're in Linux-specific code here
220 * anyway since the assumption is that the interface name returned by
221 * getsockopt() is useful in identifying the VRF, particularly with
223 * VRF l3master device. The whole mechanism is specific to Linux, so...
224 * when other platforms add VRF support, this will need handling here as
225 * well. (or, some restructuring) */
226 *bgp_inst
= bgp_get_default();
230 char name
[VRF_NAMSIZ
+ 1];
231 socklen_t name_len
= VRF_NAMSIZ
;
234 struct listnode
*node
, *nnode
;
238 rc
= getsockopt(sock
, SOL_SOCKET
, SO_BINDTODEVICE
, name
, &name_len
);
240 #if defined(HAVE_CUMULUS)
243 "[Error] BGP SO_BINDTODEVICE get failed (%s), sock %d",
244 safe_strerror(errno
), sock
);
250 *bgp_inst
= bgp_get_default();
251 return 0; /* default instance. */
254 /* First try match to instance; if that fails, check for interfaces. */
255 bgp
= bgp_lookup_by_name(name
);
257 if (!bgp
->vrf_id
) // unexpected
263 /* TODO - This will be optimized once interfaces move into the NS */
264 for (ALL_LIST_ELEMENTS(bm
->bgp
, node
, nnode
, bgp
)) {
265 struct interface
*ifp
;
267 if (bgp
->inst_type
== BGP_INSTANCE_TYPE_VIEW
)
270 ifp
= if_lookup_by_name(name
, bgp
->vrf_id
);
277 /* We didn't match to either an instance or an interface. */
282 /* Accept bgp connection. */
283 static int bgp_accept(struct thread
*thread
)
288 struct bgp_listener
*listener
= THREAD_ARG(thread
);
291 char buf
[SU_ADDRSTRLEN
];
292 struct bgp
*bgp
= NULL
;
296 /* Register accept thread. */
297 accept_sock
= THREAD_FD(thread
);
298 if (accept_sock
< 0) {
299 zlog_ferr(BGP_ERR_SOCKET
, "accept_sock is nevative value %d",
303 listener
->thread
= NULL
;
305 thread_add_read(bm
->master
, bgp_accept
, listener
, accept_sock
,
308 /* Accept client connection. */
309 bgp_sock
= sockunion_accept(accept_sock
, &su
);
311 zlog_ferr(BGP_ERR_SOCKET
,
312 "[Error] BGP socket accept failed (%s)",
313 safe_strerror(errno
));
316 set_nonblocking(bgp_sock
);
318 /* Obtain BGP instance this connection is meant for.
319 * - if it is a VRF netns sock, then BGP is in listener structure
320 * - otherwise, the bgp instance need to be demultiplexed
324 else if (bgp_get_instance_for_inc_conn(bgp_sock
, &bgp
)) {
325 if (bgp_debug_neighbor_events(NULL
))
327 "[Event] Could not get instance for incoming conn from %s",
328 inet_sutop(&su
, buf
));
333 /* Set socket send buffer size */
334 setsockopt_so_sendbuf(bgp_sock
, BGP_SOCKET_SNDBUF_SIZE
);
336 /* Check remote IP address */
337 peer1
= peer_lookup(bgp
, &su
);
340 peer1
= peer_lookup_dynamic_neighbor(bgp
, &su
);
342 /* Dynamic neighbor has been created, let it proceed */
343 peer1
->fd
= bgp_sock
;
344 bgp_fsm_change_status(peer1
, Active
);
346 peer1
->t_start
); /* created in peer_create() */
348 if (peer_active(peer1
))
349 BGP_EVENT_ADD(peer1
, TCP_connection_open
);
356 if (bgp_debug_neighbor_events(NULL
)) {
358 "[Event] %s connection rejected - not configured"
359 " and not valid for dynamic",
360 inet_sutop(&su
, buf
));
366 if (CHECK_FLAG(peer1
->flags
, PEER_FLAG_SHUTDOWN
)) {
367 if (bgp_debug_neighbor_events(peer1
))
369 "[Event] connection from %s rejected due to admin shutdown",
370 inet_sutop(&su
, buf
));
376 * Do not accept incoming connections in Clearing state. This can result
377 * in incorect state transitions - e.g., the connection goes back to
378 * Established and then the Clearing_Completed event is generated. Also,
379 * block incoming connection in Deleted state.
381 if (peer1
->status
== Clearing
|| peer1
->status
== Deleted
) {
382 if (bgp_debug_neighbor_events(peer1
))
384 "[Event] Closing incoming conn for %s (%p) state %d",
385 peer1
->host
, peer1
, peer1
->status
);
390 /* Check that at least one AF is activated for the peer. */
391 if (!peer_active(peer1
)) {
392 if (bgp_debug_neighbor_events(peer1
))
394 "%s - incoming conn rejected - no AF activated for peer",
400 if (bgp_debug_neighbor_events(peer1
))
401 zlog_debug("[Event] BGP connection from host %s fd %d",
402 inet_sutop(&su
, buf
), bgp_sock
);
404 if (peer1
->doppelganger
) {
405 /* We have an existing connection. Kill the existing one and run
408 if (bgp_debug_neighbor_events(peer1
))
410 "[Event] New active connection from peer %s, Killing"
411 " previous active connection",
413 peer_delete(peer1
->doppelganger
);
416 if (bgp_set_socket_ttl(peer1
, bgp_sock
) < 0)
417 if (bgp_debug_neighbor_events(peer1
))
419 "[Event] Unable to set min/max TTL on peer %s, Continuing",
422 peer
= peer_create(&su
, peer1
->conf_if
, peer1
->bgp
, peer1
->local_as
,
423 peer1
->as
, peer1
->as_type
, 0, 0, NULL
);
425 hash_release(peer
->bgp
->peerhash
, peer
);
426 hash_get(peer
->bgp
->peerhash
, peer
, hash_alloc_intern
);
428 peer_xfer_config(peer
, peer1
);
429 UNSET_FLAG(peer
->flags
, PEER_FLAG_CONFIG_NODE
);
431 peer
->doppelganger
= peer1
;
432 peer1
->doppelganger
= peer
;
434 vrf_bind(peer
->bgp
->vrf_id
, bgp_sock
, bgp_get_bound_name(peer
));
435 bgp_fsm_change_status(peer
, Active
);
436 BGP_TIMER_OFF(peer
->t_start
); /* created in peer_create() */
438 SET_FLAG(peer
->sflags
, PEER_STATUS_ACCEPT_PEER
);
440 /* Make dummy peer until read Open packet. */
441 if (peer1
->status
== Established
442 && CHECK_FLAG(peer1
->sflags
, PEER_STATUS_NSF_MODE
)) {
443 /* If we have an existing established connection with graceful
445 * capability announced with one or more address families, then
447 * existing established connection and move state to connect.
449 peer1
->last_reset
= PEER_DOWN_NSF_CLOSE_SESSION
;
450 SET_FLAG(peer1
->sflags
, PEER_STATUS_NSF_WAIT
);
451 bgp_event_update(peer1
, TCP_connection_closed
);
454 if (peer_active(peer
)) {
455 BGP_EVENT_ADD(peer
, TCP_connection_open
);
461 /* BGP socket bind. */
462 static char *bgp_get_bound_name(struct peer
*peer
)
469 if ((peer
->bgp
->vrf_id
== VRF_DEFAULT
) && !peer
->ifname
473 if (peer
->su
.sa
.sa_family
!= AF_INET
474 && peer
->su
.sa
.sa_family
!= AF_INET6
)
475 return NULL
; // unexpected
477 /* For IPv6 peering, interface (unnumbered or link-local with interface)
478 * takes precedence over VRF. For IPv4 peering, explicit interface or
479 * VRF are the situations to bind.
481 if (peer
->su
.sa
.sa_family
== AF_INET6
)
482 name
= (peer
->conf_if
? peer
->conf_if
483 : (peer
->ifname
? peer
->ifname
486 name
= peer
->ifname
? peer
->ifname
: peer
->bgp
->name
;
491 static int bgp_update_address(struct interface
*ifp
, const union sockunion
*dst
,
492 union sockunion
*addr
)
494 struct prefix
*p
, *sel
, d
;
495 struct connected
*connected
;
496 struct listnode
*node
;
499 sockunion2hostprefix(dst
, &d
);
503 for (ALL_LIST_ELEMENTS_RO(ifp
->connected
, node
, connected
)) {
504 p
= connected
->address
;
505 if (p
->family
!= d
.family
)
507 if (prefix_common_bits(p
, &d
) > common
) {
509 common
= prefix_common_bits(sel
, &d
);
516 prefix2sockunion(sel
, addr
);
520 /* Update source selection. */
521 static int bgp_update_source(struct peer
*peer
)
523 struct interface
*ifp
;
524 union sockunion addr
;
527 sockunion_init(&addr
);
529 /* Source is specified with interface name. */
530 if (peer
->update_if
) {
531 ifp
= if_lookup_by_name(peer
->update_if
, peer
->bgp
->vrf_id
);
535 if (bgp_update_address(ifp
, &peer
->su
, &addr
))
538 ret
= sockunion_bind(peer
->fd
, &addr
, 0, &addr
);
541 /* Source is specified with IP address. */
542 if (peer
->update_source
)
543 ret
= sockunion_bind(peer
->fd
, peer
->update_source
, 0,
544 peer
->update_source
);
549 #define DATAPLANE_MARK 254 /* main table ID */
551 /* BGP try to connect to the peer. */
552 int bgp_connect(struct peer
*peer
)
554 assert(!CHECK_FLAG(peer
->thread_flags
, PEER_THREAD_WRITES_ON
));
555 assert(!CHECK_FLAG(peer
->thread_flags
, PEER_THREAD_READS_ON
));
556 ifindex_t ifindex
= 0;
558 if (peer
->conf_if
&& BGP_PEER_SU_UNSPEC(peer
)) {
559 zlog_debug("Peer address not learnt: Returning from connect");
562 if (bgpd_privs
.change(ZPRIVS_RAISE
))
563 zlog_ferr(LIB_ERR_PRIVILEGES
, "Can't raise privileges");
564 /* Make socket for the peer. */
565 peer
->fd
= vrf_sockunion_socket(&peer
->su
, peer
->bgp
->vrf_id
,
566 bgp_get_bound_name(peer
));
567 if (bgpd_privs
.change(ZPRIVS_LOWER
))
568 zlog_ferr(LIB_ERR_PRIVILEGES
, "Can't lower privileges");
572 set_nonblocking(peer
->fd
);
574 /* Set socket send buffer size */
575 setsockopt_so_sendbuf(peer
->fd
, BGP_SOCKET_SNDBUF_SIZE
);
577 if (bgp_set_socket_ttl(peer
, peer
->fd
) < 0)
580 sockopt_reuseaddr(peer
->fd
);
581 sockopt_reuseport(peer
->fd
);
582 if (sockopt_mark_default(peer
->fd
, DATAPLANE_MARK
, &bgpd_privs
) < 0)
583 zlog_warn("Unable to set mark on FD for peer %s, err=%s",
584 peer
->host
, safe_strerror(errno
));
586 #ifdef IPTOS_PREC_INTERNETCONTROL
587 if (bgpd_privs
.change(ZPRIVS_RAISE
))
588 zlog_ferr(BGP_ERR_PRIVILEGES
, "%s: could not raise privs",
590 if (sockunion_family(&peer
->su
) == AF_INET
)
591 setsockopt_ipv4_tos(peer
->fd
, IPTOS_PREC_INTERNETCONTROL
);
592 else if (sockunion_family(&peer
->su
) == AF_INET6
)
593 setsockopt_ipv6_tclass(peer
->fd
, IPTOS_PREC_INTERNETCONTROL
);
594 if (bgpd_privs
.change(ZPRIVS_LOWER
))
595 zlog_ferr(BGP_ERR_PRIVILEGES
, "%s: could not lower privs",
600 bgp_md5_set_connect(peer
->fd
, &peer
->su
, peer
->password
);
602 /* Update source bind. */
603 if (bgp_update_source(peer
) < 0) {
604 return connect_error
;
607 if (peer
->conf_if
|| peer
->ifname
)
608 ifindex
= ifname2ifindex(peer
->conf_if
? peer
->conf_if
612 if (bgp_debug_neighbor_events(peer
))
613 zlog_debug("%s [Event] Connect start to %s fd %d", peer
->host
,
614 peer
->host
, peer
->fd
);
616 /* Connect to the remote peer. */
617 return sockunion_connect(peer
->fd
, &peer
->su
, htons(peer
->port
),
621 /* After TCP connection is established. Get local address and port. */
622 int bgp_getsockname(struct peer
*peer
)
624 if (peer
->su_local
) {
625 sockunion_free(peer
->su_local
);
626 peer
->su_local
= NULL
;
629 if (peer
->su_remote
) {
630 sockunion_free(peer
->su_remote
);
631 peer
->su_remote
= NULL
;
634 peer
->su_local
= sockunion_getsockname(peer
->fd
);
637 peer
->su_remote
= sockunion_getpeername(peer
->fd
);
638 if (!peer
->su_remote
)
641 if (bgp_nexthop_set(peer
->su_local
, peer
->su_remote
, &peer
->nexthop
,
643 #if defined(HAVE_CUMULUS)
646 "%s: nexthop_set failed, resetting connection - intf %p",
647 peer
->host
, peer
->nexthop
.ifp
);
655 static int bgp_listener(int sock
, struct sockaddr
*sa
, socklen_t salen
,
658 struct bgp_listener
*listener
;
661 sockopt_reuseaddr(sock
);
662 sockopt_reuseport(sock
);
664 if (bgpd_privs
.change(ZPRIVS_RAISE
))
665 zlog_ferr(BGP_ERR_PRIVILEGES
, "%s: could not raise privs",
668 #ifdef IPTOS_PREC_INTERNETCONTROL
669 if (sa
->sa_family
== AF_INET
)
670 setsockopt_ipv4_tos(sock
, IPTOS_PREC_INTERNETCONTROL
);
671 else if (sa
->sa_family
== AF_INET6
)
672 setsockopt_ipv6_tclass(sock
, IPTOS_PREC_INTERNETCONTROL
);
675 sockopt_v6only(sa
->sa_family
, sock
);
677 ret
= bind(sock
, sa
, salen
);
679 if (bgpd_privs
.change(ZPRIVS_LOWER
))
680 zlog_ferr(BGP_ERR_PRIVILEGES
, "%s: could not lower privs",
684 zlog_ferr(BGP_ERR_SOCKET
, "bind: %s", safe_strerror(en
));
688 ret
= listen(sock
, SOMAXCONN
);
690 zlog_ferr(BGP_ERR_SOCKET
, "listen: %s", safe_strerror(errno
));
694 listener
= XCALLOC(MTYPE_BGP_LISTENER
, sizeof(*listener
));
697 /* this socket needs a change of ns. record bgp back pointer */
698 if (bgp
->vrf_id
!= VRF_DEFAULT
&& vrf_is_mapped_on_netns(bgp
->vrf_id
))
701 memcpy(&listener
->su
, sa
, salen
);
702 listener
->thread
= NULL
;
703 thread_add_read(bm
->master
, bgp_accept
, listener
, sock
,
705 listnode_add(bm
->listen_sockets
, listener
);
710 /* IPv6 supported version of BGP server socket setup. */
711 int bgp_socket(struct bgp
*bgp
, unsigned short port
, const char *address
)
713 struct addrinfo
*ainfo
;
714 struct addrinfo
*ainfo_save
;
715 static const struct addrinfo req
= {
716 .ai_family
= AF_UNSPEC
,
717 .ai_flags
= AI_PASSIVE
,
718 .ai_socktype
= SOCK_STREAM
,
721 char port_str
[BUFSIZ
];
723 snprintf(port_str
, sizeof(port_str
), "%d", port
);
724 port_str
[sizeof(port_str
) - 1] = '\0';
726 if (bgpd_privs
.change(ZPRIVS_RAISE
))
727 zlog_ferr(LIB_ERR_PRIVILEGES
, "Can't raise privileges");
728 ret
= vrf_getaddrinfo(address
, port_str
, &req
, &ainfo_save
,
730 if (bgpd_privs
.change(ZPRIVS_LOWER
))
731 zlog_ferr(LIB_ERR_PRIVILEGES
, "Can't lower privileges");
733 zlog_ferr(BGP_ERR_SOCKET
, "getaddrinfo: %s", gai_strerror(ret
));
738 for (ainfo
= ainfo_save
; ainfo
; ainfo
= ainfo
->ai_next
) {
741 if (ainfo
->ai_family
!= AF_INET
&& ainfo
->ai_family
!= AF_INET6
)
744 if (bgpd_privs
.change(ZPRIVS_RAISE
))
745 zlog_ferr(LIB_ERR_PRIVILEGES
, "Can't raise privileges");
746 sock
= vrf_socket(ainfo
->ai_family
, ainfo
->ai_socktype
,
747 ainfo
->ai_protocol
, bgp
->vrf_id
,
748 (bgp
->inst_type
== BGP_INSTANCE_TYPE_VRF
?
750 if (bgpd_privs
.change(ZPRIVS_LOWER
))
751 zlog_ferr(LIB_ERR_PRIVILEGES
, "Can't lower privileges");
753 zlog_ferr(BGP_ERR_SOCKET
, "socket: %s",
754 safe_strerror(errno
));
758 /* if we intend to implement ttl-security, this socket needs
760 sockopt_ttl(ainfo
->ai_family
, sock
, MAXTTL
);
762 ret
= bgp_listener(sock
, ainfo
->ai_addr
, ainfo
->ai_addrlen
,
769 freeaddrinfo(ainfo_save
);
770 if (count
== 0 && bgp
->inst_type
!= BGP_INSTANCE_TYPE_VRF
) {
773 "%s: no usable addresses please check other programs usage of specified port %d",
775 zlog_ferr(BGP_ERR_SOCKET
, "%s: Program cannot continue",
783 /* this function closes vrf socket
784 * this should be called only for vrf socket with netns backend
786 void bgp_close_vrf_socket(struct bgp
*bgp
)
788 struct listnode
*node
, *next
;
789 struct bgp_listener
*listener
;
794 if (bm
->listen_sockets
== NULL
)
797 for (ALL_LIST_ELEMENTS(bm
->listen_sockets
, node
, next
, listener
)) {
798 if (listener
->bgp
== bgp
) {
799 thread_cancel(listener
->thread
);
801 listnode_delete(bm
->listen_sockets
, listener
);
802 XFREE(MTYPE_BGP_LISTENER
, listener
);
807 /* this function closes main socket
811 struct listnode
*node
, *next
;
812 struct bgp_listener
*listener
;
814 if (bm
->listen_sockets
== NULL
)
817 for (ALL_LIST_ELEMENTS(bm
->listen_sockets
, node
, next
, listener
)) {
820 thread_cancel(listener
->thread
);
822 listnode_delete(bm
->listen_sockets
, listener
);
823 XFREE(MTYPE_BGP_LISTENER
, listener
);