1 /* BGP network related fucntions
2 Copyright (C) 1999 Kunihiro Ishiguro
4 This file is part of GNU Zebra.
6 GNU Zebra is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GNU Zebra is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Zebra; see the file COPYING. If not, write to the Free
18 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24 #include "sockunion.h"
38 #include "bgpd/bgpd.h"
39 #include "bgpd/bgp_open.h"
40 #include "bgpd/bgp_fsm.h"
41 #include "bgpd/bgp_attr.h"
42 #include "bgpd/bgp_debug.h"
43 #include "bgpd/bgp_network.h"
45 extern struct zebra_privs_t bgpd_privs
;
47 static int bgp_bind(struct peer
*);
49 /* BGP listening socket. */
54 struct thread
*thread
;
58 * Set MD5 key for the socket, for the given IPv4 peer address.
59 * If the password is NULL or zero-length, the option will be disabled.
62 bgp_md5_set_socket (int socket
, union sockunion
*su
, const char *password
)
66 #if HAVE_DECL_TCP_MD5SIG
68 #endif /* HAVE_TCP_MD5SIG */
72 #if HAVE_DECL_TCP_MD5SIG
73 /* Ensure there is no extraneous port information. */
74 memcpy (&su2
, su
, sizeof (union sockunion
));
75 if (su2
.sa
.sa_family
== AF_INET
)
78 su2
.sin6
.sin6_port
= 0;
79 ret
= sockopt_tcp_signature (socket
, &su2
, password
);
81 #endif /* HAVE_TCP_MD5SIG */
84 zlog_warn ("can't set TCP_MD5SIG option on socket %d: %s", socket
, safe_strerror (en
));
89 /* Helper for bgp_connect */
91 bgp_md5_set_connect (int socket
, union sockunion
*su
, const char *password
)
95 #if HAVE_DECL_TCP_MD5SIG
96 if ( bgpd_privs
.change (ZPRIVS_RAISE
) )
98 zlog_err ("%s: could not raise privs", __func__
);
102 ret
= bgp_md5_set_socket (socket
, su
, password
);
104 if (bgpd_privs
.change (ZPRIVS_LOWER
) )
105 zlog_err ("%s: could not lower privs", __func__
);
106 #endif /* HAVE_TCP_MD5SIG */
112 bgp_md5_set_password (struct peer
*peer
, const char *password
)
114 struct listnode
*node
;
116 struct bgp_listener
*listener
;
118 if ( bgpd_privs
.change (ZPRIVS_RAISE
) )
120 zlog_err ("%s: could not raise privs", __func__
);
124 /* Set or unset the password on the listen socket(s). Outbound connections
125 * are taken care of in bgp_connect() below.
127 for (ALL_LIST_ELEMENTS_RO(bm
->listen_sockets
, node
, listener
))
128 if (listener
->su
.sa
.sa_family
== peer
->su
.sa
.sa_family
)
130 ret
= bgp_md5_set_socket (listener
->fd
, &peer
->su
, password
);
134 if (bgpd_privs
.change (ZPRIVS_LOWER
) )
135 zlog_err ("%s: could not lower privs", __func__
);
141 bgp_md5_set (struct peer
*peer
)
143 /* Set the password from listen socket. */
144 return bgp_md5_set_password (peer
, peer
->password
);
148 bgp_md5_unset (struct peer
*peer
)
150 /* Unset the password from listen socket. */
151 return bgp_md5_set_password (peer
, NULL
);
154 /* Update BGP socket send buffer size */
156 bgp_update_sock_send_buffer_size (int fd
)
158 int size
= BGP_SOCKET_SNDBUF_SIZE
;
160 socklen_t optlen
= sizeof(optval
);
162 if (getsockopt(fd
, SOL_SOCKET
, SO_SNDBUF
, &optval
, &optlen
) < 0)
164 zlog_err("getsockopt of SO_SNDBUF failed %s\n", safe_strerror(errno
));
169 if (setsockopt(fd
, SOL_SOCKET
, SO_SNDBUF
, &size
, sizeof(size
)) < 0)
171 zlog_err("Couldn't increase send buffer: %s\n", safe_strerror(errno
));
177 bgp_set_socket_ttl (struct peer
*peer
, int bgp_sock
)
179 char buf
[INET_ADDRSTRLEN
];
182 /* In case of peer is EBGP, we should set TTL for this connection. */
183 if (!peer
->gtsm_hops
&& (peer_sort (peer
) == BGP_PEER_EBGP
))
185 ret
= sockopt_ttl (peer
->su
.sa
.sa_family
, bgp_sock
, peer
->ttl
);
188 zlog_err ("%s: Can't set TxTTL on peer (rtrid %s) socket, err = %d",
190 inet_ntop (AF_INET
, &peer
->remote_id
, buf
, sizeof(buf
)),
195 else if (peer
->gtsm_hops
)
197 /* On Linux, setting minttl without setting ttl seems to mess with the
198 outgoing ttl. Therefore setting both.
200 ret
= sockopt_ttl (peer
->su
.sa
.sa_family
, bgp_sock
, MAXTTL
);
203 zlog_err ("%s: Can't set TxTTL on peer (rtrid %s) socket, err = %d",
205 inet_ntop (AF_INET
, &peer
->remote_id
, buf
, sizeof(buf
)),
209 ret
= sockopt_minttl (peer
->su
.sa
.sa_family
, bgp_sock
,
210 MAXTTL
+ 1 - peer
->gtsm_hops
);
213 zlog_err ("%s: Can't set MinTTL on peer (rtrid %s) socket, err = %d",
215 inet_ntop (AF_INET
, &peer
->remote_id
, buf
, sizeof(buf
)),
225 * Obtain the BGP instance that the incoming connection should be processed
226 * against. This is important because more than one VRF could be using the
227 * same IP address space. The instance is got by obtaining the device to
228 * which the incoming connection is bound to. This could either be a VRF
229 * or it could be an interface, which in turn determines the VRF.
232 bgp_get_instance_for_inc_conn (int sock
, struct bgp
**bgp_inst
)
234 #ifndef SO_BINDTODEVICE
235 /* only Linux has SO_BINDTODEVICE, but we're in Linux-specific code here
236 * anyway since the assumption is that the interface name returned by
237 * getsockopt() is useful in identifying the VRF, particularly with Linux's
238 * VRF l3master device. The whole mechanism is specific to Linux, so...
239 * when other platforms add VRF support, this will need handling here as
240 * well. (or, some restructuring) */
241 *bgp_inst
= bgp_get_default ();
245 char name
[VRF_NAMSIZ
+ 1];
246 socklen_t name_len
= VRF_NAMSIZ
;
249 struct listnode
*node
, *nnode
;
253 rc
= getsockopt(sock
, SOL_SOCKET
, SO_BINDTODEVICE
, name
, &name_len
);
256 #if defined (HAVE_CUMULUS)
257 zlog_err ("[Error] BGP SO_BINDTODEVICE get failed (%s), sock %d",
258 safe_strerror (errno
), sock
);
265 *bgp_inst
= bgp_get_default ();
266 return 0; /* default instance. */
269 /* First try match to instance; if that fails, check for interfaces. */
270 bgp
= bgp_lookup_by_name (name
);
273 if (!bgp
->vrf_id
) // unexpected
279 /* TODO - This will be optimized once interfaces move into the NS */
280 for (ALL_LIST_ELEMENTS (bm
->bgp
, node
, nnode
, bgp
))
282 struct interface
*ifp
;
284 if (bgp
->inst_type
== BGP_INSTANCE_TYPE_VIEW
)
287 ifp
= if_lookup_by_name_vrf (name
, bgp
->vrf_id
);
295 /* We didn't match to either an instance or an interface. */
300 /* Accept bgp connection. */
302 bgp_accept (struct thread
*thread
)
307 struct bgp_listener
*listener
= THREAD_ARG(thread
);
310 char buf
[SU_ADDRSTRLEN
];
311 struct bgp
*bgp
= NULL
;
313 sockunion_init (&su
);
315 /* Register accept thread. */
316 accept_sock
= THREAD_FD (thread
);
319 zlog_err ("accept_sock is nevative value %d", accept_sock
);
322 listener
->thread
= thread_add_read (bm
->master
, bgp_accept
, listener
, accept_sock
);
324 /* Accept client connection. */
325 bgp_sock
= sockunion_accept (accept_sock
, &su
);
328 zlog_err ("[Error] BGP socket accept failed (%s)", safe_strerror (errno
));
331 set_nonblocking (bgp_sock
);
333 /* Obtain BGP instance this connection is meant for. */
334 if (bgp_get_instance_for_inc_conn (bgp_sock
, &bgp
))
336 zlog_err ("[Error] Could not get instance for incoming conn from %s",
337 inet_sutop (&su
, buf
));
342 /* Set socket send buffer size */
343 bgp_update_sock_send_buffer_size(bgp_sock
);
345 /* Check remote IP address */
346 peer1
= peer_lookup (bgp
, &su
);
350 peer1
= peer_lookup_dynamic_neighbor (bgp
, &su
);
353 /* Dynamic neighbor has been created, let it proceed */
354 peer1
->fd
= bgp_sock
;
355 bgp_fsm_change_status(peer1
, Active
);
356 BGP_TIMER_OFF(peer1
->t_start
); /* created in peer_create() */
358 if (peer_active (peer1
))
359 BGP_EVENT_ADD (peer1
, TCP_connection_open
);
367 if (bgp_debug_neighbor_events(NULL
))
369 zlog_debug ("[Event] %s connection rejected - not configured"
370 " and not valid for dynamic",
371 inet_sutop (&su
, buf
));
377 if (CHECK_FLAG(peer1
->flags
, PEER_FLAG_SHUTDOWN
))
379 if (bgp_debug_neighbor_events(peer1
))
380 zlog_debug ("[Event] connection from %s rejected due to admin shutdown",
381 inet_sutop (&su
, buf
));
387 * Do not accept incoming connections in Clearing state. This can result
388 * in incorect state transitions - e.g., the connection goes back to
389 * Established and then the Clearing_Completed event is generated. Also,
390 * block incoming connection in Deleted state.
392 if (peer1
->status
== Clearing
|| peer1
->status
== Deleted
)
394 if (bgp_debug_neighbor_events(peer1
))
395 zlog_debug("[Event] Closing incoming conn for %s (%p) state %d",
396 peer1
->host
, peer1
, peer1
->status
);
401 /* Check that at least one AF is activated for the peer. */
402 if (!peer_active (peer1
))
404 if (bgp_debug_neighbor_events(peer1
))
405 zlog_debug ("%s - incoming conn rejected - no AF activated for peer",
411 if (bgp_debug_neighbor_events(peer1
))
412 zlog_debug ("[Event] BGP connection from host %s fd %d",
413 inet_sutop (&su
, buf
), bgp_sock
);
415 if (peer1
->doppelganger
)
417 /* We have an existing connection. Kill the existing one and run
420 if (bgp_debug_neighbor_events(peer1
))
421 zlog_debug ("[Event] New active connection from peer %s, Killing"
422 " previous active connection", peer1
->host
);
423 peer_delete(peer1
->doppelganger
);
426 if (bgp_set_socket_ttl (peer1
, bgp_sock
) < 0)
427 if (bgp_debug_neighbor_events(peer1
))
428 zlog_debug ("[Event] Unable to set min/max TTL on peer %s, Continuing",
431 peer
= peer_create (&su
, peer1
->conf_if
, peer1
->bgp
, peer1
->local_as
,
432 peer1
->as
, peer1
->as_type
, 0, 0, NULL
);
434 hash_release(peer
->bgp
->peerhash
, peer
);
435 hash_get(peer
->bgp
->peerhash
, peer
, hash_alloc_intern
);
437 peer_xfer_config(peer
, peer1
);
438 UNSET_FLAG (peer
->flags
, PEER_FLAG_CONFIG_NODE
);
440 peer
->doppelganger
= peer1
;
441 peer1
->doppelganger
= peer
;
444 bgp_fsm_change_status(peer
, Active
);
445 BGP_TIMER_OFF(peer
->t_start
); /* created in peer_create() */
447 SET_FLAG (peer
->sflags
, PEER_STATUS_ACCEPT_PEER
);
449 /* Make dummy peer until read Open packet. */
450 if (peer1
->status
== Established
&&
451 CHECK_FLAG (peer1
->sflags
, PEER_STATUS_NSF_MODE
))
453 /* If we have an existing established connection with graceful restart
454 * capability announced with one or more address families, then drop
455 * existing established connection and move state to connect.
457 peer1
->last_reset
= PEER_DOWN_NSF_CLOSE_SESSION
;
458 SET_FLAG (peer1
->sflags
, PEER_STATUS_NSF_WAIT
);
459 bgp_event_update(peer1
, TCP_connection_closed
);
462 if (peer_active (peer
))
464 BGP_EVENT_ADD (peer
, TCP_connection_open
);
470 /* BGP socket bind. */
472 bgp_bind (struct peer
*peer
)
474 #ifdef SO_BINDTODEVICE
479 /* If not bound to an interface or part of a VRF, we don't care. */
480 if (!peer
->bgp
->vrf_id
&& ! peer
->ifname
&& !peer
->conf_if
)
483 if (peer
->su
.sa
.sa_family
!= AF_INET
&&
484 peer
->su
.sa
.sa_family
!= AF_INET6
)
485 return 0; // unexpected
487 /* For IPv6 peering, interface (unnumbered or link-local with interface)
488 * takes precedence over VRF. For IPv4 peering, explicit interface or
489 * VRF are the situations to bind.
491 if (peer
->su
.sa
.sa_family
== AF_INET6
)
492 name
= (peer
->conf_if
? peer
->conf_if
: \
493 (peer
->ifname
? peer
->ifname
: peer
->bgp
->name
));
495 name
= peer
->ifname
? peer
->ifname
: peer
->bgp
->name
;
500 if (bgp_debug_neighbor_events(peer
))
501 zlog_debug ("%s Binding to interface %s", peer
->host
, name
);
503 if ( bgpd_privs
.change (ZPRIVS_RAISE
) )
504 zlog_err ("bgp_bind: could not raise privs");
506 ret
= setsockopt (peer
->fd
, SOL_SOCKET
, SO_BINDTODEVICE
,
510 if (bgpd_privs
.change (ZPRIVS_LOWER
) )
511 zlog_err ("bgp_bind: could not lower privs");
515 if (bgp_debug_neighbor_events (peer
))
516 zlog_debug ("bind to interface %s failed, errno=%d", name
, myerrno
);
519 #endif /* SO_BINDTODEVICE */
524 bgp_update_address (struct interface
*ifp
, const union sockunion
*dst
,
525 union sockunion
*addr
)
527 struct prefix
*p
, *sel
, d
;
528 struct connected
*connected
;
529 struct listnode
*node
;
532 sockunion2hostprefix (dst
, &d
);
536 for (ALL_LIST_ELEMENTS_RO (ifp
->connected
, node
, connected
))
538 p
= connected
->address
;
539 if (p
->family
!= d
.family
)
541 if (prefix_common_bits (p
, &d
) > common
)
544 common
= prefix_common_bits (sel
, &d
);
551 prefix2sockunion (sel
, addr
);
555 /* Update source selection. */
557 bgp_update_source (struct peer
*peer
)
559 struct interface
*ifp
;
560 union sockunion addr
;
563 sockunion_init (&addr
);
565 /* Source is specified with interface name. */
568 ifp
= if_lookup_by_name_vrf (peer
->update_if
, peer
->bgp
->vrf_id
);
572 if (bgp_update_address (ifp
, &peer
->su
, &addr
))
575 ret
= sockunion_bind (peer
->fd
, &addr
, 0, &addr
);
578 /* Source is specified with IP address. */
579 if (peer
->update_source
)
580 ret
= sockunion_bind (peer
->fd
, peer
->update_source
, 0, peer
->update_source
);
585 #define DATAPLANE_MARK 254 /* main table ID */
587 /* BGP try to connect to the peer. */
589 bgp_connect (struct peer
*peer
)
591 ifindex_t ifindex
= 0;
593 if (peer
->conf_if
&& BGP_PEER_SU_UNSPEC(peer
))
595 zlog_debug("Peer address not learnt: Returning from connect");
598 /* Make socket for the peer. */
599 peer
->fd
= sockunion_socket (&peer
->su
);
603 set_nonblocking (peer
->fd
);
605 /* Set socket send buffer size */
606 bgp_update_sock_send_buffer_size(peer
->fd
);
608 if (bgp_set_socket_ttl (peer
, peer
->fd
) < 0)
611 sockopt_reuseaddr (peer
->fd
);
612 sockopt_reuseport (peer
->fd
);
613 if (sockopt_mark_default(peer
->fd
, DATAPLANE_MARK
, &bgpd_privs
) < 0)
614 zlog_warn("Unable to set mark on FD for peer %s, err=%s", peer
->host
,
615 safe_strerror(errno
));
617 #ifdef IPTOS_PREC_INTERNETCONTROL
618 if (bgpd_privs
.change (ZPRIVS_RAISE
))
619 zlog_err ("%s: could not raise privs", __func__
);
620 if (sockunion_family (&peer
->su
) == AF_INET
)
621 setsockopt_ipv4_tos (peer
->fd
, IPTOS_PREC_INTERNETCONTROL
);
623 else if (sockunion_family (&peer
->su
) == AF_INET6
)
624 setsockopt_ipv6_tclass (peer
->fd
, IPTOS_PREC_INTERNETCONTROL
);
626 if (bgpd_privs
.change (ZPRIVS_LOWER
))
627 zlog_err ("%s: could not lower privs", __func__
);
631 bgp_md5_set_connect (peer
->fd
, &peer
->su
, peer
->password
);
636 /* Update source bind. */
637 if (bgp_update_source (peer
) < 0)
639 return connect_error
;
643 if (peer
->conf_if
|| peer
->ifname
)
644 ifindex
= ifname2ifindex (peer
->conf_if
? peer
->conf_if
: peer
->ifname
);
645 #endif /* HAVE_IPV6 */
647 if (bgp_debug_neighbor_events(peer
))
648 zlog_debug ("%s [Event] Connect start to %s fd %d",
649 peer
->host
, peer
->host
, peer
->fd
);
651 /* Connect to the remote peer. */
652 return sockunion_connect (peer
->fd
, &peer
->su
, htons (peer
->port
), ifindex
);
655 /* After TCP connection is established. Get local address and port. */
657 bgp_getsockname (struct peer
*peer
)
661 sockunion_free (peer
->su_local
);
662 peer
->su_local
= NULL
;
667 sockunion_free (peer
->su_remote
);
668 peer
->su_remote
= NULL
;
671 peer
->su_local
= sockunion_getsockname (peer
->fd
);
672 if (!peer
->su_local
) return -1;
673 peer
->su_remote
= sockunion_getpeername (peer
->fd
);
674 if (!peer
->su_remote
) return -1;
676 if (bgp_nexthop_set (peer
->su_local
, peer
->su_remote
,
677 &peer
->nexthop
, peer
))
679 #if defined (HAVE_CUMULUS)
680 zlog_err ("%s: nexthop_set failed, resetting connection - intf %p",
681 peer
->host
, peer
->nexthop
.ifp
);
691 bgp_listener (int sock
, struct sockaddr
*sa
, socklen_t salen
)
693 struct bgp_listener
*listener
;
696 sockopt_reuseaddr (sock
);
697 sockopt_reuseport (sock
);
699 if (bgpd_privs
.change (ZPRIVS_RAISE
))
700 zlog_err ("%s: could not raise privs", __func__
);
702 #ifdef IPTOS_PREC_INTERNETCONTROL
703 if (sa
->sa_family
== AF_INET
)
704 setsockopt_ipv4_tos (sock
, IPTOS_PREC_INTERNETCONTROL
);
706 else if (sa
->sa_family
== AF_INET6
)
707 setsockopt_ipv6_tclass (sock
, IPTOS_PREC_INTERNETCONTROL
);
711 sockopt_v6only (sa
->sa_family
, sock
);
713 ret
= bind (sock
, sa
, salen
);
715 if (bgpd_privs
.change (ZPRIVS_LOWER
))
716 zlog_err ("%s: could not lower privs", __func__
);
720 zlog_err ("bind: %s", safe_strerror (en
));
724 ret
= listen (sock
, 3);
727 zlog_err ("listen: %s", safe_strerror (errno
));
731 listener
= XMALLOC (MTYPE_BGP_LISTENER
, sizeof(*listener
));
733 memcpy(&listener
->su
, sa
, salen
);
734 listener
->thread
= thread_add_read (bm
->master
, bgp_accept
, listener
, sock
);
735 listnode_add (bm
->listen_sockets
, listener
);
740 /* IPv6 supported version of BGP server socket setup. */
743 bgp_socket (unsigned short port
, const char *address
)
745 struct addrinfo
*ainfo
;
746 struct addrinfo
*ainfo_save
;
747 static const struct addrinfo req
= {
748 .ai_family
= AF_UNSPEC
,
749 .ai_flags
= AI_PASSIVE
,
750 .ai_socktype
= SOCK_STREAM
,
753 char port_str
[BUFSIZ
];
755 snprintf (port_str
, sizeof(port_str
), "%d", port
);
756 port_str
[sizeof (port_str
) - 1] = '\0';
758 ret
= getaddrinfo (address
, port_str
, &req
, &ainfo_save
);
761 zlog_err ("getaddrinfo: %s", gai_strerror (ret
));
766 for (ainfo
= ainfo_save
; ainfo
; ainfo
= ainfo
->ai_next
)
770 if (ainfo
->ai_family
!= AF_INET
&& ainfo
->ai_family
!= AF_INET6
)
773 sock
= socket (ainfo
->ai_family
, ainfo
->ai_socktype
, ainfo
->ai_protocol
);
776 zlog_err ("socket: %s", safe_strerror (errno
));
780 /* if we intend to implement ttl-security, this socket needs ttl=255 */
781 sockopt_ttl (ainfo
->ai_family
, sock
, MAXTTL
);
783 ret
= bgp_listener (sock
, ainfo
->ai_addr
, ainfo
->ai_addrlen
);
789 freeaddrinfo (ainfo_save
);
792 zlog_err ("%s: no usable addresses", __func__
);
799 /* Traditional IPv4 only version. */
801 bgp_socket (unsigned short port
, const char *address
)
805 struct sockaddr_in sin
;
808 sock
= socket (AF_INET
, SOCK_STREAM
, 0);
811 zlog_err ("socket: %s", safe_strerror (errno
));
815 /* if we intend to implement ttl-security, this socket needs ttl=255 */
816 sockopt_ttl (AF_INET
, sock
, MAXTTL
);
818 memset (&sin
, 0, sizeof (struct sockaddr_in
));
819 sin
.sin_family
= AF_INET
;
820 sin
.sin_port
= htons (port
);
821 socklen
= sizeof (struct sockaddr_in
);
823 if (address
&& ((ret
= inet_aton(address
, &sin
.sin_addr
)) < 1))
825 zlog_err("bgp_socket: could not parse ip address %s: %s",
826 address
, safe_strerror (errno
));
829 #ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
830 sin
.sin_len
= socklen
;
831 #endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */
833 ret
= bgp_listener (sock
, (struct sockaddr
*) &sin
, socklen
);
841 #endif /* HAVE_IPV6 */
846 struct listnode
*node
, *next
;
847 struct bgp_listener
*listener
;
849 if (bm
->listen_sockets
== NULL
)
852 for (ALL_LIST_ELEMENTS (bm
->listen_sockets
, node
, next
, listener
))
854 thread_cancel (listener
->thread
);
855 close (listener
->fd
);
856 listnode_delete (bm
->listen_sockets
, listener
);
857 XFREE (MTYPE_BGP_LISTENER
, listener
);