1 /* BGP network related fucntions
2 Copyright (C) 1999 Kunihiro Ishiguro
4 This file is part of GNU Zebra.
6 GNU Zebra is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GNU Zebra is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Zebra; see the file COPYING. If not, write to the Free
18 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24 #include "sockunion.h"
38 #include "bgpd/bgpd.h"
39 #include "bgpd/bgp_open.h"
40 #include "bgpd/bgp_fsm.h"
41 #include "bgpd/bgp_attr.h"
42 #include "bgpd/bgp_debug.h"
43 #include "bgpd/bgp_network.h"
45 extern struct zebra_privs_t bgpd_privs
;
47 static int bgp_bind(struct peer
*);
49 /* BGP listening socket. */
54 struct thread
*thread
;
58 * Set MD5 key for the socket, for the given IPv4 peer address.
59 * If the password is NULL or zero-length, the option will be disabled.
62 bgp_md5_set_socket (int socket
, union sockunion
*su
, const char *password
)
70 #if HAVE_DECL_TCP_MD5SIG
71 /* Ensure there is no extraneous port information. */
72 memcpy (&su2
, su
, sizeof (union sockunion
));
73 if (su2
.sa
.sa_family
== AF_INET
)
76 su2
.sin6
.sin6_port
= 0;
77 ret
= sockopt_tcp_signature (socket
, &su2
, password
);
79 #endif /* HAVE_TCP_MD5SIG */
82 zlog_warn ("can't set TCP_MD5SIG option on socket %d: %s", socket
, safe_strerror (en
));
87 /* Helper for bgp_connect */
89 bgp_md5_set_connect (int socket
, union sockunion
*su
, const char *password
)
93 #if HAVE_DECL_TCP_MD5SIG
94 if ( bgpd_privs
.change (ZPRIVS_RAISE
) )
96 zlog_err ("%s: could not raise privs", __func__
);
100 ret
= bgp_md5_set_socket (socket
, su
, password
);
102 if (bgpd_privs
.change (ZPRIVS_LOWER
) )
103 zlog_err ("%s: could not lower privs", __func__
);
104 #endif /* HAVE_TCP_MD5SIG */
110 bgp_md5_set_password (struct peer
*peer
, const char *password
)
112 struct listnode
*node
;
114 struct bgp_listener
*listener
;
116 if ( bgpd_privs
.change (ZPRIVS_RAISE
) )
118 zlog_err ("%s: could not raise privs", __func__
);
122 /* Set or unset the password on the listen socket(s). Outbound connections
123 * are taken care of in bgp_connect() below.
125 for (ALL_LIST_ELEMENTS_RO(bm
->listen_sockets
, node
, listener
))
126 if (listener
->su
.sa
.sa_family
== peer
->su
.sa
.sa_family
)
128 ret
= bgp_md5_set_socket (listener
->fd
, &peer
->su
, password
);
132 if (bgpd_privs
.change (ZPRIVS_LOWER
) )
133 zlog_err ("%s: could not lower privs", __func__
);
139 bgp_md5_set (struct peer
*peer
)
141 /* Set the password from listen socket. */
142 return bgp_md5_set_password (peer
, peer
->password
);
146 bgp_md5_unset (struct peer
*peer
)
148 /* Unset the password from listen socket. */
149 return bgp_md5_set_password (peer
, NULL
);
152 /* Update BGP socket send buffer size */
154 bgp_update_sock_send_buffer_size (int fd
)
156 int size
= BGP_SOCKET_SNDBUF_SIZE
;
158 socklen_t optlen
= sizeof(optval
);
160 if (getsockopt(fd
, SOL_SOCKET
, SO_SNDBUF
, &optval
, &optlen
) < 0)
162 zlog_err("getsockopt of SO_SNDBUF failed %s\n", safe_strerror(errno
));
167 if (setsockopt(fd
, SOL_SOCKET
, SO_SNDBUF
, &size
, sizeof(size
)) < 0)
169 zlog_err("Couldn't increase send buffer: %s\n", safe_strerror(errno
));
175 bgp_set_socket_ttl (struct peer
*peer
, int bgp_sock
)
177 char buf
[INET_ADDRSTRLEN
];
180 /* In case of peer is EBGP, we should set TTL for this connection. */
181 if (!peer
->gtsm_hops
&& (peer_sort (peer
) == BGP_PEER_EBGP
))
183 ret
= sockopt_ttl (peer
->su
.sa
.sa_family
, bgp_sock
, peer
->ttl
);
186 zlog_err ("%s: Can't set TxTTL on peer (rtrid %s) socket, err = %d",
188 inet_ntop (AF_INET
, &peer
->remote_id
, buf
, sizeof(buf
)),
193 else if (peer
->gtsm_hops
)
195 /* On Linux, setting minttl without setting ttl seems to mess with the
196 outgoing ttl. Therefore setting both.
198 ret
= sockopt_ttl (peer
->su
.sa
.sa_family
, bgp_sock
, MAXTTL
);
201 zlog_err ("%s: Can't set TxTTL on peer (rtrid %s) socket, err = %d",
203 inet_ntop (AF_INET
, &peer
->remote_id
, buf
, sizeof(buf
)),
207 ret
= sockopt_minttl (peer
->su
.sa
.sa_family
, bgp_sock
,
208 MAXTTL
+ 1 - peer
->gtsm_hops
);
211 zlog_err ("%s: Can't set MinTTL on peer (rtrid %s) socket, err = %d",
213 inet_ntop (AF_INET
, &peer
->remote_id
, buf
, sizeof(buf
)),
223 * Obtain the BGP instance that the incoming connection should be processed
224 * against. This is important because more than one VRF could be using the
225 * same IP address space. The instance is got by obtaining the device to
226 * which the incoming connection is bound to. This could either be a VRF
227 * or it could be an interface, which in turn determines the VRF.
230 bgp_get_instance_for_inc_conn (int sock
, struct bgp
**bgp_inst
)
232 #ifndef SO_BINDTODEVICE
233 /* only Linux has SO_BINDTODEVICE, but we're in Linux-specific code here
234 * anyway since the assumption is that the interface name returned by
235 * getsockopt() is useful in identifying the VRF, particularly with Linux's
236 * VRF l3master device. The whole mechanism is specific to Linux, so...
237 * when other platforms add VRF support, this will need handling here as
238 * well. (or, some restructuring) */
239 *bgp_inst
= bgp_get_default ();
243 char name
[VRF_NAMSIZ
+ 1];
244 socklen_t name_len
= VRF_NAMSIZ
;
247 struct listnode
*node
, *nnode
;
251 rc
= getsockopt(sock
, SOL_SOCKET
, SO_BINDTODEVICE
, name
, &name_len
);
254 zlog_err ("[Error] BGP SO_BINDTODEVICE get failed (%s), sock %d",
255 safe_strerror (errno
), sock
);
260 return 0; /* default instance. */
262 /* First try match to instance; if that fails, check for interfaces. */
263 bgp
= bgp_lookup_by_name (name
);
266 if (!bgp
->vrf_id
) // unexpected
272 /* TODO - This will be optimized once interfaces move into the NS */
273 for (ALL_LIST_ELEMENTS (bm
->bgp
, node
, nnode
, bgp
))
275 struct interface
*ifp
;
277 if (bgp
->inst_type
== BGP_INSTANCE_TYPE_VIEW
)
280 ifp
= if_lookup_by_name_vrf (name
, bgp
->vrf_id
);
288 /* We didn't match to either an instance or an interface. */
293 /* Accept bgp connection. */
295 bgp_accept (struct thread
*thread
)
300 struct bgp_listener
*listener
= THREAD_ARG(thread
);
303 char buf
[SU_ADDRSTRLEN
];
304 struct bgp
*bgp
= NULL
;
306 sockunion_init (&su
);
308 /* Register accept thread. */
309 accept_sock
= THREAD_FD (thread
);
312 zlog_err ("accept_sock is nevative value %d", accept_sock
);
315 listener
->thread
= thread_add_read (bm
->master
, bgp_accept
, listener
, accept_sock
);
317 /* Accept client connection. */
318 bgp_sock
= sockunion_accept (accept_sock
, &su
);
321 zlog_err ("[Error] BGP socket accept failed (%s)", safe_strerror (errno
));
324 set_nonblocking (bgp_sock
);
326 /* Obtain BGP instance this connection is meant for. */
327 if (bgp_get_instance_for_inc_conn (bgp_sock
, &bgp
))
329 zlog_err ("[Error] Could not get instance for incoming conn from %s",
330 inet_sutop (&su
, buf
));
335 /* Set socket send buffer size */
336 bgp_update_sock_send_buffer_size(bgp_sock
);
338 /* Check remote IP address */
339 peer1
= peer_lookup (bgp
, &su
);
343 peer1
= peer_lookup_dynamic_neighbor (bgp
, &su
);
346 /* Dynamic neighbor has been created, let it proceed */
347 peer1
->fd
= bgp_sock
;
348 bgp_fsm_change_status(peer1
, Active
);
349 BGP_TIMER_OFF(peer1
->t_start
); /* created in peer_create() */
351 if (peer_active (peer1
))
352 BGP_EVENT_ADD (peer1
, TCP_connection_open
);
360 if (bgp_debug_neighbor_events(NULL
))
362 zlog_debug ("[Event] %s connection rejected - not configured"
363 " and not valid for dynamic",
364 inet_sutop (&su
, buf
));
370 if (CHECK_FLAG(peer1
->flags
, PEER_FLAG_SHUTDOWN
))
372 if (bgp_debug_neighbor_events(peer1
))
373 zlog_debug ("[Event] connection from %s rejected due to admin shutdown",
374 inet_sutop (&su
, buf
));
380 * Do not accept incoming connections in Clearing state. This can result
381 * in incorect state transitions - e.g., the connection goes back to
382 * Established and then the Clearing_Completed event is generated. Also,
383 * block incoming connection in Deleted state.
385 if (peer1
->status
== Clearing
|| peer1
->status
== Deleted
)
387 if (bgp_debug_neighbor_events(peer1
))
388 zlog_debug("[Event] Closing incoming conn for %s (%p) state %d",
389 peer1
->host
, peer1
, peer1
->status
);
394 /* Check that at least one AF is activated for the peer. */
395 if (!peer_active (peer1
))
397 if (bgp_debug_neighbor_events(peer1
))
398 zlog_debug ("%s - incoming conn rejected - no AF activated for peer",
404 if (bgp_debug_neighbor_events(peer1
))
405 zlog_debug ("[Event] BGP connection from host %s fd %d",
406 inet_sutop (&su
, buf
), bgp_sock
);
408 if (peer1
->doppelganger
)
410 /* We have an existing connection. Kill the existing one and run
413 if (bgp_debug_neighbor_events(peer1
))
414 zlog_debug ("[Event] New active connection from peer %s, Killing"
415 " previous active connection", peer1
->host
);
416 peer_delete(peer1
->doppelganger
);
419 if (bgp_set_socket_ttl (peer1
, bgp_sock
) < 0)
420 if (bgp_debug_neighbor_events(peer1
))
421 zlog_debug ("[Event] Unable to set min/max TTL on peer %s, Continuing",
424 peer
= peer_create (&su
, peer1
->conf_if
, peer1
->bgp
, peer1
->local_as
,
425 peer1
->as
, peer1
->as_type
, 0, 0, NULL
);
427 hash_release(peer
->bgp
->peerhash
, peer
);
428 hash_get(peer
->bgp
->peerhash
, peer
, hash_alloc_intern
);
430 peer_xfer_config(peer
, peer1
);
431 UNSET_FLAG (peer
->flags
, PEER_FLAG_CONFIG_NODE
);
433 peer
->doppelganger
= peer1
;
434 peer1
->doppelganger
= peer
;
437 bgp_fsm_change_status(peer
, Active
);
438 BGP_TIMER_OFF(peer
->t_start
); /* created in peer_create() */
440 SET_FLAG (peer
->sflags
, PEER_STATUS_ACCEPT_PEER
);
442 /* Make dummy peer until read Open packet. */
443 if (peer1
->status
== Established
&&
444 CHECK_FLAG (peer1
->sflags
, PEER_STATUS_NSF_MODE
))
446 /* If we have an existing established connection with graceful restart
447 * capability announced with one or more address families, then drop
448 * existing established connection and move state to connect.
450 peer1
->last_reset
= PEER_DOWN_NSF_CLOSE_SESSION
;
451 SET_FLAG (peer1
->sflags
, PEER_STATUS_NSF_WAIT
);
452 bgp_event_update(peer1
, TCP_connection_closed
);
455 if (peer_active (peer
))
457 BGP_EVENT_ADD (peer
, TCP_connection_open
);
463 /* BGP socket bind. */
465 bgp_bind (struct peer
*peer
)
467 #ifdef SO_BINDTODEVICE
471 /* If not bound to an interface or part of a VRF, we don't care. */
472 if (!peer
->bgp
->vrf_id
&& ! peer
->ifname
&& !peer
->conf_if
)
475 if (peer
->su
.sa
.sa_family
!= AF_INET
&&
476 peer
->su
.sa
.sa_family
!= AF_INET6
)
477 return 0; // unexpected
479 /* For IPv6 peering, interface (unnumbered or link-local with interface)
480 * takes precedence over VRF. For IPv4 peering, explicit interface or
481 * VRF are the situations to bind.
483 if (peer
->su
.sa
.sa_family
== AF_INET6
)
484 name
= (peer
->conf_if
? peer
->conf_if
: \
485 (peer
->ifname
? peer
->ifname
: peer
->bgp
->name
));
487 name
= peer
->ifname
? peer
->ifname
: peer
->bgp
->name
;
492 if (bgp_debug_neighbor_events(peer
))
493 zlog_debug ("%s Binding to interface %s", peer
->host
, name
);
495 if ( bgpd_privs
.change (ZPRIVS_RAISE
) )
496 zlog_err ("bgp_bind: could not raise privs");
498 ret
= setsockopt (peer
->fd
, SOL_SOCKET
, SO_BINDTODEVICE
,
501 if (bgpd_privs
.change (ZPRIVS_LOWER
) )
502 zlog_err ("bgp_bind: could not lower privs");
506 if (bgp_debug_neighbor_events (peer
))
507 zlog_debug ("bind to interface %s failed", name
);
510 #endif /* SO_BINDTODEVICE */
515 bgp_update_address (struct interface
*ifp
, const union sockunion
*dst
,
516 union sockunion
*addr
)
518 struct prefix
*p
, *sel
, d
;
519 struct connected
*connected
;
520 struct listnode
*node
;
523 sockunion2hostprefix (dst
, &d
);
527 for (ALL_LIST_ELEMENTS_RO (ifp
->connected
, node
, connected
))
529 p
= connected
->address
;
530 if (p
->family
!= d
.family
)
532 if (prefix_common_bits (p
, &d
) > common
)
535 common
= prefix_common_bits (sel
, &d
);
542 prefix2sockunion (sel
, addr
);
546 /* Update source selection. */
548 bgp_update_source (struct peer
*peer
)
550 struct interface
*ifp
;
551 union sockunion addr
;
554 sockunion_init (&addr
);
556 /* Source is specified with interface name. */
559 ifp
= if_lookup_by_name_vrf (peer
->update_if
, peer
->bgp
->vrf_id
);
563 if (bgp_update_address (ifp
, &peer
->su
, &addr
))
566 ret
= sockunion_bind (peer
->fd
, &addr
, 0, &addr
);
569 /* Source is specified with IP address. */
570 if (peer
->update_source
)
571 ret
= sockunion_bind (peer
->fd
, peer
->update_source
, 0, peer
->update_source
);
576 #define DATAPLANE_MARK 254 /* main table ID */
578 /* BGP try to connect to the peer. */
580 bgp_connect (struct peer
*peer
)
582 ifindex_t ifindex
= 0;
584 if (peer
->conf_if
&& BGP_PEER_SU_UNSPEC(peer
))
586 zlog_debug("Peer address not learnt: Returning from connect");
589 /* Make socket for the peer. */
590 peer
->fd
= sockunion_socket (&peer
->su
);
594 set_nonblocking (peer
->fd
);
596 /* Set socket send buffer size */
597 bgp_update_sock_send_buffer_size(peer
->fd
);
599 if (bgp_set_socket_ttl (peer
, peer
->fd
) < 0)
602 sockopt_reuseaddr (peer
->fd
);
603 sockopt_reuseport (peer
->fd
);
604 if (sockopt_mark_default(peer
->fd
, DATAPLANE_MARK
, &bgpd_privs
) < 0)
605 zlog_warn("Unable to set mark on FD for peer %s, err=%s", peer
->host
,
606 safe_strerror(errno
));
608 #ifdef IPTOS_PREC_INTERNETCONTROL
609 if (bgpd_privs
.change (ZPRIVS_RAISE
))
610 zlog_err ("%s: could not raise privs", __func__
);
611 if (sockunion_family (&peer
->su
) == AF_INET
)
612 setsockopt_ipv4_tos (peer
->fd
, IPTOS_PREC_INTERNETCONTROL
);
614 else if (sockunion_family (&peer
->su
) == AF_INET6
)
615 setsockopt_ipv6_tclass (peer
->fd
, IPTOS_PREC_INTERNETCONTROL
);
617 if (bgpd_privs
.change (ZPRIVS_LOWER
))
618 zlog_err ("%s: could not lower privs", __func__
);
622 bgp_md5_set_connect (peer
->fd
, &peer
->su
, peer
->password
);
627 /* Update source bind. */
628 if (bgp_update_source (peer
) < 0)
630 return connect_error
;
634 if (peer
->conf_if
|| peer
->ifname
)
635 ifindex
= ifname2ifindex (peer
->conf_if
? peer
->conf_if
: peer
->ifname
);
636 #endif /* HAVE_IPV6 */
638 if (bgp_debug_neighbor_events(peer
))
639 zlog_debug ("%s [Event] Connect start to %s fd %d",
640 peer
->host
, peer
->host
, peer
->fd
);
642 /* Connect to the remote peer. */
643 return sockunion_connect (peer
->fd
, &peer
->su
, htons (peer
->port
), ifindex
);
646 /* After TCP connection is established. Get local address and port. */
648 bgp_getsockname (struct peer
*peer
)
652 sockunion_free (peer
->su_local
);
653 peer
->su_local
= NULL
;
658 sockunion_free (peer
->su_remote
);
659 peer
->su_remote
= NULL
;
662 peer
->su_local
= sockunion_getsockname (peer
->fd
);
663 if (!peer
->su_local
) return -1;
664 peer
->su_remote
= sockunion_getpeername (peer
->fd
);
665 if (!peer
->su_remote
) return -1;
667 if (bgp_nexthop_set (peer
->su_local
, peer
->su_remote
,
668 &peer
->nexthop
, peer
))
670 zlog_err ("%s: nexthop_set failed, resetting connection - intf %p",
671 peer
->host
, peer
->nexthop
.ifp
);
680 bgp_listener (int sock
, struct sockaddr
*sa
, socklen_t salen
)
682 struct bgp_listener
*listener
;
685 sockopt_reuseaddr (sock
);
686 sockopt_reuseport (sock
);
688 if (bgpd_privs
.change (ZPRIVS_RAISE
))
689 zlog_err ("%s: could not raise privs", __func__
);
691 #ifdef IPTOS_PREC_INTERNETCONTROL
692 if (sa
->sa_family
== AF_INET
)
693 setsockopt_ipv4_tos (sock
, IPTOS_PREC_INTERNETCONTROL
);
695 else if (sa
->sa_family
== AF_INET6
)
696 setsockopt_ipv6_tclass (sock
, IPTOS_PREC_INTERNETCONTROL
);
700 sockopt_v6only (sa
->sa_family
, sock
);
702 ret
= bind (sock
, sa
, salen
);
704 if (bgpd_privs
.change (ZPRIVS_LOWER
))
705 zlog_err ("%s: could not lower privs", __func__
);
709 zlog_err ("bind: %s", safe_strerror (en
));
713 ret
= listen (sock
, 3);
716 zlog_err ("listen: %s", safe_strerror (errno
));
720 listener
= XMALLOC (MTYPE_BGP_LISTENER
, sizeof(*listener
));
722 memcpy(&listener
->su
, sa
, salen
);
723 listener
->thread
= thread_add_read (bm
->master
, bgp_accept
, listener
, sock
);
724 listnode_add (bm
->listen_sockets
, listener
);
729 /* IPv6 supported version of BGP server socket setup. */
732 bgp_socket (unsigned short port
, const char *address
)
734 struct addrinfo
*ainfo
;
735 struct addrinfo
*ainfo_save
;
736 static const struct addrinfo req
= {
737 .ai_family
= AF_UNSPEC
,
738 .ai_flags
= AI_PASSIVE
,
739 .ai_socktype
= SOCK_STREAM
,
742 char port_str
[BUFSIZ
];
744 snprintf (port_str
, sizeof(port_str
), "%d", port
);
745 port_str
[sizeof (port_str
) - 1] = '\0';
747 ret
= getaddrinfo (address
, port_str
, &req
, &ainfo_save
);
750 zlog_err ("getaddrinfo: %s", gai_strerror (ret
));
755 for (ainfo
= ainfo_save
; ainfo
; ainfo
= ainfo
->ai_next
)
759 if (ainfo
->ai_family
!= AF_INET
&& ainfo
->ai_family
!= AF_INET6
)
762 sock
= socket (ainfo
->ai_family
, ainfo
->ai_socktype
, ainfo
->ai_protocol
);
765 zlog_err ("socket: %s", safe_strerror (errno
));
769 /* if we intend to implement ttl-security, this socket needs ttl=255 */
770 sockopt_ttl (ainfo
->ai_family
, sock
, MAXTTL
);
772 ret
= bgp_listener (sock
, ainfo
->ai_addr
, ainfo
->ai_addrlen
);
778 freeaddrinfo (ainfo_save
);
781 zlog_err ("%s: no usable addresses", __func__
);
788 /* Traditional IPv4 only version. */
790 bgp_socket (unsigned short port
, const char *address
)
794 struct sockaddr_in sin
;
797 sock
= socket (AF_INET
, SOCK_STREAM
, 0);
800 zlog_err ("socket: %s", safe_strerror (errno
));
804 /* if we intend to implement ttl-security, this socket needs ttl=255 */
805 sockopt_ttl (AF_INET
, sock
, MAXTTL
);
807 memset (&sin
, 0, sizeof (struct sockaddr_in
));
808 sin
.sin_family
= AF_INET
;
809 sin
.sin_port
= htons (port
);
810 socklen
= sizeof (struct sockaddr_in
);
812 if (address
&& ((ret
= inet_aton(address
, &sin
.sin_addr
)) < 1))
814 zlog_err("bgp_socket: could not parse ip address %s: %s",
815 address
, safe_strerror (errno
));
818 #ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
819 sin
.sin_len
= socklen
;
820 #endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */
822 ret
= bgp_listener (sock
, (struct sockaddr
*) &sin
, socklen
);
830 #endif /* HAVE_IPV6 */
835 struct listnode
*node
, *next
;
836 struct bgp_listener
*listener
;
838 if (bm
->listen_sockets
== NULL
)
841 for (ALL_LIST_ELEMENTS (bm
->listen_sockets
, node
, next
, listener
))
843 thread_cancel (listener
->thread
);
844 close (listener
->fd
);
845 listnode_delete (bm
->listen_sockets
, listener
);
846 XFREE (MTYPE_BGP_LISTENER
, listener
);