]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_packet.c
Merge pull request #12390 from sigeryang/vrrp-interop
[mirror_frr.git] / bgpd / bgp_packet.c
1 /* BGP packet management routine.
2 * Contains utility functions for constructing and consuming BGP messages.
3 * Copyright (C) 2017 Cumulus Networks
4 * Copyright (C) 1999 Kunihiro Ishiguro
5 *
6 * This file is part of GNU Zebra.
7 *
8 * GNU Zebra is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2, or (at your option) any
11 * later version.
12 *
13 * GNU Zebra is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; see the file COPYING; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <zebra.h>
24 #include <sys/time.h>
25
26 #include "thread.h"
27 #include "stream.h"
28 #include "network.h"
29 #include "prefix.h"
30 #include "command.h"
31 #include "log.h"
32 #include "memory.h"
33 #include "sockunion.h" /* for inet_ntop () */
34 #include "sockopt.h"
35 #include "linklist.h"
36 #include "plist.h"
37 #include "queue.h"
38 #include "filter.h"
39 #include "lib_errors.h"
40
41 #include "bgpd/bgpd.h"
42 #include "bgpd/bgp_table.h"
43 #include "bgpd/bgp_dump.h"
44 #include "bgpd/bgp_bmp.h"
45 #include "bgpd/bgp_attr.h"
46 #include "bgpd/bgp_debug.h"
47 #include "bgpd/bgp_errors.h"
48 #include "bgpd/bgp_fsm.h"
49 #include "bgpd/bgp_route.h"
50 #include "bgpd/bgp_packet.h"
51 #include "bgpd/bgp_open.h"
52 #include "bgpd/bgp_aspath.h"
53 #include "bgpd/bgp_community.h"
54 #include "bgpd/bgp_ecommunity.h"
55 #include "bgpd/bgp_lcommunity.h"
56 #include "bgpd/bgp_network.h"
57 #include "bgpd/bgp_mplsvpn.h"
58 #include "bgpd/bgp_evpn.h"
59 #include "bgpd/bgp_advertise.h"
60 #include "bgpd/bgp_vty.h"
61 #include "bgpd/bgp_updgrp.h"
62 #include "bgpd/bgp_label.h"
63 #include "bgpd/bgp_io.h"
64 #include "bgpd/bgp_keepalives.h"
65 #include "bgpd/bgp_flowspec.h"
66 #include "bgpd/bgp_trace.h"
67
68 DEFINE_HOOK(bgp_packet_dump,
69 (struct peer *peer, uint8_t type, bgp_size_t size,
70 struct stream *s),
71 (peer, type, size, s));
72
73 DEFINE_HOOK(bgp_packet_send,
74 (struct peer *peer, uint8_t type, bgp_size_t size,
75 struct stream *s),
76 (peer, type, size, s));
77
78 /**
79 * Sets marker and type fields for a BGP message.
80 *
81 * @param s the stream containing the packet
82 * @param type the packet type
83 * @return the size of the stream
84 */
85 int bgp_packet_set_marker(struct stream *s, uint8_t type)
86 {
87 int i;
88
89 /* Fill in marker. */
90 for (i = 0; i < BGP_MARKER_SIZE; i++)
91 stream_putc(s, 0xff);
92
93 /* Dummy total length. This field is should be filled in later on. */
94 stream_putw(s, 0);
95
96 /* BGP packet type. */
97 stream_putc(s, type);
98
99 /* Return current stream size. */
100 return stream_get_endp(s);
101 }
102
103 /**
104 * Sets size field for a BGP message.
105 *
106 * Size field is set to the size of the stream passed.
107 *
108 * @param s the stream containing the packet
109 */
110 void bgp_packet_set_size(struct stream *s)
111 {
112 int cp;
113
114 /* Preserve current pointer. */
115 cp = stream_get_endp(s);
116 stream_putw_at(s, BGP_MARKER_SIZE, cp);
117 }
118
119 /*
120 * Push a packet onto the beginning of the peer's output queue.
121 * This function acquires the peer's write mutex before proceeding.
122 */
123 static void bgp_packet_add(struct peer *peer, struct stream *s)
124 {
125 intmax_t delta;
126 uint32_t holdtime;
127 intmax_t sendholdtime;
128
129 frr_with_mutex (&peer->io_mtx) {
130 /* if the queue is empty, reset the "last OK" timestamp to
131 * now, otherwise if we write another packet immediately
132 * after it'll get confused
133 */
134 if (!stream_fifo_count_safe(peer->obuf))
135 peer->last_sendq_ok = monotime(NULL);
136
137 stream_fifo_push(peer->obuf, s);
138
139 delta = monotime(NULL) - peer->last_sendq_ok;
140
141 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
142 holdtime = atomic_load_explicit(&peer->holdtime,
143 memory_order_relaxed);
144 else
145 holdtime = peer->bgp->default_holdtime;
146
147 sendholdtime = holdtime * 2;
148
149 /* Note that when we're here, we're adding some packet to the
150 * OutQ. That includes keepalives when there is nothing to
151 * do, so there's a guarantee we pass by here once in a while.
152 *
153 * That implies there is no need to go set up another separate
154 * timer that ticks down SendHoldTime, as we'll be here sooner
155 * or later anyway and will see the checks below failing.
156 */
157 if (!holdtime) {
158 /* no holdtime, do nothing. */
159 } else if (delta > sendholdtime) {
160 flog_err(
161 EC_BGP_SENDQ_STUCK_PROPER,
162 "%pBP has not made any SendQ progress for 2 holdtimes (%jds), terminating session",
163 peer, sendholdtime);
164 BGP_EVENT_ADD(peer, TCP_fatal_error);
165 } else if (delta > (intmax_t)holdtime &&
166 monotime(NULL) - peer->last_sendq_warn > 5) {
167 flog_warn(
168 EC_BGP_SENDQ_STUCK_WARN,
169 "%pBP has not made any SendQ progress for 1 holdtime (%us), peer overloaded?",
170 peer, holdtime);
171 peer->last_sendq_warn = monotime(NULL);
172 }
173 }
174 }
175
176 static struct stream *bgp_update_packet_eor(struct peer *peer, afi_t afi,
177 safi_t safi)
178 {
179 struct stream *s;
180 iana_afi_t pkt_afi = IANA_AFI_IPV4;
181 iana_safi_t pkt_safi = IANA_SAFI_UNICAST;
182
183 if (DISABLE_BGP_ANNOUNCE)
184 return NULL;
185
186 if (bgp_debug_neighbor_events(peer))
187 zlog_debug("send End-of-RIB for %s to %s",
188 get_afi_safi_str(afi, safi, false), peer->host);
189
190 s = stream_new(peer->max_packet_size);
191
192 /* Make BGP update packet. */
193 bgp_packet_set_marker(s, BGP_MSG_UPDATE);
194
195 /* Unfeasible Routes Length */
196 stream_putw(s, 0);
197
198 if (afi == AFI_IP && safi == SAFI_UNICAST) {
199 /* Total Path Attribute Length */
200 stream_putw(s, 0);
201 } else {
202 /* Convert AFI, SAFI to values for packet. */
203 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
204
205 /* Total Path Attribute Length */
206 stream_putw(s, 6);
207 stream_putc(s, BGP_ATTR_FLAG_OPTIONAL);
208 stream_putc(s, BGP_ATTR_MP_UNREACH_NLRI);
209 stream_putc(s, 3);
210 stream_putw(s, pkt_afi);
211 stream_putc(s, pkt_safi);
212 }
213
214 bgp_packet_set_size(s);
215 return s;
216 }
217
218 /* Called when there is a change in the EOR(implicit or explicit) status of a
219 * peer. Ends the update-delay if all expected peers are done with EORs. */
220 void bgp_check_update_delay(struct bgp *bgp)
221 {
222 struct listnode *node, *nnode;
223 struct peer *peer = NULL;
224
225 if (bgp_debug_neighbor_events(peer))
226 zlog_debug("Checking update delay, T: %d R: %d I:%d E: %d",
227 bgp->established, bgp->restarted_peers,
228 bgp->implicit_eors, bgp->explicit_eors);
229
230 if (bgp->established
231 <= bgp->restarted_peers + bgp->implicit_eors + bgp->explicit_eors) {
232 /*
233 * This is an extra sanity check to make sure we wait for all
234 * the eligible configured peers. This check is performed if
235 * establish wait timer is on, or establish wait option is not
236 * given with the update-delay command
237 */
238 if (bgp->t_establish_wait
239 || (bgp->v_establish_wait == bgp->v_update_delay))
240 for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer)) {
241 if (CHECK_FLAG(peer->flags,
242 PEER_FLAG_CONFIG_NODE)
243 && !CHECK_FLAG(peer->flags,
244 PEER_FLAG_SHUTDOWN)
245 && !CHECK_FLAG(peer->bgp->flags,
246 BGP_FLAG_SHUTDOWN)
247 && !peer->update_delay_over) {
248 if (bgp_debug_neighbor_events(peer))
249 zlog_debug(
250 " Peer %s pending, continuing read-only mode",
251 peer->host);
252 return;
253 }
254 }
255
256 zlog_info(
257 "Update delay ended, restarted: %d, EORs implicit: %d, explicit: %d",
258 bgp->restarted_peers, bgp->implicit_eors,
259 bgp->explicit_eors);
260 bgp_update_delay_end(bgp);
261 }
262 }
263
264 /*
265 * Called if peer is known to have restarted. The restart-state bit in
266 * Graceful-Restart capability is used for that
267 */
268 void bgp_update_restarted_peers(struct peer *peer)
269 {
270 if (!bgp_update_delay_active(peer->bgp))
271 return; /* BGP update delay has ended */
272 if (peer->update_delay_over)
273 return; /* This peer has already been considered */
274
275 if (bgp_debug_neighbor_events(peer))
276 zlog_debug("Peer %s: Checking restarted", peer->host);
277
278 if (peer_established(peer)) {
279 peer->update_delay_over = 1;
280 peer->bgp->restarted_peers++;
281 bgp_check_update_delay(peer->bgp);
282 }
283 }
284
285 /*
286 * Called as peer receives a keep-alive. Determines if this occurence can be
287 * taken as an implicit EOR for this peer.
288 * NOTE: The very first keep-alive after the Established state of a peer is
289 * considered implicit EOR for the update-delay purposes
290 */
291 void bgp_update_implicit_eors(struct peer *peer)
292 {
293 if (!bgp_update_delay_active(peer->bgp))
294 return; /* BGP update delay has ended */
295 if (peer->update_delay_over)
296 return; /* This peer has already been considered */
297
298 if (bgp_debug_neighbor_events(peer))
299 zlog_debug("Peer %s: Checking implicit EORs", peer->host);
300
301 if (peer_established(peer)) {
302 peer->update_delay_over = 1;
303 peer->bgp->implicit_eors++;
304 bgp_check_update_delay(peer->bgp);
305 }
306 }
307
308 /*
309 * Should be called only when there is a change in the EOR_RECEIVED status
310 * for any afi/safi on a peer.
311 */
312 static void bgp_update_explicit_eors(struct peer *peer)
313 {
314 afi_t afi;
315 safi_t safi;
316
317 if (!bgp_update_delay_active(peer->bgp))
318 return; /* BGP update delay has ended */
319 if (peer->update_delay_over)
320 return; /* This peer has already been considered */
321
322 if (bgp_debug_neighbor_events(peer))
323 zlog_debug("Peer %s: Checking explicit EORs", peer->host);
324
325 FOREACH_AFI_SAFI (afi, safi) {
326 if (peer->afc_nego[afi][safi]
327 && !CHECK_FLAG(peer->af_sflags[afi][safi],
328 PEER_STATUS_EOR_RECEIVED)) {
329 if (bgp_debug_neighbor_events(peer))
330 zlog_debug(
331 " afi %d safi %d didn't receive EOR",
332 afi, safi);
333 return;
334 }
335 }
336
337 peer->update_delay_over = 1;
338 peer->bgp->explicit_eors++;
339 bgp_check_update_delay(peer->bgp);
340 }
341
342 /**
343 * Frontend for NLRI parsing, to fan-out to AFI/SAFI specific parsers.
344 *
345 * mp_withdraw, if set, is used to nullify attr structure on most of the
346 * calling safi function and for evpn, passed as parameter
347 */
348 int bgp_nlri_parse(struct peer *peer, struct attr *attr,
349 struct bgp_nlri *packet, int mp_withdraw)
350 {
351 switch (packet->safi) {
352 case SAFI_UNICAST:
353 case SAFI_MULTICAST:
354 return bgp_nlri_parse_ip(peer, mp_withdraw ? NULL : attr,
355 packet);
356 case SAFI_LABELED_UNICAST:
357 return bgp_nlri_parse_label(peer, mp_withdraw ? NULL : attr,
358 packet);
359 case SAFI_MPLS_VPN:
360 return bgp_nlri_parse_vpn(peer, mp_withdraw ? NULL : attr,
361 packet);
362 case SAFI_EVPN:
363 return bgp_nlri_parse_evpn(peer, attr, packet, mp_withdraw);
364 case SAFI_FLOWSPEC:
365 return bgp_nlri_parse_flowspec(peer, attr, packet, mp_withdraw);
366 }
367 return BGP_NLRI_PARSE_ERROR;
368 }
369
370
371 /*
372 * Check if route-refresh request from peer is pending (received before EoR),
373 * and process it now.
374 */
375 static void bgp_process_pending_refresh(struct peer *peer, afi_t afi,
376 safi_t safi)
377 {
378 if (CHECK_FLAG(peer->af_sflags[afi][safi],
379 PEER_STATUS_REFRESH_PENDING)) {
380 UNSET_FLAG(peer->af_sflags[afi][safi],
381 PEER_STATUS_REFRESH_PENDING);
382 bgp_route_refresh_send(peer, afi, safi, 0, 0, 0,
383 BGP_ROUTE_REFRESH_BORR);
384 if (bgp_debug_neighbor_events(peer))
385 zlog_debug(
386 "%pBP sending route-refresh (BoRR) for %s/%s (for pending REQUEST)",
387 peer, afi2str(afi), safi2str(safi));
388
389 SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_BORR_SEND);
390 UNSET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_EORR_SEND);
391 bgp_announce_route(peer, afi, safi, true);
392 }
393 }
394
395 /*
396 * Checks a variety of conditions to determine whether the peer needs to be
397 * rescheduled for packet generation again, and does so if necessary.
398 *
399 * @param peer to check for rescheduling
400 */
401 static void bgp_write_proceed_actions(struct peer *peer)
402 {
403 afi_t afi;
404 safi_t safi;
405 struct peer_af *paf;
406 struct bpacket *next_pkt;
407 struct update_subgroup *subgrp;
408 enum bgp_af_index index;
409
410 for (index = BGP_AF_START; index < BGP_AF_MAX; index++) {
411 paf = peer->peer_af_array[index];
412 if (!paf)
413 continue;
414
415 subgrp = paf->subgroup;
416 if (!subgrp)
417 continue;
418
419 next_pkt = paf->next_pkt_to_send;
420 if (next_pkt && next_pkt->buffer) {
421 BGP_TIMER_ON(peer->t_generate_updgrp_packets,
422 bgp_generate_updgrp_packets, 0);
423 return;
424 }
425
426 /* No packets readily available for AFI/SAFI, are there
427 * subgroup packets
428 * that need to be generated? */
429 if (bpacket_queue_is_full(SUBGRP_INST(subgrp),
430 SUBGRP_PKTQ(subgrp))
431 || subgroup_packets_to_build(subgrp)) {
432 BGP_TIMER_ON(peer->t_generate_updgrp_packets,
433 bgp_generate_updgrp_packets, 0);
434 return;
435 }
436
437 afi = paf->afi;
438 safi = paf->safi;
439
440 /* No packets to send, see if EOR is pending */
441 if (CHECK_FLAG(peer->cap, PEER_CAP_RESTART_RCV)) {
442 if (!subgrp->t_coalesce && peer->afc_nego[afi][safi]
443 && peer->synctime
444 && !CHECK_FLAG(peer->af_sflags[afi][safi],
445 PEER_STATUS_EOR_SEND)
446 && safi != SAFI_MPLS_VPN) {
447 BGP_TIMER_ON(peer->t_generate_updgrp_packets,
448 bgp_generate_updgrp_packets, 0);
449 return;
450 }
451 }
452 }
453 }
454
455 /*
456 * Generate advertisement information (withdraws, updates, EOR) from each
457 * update group a peer belongs to, encode this information into packets, and
458 * enqueue the packets onto the peer's output buffer.
459 */
460 void bgp_generate_updgrp_packets(struct thread *thread)
461 {
462 struct peer *peer = THREAD_ARG(thread);
463
464 struct stream *s;
465 struct peer_af *paf;
466 struct bpacket *next_pkt;
467 uint32_t wpq;
468 uint32_t generated = 0;
469 afi_t afi;
470 safi_t safi;
471
472 wpq = atomic_load_explicit(&peer->bgp->wpkt_quanta,
473 memory_order_relaxed);
474
475 /*
476 * The code beyond this part deals with update packets, proceed only
477 * if peer is Established and updates are not on hold (as part of
478 * update-delay processing).
479 */
480 if (!peer_established(peer))
481 return;
482
483 if ((peer->bgp->main_peers_update_hold)
484 || bgp_update_delay_active(peer->bgp))
485 return;
486
487 if (peer->t_routeadv)
488 return;
489
490 do {
491 enum bgp_af_index index;
492
493 s = NULL;
494 for (index = BGP_AF_START; index < BGP_AF_MAX; index++) {
495 paf = peer->peer_af_array[index];
496 if (!paf || !PAF_SUBGRP(paf))
497 continue;
498
499 afi = paf->afi;
500 safi = paf->safi;
501 next_pkt = paf->next_pkt_to_send;
502
503 /*
504 * Try to generate a packet for the peer if we are at
505 * the end of the list. Always try to push out
506 * WITHDRAWs first.
507 */
508 if (!next_pkt || !next_pkt->buffer) {
509 next_pkt = subgroup_withdraw_packet(
510 PAF_SUBGRP(paf));
511 if (!next_pkt || !next_pkt->buffer)
512 subgroup_update_packet(PAF_SUBGRP(paf));
513 next_pkt = paf->next_pkt_to_send;
514 }
515
516 /*
517 * If we still don't have a packet to send to the peer,
518 * then try to find out out if we have to send eor or
519 * if not, skip to the next AFI, SAFI. Don't send the
520 * EOR prematurely; if the subgroup's coalesce timer is
521 * running, the adjacency-out structure is not created
522 * yet.
523 */
524 if (!next_pkt || !next_pkt->buffer) {
525 if (!paf->t_announce_route) {
526 /* Make sure we supress BGP UPDATES
527 * for normal processing later again.
528 */
529 UNSET_FLAG(paf->subgroup->sflags,
530 SUBGRP_STATUS_FORCE_UPDATES);
531
532 /* If route-refresh BoRR message was
533 * already sent and we are done with
534 * re-announcing tables for a decent
535 * afi/safi, we ready to send
536 * EoRR request.
537 */
538 if (CHECK_FLAG(
539 peer->af_sflags[afi][safi],
540 PEER_STATUS_BORR_SEND)) {
541 bgp_route_refresh_send(
542 peer, afi, safi, 0, 0,
543 0,
544 BGP_ROUTE_REFRESH_EORR);
545
546 SET_FLAG(peer->af_sflags[afi]
547 [safi],
548 PEER_STATUS_EORR_SEND);
549 UNSET_FLAG(
550 peer->af_sflags[afi]
551 [safi],
552 PEER_STATUS_BORR_SEND);
553
554 if (bgp_debug_neighbor_events(
555 peer))
556 zlog_debug(
557 "%pBP sending route-refresh (EoRR) for %s/%s",
558 peer,
559 afi2str(afi),
560 safi2str(safi));
561 }
562 }
563
564 if (CHECK_FLAG(peer->cap,
565 PEER_CAP_RESTART_RCV)) {
566 if (!(PAF_SUBGRP(paf))->t_coalesce
567 && peer->afc_nego[afi][safi]
568 && peer->synctime
569 && !CHECK_FLAG(
570 peer->af_sflags[afi][safi],
571 PEER_STATUS_EOR_SEND)) {
572 /* If EOR is disabled,
573 * the message is not sent
574 */
575 if (BGP_SEND_EOR(peer->bgp, afi,
576 safi)) {
577 SET_FLAG(
578 peer->af_sflags
579 [afi]
580 [safi],
581 PEER_STATUS_EOR_SEND);
582
583 /* Update EOR
584 * send time
585 */
586 peer->eor_stime[afi]
587 [safi] =
588 monotime(NULL);
589
590 BGP_UPDATE_EOR_PKT(
591 peer, afi, safi,
592 s);
593 bgp_process_pending_refresh(
594 peer, afi,
595 safi);
596 }
597 }
598 }
599 continue;
600 }
601
602 /* Update packet send time */
603 peer->pkt_stime[afi][safi] = monotime(NULL);
604
605 /* Found a packet template to send, overwrite
606 * packet with appropriate attributes from peer
607 * and advance peer */
608 s = bpacket_reformat_for_peer(next_pkt, paf);
609 bgp_packet_add(peer, s);
610 bpacket_queue_advance_peer(paf);
611 }
612 } while (s && (++generated < wpq));
613
614 if (generated)
615 bgp_writes_on(peer);
616
617 bgp_write_proceed_actions(peer);
618 }
619
620 /*
621 * Creates a BGP Keepalive packet and appends it to the peer's output queue.
622 */
623 void bgp_keepalive_send(struct peer *peer)
624 {
625 struct stream *s;
626
627 s = stream_new(BGP_STANDARD_MESSAGE_MAX_PACKET_SIZE);
628
629 /* Make keepalive packet. */
630 bgp_packet_set_marker(s, BGP_MSG_KEEPALIVE);
631
632 /* Set packet size. */
633 bgp_packet_set_size(s);
634
635 /* Dump packet if debug option is set. */
636 /* bgp_packet_dump (s); */
637
638 if (bgp_debug_keepalive(peer))
639 zlog_debug("%s sending KEEPALIVE", peer->host);
640
641 /* Add packet to the peer. */
642 bgp_packet_add(peer, s);
643
644 bgp_writes_on(peer);
645 }
646
647 /*
648 * Creates a BGP Open packet and appends it to the peer's output queue.
649 * Sets capabilities as necessary.
650 */
651 void bgp_open_send(struct peer *peer)
652 {
653 struct stream *s;
654 uint16_t send_holdtime;
655 as_t local_as;
656
657 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
658 send_holdtime = peer->holdtime;
659 else
660 send_holdtime = peer->bgp->default_holdtime;
661
662 /* local-as Change */
663 if (peer->change_local_as)
664 local_as = peer->change_local_as;
665 else
666 local_as = peer->local_as;
667
668 s = stream_new(BGP_STANDARD_MESSAGE_MAX_PACKET_SIZE);
669
670 /* Make open packet. */
671 bgp_packet_set_marker(s, BGP_MSG_OPEN);
672
673 /* Set open packet values. */
674 stream_putc(s, BGP_VERSION_4); /* BGP version */
675 stream_putw(s, (local_as <= BGP_AS_MAX) ? (uint16_t)local_as
676 : BGP_AS_TRANS);
677 stream_putw(s, send_holdtime); /* Hold Time */
678 stream_put_in_addr(s, &peer->local_id); /* BGP Identifier */
679
680 /* Set capabilities */
681 if (CHECK_FLAG(peer->flags, PEER_FLAG_EXTENDED_OPT_PARAMS)) {
682 (void)bgp_open_capability(s, peer, true);
683 } else {
684 struct stream *tmp = stream_new(STREAM_SIZE(s));
685
686 stream_copy(tmp, s);
687 if (bgp_open_capability(tmp, peer, false)
688 > BGP_OPEN_NON_EXT_OPT_LEN) {
689 stream_free(tmp);
690 (void)bgp_open_capability(s, peer, true);
691 } else {
692 stream_copy(s, tmp);
693 stream_free(tmp);
694 }
695 }
696
697 /* Set BGP packet length. */
698 bgp_packet_set_size(s);
699
700 if (bgp_debug_neighbor_events(peer))
701 zlog_debug(
702 "%s sending OPEN, version %d, my as %u, holdtime %d, id %pI4",
703 peer->host, BGP_VERSION_4, local_as, send_holdtime,
704 &peer->local_id);
705
706 /* Dump packet if debug option is set. */
707 /* bgp_packet_dump (s); */
708 hook_call(bgp_packet_send, peer, BGP_MSG_OPEN, stream_get_endp(s), s);
709
710 /* Add packet to the peer. */
711 bgp_packet_add(peer, s);
712
713 bgp_writes_on(peer);
714 }
715
716 /*
717 * Writes NOTIFICATION message directly to a peer socket without waiting for
718 * the I/O thread.
719 *
720 * There must be exactly one stream on the peer->obuf FIFO, and the data within
721 * this stream must match the format of a BGP NOTIFICATION message.
722 * Transmission is best-effort.
723 *
724 * @requires peer->io_mtx
725 * @param peer
726 * @return 0
727 */
728 static void bgp_write_notify(struct peer *peer)
729 {
730 int ret, val;
731 uint8_t type;
732 struct stream *s;
733
734 /* There should be at least one packet. */
735 s = stream_fifo_pop(peer->obuf);
736
737 if (!s)
738 return;
739
740 assert(stream_get_endp(s) >= BGP_HEADER_SIZE);
741
742 /*
743 * socket is in nonblocking mode, if we can't deliver the NOTIFY, well,
744 * we only care about getting a clean shutdown at this point.
745 */
746 ret = write(peer->fd, STREAM_DATA(s), stream_get_endp(s));
747
748 /*
749 * only connection reset/close gets counted as TCP_fatal_error, failure
750 * to write the entire NOTIFY doesn't get different FSM treatment
751 */
752 if (ret <= 0) {
753 stream_free(s);
754 BGP_EVENT_ADD(peer, TCP_fatal_error);
755 return;
756 }
757
758 /* Disable Nagle, make NOTIFY packet go out right away */
759 val = 1;
760 (void)setsockopt(peer->fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val,
761 sizeof(val));
762
763 /* Retrieve BGP packet type. */
764 stream_set_getp(s, BGP_MARKER_SIZE + 2);
765 type = stream_getc(s);
766
767 assert(type == BGP_MSG_NOTIFY);
768
769 /* Type should be notify. */
770 atomic_fetch_add_explicit(&peer->notify_out, 1, memory_order_relaxed);
771
772 /* Double start timer. */
773 peer->v_start *= 2;
774
775 /* Overflow check. */
776 if (peer->v_start >= (60 * 2))
777 peer->v_start = (60 * 2);
778
779 /*
780 * Handle Graceful Restart case where the state changes to
781 * Connect instead of Idle
782 */
783 BGP_EVENT_ADD(peer, BGP_Stop);
784
785 stream_free(s);
786 }
787
788 /*
789 * Encapsulate an original BGP CEASE Notification into Hard Reset
790 */
791 static uint8_t *bgp_notify_encapsulate_hard_reset(uint8_t code, uint8_t subcode,
792 uint8_t *data, size_t datalen)
793 {
794 uint8_t *message = XCALLOC(MTYPE_BGP_NOTIFICATION, datalen + 2);
795
796 /* ErrCode */
797 message[0] = code;
798 /* Subcode */
799 message[1] = subcode;
800 /* Data */
801 if (datalen)
802 memcpy(message + 2, data, datalen);
803
804 return message;
805 }
806
807 /*
808 * Decapsulate an original BGP CEASE Notification from Hard Reset
809 */
810 struct bgp_notify bgp_notify_decapsulate_hard_reset(struct bgp_notify *notify)
811 {
812 struct bgp_notify bn = {};
813
814 bn.code = notify->raw_data[0];
815 bn.subcode = notify->raw_data[1];
816 bn.length = notify->length - 2;
817
818 bn.raw_data = XMALLOC(MTYPE_BGP_NOTIFICATION, bn.length);
819 memcpy(bn.raw_data, notify->raw_data + 2, bn.length);
820
821 return bn;
822 }
823
824 /* Check if Graceful-Restart N-bit is exchanged */
825 bool bgp_has_graceful_restart_notification(struct peer *peer)
826 {
827 return CHECK_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_N_BIT_RCV) &&
828 CHECK_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_N_BIT_ADV);
829 }
830
831 /*
832 * Check if to send BGP CEASE Notification/Hard Reset?
833 */
834 bool bgp_notify_send_hard_reset(struct peer *peer, uint8_t code,
835 uint8_t subcode)
836 {
837 /* When the "N" bit has been exchanged, a Hard Reset message is used to
838 * indicate to the peer that the session is to be fully terminated.
839 */
840 if (!bgp_has_graceful_restart_notification(peer))
841 return false;
842
843 /*
844 * https://datatracker.ietf.org/doc/html/rfc8538#section-5.1
845 */
846 if (code == BGP_NOTIFY_CEASE) {
847 switch (subcode) {
848 case BGP_NOTIFY_CEASE_MAX_PREFIX:
849 case BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN:
850 case BGP_NOTIFY_CEASE_PEER_UNCONFIG:
851 case BGP_NOTIFY_CEASE_HARD_RESET:
852 case BGP_NOTIFY_CEASE_BFD_DOWN:
853 return true;
854 case BGP_NOTIFY_CEASE_ADMIN_RESET:
855 /* Provide user control:
856 * `bgp hard-adminstrative-reset`
857 */
858 if (CHECK_FLAG(peer->bgp->flags,
859 BGP_FLAG_HARD_ADMIN_RESET))
860 return true;
861 else
862 return false;
863 default:
864 break;
865 }
866 }
867
868 return false;
869 }
870
871 /*
872 * Check if received BGP CEASE Notification/Hard Reset?
873 */
874 bool bgp_notify_received_hard_reset(struct peer *peer, uint8_t code,
875 uint8_t subcode)
876 {
877 /* When the "N" bit has been exchanged, a Hard Reset message is used to
878 * indicate to the peer that the session is to be fully terminated.
879 */
880 if (!bgp_has_graceful_restart_notification(peer))
881 return false;
882
883 if (code == BGP_NOTIFY_CEASE && subcode == BGP_NOTIFY_CEASE_HARD_RESET)
884 return true;
885
886 return false;
887 }
888
889 /*
890 * Creates a BGP Notify and appends it to the peer's output queue.
891 *
892 * This function attempts to write the packet from the thread it is called
893 * from, to ensure the packet gets out ASAP.
894 *
895 * This function may be called from multiple threads. Since the function
896 * modifies I/O buffer(s) in the peer, these are locked for the duration of the
897 * call to prevent tampering from other threads.
898 *
899 * Delivery of the NOTIFICATION is attempted once and is best-effort. After
900 * return, the peer structure *must* be reset; no assumptions about session
901 * state are valid.
902 *
903 * @param peer
904 * @param code BGP error code
905 * @param sub_code BGP error subcode
906 * @param data Data portion
907 * @param datalen length of data portion
908 */
909 static void bgp_notify_send_internal(struct peer *peer, uint8_t code,
910 uint8_t sub_code, uint8_t *data,
911 size_t datalen, bool use_curr)
912 {
913 struct stream *s;
914 bool hard_reset = bgp_notify_send_hard_reset(peer, code, sub_code);
915
916 /* Lock I/O mutex to prevent other threads from pushing packets */
917 frr_mutex_lock_autounlock(&peer->io_mtx);
918 /* ============================================== */
919
920 /* Allocate new stream. */
921 s = stream_new(peer->max_packet_size);
922
923 /* Make notify packet. */
924 bgp_packet_set_marker(s, BGP_MSG_NOTIFY);
925
926 /* Check if we should send Hard Reset Notification or not */
927 if (hard_reset) {
928 uint8_t *hard_reset_message = bgp_notify_encapsulate_hard_reset(
929 code, sub_code, data, datalen);
930
931 /* Hard Reset encapsulates another NOTIFICATION message
932 * in its data portion.
933 */
934 stream_putc(s, BGP_NOTIFY_CEASE);
935 stream_putc(s, BGP_NOTIFY_CEASE_HARD_RESET);
936 stream_write(s, hard_reset_message, datalen + 2);
937
938 XFREE(MTYPE_BGP_NOTIFICATION, hard_reset_message);
939 } else {
940 stream_putc(s, code);
941 stream_putc(s, sub_code);
942 if (data)
943 stream_write(s, data, datalen);
944 }
945
946 /* Set BGP packet length. */
947 bgp_packet_set_size(s);
948
949 /* wipe output buffer */
950 stream_fifo_clean(peer->obuf);
951
952 /*
953 * If possible, store last packet for debugging purposes. This check is
954 * in place because we are sometimes called with a doppelganger peer,
955 * who tends to have a plethora of fields nulled out.
956 *
957 * Some callers should not attempt this - the io pthread for example
958 * should not touch internals of the peer struct.
959 */
960 if (use_curr && peer->curr) {
961 size_t packetsize = stream_get_endp(peer->curr);
962 assert(packetsize <= peer->max_packet_size);
963 memcpy(peer->last_reset_cause, peer->curr->data, packetsize);
964 peer->last_reset_cause_size = packetsize;
965 }
966
967 /* For debug */
968 {
969 struct bgp_notify bgp_notify;
970 int first = 0;
971 int i;
972 char c[4];
973
974 bgp_notify.code = code;
975 bgp_notify.subcode = sub_code;
976 bgp_notify.data = NULL;
977 bgp_notify.length = datalen;
978 bgp_notify.raw_data = data;
979
980 peer->notify.code = bgp_notify.code;
981 peer->notify.subcode = bgp_notify.subcode;
982 peer->notify.length = bgp_notify.length;
983
984 if (bgp_notify.length && data) {
985 bgp_notify.data = XMALLOC(MTYPE_BGP_NOTIFICATION,
986 bgp_notify.length * 3);
987 for (i = 0; i < bgp_notify.length; i++)
988 if (first) {
989 snprintf(c, sizeof(c), " %02x",
990 data[i]);
991
992 strlcat(bgp_notify.data, c,
993 bgp_notify.length);
994
995 } else {
996 first = 1;
997 snprintf(c, sizeof(c), "%02x", data[i]);
998
999 strlcpy(bgp_notify.data, c,
1000 bgp_notify.length);
1001 }
1002 }
1003 bgp_notify_print(peer, &bgp_notify, "sending", hard_reset);
1004
1005 if (bgp_notify.data) {
1006 if (data) {
1007 XFREE(MTYPE_BGP_NOTIFICATION,
1008 peer->notify.data);
1009 peer->notify.data = XCALLOC(
1010 MTYPE_BGP_NOTIFICATION, datalen);
1011 memcpy(peer->notify.data, data, datalen);
1012 }
1013
1014 XFREE(MTYPE_BGP_NOTIFICATION, bgp_notify.data);
1015 bgp_notify.length = 0;
1016 }
1017 }
1018
1019 /* peer reset cause */
1020 if (code == BGP_NOTIFY_CEASE) {
1021 if (sub_code == BGP_NOTIFY_CEASE_ADMIN_RESET)
1022 peer->last_reset = PEER_DOWN_USER_RESET;
1023 else if (sub_code == BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN) {
1024 if (CHECK_FLAG(peer->sflags, PEER_STATUS_RTT_SHUTDOWN))
1025 peer->last_reset = PEER_DOWN_RTT_SHUTDOWN;
1026 else
1027 peer->last_reset = PEER_DOWN_USER_SHUTDOWN;
1028 } else
1029 peer->last_reset = PEER_DOWN_NOTIFY_SEND;
1030 } else
1031 peer->last_reset = PEER_DOWN_NOTIFY_SEND;
1032
1033 /* Add packet to peer's output queue */
1034 stream_fifo_push(peer->obuf, s);
1035
1036 bgp_peer_gr_flags_update(peer);
1037 BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(peer->bgp,
1038 peer->bgp->peer);
1039
1040 bgp_write_notify(peer);
1041 }
1042
1043 /*
1044 * Creates a BGP Notify and appends it to the peer's output queue.
1045 *
1046 * This function attempts to write the packet from the thread it is called
1047 * from, to ensure the packet gets out ASAP.
1048 *
1049 * @param peer
1050 * @param code BGP error code
1051 * @param sub_code BGP error subcode
1052 */
1053 void bgp_notify_send(struct peer *peer, uint8_t code, uint8_t sub_code)
1054 {
1055 bgp_notify_send_internal(peer, code, sub_code, NULL, 0, true);
1056 }
1057
1058 /*
1059 * Enqueue notification; called from the main pthread, peer object access is ok.
1060 */
1061 void bgp_notify_send_with_data(struct peer *peer, uint8_t code,
1062 uint8_t sub_code, uint8_t *data, size_t datalen)
1063 {
1064 bgp_notify_send_internal(peer, code, sub_code, data, datalen, true);
1065 }
1066
1067 /*
1068 * For use by the io pthread, queueing a notification but avoiding access to
1069 * the peer object.
1070 */
1071 void bgp_notify_io_invalid(struct peer *peer, uint8_t code, uint8_t sub_code,
1072 uint8_t *data, size_t datalen)
1073 {
1074 /* Avoid touching the peer object */
1075 bgp_notify_send_internal(peer, code, sub_code, data, datalen, false);
1076 }
1077
1078 /*
1079 * Creates BGP Route Refresh packet and appends it to the peer's output queue.
1080 *
1081 * @param peer
1082 * @param afi Address Family Identifier
1083 * @param safi Subsequent Address Family Identifier
1084 * @param orf_type Outbound Route Filtering type
1085 * @param when_to_refresh Whether to refresh immediately or defer
1086 * @param remove Whether to remove ORF for specified AFI/SAFI
1087 */
1088 void bgp_route_refresh_send(struct peer *peer, afi_t afi, safi_t safi,
1089 uint8_t orf_type, uint8_t when_to_refresh,
1090 int remove, uint8_t subtype)
1091 {
1092 struct stream *s;
1093 struct bgp_filter *filter;
1094 int orf_refresh = 0;
1095 iana_afi_t pkt_afi = IANA_AFI_IPV4;
1096 iana_safi_t pkt_safi = IANA_SAFI_UNICAST;
1097
1098 if (DISABLE_BGP_ANNOUNCE)
1099 return;
1100
1101 filter = &peer->filter[afi][safi];
1102
1103 /* Convert AFI, SAFI to values for packet. */
1104 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
1105
1106 s = stream_new(peer->max_packet_size);
1107
1108 /* Make BGP update packet. */
1109 if (CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_NEW_RCV))
1110 bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_NEW);
1111 else
1112 bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_OLD);
1113
1114 /* Encode Route Refresh message. */
1115 stream_putw(s, pkt_afi);
1116 if (subtype)
1117 stream_putc(s, subtype);
1118 else
1119 stream_putc(s, 0);
1120 stream_putc(s, pkt_safi);
1121
1122 if (orf_type == ORF_TYPE_PREFIX || orf_type == ORF_TYPE_PREFIX_OLD)
1123 if (remove || filter->plist[FILTER_IN].plist) {
1124 uint16_t orf_len;
1125 unsigned long orfp;
1126
1127 orf_refresh = 1;
1128 stream_putc(s, when_to_refresh);
1129 stream_putc(s, orf_type);
1130 orfp = stream_get_endp(s);
1131 stream_putw(s, 0);
1132
1133 if (remove) {
1134 UNSET_FLAG(peer->af_sflags[afi][safi],
1135 PEER_STATUS_ORF_PREFIX_SEND);
1136 stream_putc(s, ORF_COMMON_PART_REMOVE_ALL);
1137 if (bgp_debug_neighbor_events(peer))
1138 zlog_debug(
1139 "%pBP sending REFRESH_REQ to remove ORF(%d) (%s) for afi/safi: %s/%s",
1140 peer, orf_type,
1141 (when_to_refresh ==
1142 REFRESH_DEFER
1143 ? "defer"
1144 : "immediate"),
1145 iana_afi2str(pkt_afi),
1146 iana_safi2str(pkt_safi));
1147 } else {
1148 SET_FLAG(peer->af_sflags[afi][safi],
1149 PEER_STATUS_ORF_PREFIX_SEND);
1150 prefix_bgp_orf_entry(
1151 s, filter->plist[FILTER_IN].plist,
1152 ORF_COMMON_PART_ADD,
1153 ORF_COMMON_PART_PERMIT,
1154 ORF_COMMON_PART_DENY);
1155 if (bgp_debug_neighbor_events(peer))
1156 zlog_debug(
1157 "%pBP sending REFRESH_REQ with pfxlist ORF(%d) (%s) for afi/safi: %s/%s",
1158 peer, orf_type,
1159 (when_to_refresh ==
1160 REFRESH_DEFER
1161 ? "defer"
1162 : "immediate"),
1163 iana_afi2str(pkt_afi),
1164 iana_safi2str(pkt_safi));
1165 }
1166
1167 /* Total ORF Entry Len. */
1168 orf_len = stream_get_endp(s) - orfp - 2;
1169 stream_putw_at(s, orfp, orf_len);
1170 }
1171
1172 /* Set packet size. */
1173 bgp_packet_set_size(s);
1174
1175 if (bgp_debug_neighbor_events(peer)) {
1176 if (!orf_refresh)
1177 zlog_debug(
1178 "%pBP sending REFRESH_REQ for afi/safi: %s/%s",
1179 peer, iana_afi2str(pkt_afi),
1180 iana_safi2str(pkt_safi));
1181 }
1182
1183 /* Add packet to the peer. */
1184 bgp_packet_add(peer, s);
1185
1186 bgp_writes_on(peer);
1187 }
1188
1189 /*
1190 * Create a BGP Capability packet and append it to the peer's output queue.
1191 *
1192 * @param peer
1193 * @param afi Address Family Identifier
1194 * @param safi Subsequent Address Family Identifier
1195 * @param capability_code BGP Capability Code
1196 * @param action Set or Remove capability
1197 */
1198 void bgp_capability_send(struct peer *peer, afi_t afi, safi_t safi,
1199 int capability_code, int action)
1200 {
1201 struct stream *s;
1202 iana_afi_t pkt_afi = IANA_AFI_IPV4;
1203 iana_safi_t pkt_safi = IANA_SAFI_UNICAST;
1204
1205 /* Convert AFI, SAFI to values for packet. */
1206 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
1207
1208 s = stream_new(peer->max_packet_size);
1209
1210 /* Make BGP update packet. */
1211 bgp_packet_set_marker(s, BGP_MSG_CAPABILITY);
1212
1213 /* Encode MP_EXT capability. */
1214 if (capability_code == CAPABILITY_CODE_MP) {
1215 stream_putc(s, action);
1216 stream_putc(s, CAPABILITY_CODE_MP);
1217 stream_putc(s, CAPABILITY_CODE_MP_LEN);
1218 stream_putw(s, pkt_afi);
1219 stream_putc(s, 0);
1220 stream_putc(s, pkt_safi);
1221
1222 if (bgp_debug_neighbor_events(peer))
1223 zlog_debug(
1224 "%pBP sending CAPABILITY has %s MP_EXT CAP for afi/safi: %s/%s",
1225 peer,
1226 action == CAPABILITY_ACTION_SET ? "Advertising"
1227 : "Removing",
1228 iana_afi2str(pkt_afi), iana_safi2str(pkt_safi));
1229 }
1230
1231 /* Set packet size. */
1232 bgp_packet_set_size(s);
1233
1234 /* Add packet to the peer. */
1235 bgp_packet_add(peer, s);
1236
1237 bgp_writes_on(peer);
1238 }
1239
1240 /* RFC1771 6.8 Connection collision detection. */
1241 static int bgp_collision_detect(struct peer *new, struct in_addr remote_id)
1242 {
1243 struct peer *peer;
1244
1245 /*
1246 * Upon receipt of an OPEN message, the local system must examine
1247 * all of its connections that are in the OpenConfirm state. A BGP
1248 * speaker may also examine connections in an OpenSent state if it
1249 * knows the BGP Identifier of the peer by means outside of the
1250 * protocol. If among these connections there is a connection to a
1251 * remote BGP speaker whose BGP Identifier equals the one in the
1252 * OPEN message, then the local system performs the following
1253 * collision resolution procedure:
1254 */
1255 peer = new->doppelganger;
1256 if (peer == NULL)
1257 return 0;
1258
1259 /*
1260 * Do not accept the new connection in Established or Clearing
1261 * states. Note that a peer GR is handled by closing the existing
1262 * connection upon receipt of new one.
1263 */
1264 if (peer_established(peer) || peer->status == Clearing) {
1265 bgp_notify_send(new, BGP_NOTIFY_CEASE,
1266 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1267 return -1;
1268 }
1269
1270 if ((peer->status != OpenConfirm) && (peer->status != OpenSent))
1271 return 0;
1272
1273 /*
1274 * 1. The BGP Identifier of the local system is
1275 * compared to the BGP Identifier of the remote
1276 * system (as specified in the OPEN message).
1277 *
1278 * If the BGP Identifiers of the peers
1279 * involved in the connection collision
1280 * are identical, then the connection
1281 * initiated by the BGP speaker with the
1282 * larger AS number is preserved.
1283 */
1284 if (ntohl(peer->local_id.s_addr) < ntohl(remote_id.s_addr)
1285 || (ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr)
1286 && peer->local_as < peer->as))
1287 if (!CHECK_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER)) {
1288 /*
1289 * 2. If the value of the local BGP
1290 * Identifier is less than the remote one,
1291 * the local system closes BGP connection
1292 * that already exists (the one that is
1293 * already in the OpenConfirm state),
1294 * and accepts BGP connection initiated by
1295 * the remote system.
1296 */
1297 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
1298 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1299 return 1;
1300 } else {
1301 bgp_notify_send(new, BGP_NOTIFY_CEASE,
1302 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1303 return -1;
1304 }
1305 else {
1306 if (ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr)
1307 && peer->local_as == peer->as)
1308 flog_err(EC_BGP_ROUTER_ID_SAME,
1309 "Peer's router-id %pI4 is the same as ours",
1310 &remote_id);
1311
1312 /*
1313 * 3. Otherwise, the local system closes newly
1314 * created BGP connection (the one associated with the
1315 * newly received OPEN message), and continues to use
1316 * the existing one (the one that is already in the
1317 * OpenConfirm state).
1318 */
1319 if (CHECK_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER)) {
1320 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
1321 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1322 return 1;
1323 } else {
1324 bgp_notify_send(new, BGP_NOTIFY_CEASE,
1325 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1326 return -1;
1327 }
1328 }
1329 }
1330
1331 /* Packet processing routines ---------------------------------------------- */
1332 /*
1333 * This is a family of functions designed to be called from
1334 * bgp_process_packet(). These functions all share similar behavior and should
1335 * adhere to the following invariants and restrictions:
1336 *
1337 * Return codes
1338 * ------------
1339 * The return code of any one of those functions should be one of the FSM event
1340 * codes specified in bgpd.h. If a NOTIFY was sent, this event code MUST be
1341 * BGP_Stop. Otherwise, the code SHOULD correspond to the function's expected
1342 * packet type. For example, bgp_open_receive() should return BGP_Stop upon
1343 * error and Receive_OPEN_message otherwise.
1344 *
1345 * If no action is necessary, the correct return code is BGP_PACKET_NOOP as
1346 * defined below.
1347 *
1348 * Side effects
1349 * ------------
1350 * - May send NOTIFY messages
1351 * - May not modify peer->status
1352 * - May not call bgp_event_update()
1353 */
1354
1355 #define BGP_PACKET_NOOP 0
1356
1357 /**
1358 * Process BGP OPEN message for peer.
1359 *
1360 * If any errors are encountered in the OPEN message, immediately sends NOTIFY
1361 * and returns BGP_Stop.
1362 *
1363 * @param peer
1364 * @param size size of the packet
1365 * @return as in summary
1366 */
1367 static int bgp_open_receive(struct peer *peer, bgp_size_t size)
1368 {
1369 int ret;
1370 uint8_t version;
1371 uint16_t optlen;
1372 uint16_t holdtime;
1373 uint16_t send_holdtime;
1374 as_t remote_as;
1375 as_t as4 = 0, as4_be;
1376 struct in_addr remote_id;
1377 int mp_capability;
1378 uint8_t notify_data_remote_as[2];
1379 uint8_t notify_data_remote_as4[4];
1380 uint8_t notify_data_remote_id[4];
1381 uint16_t *holdtime_ptr;
1382
1383 /* Parse open packet. */
1384 version = stream_getc(peer->curr);
1385 memcpy(notify_data_remote_as, stream_pnt(peer->curr), 2);
1386 remote_as = stream_getw(peer->curr);
1387 holdtime_ptr = (uint16_t *)stream_pnt(peer->curr);
1388 holdtime = stream_getw(peer->curr);
1389 memcpy(notify_data_remote_id, stream_pnt(peer->curr), 4);
1390 remote_id.s_addr = stream_get_ipv4(peer->curr);
1391
1392 /* BEGIN to read the capability here, but dont do it yet */
1393 mp_capability = 0;
1394 optlen = stream_getc(peer->curr);
1395
1396 /* Extended Optional Parameters Length for BGP OPEN Message */
1397 if (optlen == BGP_OPEN_NON_EXT_OPT_LEN
1398 || CHECK_FLAG(peer->flags, PEER_FLAG_EXTENDED_OPT_PARAMS)) {
1399 uint8_t opttype;
1400
1401 if (STREAM_READABLE(peer->curr) < 1) {
1402 flog_err(
1403 EC_BGP_PKT_OPEN,
1404 "%s: stream does not have enough bytes for extended optional parameters",
1405 peer->host);
1406 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1407 BGP_NOTIFY_OPEN_MALFORMED_ATTR);
1408 return BGP_Stop;
1409 }
1410
1411 opttype = stream_getc(peer->curr);
1412 if (opttype == BGP_OPEN_NON_EXT_OPT_TYPE_EXTENDED_LENGTH) {
1413 if (STREAM_READABLE(peer->curr) < 2) {
1414 flog_err(
1415 EC_BGP_PKT_OPEN,
1416 "%s: stream does not have enough bytes to read the extended optional parameters optlen",
1417 peer->host);
1418 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1419 BGP_NOTIFY_OPEN_MALFORMED_ATTR);
1420 return BGP_Stop;
1421 }
1422 optlen = stream_getw(peer->curr);
1423 SET_FLAG(peer->sflags,
1424 PEER_STATUS_EXT_OPT_PARAMS_LENGTH);
1425 }
1426 }
1427
1428 /* Receive OPEN message log */
1429 if (bgp_debug_neighbor_events(peer))
1430 zlog_debug(
1431 "%s rcv OPEN%s, version %d, remote-as (in open) %u, holdtime %d, id %pI4",
1432 peer->host,
1433 CHECK_FLAG(peer->sflags,
1434 PEER_STATUS_EXT_OPT_PARAMS_LENGTH)
1435 ? " (Extended)"
1436 : "",
1437 version, remote_as, holdtime, &remote_id);
1438
1439 if (optlen != 0) {
1440 /* If not enough bytes, it is an error. */
1441 if (STREAM_READABLE(peer->curr) < optlen) {
1442 flog_err(EC_BGP_PKT_OPEN,
1443 "%s: stream has not enough bytes (%u)",
1444 peer->host, optlen);
1445 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1446 BGP_NOTIFY_OPEN_MALFORMED_ATTR);
1447 return BGP_Stop;
1448 }
1449
1450 /* We need the as4 capability value *right now* because
1451 * if it is there, we have not got the remote_as yet, and
1452 * without
1453 * that we do not know which peer is connecting to us now.
1454 */
1455 as4 = peek_for_as4_capability(peer, optlen);
1456 }
1457
1458 as4_be = htonl(as4);
1459 memcpy(notify_data_remote_as4, &as4_be, 4);
1460
1461 /* Just in case we have a silly peer who sends AS4 capability set to 0
1462 */
1463 if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV) && !as4) {
1464 flog_err(EC_BGP_PKT_OPEN,
1465 "%s bad OPEN, got AS4 capability, but AS4 set to 0",
1466 peer->host);
1467 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1468 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1469 notify_data_remote_as4, 4);
1470 return BGP_Stop;
1471 }
1472
1473 /* Codification of AS 0 Processing */
1474 if (remote_as == BGP_AS_ZERO) {
1475 flog_err(EC_BGP_PKT_OPEN, "%s bad OPEN, got AS set to 0",
1476 peer->host);
1477 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1478 BGP_NOTIFY_OPEN_BAD_PEER_AS);
1479 return BGP_Stop;
1480 }
1481
1482 if (remote_as == BGP_AS_TRANS) {
1483 /* Take the AS4 from the capability. We must have received the
1484 * capability now! Otherwise we have a asn16 peer who uses
1485 * BGP_AS_TRANS, for some unknown reason.
1486 */
1487 if (as4 == BGP_AS_TRANS) {
1488 flog_err(
1489 EC_BGP_PKT_OPEN,
1490 "%s [AS4] NEW speaker using AS_TRANS for AS4, not allowed",
1491 peer->host);
1492 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1493 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1494 notify_data_remote_as4, 4);
1495 return BGP_Stop;
1496 }
1497
1498 if (!as4 && BGP_DEBUG(as4, AS4))
1499 zlog_debug(
1500 "%s [AS4] OPEN remote_as is AS_TRANS, but no AS4. Odd, but proceeding.",
1501 peer->host);
1502 else if (as4 < BGP_AS_MAX && BGP_DEBUG(as4, AS4))
1503 zlog_debug(
1504 "%s [AS4] OPEN remote_as is AS_TRANS, but AS4 (%u) fits in 2-bytes, very odd peer.",
1505 peer->host, as4);
1506 if (as4)
1507 remote_as = as4;
1508 } else {
1509 /* We may have a partner with AS4 who has an asno < BGP_AS_MAX
1510 */
1511 /* If we have got the capability, peer->as4cap must match
1512 * remote_as */
1513 if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV)
1514 && as4 != remote_as) {
1515 /* raise error, log this, close session */
1516 flog_err(
1517 EC_BGP_PKT_OPEN,
1518 "%s bad OPEN, got AS4 capability, but remote_as %u mismatch with 16bit 'myasn' %u in open",
1519 peer->host, as4, remote_as);
1520 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1521 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1522 notify_data_remote_as4, 4);
1523 return BGP_Stop;
1524 }
1525 }
1526
1527 /* rfc6286:
1528 * If the BGP Identifier field of the OPEN message
1529 * is zero, or if it is the same as the BGP Identifier
1530 * of the local BGP speaker and the message is from an
1531 * internal peer, then the Error Subcode is set to
1532 * "Bad BGP Identifier".
1533 */
1534 if (remote_id.s_addr == INADDR_ANY
1535 || (peer->sort == BGP_PEER_IBGP
1536 && ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr))) {
1537 if (bgp_debug_neighbor_events(peer))
1538 zlog_debug("%s bad OPEN, wrong router identifier %pI4",
1539 peer->host, &remote_id);
1540 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1541 BGP_NOTIFY_OPEN_BAD_BGP_IDENT,
1542 notify_data_remote_id, 4);
1543 return BGP_Stop;
1544 }
1545
1546 /* Peer BGP version check. */
1547 if (version != BGP_VERSION_4) {
1548 uint16_t maxver = htons(BGP_VERSION_4);
1549 /* XXX this reply may not be correct if version < 4 XXX */
1550 if (bgp_debug_neighbor_events(peer))
1551 zlog_debug(
1552 "%s bad protocol version, remote requested %d, local request %d",
1553 peer->host, version, BGP_VERSION_4);
1554 /* Data must be in network byte order here */
1555 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1556 BGP_NOTIFY_OPEN_UNSUP_VERSION,
1557 (uint8_t *)&maxver, 2);
1558 return BGP_Stop;
1559 }
1560
1561 /* Check neighbor as number. */
1562 if (peer->as_type == AS_UNSPECIFIED) {
1563 if (bgp_debug_neighbor_events(peer))
1564 zlog_debug(
1565 "%s bad OPEN, remote AS is unspecified currently",
1566 peer->host);
1567 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1568 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1569 notify_data_remote_as, 2);
1570 return BGP_Stop;
1571 } else if (peer->as_type == AS_INTERNAL) {
1572 if (remote_as != peer->bgp->as) {
1573 if (bgp_debug_neighbor_events(peer))
1574 zlog_debug(
1575 "%s bad OPEN, remote AS is %u, internal specified",
1576 peer->host, remote_as);
1577 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1578 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1579 notify_data_remote_as, 2);
1580 return BGP_Stop;
1581 }
1582 peer->as = peer->local_as;
1583 } else if (peer->as_type == AS_EXTERNAL) {
1584 if (remote_as == peer->bgp->as) {
1585 if (bgp_debug_neighbor_events(peer))
1586 zlog_debug(
1587 "%s bad OPEN, remote AS is %u, external specified",
1588 peer->host, remote_as);
1589 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1590 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1591 notify_data_remote_as, 2);
1592 return BGP_Stop;
1593 }
1594 peer->as = remote_as;
1595 } else if ((peer->as_type == AS_SPECIFIED) && (remote_as != peer->as)) {
1596 if (bgp_debug_neighbor_events(peer))
1597 zlog_debug("%s bad OPEN, remote AS is %u, expected %u",
1598 peer->host, remote_as, peer->as);
1599 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1600 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1601 notify_data_remote_as, 2);
1602 return BGP_Stop;
1603 }
1604
1605 /*
1606 * When collision is detected and this peer is closed.
1607 * Return immediately.
1608 */
1609 ret = bgp_collision_detect(peer, remote_id);
1610 if (ret < 0)
1611 return BGP_Stop;
1612
1613 /* Get sockname. */
1614 if (bgp_getsockname(peer) < 0) {
1615 flog_err_sys(EC_LIB_SOCKET,
1616 "%s: bgp_getsockname() failed for peer: %s",
1617 __func__, peer->host);
1618 return BGP_Stop;
1619 }
1620
1621 /* Set remote router-id */
1622 peer->remote_id = remote_id;
1623
1624 /* From the rfc: Upon receipt of an OPEN message, a BGP speaker MUST
1625 calculate the value of the Hold Timer by using the smaller of its
1626 configured Hold Time and the Hold Time received in the OPEN message.
1627 The Hold Time MUST be either zero or at least three seconds. An
1628 implementation may reject connections on the basis of the Hold Time.
1629 */
1630
1631 if (holdtime < 3 && holdtime != 0) {
1632 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1633 BGP_NOTIFY_OPEN_UNACEP_HOLDTIME,
1634 (uint8_t *)holdtime_ptr, 2);
1635 return BGP_Stop;
1636 }
1637
1638 /* Send notification message when Hold Time received in the OPEN message
1639 * is smaller than configured minimum Hold Time. */
1640 if (holdtime < peer->bgp->default_min_holdtime
1641 && peer->bgp->default_min_holdtime != 0) {
1642 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1643 BGP_NOTIFY_OPEN_UNACEP_HOLDTIME,
1644 (uint8_t *)holdtime_ptr, 2);
1645 return BGP_Stop;
1646 }
1647
1648 /* From the rfc: A reasonable maximum time between KEEPALIVE messages
1649 would be one third of the Hold Time interval. KEEPALIVE messages
1650 MUST NOT be sent more frequently than one per second. An
1651 implementation MAY adjust the rate at which it sends KEEPALIVE
1652 messages as a function of the Hold Time interval. */
1653
1654 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
1655 send_holdtime = peer->holdtime;
1656 else
1657 send_holdtime = peer->bgp->default_holdtime;
1658
1659 if (holdtime < send_holdtime)
1660 peer->v_holdtime = holdtime;
1661 else
1662 peer->v_holdtime = send_holdtime;
1663
1664 /* Set effective keepalive to 1/3 the effective holdtime.
1665 * Use configured keeplive when < effective keepalive.
1666 */
1667 peer->v_keepalive = peer->v_holdtime / 3;
1668 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER)) {
1669 if (peer->keepalive && peer->keepalive < peer->v_keepalive)
1670 peer->v_keepalive = peer->keepalive;
1671 } else {
1672 if (peer->bgp->default_keepalive
1673 && peer->bgp->default_keepalive < peer->v_keepalive)
1674 peer->v_keepalive = peer->bgp->default_keepalive;
1675 }
1676
1677 /* Open option part parse. */
1678 if (optlen != 0) {
1679 if (bgp_open_option_parse(peer, optlen, &mp_capability) < 0)
1680 return BGP_Stop;
1681 } else {
1682 if (bgp_debug_neighbor_events(peer))
1683 zlog_debug("%s rcvd OPEN w/ OPTION parameter len: 0",
1684 peer->host);
1685 }
1686
1687 /*
1688 * Assume that the peer supports the locally configured set of
1689 * AFI/SAFIs if the peer did not send us any Mulitiprotocol
1690 * capabilities, or if 'override-capability' is configured.
1691 */
1692 if (!mp_capability
1693 || CHECK_FLAG(peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) {
1694 peer->afc_nego[AFI_IP][SAFI_UNICAST] =
1695 peer->afc[AFI_IP][SAFI_UNICAST];
1696 peer->afc_nego[AFI_IP][SAFI_MULTICAST] =
1697 peer->afc[AFI_IP][SAFI_MULTICAST];
1698 peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST] =
1699 peer->afc[AFI_IP][SAFI_LABELED_UNICAST];
1700 peer->afc_nego[AFI_IP][SAFI_FLOWSPEC] =
1701 peer->afc[AFI_IP][SAFI_FLOWSPEC];
1702 peer->afc_nego[AFI_IP6][SAFI_UNICAST] =
1703 peer->afc[AFI_IP6][SAFI_UNICAST];
1704 peer->afc_nego[AFI_IP6][SAFI_MULTICAST] =
1705 peer->afc[AFI_IP6][SAFI_MULTICAST];
1706 peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST] =
1707 peer->afc[AFI_IP6][SAFI_LABELED_UNICAST];
1708 peer->afc_nego[AFI_L2VPN][SAFI_EVPN] =
1709 peer->afc[AFI_L2VPN][SAFI_EVPN];
1710 peer->afc_nego[AFI_IP6][SAFI_FLOWSPEC] =
1711 peer->afc[AFI_IP6][SAFI_FLOWSPEC];
1712 }
1713
1714 /* Verify valid local address present based on negotiated
1715 * address-families. */
1716 if (peer->afc_nego[AFI_IP][SAFI_UNICAST]
1717 || peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST]
1718 || peer->afc_nego[AFI_IP][SAFI_MULTICAST]
1719 || peer->afc_nego[AFI_IP][SAFI_MPLS_VPN]
1720 || peer->afc_nego[AFI_IP][SAFI_ENCAP]) {
1721 if (peer->nexthop.v4.s_addr == INADDR_ANY) {
1722 #if defined(HAVE_CUMULUS)
1723 zlog_warn("%s: No local IPv4 addr, BGP routing may not work",
1724 peer->host);
1725 #endif
1726 }
1727 }
1728 if (peer->afc_nego[AFI_IP6][SAFI_UNICAST]
1729 || peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST]
1730 || peer->afc_nego[AFI_IP6][SAFI_MULTICAST]
1731 || peer->afc_nego[AFI_IP6][SAFI_MPLS_VPN]
1732 || peer->afc_nego[AFI_IP6][SAFI_ENCAP]) {
1733 if (IN6_IS_ADDR_UNSPECIFIED(&peer->nexthop.v6_global)) {
1734 #if defined(HAVE_CUMULUS)
1735 zlog_warn("%s: No local IPv6 address, BGP routing may not work",
1736 peer->host);
1737 #endif
1738 }
1739 }
1740 peer->rtt = sockopt_tcp_rtt(peer->fd);
1741
1742 return Receive_OPEN_message;
1743 }
1744
1745 /**
1746 * Process BGP KEEPALIVE message for peer.
1747 *
1748 * @param peer
1749 * @param size size of the packet
1750 * @return as in summary
1751 */
1752 static int bgp_keepalive_receive(struct peer *peer, bgp_size_t size)
1753 {
1754 if (bgp_debug_keepalive(peer))
1755 zlog_debug("%s KEEPALIVE rcvd", peer->host);
1756
1757 bgp_update_implicit_eors(peer);
1758
1759 peer->rtt = sockopt_tcp_rtt(peer->fd);
1760
1761 /* If the peer's RTT is higher than expected, shutdown
1762 * the peer automatically.
1763 */
1764 if (!CHECK_FLAG(peer->flags, PEER_FLAG_RTT_SHUTDOWN))
1765 return Receive_KEEPALIVE_message;
1766
1767 if (peer->rtt > peer->rtt_expected) {
1768 peer->rtt_keepalive_rcv++;
1769
1770 if (peer->rtt_keepalive_rcv > peer->rtt_keepalive_conf) {
1771 char rtt_shutdown_reason[BUFSIZ] = {};
1772
1773 snprintfrr(
1774 rtt_shutdown_reason,
1775 sizeof(rtt_shutdown_reason),
1776 "shutdown due to high round-trip-time (%dms > %dms, hit %u times)",
1777 peer->rtt, peer->rtt_expected,
1778 peer->rtt_keepalive_rcv);
1779 zlog_warn("%s %s", peer->host, rtt_shutdown_reason);
1780 SET_FLAG(peer->sflags, PEER_STATUS_RTT_SHUTDOWN);
1781 peer_tx_shutdown_message_set(peer, rtt_shutdown_reason);
1782 peer_flag_set(peer, PEER_FLAG_SHUTDOWN);
1783 }
1784 } else {
1785 if (peer->rtt_keepalive_rcv)
1786 peer->rtt_keepalive_rcv--;
1787 }
1788
1789 return Receive_KEEPALIVE_message;
1790 }
1791
1792 static void bgp_refresh_stalepath_timer_expire(struct thread *thread)
1793 {
1794 struct peer_af *paf;
1795
1796 paf = THREAD_ARG(thread);
1797
1798 afi_t afi = paf->afi;
1799 safi_t safi = paf->safi;
1800 struct peer *peer = paf->peer;
1801
1802 peer->t_refresh_stalepath = NULL;
1803
1804 if (peer->nsf[afi][safi])
1805 bgp_clear_stale_route(peer, afi, safi);
1806
1807 if (bgp_debug_neighbor_events(peer))
1808 zlog_debug(
1809 "%pBP route-refresh (BoRR) timer expired for afi/safi: %d/%d",
1810 peer, afi, safi);
1811
1812 bgp_timer_set(peer);
1813 }
1814
1815 /**
1816 * Process BGP UPDATE message for peer.
1817 *
1818 * Parses UPDATE and creates attribute object.
1819 *
1820 * @param peer
1821 * @param size size of the packet
1822 * @return as in summary
1823 */
1824 static int bgp_update_receive(struct peer *peer, bgp_size_t size)
1825 {
1826 int ret, nlri_ret;
1827 uint8_t *end;
1828 struct stream *s;
1829 struct attr attr;
1830 bgp_size_t attribute_len;
1831 bgp_size_t update_len;
1832 bgp_size_t withdraw_len;
1833 bool restart = false;
1834
1835 enum NLRI_TYPES {
1836 NLRI_UPDATE,
1837 NLRI_WITHDRAW,
1838 NLRI_MP_UPDATE,
1839 NLRI_MP_WITHDRAW,
1840 NLRI_TYPE_MAX
1841 };
1842 struct bgp_nlri nlris[NLRI_TYPE_MAX];
1843
1844 /* Status must be Established. */
1845 if (!peer_established(peer)) {
1846 flog_err(EC_BGP_INVALID_STATUS,
1847 "%s [FSM] Update packet received under status %s",
1848 peer->host,
1849 lookup_msg(bgp_status_msg, peer->status, NULL));
1850 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR,
1851 bgp_fsm_error_subcode(peer->status));
1852 return BGP_Stop;
1853 }
1854
1855 /* Set initial values. */
1856 memset(&attr, 0, sizeof(attr));
1857 attr.label_index = BGP_INVALID_LABEL_INDEX;
1858 attr.label = MPLS_INVALID_LABEL;
1859 memset(&nlris, 0, sizeof(nlris));
1860 memset(peer->rcvd_attr_str, 0, BUFSIZ);
1861 peer->rcvd_attr_printed = 0;
1862
1863 s = peer->curr;
1864 end = stream_pnt(s) + size;
1865
1866 /* RFC1771 6.3 If the Unfeasible Routes Length or Total Attribute
1867 Length is too large (i.e., if Unfeasible Routes Length + Total
1868 Attribute Length + 23 exceeds the message Length), then the Error
1869 Subcode is set to Malformed Attribute List. */
1870 if (stream_pnt(s) + 2 > end) {
1871 flog_err(EC_BGP_UPDATE_RCV,
1872 "%s [Error] Update packet error (packet length is short for unfeasible length)",
1873 peer->host);
1874 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1875 BGP_NOTIFY_UPDATE_MAL_ATTR);
1876 return BGP_Stop;
1877 }
1878
1879 /* Unfeasible Route Length. */
1880 withdraw_len = stream_getw(s);
1881
1882 /* Unfeasible Route Length check. */
1883 if (stream_pnt(s) + withdraw_len > end) {
1884 flog_err(EC_BGP_UPDATE_RCV,
1885 "%s [Error] Update packet error (packet unfeasible length overflow %d)",
1886 peer->host, withdraw_len);
1887 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1888 BGP_NOTIFY_UPDATE_MAL_ATTR);
1889 return BGP_Stop;
1890 }
1891
1892 /* Unfeasible Route packet format check. */
1893 if (withdraw_len > 0) {
1894 nlris[NLRI_WITHDRAW].afi = AFI_IP;
1895 nlris[NLRI_WITHDRAW].safi = SAFI_UNICAST;
1896 nlris[NLRI_WITHDRAW].nlri = stream_pnt(s);
1897 nlris[NLRI_WITHDRAW].length = withdraw_len;
1898 stream_forward_getp(s, withdraw_len);
1899 }
1900
1901 /* Attribute total length check. */
1902 if (stream_pnt(s) + 2 > end) {
1903 flog_warn(
1904 EC_BGP_UPDATE_PACKET_SHORT,
1905 "%s [Error] Packet Error (update packet is short for attribute length)",
1906 peer->host);
1907 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1908 BGP_NOTIFY_UPDATE_MAL_ATTR);
1909 return BGP_Stop;
1910 }
1911
1912 /* Fetch attribute total length. */
1913 attribute_len = stream_getw(s);
1914
1915 /* Attribute length check. */
1916 if (stream_pnt(s) + attribute_len > end) {
1917 flog_warn(
1918 EC_BGP_UPDATE_PACKET_LONG,
1919 "%s [Error] Packet Error (update packet attribute length overflow %d)",
1920 peer->host, attribute_len);
1921 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1922 BGP_NOTIFY_UPDATE_MAL_ATTR);
1923 return BGP_Stop;
1924 }
1925
1926 /* Certain attribute parsing errors should not be considered bad enough
1927 * to reset the session for, most particularly any partial/optional
1928 * attributes that have 'tunneled' over speakers that don't understand
1929 * them. Instead we withdraw only the prefix concerned.
1930 *
1931 * Complicates the flow a little though..
1932 */
1933 enum bgp_attr_parse_ret attr_parse_ret = BGP_ATTR_PARSE_PROCEED;
1934 /* This define morphs the update case into a withdraw when lower levels
1935 * have signalled an error condition where this is best.
1936 */
1937 #define NLRI_ATTR_ARG (attr_parse_ret != BGP_ATTR_PARSE_WITHDRAW ? &attr : NULL)
1938
1939 /* Parse attribute when it exists. */
1940 if (attribute_len) {
1941 attr_parse_ret = bgp_attr_parse(peer, &attr, attribute_len,
1942 &nlris[NLRI_MP_UPDATE],
1943 &nlris[NLRI_MP_WITHDRAW]);
1944 if (attr_parse_ret == BGP_ATTR_PARSE_ERROR) {
1945 bgp_attr_unintern_sub(&attr);
1946 return BGP_Stop;
1947 }
1948 }
1949
1950 /* Logging the attribute. */
1951 if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW
1952 || BGP_DEBUG(update, UPDATE_IN)
1953 || BGP_DEBUG(update, UPDATE_PREFIX)) {
1954 ret = bgp_dump_attr(&attr, peer->rcvd_attr_str,
1955 sizeof(peer->rcvd_attr_str));
1956
1957 peer->stat_upd_7606++;
1958
1959 if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW)
1960 flog_err(
1961 EC_BGP_UPDATE_RCV,
1962 "%pBP rcvd UPDATE with errors in attr(s)!! Withdrawing route.",
1963 peer);
1964
1965 if (ret && bgp_debug_update(peer, NULL, NULL, 1)) {
1966 zlog_debug("%pBP rcvd UPDATE w/ attr: %s", peer,
1967 peer->rcvd_attr_str);
1968 peer->rcvd_attr_printed = 1;
1969 }
1970 }
1971
1972 /* Network Layer Reachability Information. */
1973 update_len = end - stream_pnt(s);
1974
1975 if (update_len) {
1976 /* Set NLRI portion to structure. */
1977 nlris[NLRI_UPDATE].afi = AFI_IP;
1978 nlris[NLRI_UPDATE].safi = SAFI_UNICAST;
1979 nlris[NLRI_UPDATE].nlri = stream_pnt(s);
1980 nlris[NLRI_UPDATE].length = update_len;
1981 stream_forward_getp(s, update_len);
1982
1983 if (CHECK_FLAG(attr.flag, ATTR_FLAG_BIT(BGP_ATTR_MP_REACH_NLRI))) {
1984 /*
1985 * We skipped nexthop attribute validation earlier so
1986 * validate the nexthop now.
1987 */
1988 if (bgp_attr_nexthop_valid(peer, &attr) < 0) {
1989 bgp_attr_unintern_sub(&attr);
1990 return BGP_Stop;
1991 }
1992 }
1993 }
1994
1995 if (BGP_DEBUG(update, UPDATE_IN))
1996 zlog_debug("%pBP rcvd UPDATE wlen %d attrlen %d alen %d", peer,
1997 withdraw_len, attribute_len, update_len);
1998
1999 /* Parse any given NLRIs */
2000 for (int i = NLRI_UPDATE; i < NLRI_TYPE_MAX; i++) {
2001 if (!nlris[i].nlri)
2002 continue;
2003
2004 /* NLRI is processed iff the peer if configured for the specific
2005 * afi/safi */
2006 if (!peer->afc[nlris[i].afi][nlris[i].safi]) {
2007 zlog_info(
2008 "%s [Info] UPDATE for non-enabled AFI/SAFI %u/%u",
2009 peer->host, nlris[i].afi, nlris[i].safi);
2010 continue;
2011 }
2012
2013 /* EoR handled later */
2014 if (nlris[i].length == 0)
2015 continue;
2016
2017 switch (i) {
2018 case NLRI_UPDATE:
2019 case NLRI_MP_UPDATE:
2020 nlri_ret = bgp_nlri_parse(peer, NLRI_ATTR_ARG,
2021 &nlris[i], 0);
2022 break;
2023 case NLRI_WITHDRAW:
2024 case NLRI_MP_WITHDRAW:
2025 nlri_ret = bgp_nlri_parse(peer, &attr, &nlris[i], 1);
2026 break;
2027 default:
2028 nlri_ret = BGP_NLRI_PARSE_ERROR;
2029 }
2030
2031 if (nlri_ret < BGP_NLRI_PARSE_OK
2032 && nlri_ret != BGP_NLRI_PARSE_ERROR_PREFIX_OVERFLOW) {
2033 flog_err(EC_BGP_UPDATE_RCV,
2034 "%s [Error] Error parsing NLRI", peer->host);
2035 if (peer_established(peer))
2036 bgp_notify_send(
2037 peer, BGP_NOTIFY_UPDATE_ERR,
2038 i <= NLRI_WITHDRAW
2039 ? BGP_NOTIFY_UPDATE_INVAL_NETWORK
2040 : BGP_NOTIFY_UPDATE_OPT_ATTR_ERR);
2041 bgp_attr_unintern_sub(&attr);
2042 return BGP_Stop;
2043 }
2044 }
2045
2046 /* EoR checks
2047 *
2048 * Non-MP IPv4/Unicast EoR is a completely empty UPDATE
2049 * and MP EoR should have only an empty MP_UNREACH
2050 */
2051 if ((!update_len && !withdraw_len && nlris[NLRI_MP_UPDATE].length == 0)
2052 || (attr_parse_ret == BGP_ATTR_PARSE_EOR)) {
2053 afi_t afi = 0;
2054 safi_t safi;
2055 struct graceful_restart_info *gr_info;
2056
2057 /* Restarting router */
2058 if (BGP_PEER_GRACEFUL_RESTART_CAPABLE(peer)
2059 && BGP_PEER_RESTARTING_MODE(peer))
2060 restart = true;
2061
2062 /* Non-MP IPv4/Unicast is a completely emtpy UPDATE - already
2063 * checked
2064 * update and withdraw NLRI lengths are 0.
2065 */
2066 if (!attribute_len) {
2067 afi = AFI_IP;
2068 safi = SAFI_UNICAST;
2069 } else if (attr.flag & ATTR_FLAG_BIT(BGP_ATTR_MP_UNREACH_NLRI)
2070 && nlris[NLRI_MP_WITHDRAW].length == 0) {
2071 afi = nlris[NLRI_MP_WITHDRAW].afi;
2072 safi = nlris[NLRI_MP_WITHDRAW].safi;
2073 } else if (attr_parse_ret == BGP_ATTR_PARSE_EOR) {
2074 afi = nlris[NLRI_MP_UPDATE].afi;
2075 safi = nlris[NLRI_MP_UPDATE].safi;
2076 }
2077
2078 if (afi && peer->afc[afi][safi]) {
2079 struct vrf *vrf = vrf_lookup_by_id(peer->bgp->vrf_id);
2080
2081 /* End-of-RIB received */
2082 if (!CHECK_FLAG(peer->af_sflags[afi][safi],
2083 PEER_STATUS_EOR_RECEIVED)) {
2084 SET_FLAG(peer->af_sflags[afi][safi],
2085 PEER_STATUS_EOR_RECEIVED);
2086 bgp_update_explicit_eors(peer);
2087 /* Update graceful restart information */
2088 gr_info = &(peer->bgp->gr_info[afi][safi]);
2089 if (restart)
2090 gr_info->eor_received++;
2091 /* If EOR received from all peers and selection
2092 * deferral timer is running, cancel the timer
2093 * and invoke the best path calculation
2094 */
2095 if (gr_info->eor_required
2096 == gr_info->eor_received) {
2097 if (bgp_debug_neighbor_events(peer))
2098 zlog_debug(
2099 "%s %d, %s %d",
2100 "EOR REQ",
2101 gr_info->eor_required,
2102 "EOR RCV",
2103 gr_info->eor_received);
2104 if (gr_info->t_select_deferral) {
2105 void *info = THREAD_ARG(
2106 gr_info->t_select_deferral);
2107 XFREE(MTYPE_TMP, info);
2108 }
2109 THREAD_OFF(gr_info->t_select_deferral);
2110 gr_info->eor_required = 0;
2111 gr_info->eor_received = 0;
2112 /* Best path selection */
2113 bgp_best_path_select_defer(peer->bgp,
2114 afi, safi);
2115 }
2116 }
2117
2118 /* NSF delete stale route */
2119 if (peer->nsf[afi][safi])
2120 bgp_clear_stale_route(peer, afi, safi);
2121
2122 zlog_info(
2123 "%s: rcvd End-of-RIB for %s from %s in vrf %s",
2124 __func__, get_afi_safi_str(afi, safi, false),
2125 peer->host, vrf ? vrf->name : VRF_DEFAULT_NAME);
2126 }
2127 }
2128
2129 /* Everything is done. We unintern temporary structures which
2130 interned in bgp_attr_parse(). */
2131 bgp_attr_unintern_sub(&attr);
2132
2133 peer->update_time = monotime(NULL);
2134
2135 /* Notify BGP Conditional advertisement scanner process */
2136 peer->advmap_table_change = true;
2137
2138 return Receive_UPDATE_message;
2139 }
2140
2141 /**
2142 * Process BGP NOTIFY message for peer.
2143 *
2144 * @param peer
2145 * @param size size of the packet
2146 * @return as in summary
2147 */
2148 static int bgp_notify_receive(struct peer *peer, bgp_size_t size)
2149 {
2150 struct bgp_notify outer = {};
2151 struct bgp_notify inner = {};
2152 bool hard_reset = false;
2153
2154 if (peer->notify.data) {
2155 XFREE(MTYPE_BGP_NOTIFICATION, peer->notify.data);
2156 peer->notify.length = 0;
2157 peer->notify.hard_reset = false;
2158 }
2159
2160 outer.code = stream_getc(peer->curr);
2161 outer.subcode = stream_getc(peer->curr);
2162 outer.length = size - 2;
2163 outer.data = NULL;
2164 outer.raw_data = NULL;
2165 if (outer.length) {
2166 outer.raw_data = XMALLOC(MTYPE_BGP_NOTIFICATION, outer.length);
2167 memcpy(outer.raw_data, stream_pnt(peer->curr), outer.length);
2168 }
2169
2170 hard_reset =
2171 bgp_notify_received_hard_reset(peer, outer.code, outer.subcode);
2172 if (hard_reset && outer.length) {
2173 inner = bgp_notify_decapsulate_hard_reset(&outer);
2174 peer->notify.hard_reset = true;
2175 } else {
2176 inner = outer;
2177 }
2178
2179 /* Preserv notify code and sub code. */
2180 peer->notify.code = inner.code;
2181 peer->notify.subcode = inner.subcode;
2182 /* For further diagnostic record returned Data. */
2183 if (inner.length) {
2184 peer->notify.length = inner.length;
2185 peer->notify.data =
2186 XMALLOC(MTYPE_BGP_NOTIFICATION, inner.length);
2187 memcpy(peer->notify.data, inner.raw_data, inner.length);
2188 }
2189
2190 /* For debug */
2191 {
2192 int i;
2193 int first = 0;
2194 char c[4];
2195
2196 if (inner.length) {
2197 inner.data = XMALLOC(MTYPE_BGP_NOTIFICATION,
2198 inner.length * 3);
2199 for (i = 0; i < inner.length; i++)
2200 if (first) {
2201 snprintf(c, sizeof(c), " %02x",
2202 stream_getc(peer->curr));
2203
2204 strlcat(inner.data, c,
2205 inner.length * 3);
2206
2207 } else {
2208 first = 1;
2209 snprintf(c, sizeof(c), "%02x",
2210 stream_getc(peer->curr));
2211
2212 strlcpy(inner.data, c,
2213 inner.length * 3);
2214 }
2215 }
2216
2217 bgp_notify_print(peer, &inner, "received", hard_reset);
2218 if (inner.length) {
2219 XFREE(MTYPE_BGP_NOTIFICATION, inner.data);
2220 inner.length = 0;
2221 }
2222 if (outer.length) {
2223 XFREE(MTYPE_BGP_NOTIFICATION, outer.data);
2224 XFREE(MTYPE_BGP_NOTIFICATION, outer.raw_data);
2225
2226 /* If this is a Hard Reset notification, we MUST free
2227 * the inner (encapsulated) notification too.
2228 */
2229 if (hard_reset)
2230 XFREE(MTYPE_BGP_NOTIFICATION, inner.raw_data);
2231 outer.length = 0;
2232 }
2233 }
2234
2235 /* peer count update */
2236 atomic_fetch_add_explicit(&peer->notify_in, 1, memory_order_relaxed);
2237
2238 peer->last_reset = PEER_DOWN_NOTIFY_RECEIVED;
2239
2240 /* We have to check for Notify with Unsupported Optional Parameter.
2241 in that case we fallback to open without the capability option.
2242 But this done in bgp_stop. We just mark it here to avoid changing
2243 the fsm tables. */
2244 if (inner.code == BGP_NOTIFY_OPEN_ERR &&
2245 inner.subcode == BGP_NOTIFY_OPEN_UNSUP_PARAM)
2246 UNSET_FLAG(peer->sflags, PEER_STATUS_CAPABILITY_OPEN);
2247
2248 /* If Graceful-Restart N-bit (Notification) is exchanged,
2249 * and it's not a Hard Reset, let's retain the routes.
2250 */
2251 if (bgp_has_graceful_restart_notification(peer) && !hard_reset &&
2252 CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE))
2253 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
2254
2255 bgp_peer_gr_flags_update(peer);
2256 BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(peer->bgp,
2257 peer->bgp->peer);
2258
2259 return Receive_NOTIFICATION_message;
2260 }
2261
2262 /**
2263 * Process BGP ROUTEREFRESH message for peer.
2264 *
2265 * @param peer
2266 * @param size size of the packet
2267 * @return as in summary
2268 */
2269 static int bgp_route_refresh_receive(struct peer *peer, bgp_size_t size)
2270 {
2271 iana_afi_t pkt_afi;
2272 afi_t afi;
2273 iana_safi_t pkt_safi;
2274 safi_t safi;
2275 struct stream *s;
2276 struct peer_af *paf;
2277 struct update_group *updgrp;
2278 struct peer *updgrp_peer;
2279 uint8_t subtype;
2280 bool force_update = false;
2281 bgp_size_t msg_length =
2282 size - (BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE);
2283
2284 /* If peer does not have the capability, send notification. */
2285 if (!CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_ADV)) {
2286 flog_err(EC_BGP_NO_CAP,
2287 "%s [Error] BGP route refresh is not enabled",
2288 peer->host);
2289 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
2290 BGP_NOTIFY_HEADER_BAD_MESTYPE);
2291 return BGP_Stop;
2292 }
2293
2294 /* Status must be Established. */
2295 if (!peer_established(peer)) {
2296 flog_err(
2297 EC_BGP_INVALID_STATUS,
2298 "%s [Error] Route refresh packet received under status %s",
2299 peer->host,
2300 lookup_msg(bgp_status_msg, peer->status, NULL));
2301 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR,
2302 bgp_fsm_error_subcode(peer->status));
2303 return BGP_Stop;
2304 }
2305
2306 s = peer->curr;
2307
2308 /* Parse packet. */
2309 pkt_afi = stream_getw(s);
2310 subtype = stream_getc(s);
2311 pkt_safi = stream_getc(s);
2312
2313 /* Convert AFI, SAFI to internal values and check. */
2314 if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi, &safi)) {
2315 zlog_info(
2316 "%s REFRESH_REQ for unrecognized afi/safi: %s/%s - ignored",
2317 peer->host, iana_afi2str(pkt_afi),
2318 iana_safi2str(pkt_safi));
2319 return BGP_PACKET_NOOP;
2320 }
2321
2322 if (size != BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE) {
2323 uint8_t *end;
2324 uint8_t when_to_refresh;
2325 uint8_t orf_type;
2326 uint16_t orf_len;
2327
2328 if (subtype) {
2329 /* If the length, excluding the fixed-size message
2330 * header, of the received ROUTE-REFRESH message with
2331 * Message Subtype 1 and 2 is not 4, then the BGP
2332 * speaker MUST send a NOTIFICATION message with the
2333 * Error Code of "ROUTE-REFRESH Message Error" and the
2334 * subcode of "Invalid Message Length".
2335 */
2336 if (msg_length != 4) {
2337 zlog_err(
2338 "%s Enhanced Route Refresh message length error",
2339 peer->host);
2340 bgp_notify_send(
2341 peer, BGP_NOTIFY_ROUTE_REFRESH_ERR,
2342 BGP_NOTIFY_ROUTE_REFRESH_INVALID_MSG_LEN);
2343 }
2344
2345 /* When the BGP speaker receives a ROUTE-REFRESH message
2346 * with a "Message Subtype" field other than 0, 1, or 2,
2347 * it MUST ignore the received ROUTE-REFRESH message.
2348 */
2349 if (subtype > 2)
2350 zlog_err(
2351 "%s Enhanced Route Refresh invalid subtype",
2352 peer->host);
2353 }
2354
2355 if (msg_length < 5) {
2356 zlog_info("%s ORF route refresh length error",
2357 peer->host);
2358 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2359 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
2360 return BGP_Stop;
2361 }
2362
2363 when_to_refresh = stream_getc(s);
2364 end = stream_pnt(s) + (size - 5);
2365
2366 while ((stream_pnt(s) + 2) < end) {
2367 orf_type = stream_getc(s);
2368 orf_len = stream_getw(s);
2369
2370 /* orf_len in bounds? */
2371 if ((stream_pnt(s) + orf_len) > end)
2372 break; /* XXX: Notify instead?? */
2373 if (orf_type == ORF_TYPE_PREFIX
2374 || orf_type == ORF_TYPE_PREFIX_OLD) {
2375 uint8_t *p_pnt = stream_pnt(s);
2376 uint8_t *p_end = stream_pnt(s) + orf_len;
2377 struct orf_prefix orfp;
2378 uint8_t common = 0;
2379 uint32_t seq;
2380 int psize;
2381 char name[BUFSIZ];
2382 int ret = CMD_SUCCESS;
2383
2384 if (bgp_debug_neighbor_events(peer)) {
2385 zlog_debug(
2386 "%pBP rcvd Prefixlist ORF(%d) length %d",
2387 peer, orf_type, orf_len);
2388 }
2389
2390 /* ORF prefix-list name */
2391 snprintf(name, sizeof(name), "%s.%d.%d",
2392 peer->host, afi, safi);
2393
2394 /* we're going to read at least 1 byte of common
2395 * ORF header,
2396 * and 7 bytes of ORF Address-filter entry from
2397 * the stream
2398 */
2399 if (*p_pnt & ORF_COMMON_PART_REMOVE_ALL) {
2400 if (bgp_debug_neighbor_events(peer))
2401 zlog_debug(
2402 "%pBP rcvd Remove-All pfxlist ORF request",
2403 peer);
2404 prefix_bgp_orf_remove_all(afi, name);
2405 break;
2406 }
2407
2408 if (orf_len < 7)
2409 break;
2410
2411 while (p_pnt < p_end) {
2412 /* If the ORF entry is malformed, want
2413 * to read as much of it
2414 * as possible without going beyond the
2415 * bounds of the entry,
2416 * to maximise debug information.
2417 */
2418 int ok;
2419 memset(&orfp, 0, sizeof(orfp));
2420 common = *p_pnt++;
2421 /* after ++: p_pnt <= p_end */
2422 ok = ((uint32_t)(p_end - p_pnt)
2423 >= sizeof(uint32_t));
2424 if (ok) {
2425 memcpy(&seq, p_pnt,
2426 sizeof(uint32_t));
2427 p_pnt += sizeof(uint32_t);
2428 orfp.seq = ntohl(seq);
2429 } else
2430 p_pnt = p_end;
2431
2432 /* val checked in prefix_bgp_orf_set */
2433 if (p_pnt < p_end)
2434 orfp.ge = *p_pnt++;
2435
2436 /* val checked in prefix_bgp_orf_set */
2437 if (p_pnt < p_end)
2438 orfp.le = *p_pnt++;
2439
2440 if ((ok = (p_pnt < p_end)))
2441 orfp.p.prefixlen = *p_pnt++;
2442
2443 /* afi checked already */
2444 orfp.p.family = afi2family(afi);
2445
2446 /* 0 if not ok */
2447 psize = PSIZE(orfp.p.prefixlen);
2448 /* valid for family ? */
2449 if (psize > prefix_blen(&orfp.p)) {
2450 ok = 0;
2451 psize = prefix_blen(&orfp.p);
2452 }
2453 /* valid for packet ? */
2454 if (psize > (p_end - p_pnt)) {
2455 ok = 0;
2456 psize = p_end - p_pnt;
2457 }
2458
2459 if (psize > 0)
2460 memcpy(&orfp.p.u.prefix, p_pnt,
2461 psize);
2462 p_pnt += psize;
2463
2464 if (bgp_debug_neighbor_events(peer)) {
2465 char buf[INET6_BUFSIZ];
2466
2467 zlog_debug(
2468 "%pBP rcvd %s %s seq %u %s/%d ge %d le %d%s",
2469 peer,
2470 (common & ORF_COMMON_PART_REMOVE
2471 ? "Remove"
2472 : "Add"),
2473 (common & ORF_COMMON_PART_DENY
2474 ? "deny"
2475 : "permit"),
2476 orfp.seq,
2477 inet_ntop(
2478 orfp.p.family,
2479 &orfp.p.u.prefix,
2480 buf,
2481 INET6_BUFSIZ),
2482 orfp.p.prefixlen,
2483 orfp.ge, orfp.le,
2484 ok ? "" : " MALFORMED");
2485 }
2486
2487 if (ok)
2488 ret = prefix_bgp_orf_set(
2489 name, afi, &orfp,
2490 (common & ORF_COMMON_PART_DENY
2491 ? 0
2492 : 1),
2493 (common & ORF_COMMON_PART_REMOVE
2494 ? 0
2495 : 1));
2496
2497 if (!ok || (ok && ret != CMD_SUCCESS)) {
2498 zlog_info(
2499 "%pBP Received misformatted prefixlist ORF. Remove All pfxlist",
2500 peer);
2501 prefix_bgp_orf_remove_all(afi,
2502 name);
2503 break;
2504 }
2505 }
2506
2507 peer->orf_plist[afi][safi] =
2508 prefix_bgp_orf_lookup(afi, name);
2509 }
2510 stream_forward_getp(s, orf_len);
2511 }
2512 if (bgp_debug_neighbor_events(peer))
2513 zlog_debug("%pBP rcvd Refresh %s ORF request", peer,
2514 when_to_refresh == REFRESH_DEFER
2515 ? "Defer"
2516 : "Immediate");
2517 if (when_to_refresh == REFRESH_DEFER)
2518 return BGP_PACKET_NOOP;
2519 }
2520
2521 /* First update is deferred until ORF or ROUTE-REFRESH is received */
2522 if (CHECK_FLAG(peer->af_sflags[afi][safi],
2523 PEER_STATUS_ORF_WAIT_REFRESH))
2524 UNSET_FLAG(peer->af_sflags[afi][safi],
2525 PEER_STATUS_ORF_WAIT_REFRESH);
2526
2527 paf = peer_af_find(peer, afi, safi);
2528 if (paf && paf->subgroup) {
2529 if (peer->orf_plist[afi][safi]) {
2530 updgrp = PAF_UPDGRP(paf);
2531 updgrp_peer = UPDGRP_PEER(updgrp);
2532 updgrp_peer->orf_plist[afi][safi] =
2533 peer->orf_plist[afi][safi];
2534 }
2535
2536 /* Avoid supressing duplicate routes later
2537 * when processing in subgroup_announce_table().
2538 */
2539 force_update = true;
2540
2541 /* If the peer is configured for default-originate clear the
2542 * SUBGRP_STATUS_DEFAULT_ORIGINATE flag so that we will
2543 * re-advertise the
2544 * default
2545 */
2546 if (CHECK_FLAG(paf->subgroup->sflags,
2547 SUBGRP_STATUS_DEFAULT_ORIGINATE))
2548 UNSET_FLAG(paf->subgroup->sflags,
2549 SUBGRP_STATUS_DEFAULT_ORIGINATE);
2550 }
2551
2552 if (subtype == BGP_ROUTE_REFRESH_BORR) {
2553 /* A BGP speaker that has received the Graceful Restart
2554 * Capability from its neighbor MUST ignore any BoRRs for
2555 * an <AFI, SAFI> from the neighbor before the speaker
2556 * receives the EoR for the given <AFI, SAFI> from the
2557 * neighbor.
2558 */
2559 if (CHECK_FLAG(peer->cap, PEER_CAP_RESTART_RCV)
2560 && !CHECK_FLAG(peer->af_sflags[afi][safi],
2561 PEER_STATUS_EOR_RECEIVED)) {
2562 if (bgp_debug_neighbor_events(peer))
2563 zlog_debug(
2564 "%pBP rcvd route-refresh (BoRR) for %s/%s before EoR",
2565 peer, afi2str(afi), safi2str(safi));
2566 return BGP_PACKET_NOOP;
2567 }
2568
2569 if (peer->t_refresh_stalepath) {
2570 if (bgp_debug_neighbor_events(peer))
2571 zlog_debug(
2572 "%pBP rcvd route-refresh (BoRR) for %s/%s, whereas BoRR already received",
2573 peer, afi2str(afi), safi2str(safi));
2574 return BGP_PACKET_NOOP;
2575 }
2576
2577 SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_BORR_RECEIVED);
2578 UNSET_FLAG(peer->af_sflags[afi][safi],
2579 PEER_STATUS_EORR_RECEIVED);
2580
2581 /* When a BGP speaker receives a BoRR message from
2582 * a peer, it MUST mark all the routes with the given
2583 * Address Family Identifier and Subsequent Address
2584 * Family Identifier, <AFI, SAFI> [RFC2918], from
2585 * that peer as stale.
2586 */
2587 if (peer_active_nego(peer)) {
2588 SET_FLAG(peer->af_sflags[afi][safi],
2589 PEER_STATUS_ENHANCED_REFRESH);
2590 bgp_set_stale_route(peer, afi, safi);
2591 }
2592
2593 if (peer_established(peer))
2594 thread_add_timer(bm->master,
2595 bgp_refresh_stalepath_timer_expire,
2596 paf, peer->bgp->stalepath_time,
2597 &peer->t_refresh_stalepath);
2598
2599 if (bgp_debug_neighbor_events(peer))
2600 zlog_debug(
2601 "%pBP rcvd route-refresh (BoRR) for %s/%s, triggering timer for %u seconds",
2602 peer, afi2str(afi), safi2str(safi),
2603 peer->bgp->stalepath_time);
2604 } else if (subtype == BGP_ROUTE_REFRESH_EORR) {
2605 if (!peer->t_refresh_stalepath) {
2606 zlog_err(
2607 "%pBP rcvd route-refresh (EoRR) for %s/%s, whereas no BoRR received",
2608 peer, afi2str(afi), safi2str(safi));
2609 return BGP_PACKET_NOOP;
2610 }
2611
2612 THREAD_OFF(peer->t_refresh_stalepath);
2613
2614 SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_EORR_RECEIVED);
2615 UNSET_FLAG(peer->af_sflags[afi][safi],
2616 PEER_STATUS_BORR_RECEIVED);
2617
2618 if (bgp_debug_neighbor_events(peer))
2619 zlog_debug(
2620 "%pBP rcvd route-refresh (EoRR) for %s/%s, stopping BoRR timer",
2621 peer, afi2str(afi), safi2str(safi));
2622
2623 if (peer->nsf[afi][safi])
2624 bgp_clear_stale_route(peer, afi, safi);
2625 } else {
2626 if (bgp_debug_neighbor_events(peer))
2627 zlog_debug(
2628 "%pBP rcvd route-refresh (REQUEST) for %s/%s",
2629 peer, afi2str(afi), safi2str(safi));
2630
2631 /* In response to a "normal route refresh request" from the
2632 * peer, the speaker MUST send a BoRR message.
2633 */
2634 if (CHECK_FLAG(peer->cap, PEER_CAP_ENHANCED_RR_RCV)) {
2635 /* For a BGP speaker that supports the BGP Graceful
2636 * Restart, it MUST NOT send a BoRR for an <AFI, SAFI>
2637 * to a neighbor before it sends the EoR for the
2638 * <AFI, SAFI> to the neighbor.
2639 */
2640 if (!CHECK_FLAG(peer->af_sflags[afi][safi],
2641 PEER_STATUS_EOR_SEND)) {
2642 if (bgp_debug_neighbor_events(peer))
2643 zlog_debug(
2644 "%pBP rcvd route-refresh (REQUEST) for %s/%s before EoR",
2645 peer, afi2str(afi),
2646 safi2str(safi));
2647 /* Can't send BoRR now, postpone after EoR */
2648 SET_FLAG(peer->af_sflags[afi][safi],
2649 PEER_STATUS_REFRESH_PENDING);
2650 return BGP_PACKET_NOOP;
2651 }
2652
2653 bgp_route_refresh_send(peer, afi, safi, 0, 0, 0,
2654 BGP_ROUTE_REFRESH_BORR);
2655
2656 if (bgp_debug_neighbor_events(peer))
2657 zlog_debug(
2658 "%pBP sending route-refresh (BoRR) for %s/%s",
2659 peer, afi2str(afi), safi2str(safi));
2660
2661 /* Set flag Ready-To-Send to know when we can send EoRR
2662 * message.
2663 */
2664 SET_FLAG(peer->af_sflags[afi][safi],
2665 PEER_STATUS_BORR_SEND);
2666 UNSET_FLAG(peer->af_sflags[afi][safi],
2667 PEER_STATUS_EORR_SEND);
2668 }
2669 }
2670
2671 /* Perform route refreshment to the peer */
2672 bgp_announce_route(peer, afi, safi, force_update);
2673
2674 /* No FSM action necessary */
2675 return BGP_PACKET_NOOP;
2676 }
2677
2678 /**
2679 * Parse BGP CAPABILITY message for peer.
2680 *
2681 * @param peer
2682 * @param size size of the packet
2683 * @return as in summary
2684 */
2685 static int bgp_capability_msg_parse(struct peer *peer, uint8_t *pnt,
2686 bgp_size_t length)
2687 {
2688 uint8_t *end;
2689 struct capability_mp_data mpc;
2690 struct capability_header *hdr;
2691 uint8_t action;
2692 iana_afi_t pkt_afi;
2693 afi_t afi;
2694 iana_safi_t pkt_safi;
2695 safi_t safi;
2696
2697 end = pnt + length;
2698
2699 while (pnt < end) {
2700 /* We need at least action, capability code and capability
2701 * length. */
2702 if (pnt + 3 > end) {
2703 zlog_info("%s Capability length error", peer->host);
2704 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2705 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
2706 return BGP_Stop;
2707 }
2708 action = *pnt;
2709 hdr = (struct capability_header *)(pnt + 1);
2710
2711 /* Action value check. */
2712 if (action != CAPABILITY_ACTION_SET
2713 && action != CAPABILITY_ACTION_UNSET) {
2714 zlog_info("%s Capability Action Value error %d",
2715 peer->host, action);
2716 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2717 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
2718 return BGP_Stop;
2719 }
2720
2721 if (bgp_debug_neighbor_events(peer))
2722 zlog_debug(
2723 "%s CAPABILITY has action: %d, code: %u, length %u",
2724 peer->host, action, hdr->code, hdr->length);
2725
2726 if (hdr->length < sizeof(struct capability_mp_data)) {
2727 zlog_info(
2728 "%pBP Capability structure is not properly filled out, expected at least %zu bytes but header length specified is %d",
2729 peer, sizeof(struct capability_mp_data),
2730 hdr->length);
2731 return BGP_Stop;
2732 }
2733
2734 /* Capability length check. */
2735 if ((pnt + hdr->length + 3) > end) {
2736 zlog_info("%s Capability length error", peer->host);
2737 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2738 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
2739 return BGP_Stop;
2740 }
2741
2742 /* Fetch structure to the byte stream. */
2743 memcpy(&mpc, pnt + 3, sizeof(struct capability_mp_data));
2744 pnt += hdr->length + 3;
2745
2746 /* We know MP Capability Code. */
2747 if (hdr->code == CAPABILITY_CODE_MP) {
2748 pkt_afi = ntohs(mpc.afi);
2749 pkt_safi = mpc.safi;
2750
2751 /* Ignore capability when override-capability is set. */
2752 if (CHECK_FLAG(peer->flags,
2753 PEER_FLAG_OVERRIDE_CAPABILITY))
2754 continue;
2755
2756 /* Convert AFI, SAFI to internal values. */
2757 if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi,
2758 &safi)) {
2759 if (bgp_debug_neighbor_events(peer))
2760 zlog_debug(
2761 "%s Dynamic Capability MP_EXT afi/safi invalid (%s/%s)",
2762 peer->host,
2763 iana_afi2str(pkt_afi),
2764 iana_safi2str(pkt_safi));
2765 continue;
2766 }
2767
2768 /* Address family check. */
2769 if (bgp_debug_neighbor_events(peer))
2770 zlog_debug(
2771 "%s CAPABILITY has %s MP_EXT CAP for afi/safi: %s/%s",
2772 peer->host,
2773 action == CAPABILITY_ACTION_SET
2774 ? "Advertising"
2775 : "Removing",
2776 iana_afi2str(pkt_afi),
2777 iana_safi2str(pkt_safi));
2778
2779 if (action == CAPABILITY_ACTION_SET) {
2780 peer->afc_recv[afi][safi] = 1;
2781 if (peer->afc[afi][safi]) {
2782 peer->afc_nego[afi][safi] = 1;
2783 bgp_announce_route(peer, afi, safi,
2784 false);
2785 }
2786 } else {
2787 peer->afc_recv[afi][safi] = 0;
2788 peer->afc_nego[afi][safi] = 0;
2789
2790 if (peer_active_nego(peer))
2791 bgp_clear_route(peer, afi, safi);
2792 else
2793 return BGP_Stop;
2794 }
2795 } else {
2796 flog_warn(
2797 EC_BGP_UNRECOGNIZED_CAPABILITY,
2798 "%s unrecognized capability code: %d - ignored",
2799 peer->host, hdr->code);
2800 }
2801 }
2802
2803 /* No FSM action necessary */
2804 return BGP_PACKET_NOOP;
2805 }
2806
2807 /**
2808 * Parse BGP CAPABILITY message for peer.
2809 *
2810 * Exported for unit testing.
2811 *
2812 * @param peer
2813 * @param size size of the packet
2814 * @return as in summary
2815 */
2816 int bgp_capability_receive(struct peer *peer, bgp_size_t size)
2817 {
2818 uint8_t *pnt;
2819
2820 /* Fetch pointer. */
2821 pnt = stream_pnt(peer->curr);
2822
2823 if (bgp_debug_neighbor_events(peer))
2824 zlog_debug("%s rcv CAPABILITY", peer->host);
2825
2826 /* If peer does not have the capability, send notification. */
2827 if (!CHECK_FLAG(peer->cap, PEER_CAP_DYNAMIC_ADV)) {
2828 flog_err(EC_BGP_NO_CAP,
2829 "%s [Error] BGP dynamic capability is not enabled",
2830 peer->host);
2831 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
2832 BGP_NOTIFY_HEADER_BAD_MESTYPE);
2833 return BGP_Stop;
2834 }
2835
2836 /* Status must be Established. */
2837 if (!peer_established(peer)) {
2838 flog_err(
2839 EC_BGP_NO_CAP,
2840 "%s [Error] Dynamic capability packet received under status %s",
2841 peer->host,
2842 lookup_msg(bgp_status_msg, peer->status, NULL));
2843 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR,
2844 bgp_fsm_error_subcode(peer->status));
2845 return BGP_Stop;
2846 }
2847
2848 /* Parse packet. */
2849 return bgp_capability_msg_parse(peer, pnt, size);
2850 }
2851
2852 /**
2853 * Processes a peer's input buffer.
2854 *
2855 * This function sidesteps the event loop and directly calls bgp_event_update()
2856 * after processing each BGP message. This is necessary to ensure proper
2857 * ordering of FSM events and unifies the behavior that was present previously,
2858 * whereby some of the packet handling functions would update the FSM and some
2859 * would not, making event flow difficult to understand. Please think twice
2860 * before hacking this.
2861 *
2862 * Thread type: THREAD_EVENT
2863 * @param thread
2864 * @return 0
2865 */
2866 void bgp_process_packet(struct thread *thread)
2867 {
2868 /* Yes first of all get peer pointer. */
2869 struct peer *peer; // peer
2870 uint32_t rpkt_quanta_old; // how many packets to read
2871 int fsm_update_result; // return code of bgp_event_update()
2872 int mprc; // message processing return code
2873
2874 peer = THREAD_ARG(thread);
2875 rpkt_quanta_old = atomic_load_explicit(&peer->bgp->rpkt_quanta,
2876 memory_order_relaxed);
2877 fsm_update_result = 0;
2878
2879 /* Guard against scheduled events that occur after peer deletion. */
2880 if (peer->status == Deleted || peer->status == Clearing)
2881 return;
2882
2883 unsigned int processed = 0;
2884
2885 while (processed < rpkt_quanta_old) {
2886 uint8_t type = 0;
2887 bgp_size_t size;
2888 char notify_data_length[2];
2889
2890 frr_with_mutex (&peer->io_mtx) {
2891 peer->curr = stream_fifo_pop(peer->ibuf);
2892 }
2893
2894 if (peer->curr == NULL) // no packets to process, hmm...
2895 return;
2896
2897 /* skip the marker and copy the packet length */
2898 stream_forward_getp(peer->curr, BGP_MARKER_SIZE);
2899 memcpy(notify_data_length, stream_pnt(peer->curr), 2);
2900
2901 /* read in the packet length and type */
2902 size = stream_getw(peer->curr);
2903 type = stream_getc(peer->curr);
2904
2905 hook_call(bgp_packet_dump, peer, type, size, peer->curr);
2906
2907 /* adjust size to exclude the marker + length + type */
2908 size -= BGP_HEADER_SIZE;
2909
2910 /* Read rest of the packet and call each sort of packet routine
2911 */
2912 switch (type) {
2913 case BGP_MSG_OPEN:
2914 frrtrace(2, frr_bgp, open_process, peer, size);
2915 atomic_fetch_add_explicit(&peer->open_in, 1,
2916 memory_order_relaxed);
2917 mprc = bgp_open_receive(peer, size);
2918 if (mprc == BGP_Stop)
2919 flog_err(
2920 EC_BGP_PKT_OPEN,
2921 "%s: BGP OPEN receipt failed for peer: %s",
2922 __func__, peer->host);
2923 break;
2924 case BGP_MSG_UPDATE:
2925 frrtrace(2, frr_bgp, update_process, peer, size);
2926 atomic_fetch_add_explicit(&peer->update_in, 1,
2927 memory_order_relaxed);
2928 peer->readtime = monotime(NULL);
2929 mprc = bgp_update_receive(peer, size);
2930 if (mprc == BGP_Stop)
2931 flog_err(
2932 EC_BGP_UPDATE_RCV,
2933 "%s: BGP UPDATE receipt failed for peer: %s",
2934 __func__, peer->host);
2935 break;
2936 case BGP_MSG_NOTIFY:
2937 frrtrace(2, frr_bgp, notification_process, peer, size);
2938 atomic_fetch_add_explicit(&peer->notify_in, 1,
2939 memory_order_relaxed);
2940 mprc = bgp_notify_receive(peer, size);
2941 if (mprc == BGP_Stop)
2942 flog_err(
2943 EC_BGP_NOTIFY_RCV,
2944 "%s: BGP NOTIFY receipt failed for peer: %s",
2945 __func__, peer->host);
2946 break;
2947 case BGP_MSG_KEEPALIVE:
2948 frrtrace(2, frr_bgp, keepalive_process, peer, size);
2949 peer->readtime = monotime(NULL);
2950 atomic_fetch_add_explicit(&peer->keepalive_in, 1,
2951 memory_order_relaxed);
2952 mprc = bgp_keepalive_receive(peer, size);
2953 if (mprc == BGP_Stop)
2954 flog_err(
2955 EC_BGP_KEEP_RCV,
2956 "%s: BGP KEEPALIVE receipt failed for peer: %s",
2957 __func__, peer->host);
2958 break;
2959 case BGP_MSG_ROUTE_REFRESH_NEW:
2960 case BGP_MSG_ROUTE_REFRESH_OLD:
2961 frrtrace(2, frr_bgp, refresh_process, peer, size);
2962 atomic_fetch_add_explicit(&peer->refresh_in, 1,
2963 memory_order_relaxed);
2964 mprc = bgp_route_refresh_receive(peer, size);
2965 if (mprc == BGP_Stop)
2966 flog_err(
2967 EC_BGP_RFSH_RCV,
2968 "%s: BGP ROUTEREFRESH receipt failed for peer: %s",
2969 __func__, peer->host);
2970 break;
2971 case BGP_MSG_CAPABILITY:
2972 frrtrace(2, frr_bgp, capability_process, peer, size);
2973 atomic_fetch_add_explicit(&peer->dynamic_cap_in, 1,
2974 memory_order_relaxed);
2975 mprc = bgp_capability_receive(peer, size);
2976 if (mprc == BGP_Stop)
2977 flog_err(
2978 EC_BGP_CAP_RCV,
2979 "%s: BGP CAPABILITY receipt failed for peer: %s",
2980 __func__, peer->host);
2981 break;
2982 default:
2983 /* Suppress uninitialized variable warning */
2984 mprc = 0;
2985 (void)mprc;
2986 /*
2987 * The message type should have been sanitized before
2988 * we ever got here. Receipt of a message with an
2989 * invalid header at this point is indicative of a
2990 * security issue.
2991 */
2992 assert (!"Message of invalid type received during input processing");
2993 }
2994
2995 /* delete processed packet */
2996 stream_free(peer->curr);
2997 peer->curr = NULL;
2998 processed++;
2999
3000 /* Update FSM */
3001 if (mprc != BGP_PACKET_NOOP)
3002 fsm_update_result = bgp_event_update(peer, mprc);
3003 else
3004 continue;
3005
3006 /*
3007 * If peer was deleted, do not process any more packets. This
3008 * is usually due to executing BGP_Stop or a stub deletion.
3009 */
3010 if (fsm_update_result == FSM_PEER_TRANSFERRED
3011 || fsm_update_result == FSM_PEER_STOPPED)
3012 break;
3013 }
3014
3015 if (fsm_update_result != FSM_PEER_TRANSFERRED
3016 && fsm_update_result != FSM_PEER_STOPPED) {
3017 frr_with_mutex (&peer->io_mtx) {
3018 // more work to do, come back later
3019 if (peer->ibuf->count > 0)
3020 thread_add_event(
3021 bm->master, bgp_process_packet, peer, 0,
3022 &peer->t_process_packet);
3023 }
3024 }
3025 }
3026
3027 /* Send EOR when routes are processed by selection deferral timer */
3028 void bgp_send_delayed_eor(struct bgp *bgp)
3029 {
3030 struct peer *peer;
3031 struct listnode *node, *nnode;
3032
3033 /* EOR message sent in bgp_write_proceed_actions */
3034 for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer))
3035 bgp_write_proceed_actions(peer);
3036 }
3037
3038 /*
3039 * Task callback to handle socket error encountered in the io pthread. We avoid
3040 * having the io pthread try to enqueue fsm events or mess with the peer
3041 * struct.
3042 */
3043 void bgp_packet_process_error(struct thread *thread)
3044 {
3045 struct peer *peer;
3046 int code;
3047
3048 peer = THREAD_ARG(thread);
3049 code = THREAD_VAL(thread);
3050
3051 if (bgp_debug_neighbor_events(peer))
3052 zlog_debug("%s [Event] BGP error %d on fd %d",
3053 peer->host, code, peer->fd);
3054
3055 /* Closed connection or error on the socket */
3056 if (peer_established(peer)) {
3057 if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART)
3058 || CHECK_FLAG(peer->flags,
3059 PEER_FLAG_GRACEFUL_RESTART_HELPER))
3060 && CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
3061 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
3062 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
3063 } else
3064 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
3065 }
3066
3067 bgp_event_update(peer, code);
3068 }