]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_packet.c
Merge pull request #12837 from donaldsharp/unlikely_routemap
[mirror_frr.git] / bgpd / bgp_packet.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* BGP packet management routine.
3 * Contains utility functions for constructing and consuming BGP messages.
4 * Copyright (C) 2017 Cumulus Networks
5 * Copyright (C) 1999 Kunihiro Ishiguro
6 */
7
8 #include <zebra.h>
9 #include <sys/time.h>
10
11 #include "frrevent.h"
12 #include "stream.h"
13 #include "network.h"
14 #include "prefix.h"
15 #include "command.h"
16 #include "log.h"
17 #include "memory.h"
18 #include "sockunion.h" /* for inet_ntop () */
19 #include "sockopt.h"
20 #include "linklist.h"
21 #include "plist.h"
22 #include "queue.h"
23 #include "filter.h"
24 #include "lib_errors.h"
25
26 #include "bgpd/bgpd.h"
27 #include "bgpd/bgp_table.h"
28 #include "bgpd/bgp_dump.h"
29 #include "bgpd/bgp_bmp.h"
30 #include "bgpd/bgp_attr.h"
31 #include "bgpd/bgp_debug.h"
32 #include "bgpd/bgp_errors.h"
33 #include "bgpd/bgp_fsm.h"
34 #include "bgpd/bgp_route.h"
35 #include "bgpd/bgp_packet.h"
36 #include "bgpd/bgp_open.h"
37 #include "bgpd/bgp_aspath.h"
38 #include "bgpd/bgp_community.h"
39 #include "bgpd/bgp_ecommunity.h"
40 #include "bgpd/bgp_lcommunity.h"
41 #include "bgpd/bgp_network.h"
42 #include "bgpd/bgp_mplsvpn.h"
43 #include "bgpd/bgp_evpn.h"
44 #include "bgpd/bgp_advertise.h"
45 #include "bgpd/bgp_vty.h"
46 #include "bgpd/bgp_updgrp.h"
47 #include "bgpd/bgp_label.h"
48 #include "bgpd/bgp_io.h"
49 #include "bgpd/bgp_keepalives.h"
50 #include "bgpd/bgp_flowspec.h"
51 #include "bgpd/bgp_trace.h"
52
53 DEFINE_HOOK(bgp_packet_dump,
54 (struct peer *peer, uint8_t type, bgp_size_t size,
55 struct stream *s),
56 (peer, type, size, s));
57
58 DEFINE_HOOK(bgp_packet_send,
59 (struct peer *peer, uint8_t type, bgp_size_t size,
60 struct stream *s),
61 (peer, type, size, s));
62
63 /**
64 * Sets marker and type fields for a BGP message.
65 *
66 * @param s the stream containing the packet
67 * @param type the packet type
68 * @return the size of the stream
69 */
70 int bgp_packet_set_marker(struct stream *s, uint8_t type)
71 {
72 int i;
73
74 /* Fill in marker. */
75 for (i = 0; i < BGP_MARKER_SIZE; i++)
76 stream_putc(s, 0xff);
77
78 /* Dummy total length. This field is should be filled in later on. */
79 stream_putw(s, 0);
80
81 /* BGP packet type. */
82 stream_putc(s, type);
83
84 /* Return current stream size. */
85 return stream_get_endp(s);
86 }
87
88 /**
89 * Sets size field for a BGP message.
90 *
91 * Size field is set to the size of the stream passed.
92 *
93 * @param s the stream containing the packet
94 */
95 void bgp_packet_set_size(struct stream *s)
96 {
97 int cp;
98
99 /* Preserve current pointer. */
100 cp = stream_get_endp(s);
101 stream_putw_at(s, BGP_MARKER_SIZE, cp);
102 }
103
104 /*
105 * Push a packet onto the beginning of the peer's output queue.
106 * This function acquires the peer's write mutex before proceeding.
107 */
108 static void bgp_packet_add(struct peer *peer, struct stream *s)
109 {
110 intmax_t delta;
111 uint32_t holdtime;
112 intmax_t sendholdtime;
113
114 frr_with_mutex (&peer->io_mtx) {
115 /* if the queue is empty, reset the "last OK" timestamp to
116 * now, otherwise if we write another packet immediately
117 * after it'll get confused
118 */
119 if (!stream_fifo_count_safe(peer->obuf))
120 peer->last_sendq_ok = monotime(NULL);
121
122 stream_fifo_push(peer->obuf, s);
123
124 delta = monotime(NULL) - peer->last_sendq_ok;
125
126 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
127 holdtime = atomic_load_explicit(&peer->holdtime,
128 memory_order_relaxed);
129 else
130 holdtime = peer->bgp->default_holdtime;
131
132 sendholdtime = holdtime * 2;
133
134 /* Note that when we're here, we're adding some packet to the
135 * OutQ. That includes keepalives when there is nothing to
136 * do, so there's a guarantee we pass by here once in a while.
137 *
138 * That implies there is no need to go set up another separate
139 * timer that ticks down SendHoldTime, as we'll be here sooner
140 * or later anyway and will see the checks below failing.
141 */
142 if (!holdtime) {
143 /* no holdtime, do nothing. */
144 } else if (delta > sendholdtime) {
145 flog_err(
146 EC_BGP_SENDQ_STUCK_PROPER,
147 "%pBP has not made any SendQ progress for 2 holdtimes (%jds), terminating session",
148 peer, sendholdtime);
149 BGP_EVENT_ADD(peer, TCP_fatal_error);
150 } else if (delta > (intmax_t)holdtime &&
151 monotime(NULL) - peer->last_sendq_warn > 5) {
152 flog_warn(
153 EC_BGP_SENDQ_STUCK_WARN,
154 "%pBP has not made any SendQ progress for 1 holdtime (%us), peer overloaded?",
155 peer, holdtime);
156 peer->last_sendq_warn = monotime(NULL);
157 }
158 }
159 }
160
161 static struct stream *bgp_update_packet_eor(struct peer *peer, afi_t afi,
162 safi_t safi)
163 {
164 struct stream *s;
165 iana_afi_t pkt_afi = IANA_AFI_IPV4;
166 iana_safi_t pkt_safi = IANA_SAFI_UNICAST;
167
168 if (DISABLE_BGP_ANNOUNCE)
169 return NULL;
170
171 if (bgp_debug_neighbor_events(peer))
172 zlog_debug("send End-of-RIB for %s to %s",
173 get_afi_safi_str(afi, safi, false), peer->host);
174
175 s = stream_new(peer->max_packet_size);
176
177 /* Make BGP update packet. */
178 bgp_packet_set_marker(s, BGP_MSG_UPDATE);
179
180 /* Unfeasible Routes Length */
181 stream_putw(s, 0);
182
183 if (afi == AFI_IP && safi == SAFI_UNICAST) {
184 /* Total Path Attribute Length */
185 stream_putw(s, 0);
186 } else {
187 /* Convert AFI, SAFI to values for packet. */
188 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
189
190 /* Total Path Attribute Length */
191 stream_putw(s, 6);
192 stream_putc(s, BGP_ATTR_FLAG_OPTIONAL);
193 stream_putc(s, BGP_ATTR_MP_UNREACH_NLRI);
194 stream_putc(s, 3);
195 stream_putw(s, pkt_afi);
196 stream_putc(s, pkt_safi);
197 }
198
199 bgp_packet_set_size(s);
200 return s;
201 }
202
203 /* Called when there is a change in the EOR(implicit or explicit) status of a
204 * peer. Ends the update-delay if all expected peers are done with EORs. */
205 void bgp_check_update_delay(struct bgp *bgp)
206 {
207 struct listnode *node, *nnode;
208 struct peer *peer = NULL;
209
210 if (bgp_debug_neighbor_events(peer))
211 zlog_debug("Checking update delay, T: %d R: %d I:%d E: %d",
212 bgp->established, bgp->restarted_peers,
213 bgp->implicit_eors, bgp->explicit_eors);
214
215 if (bgp->established
216 <= bgp->restarted_peers + bgp->implicit_eors + bgp->explicit_eors) {
217 /*
218 * This is an extra sanity check to make sure we wait for all
219 * the eligible configured peers. This check is performed if
220 * establish wait timer is on, or establish wait option is not
221 * given with the update-delay command
222 */
223 if (bgp->t_establish_wait
224 || (bgp->v_establish_wait == bgp->v_update_delay))
225 for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer)) {
226 if (CHECK_FLAG(peer->flags,
227 PEER_FLAG_CONFIG_NODE)
228 && !CHECK_FLAG(peer->flags,
229 PEER_FLAG_SHUTDOWN)
230 && !CHECK_FLAG(peer->bgp->flags,
231 BGP_FLAG_SHUTDOWN)
232 && !peer->update_delay_over) {
233 if (bgp_debug_neighbor_events(peer))
234 zlog_debug(
235 " Peer %s pending, continuing read-only mode",
236 peer->host);
237 return;
238 }
239 }
240
241 zlog_info(
242 "Update delay ended, restarted: %d, EORs implicit: %d, explicit: %d",
243 bgp->restarted_peers, bgp->implicit_eors,
244 bgp->explicit_eors);
245 bgp_update_delay_end(bgp);
246 }
247 }
248
249 /*
250 * Called if peer is known to have restarted. The restart-state bit in
251 * Graceful-Restart capability is used for that
252 */
253 void bgp_update_restarted_peers(struct peer *peer)
254 {
255 if (!bgp_update_delay_active(peer->bgp))
256 return; /* BGP update delay has ended */
257 if (peer->update_delay_over)
258 return; /* This peer has already been considered */
259
260 if (bgp_debug_neighbor_events(peer))
261 zlog_debug("Peer %s: Checking restarted", peer->host);
262
263 if (peer_established(peer)) {
264 peer->update_delay_over = 1;
265 peer->bgp->restarted_peers++;
266 bgp_check_update_delay(peer->bgp);
267 }
268 }
269
270 /*
271 * Called as peer receives a keep-alive. Determines if this occurence can be
272 * taken as an implicit EOR for this peer.
273 * NOTE: The very first keep-alive after the Established state of a peer is
274 * considered implicit EOR for the update-delay purposes
275 */
276 void bgp_update_implicit_eors(struct peer *peer)
277 {
278 if (!bgp_update_delay_active(peer->bgp))
279 return; /* BGP update delay has ended */
280 if (peer->update_delay_over)
281 return; /* This peer has already been considered */
282
283 if (bgp_debug_neighbor_events(peer))
284 zlog_debug("Peer %s: Checking implicit EORs", peer->host);
285
286 if (peer_established(peer)) {
287 peer->update_delay_over = 1;
288 peer->bgp->implicit_eors++;
289 bgp_check_update_delay(peer->bgp);
290 }
291 }
292
293 /*
294 * Should be called only when there is a change in the EOR_RECEIVED status
295 * for any afi/safi on a peer.
296 */
297 static void bgp_update_explicit_eors(struct peer *peer)
298 {
299 afi_t afi;
300 safi_t safi;
301
302 if (!bgp_update_delay_active(peer->bgp))
303 return; /* BGP update delay has ended */
304 if (peer->update_delay_over)
305 return; /* This peer has already been considered */
306
307 if (bgp_debug_neighbor_events(peer))
308 zlog_debug("Peer %s: Checking explicit EORs", peer->host);
309
310 FOREACH_AFI_SAFI (afi, safi) {
311 if (peer->afc_nego[afi][safi]
312 && !CHECK_FLAG(peer->af_sflags[afi][safi],
313 PEER_STATUS_EOR_RECEIVED)) {
314 if (bgp_debug_neighbor_events(peer))
315 zlog_debug(
316 " afi %d safi %d didn't receive EOR",
317 afi, safi);
318 return;
319 }
320 }
321
322 peer->update_delay_over = 1;
323 peer->bgp->explicit_eors++;
324 bgp_check_update_delay(peer->bgp);
325 }
326
327 /**
328 * Frontend for NLRI parsing, to fan-out to AFI/SAFI specific parsers.
329 *
330 * mp_withdraw, if set, is used to nullify attr structure on most of the
331 * calling safi function and for evpn, passed as parameter
332 */
333 int bgp_nlri_parse(struct peer *peer, struct attr *attr,
334 struct bgp_nlri *packet, int mp_withdraw)
335 {
336 switch (packet->safi) {
337 case SAFI_UNICAST:
338 case SAFI_MULTICAST:
339 return bgp_nlri_parse_ip(peer, mp_withdraw ? NULL : attr,
340 packet);
341 case SAFI_LABELED_UNICAST:
342 return bgp_nlri_parse_label(peer, mp_withdraw ? NULL : attr,
343 packet);
344 case SAFI_MPLS_VPN:
345 return bgp_nlri_parse_vpn(peer, mp_withdraw ? NULL : attr,
346 packet);
347 case SAFI_EVPN:
348 return bgp_nlri_parse_evpn(peer, attr, packet, mp_withdraw);
349 case SAFI_FLOWSPEC:
350 return bgp_nlri_parse_flowspec(peer, attr, packet, mp_withdraw);
351 }
352 return BGP_NLRI_PARSE_ERROR;
353 }
354
355
356 /*
357 * Check if route-refresh request from peer is pending (received before EoR),
358 * and process it now.
359 */
360 static void bgp_process_pending_refresh(struct peer *peer, afi_t afi,
361 safi_t safi)
362 {
363 if (CHECK_FLAG(peer->af_sflags[afi][safi],
364 PEER_STATUS_REFRESH_PENDING)) {
365 UNSET_FLAG(peer->af_sflags[afi][safi],
366 PEER_STATUS_REFRESH_PENDING);
367 bgp_route_refresh_send(peer, afi, safi, 0, 0, 0,
368 BGP_ROUTE_REFRESH_BORR);
369 if (bgp_debug_neighbor_events(peer))
370 zlog_debug(
371 "%pBP sending route-refresh (BoRR) for %s/%s (for pending REQUEST)",
372 peer, afi2str(afi), safi2str(safi));
373
374 SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_BORR_SEND);
375 UNSET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_EORR_SEND);
376 bgp_announce_route(peer, afi, safi, true);
377 }
378 }
379
380 /*
381 * Checks a variety of conditions to determine whether the peer needs to be
382 * rescheduled for packet generation again, and does so if necessary.
383 *
384 * @param peer to check for rescheduling
385 */
386 static void bgp_write_proceed_actions(struct peer *peer)
387 {
388 afi_t afi;
389 safi_t safi;
390 struct peer_af *paf;
391 struct bpacket *next_pkt;
392 struct update_subgroup *subgrp;
393 enum bgp_af_index index;
394
395 for (index = BGP_AF_START; index < BGP_AF_MAX; index++) {
396 paf = peer->peer_af_array[index];
397 if (!paf)
398 continue;
399
400 subgrp = paf->subgroup;
401 if (!subgrp)
402 continue;
403
404 next_pkt = paf->next_pkt_to_send;
405 if (next_pkt && next_pkt->buffer) {
406 BGP_TIMER_ON(peer->t_generate_updgrp_packets,
407 bgp_generate_updgrp_packets, 0);
408 return;
409 }
410
411 /* No packets readily available for AFI/SAFI, are there
412 * subgroup packets
413 * that need to be generated? */
414 if (bpacket_queue_is_full(SUBGRP_INST(subgrp),
415 SUBGRP_PKTQ(subgrp))
416 || subgroup_packets_to_build(subgrp)) {
417 BGP_TIMER_ON(peer->t_generate_updgrp_packets,
418 bgp_generate_updgrp_packets, 0);
419 return;
420 }
421
422 afi = paf->afi;
423 safi = paf->safi;
424
425 /* No packets to send, see if EOR is pending */
426 if (CHECK_FLAG(peer->cap, PEER_CAP_RESTART_RCV)) {
427 if (!subgrp->t_coalesce && peer->afc_nego[afi][safi]
428 && peer->synctime
429 && !CHECK_FLAG(peer->af_sflags[afi][safi],
430 PEER_STATUS_EOR_SEND)
431 && safi != SAFI_MPLS_VPN) {
432 BGP_TIMER_ON(peer->t_generate_updgrp_packets,
433 bgp_generate_updgrp_packets, 0);
434 return;
435 }
436 }
437 }
438 }
439
440 /*
441 * Generate advertisement information (withdraws, updates, EOR) from each
442 * update group a peer belongs to, encode this information into packets, and
443 * enqueue the packets onto the peer's output buffer.
444 */
445 void bgp_generate_updgrp_packets(struct event *thread)
446 {
447 struct peer *peer = EVENT_ARG(thread);
448
449 struct stream *s;
450 struct peer_af *paf;
451 struct bpacket *next_pkt;
452 uint32_t wpq;
453 uint32_t generated = 0;
454 afi_t afi;
455 safi_t safi;
456
457 wpq = atomic_load_explicit(&peer->bgp->wpkt_quanta,
458 memory_order_relaxed);
459
460 /*
461 * The code beyond this part deals with update packets, proceed only
462 * if peer is Established and updates are not on hold (as part of
463 * update-delay processing).
464 */
465 if (!peer_established(peer))
466 return;
467
468 if ((peer->bgp->main_peers_update_hold)
469 || bgp_update_delay_active(peer->bgp))
470 return;
471
472 if (peer->t_routeadv)
473 return;
474
475 /*
476 * Since the following is a do while loop
477 * let's stop adding to the outq if we are
478 * already at the limit.
479 */
480 if (peer->obuf->count >= bm->outq_limit) {
481 bgp_write_proceed_actions(peer);
482 return;
483 }
484
485 do {
486 enum bgp_af_index index;
487
488 s = NULL;
489 for (index = BGP_AF_START; index < BGP_AF_MAX; index++) {
490 paf = peer->peer_af_array[index];
491 if (!paf || !PAF_SUBGRP(paf))
492 continue;
493
494 afi = paf->afi;
495 safi = paf->safi;
496 next_pkt = paf->next_pkt_to_send;
497
498 /*
499 * Try to generate a packet for the peer if we are at
500 * the end of the list. Always try to push out
501 * WITHDRAWs first.
502 */
503 if (!next_pkt || !next_pkt->buffer) {
504 next_pkt = subgroup_withdraw_packet(
505 PAF_SUBGRP(paf));
506 if (!next_pkt || !next_pkt->buffer)
507 subgroup_update_packet(PAF_SUBGRP(paf));
508 next_pkt = paf->next_pkt_to_send;
509 }
510
511 /*
512 * If we still don't have a packet to send to the peer,
513 * then try to find out out if we have to send eor or
514 * if not, skip to the next AFI, SAFI. Don't send the
515 * EOR prematurely; if the subgroup's coalesce timer is
516 * running, the adjacency-out structure is not created
517 * yet.
518 */
519 if (!next_pkt || !next_pkt->buffer) {
520 if (!paf->t_announce_route) {
521 /* Make sure we supress BGP UPDATES
522 * for normal processing later again.
523 */
524 UNSET_FLAG(paf->subgroup->sflags,
525 SUBGRP_STATUS_FORCE_UPDATES);
526
527 /* If route-refresh BoRR message was
528 * already sent and we are done with
529 * re-announcing tables for a decent
530 * afi/safi, we ready to send
531 * EoRR request.
532 */
533 if (CHECK_FLAG(
534 peer->af_sflags[afi][safi],
535 PEER_STATUS_BORR_SEND)) {
536 bgp_route_refresh_send(
537 peer, afi, safi, 0, 0,
538 0,
539 BGP_ROUTE_REFRESH_EORR);
540
541 SET_FLAG(peer->af_sflags[afi]
542 [safi],
543 PEER_STATUS_EORR_SEND);
544 UNSET_FLAG(
545 peer->af_sflags[afi]
546 [safi],
547 PEER_STATUS_BORR_SEND);
548
549 if (bgp_debug_neighbor_events(
550 peer))
551 zlog_debug(
552 "%pBP sending route-refresh (EoRR) for %s/%s",
553 peer,
554 afi2str(afi),
555 safi2str(safi));
556 }
557 }
558
559 if (CHECK_FLAG(peer->cap,
560 PEER_CAP_RESTART_RCV)) {
561 if (!(PAF_SUBGRP(paf))->t_coalesce
562 && peer->afc_nego[afi][safi]
563 && peer->synctime
564 && !CHECK_FLAG(
565 peer->af_sflags[afi][safi],
566 PEER_STATUS_EOR_SEND)) {
567 /* If EOR is disabled,
568 * the message is not sent
569 */
570 if (BGP_SEND_EOR(peer->bgp, afi,
571 safi)) {
572 SET_FLAG(
573 peer->af_sflags
574 [afi]
575 [safi],
576 PEER_STATUS_EOR_SEND);
577
578 /* Update EOR
579 * send time
580 */
581 peer->eor_stime[afi]
582 [safi] =
583 monotime(NULL);
584
585 BGP_UPDATE_EOR_PKT(
586 peer, afi, safi,
587 s);
588 bgp_process_pending_refresh(
589 peer, afi,
590 safi);
591 }
592 }
593 }
594 continue;
595 }
596
597 /* Update packet send time */
598 peer->pkt_stime[afi][safi] = monotime(NULL);
599
600 /* Found a packet template to send, overwrite
601 * packet with appropriate attributes from peer
602 * and advance peer */
603 s = bpacket_reformat_for_peer(next_pkt, paf);
604 bgp_packet_add(peer, s);
605 bpacket_queue_advance_peer(paf);
606 }
607 } while (s && (++generated < wpq) &&
608 (peer->obuf->count <= bm->outq_limit));
609
610 if (generated)
611 bgp_writes_on(peer);
612
613 bgp_write_proceed_actions(peer);
614 }
615
616 /*
617 * Creates a BGP Keepalive packet and appends it to the peer's output queue.
618 */
619 void bgp_keepalive_send(struct peer *peer)
620 {
621 struct stream *s;
622
623 s = stream_new(BGP_STANDARD_MESSAGE_MAX_PACKET_SIZE);
624
625 /* Make keepalive packet. */
626 bgp_packet_set_marker(s, BGP_MSG_KEEPALIVE);
627
628 /* Set packet size. */
629 bgp_packet_set_size(s);
630
631 /* Dump packet if debug option is set. */
632 /* bgp_packet_dump (s); */
633
634 if (bgp_debug_keepalive(peer))
635 zlog_debug("%s sending KEEPALIVE", peer->host);
636
637 /* Add packet to the peer. */
638 bgp_packet_add(peer, s);
639
640 bgp_writes_on(peer);
641 }
642
643 /*
644 * Creates a BGP Open packet and appends it to the peer's output queue.
645 * Sets capabilities as necessary.
646 */
647 void bgp_open_send(struct peer *peer)
648 {
649 struct stream *s;
650 uint16_t send_holdtime;
651 as_t local_as;
652
653 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
654 send_holdtime = peer->holdtime;
655 else
656 send_holdtime = peer->bgp->default_holdtime;
657
658 /* local-as Change */
659 if (peer->change_local_as)
660 local_as = peer->change_local_as;
661 else
662 local_as = peer->local_as;
663
664 s = stream_new(BGP_STANDARD_MESSAGE_MAX_PACKET_SIZE);
665
666 /* Make open packet. */
667 bgp_packet_set_marker(s, BGP_MSG_OPEN);
668
669 /* Set open packet values. */
670 stream_putc(s, BGP_VERSION_4); /* BGP version */
671 stream_putw(s, (local_as <= BGP_AS_MAX) ? (uint16_t)local_as
672 : BGP_AS_TRANS);
673 stream_putw(s, send_holdtime); /* Hold Time */
674 stream_put_in_addr(s, &peer->local_id); /* BGP Identifier */
675
676 /* Set capabilities */
677 if (CHECK_FLAG(peer->flags, PEER_FLAG_EXTENDED_OPT_PARAMS)) {
678 (void)bgp_open_capability(s, peer, true);
679 } else {
680 struct stream *tmp = stream_new(STREAM_SIZE(s));
681
682 stream_copy(tmp, s);
683 if (bgp_open_capability(tmp, peer, false)
684 > BGP_OPEN_NON_EXT_OPT_LEN) {
685 stream_free(tmp);
686 (void)bgp_open_capability(s, peer, true);
687 } else {
688 stream_copy(s, tmp);
689 stream_free(tmp);
690 }
691 }
692
693 /* Set BGP packet length. */
694 bgp_packet_set_size(s);
695
696 if (bgp_debug_neighbor_events(peer))
697 zlog_debug(
698 "%s sending OPEN, version %d, my as %u, holdtime %d, id %pI4",
699 peer->host, BGP_VERSION_4, local_as, send_holdtime,
700 &peer->local_id);
701
702 /* Dump packet if debug option is set. */
703 /* bgp_packet_dump (s); */
704 hook_call(bgp_packet_send, peer, BGP_MSG_OPEN, stream_get_endp(s), s);
705
706 /* Add packet to the peer. */
707 bgp_packet_add(peer, s);
708
709 bgp_writes_on(peer);
710 }
711
712 /*
713 * Writes NOTIFICATION message directly to a peer socket without waiting for
714 * the I/O thread.
715 *
716 * There must be exactly one stream on the peer->obuf FIFO, and the data within
717 * this stream must match the format of a BGP NOTIFICATION message.
718 * Transmission is best-effort.
719 *
720 * @requires peer->io_mtx
721 * @param peer
722 * @return 0
723 */
724 static void bgp_write_notify(struct peer *peer)
725 {
726 int ret, val;
727 uint8_t type;
728 struct stream *s;
729
730 /* There should be at least one packet. */
731 s = stream_fifo_pop(peer->obuf);
732
733 if (!s)
734 return;
735
736 assert(stream_get_endp(s) >= BGP_HEADER_SIZE);
737
738 /*
739 * socket is in nonblocking mode, if we can't deliver the NOTIFY, well,
740 * we only care about getting a clean shutdown at this point.
741 */
742 ret = write(peer->fd, STREAM_DATA(s), stream_get_endp(s));
743
744 /*
745 * only connection reset/close gets counted as TCP_fatal_error, failure
746 * to write the entire NOTIFY doesn't get different FSM treatment
747 */
748 if (ret <= 0) {
749 stream_free(s);
750 BGP_EVENT_ADD(peer, TCP_fatal_error);
751 return;
752 }
753
754 /* Disable Nagle, make NOTIFY packet go out right away */
755 val = 1;
756 (void)setsockopt(peer->fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val,
757 sizeof(val));
758
759 /* Retrieve BGP packet type. */
760 stream_set_getp(s, BGP_MARKER_SIZE + 2);
761 type = stream_getc(s);
762
763 assert(type == BGP_MSG_NOTIFY);
764
765 /* Type should be notify. */
766 atomic_fetch_add_explicit(&peer->notify_out, 1, memory_order_relaxed);
767
768 /* Double start timer. */
769 peer->v_start *= 2;
770
771 /* Overflow check. */
772 if (peer->v_start >= (60 * 2))
773 peer->v_start = (60 * 2);
774
775 /*
776 * Handle Graceful Restart case where the state changes to
777 * Connect instead of Idle
778 */
779 BGP_EVENT_ADD(peer, BGP_Stop);
780
781 stream_free(s);
782 }
783
784 /*
785 * Encapsulate an original BGP CEASE Notification into Hard Reset
786 */
787 static uint8_t *bgp_notify_encapsulate_hard_reset(uint8_t code, uint8_t subcode,
788 uint8_t *data, size_t datalen)
789 {
790 uint8_t *message = XCALLOC(MTYPE_BGP_NOTIFICATION, datalen + 2);
791
792 /* ErrCode */
793 message[0] = code;
794 /* Subcode */
795 message[1] = subcode;
796 /* Data */
797 if (datalen)
798 memcpy(message + 2, data, datalen);
799
800 return message;
801 }
802
803 /*
804 * Decapsulate an original BGP CEASE Notification from Hard Reset
805 */
806 struct bgp_notify bgp_notify_decapsulate_hard_reset(struct bgp_notify *notify)
807 {
808 struct bgp_notify bn = {};
809
810 bn.code = notify->raw_data[0];
811 bn.subcode = notify->raw_data[1];
812 bn.length = notify->length - 2;
813
814 bn.raw_data = XMALLOC(MTYPE_BGP_NOTIFICATION, bn.length);
815 memcpy(bn.raw_data, notify->raw_data + 2, bn.length);
816
817 return bn;
818 }
819
820 /* Check if Graceful-Restart N-bit is exchanged */
821 bool bgp_has_graceful_restart_notification(struct peer *peer)
822 {
823 return CHECK_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_N_BIT_RCV) &&
824 CHECK_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_N_BIT_ADV);
825 }
826
827 /*
828 * Check if to send BGP CEASE Notification/Hard Reset?
829 */
830 bool bgp_notify_send_hard_reset(struct peer *peer, uint8_t code,
831 uint8_t subcode)
832 {
833 /* When the "N" bit has been exchanged, a Hard Reset message is used to
834 * indicate to the peer that the session is to be fully terminated.
835 */
836 if (!bgp_has_graceful_restart_notification(peer))
837 return false;
838
839 /*
840 * https://datatracker.ietf.org/doc/html/rfc8538#section-5.1
841 */
842 if (code == BGP_NOTIFY_CEASE) {
843 switch (subcode) {
844 case BGP_NOTIFY_CEASE_MAX_PREFIX:
845 case BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN:
846 case BGP_NOTIFY_CEASE_PEER_UNCONFIG:
847 case BGP_NOTIFY_CEASE_HARD_RESET:
848 case BGP_NOTIFY_CEASE_BFD_DOWN:
849 return true;
850 case BGP_NOTIFY_CEASE_ADMIN_RESET:
851 /* Provide user control:
852 * `bgp hard-adminstrative-reset`
853 */
854 if (CHECK_FLAG(peer->bgp->flags,
855 BGP_FLAG_HARD_ADMIN_RESET))
856 return true;
857 else
858 return false;
859 default:
860 break;
861 }
862 }
863
864 return false;
865 }
866
867 /*
868 * Check if received BGP CEASE Notification/Hard Reset?
869 */
870 bool bgp_notify_received_hard_reset(struct peer *peer, uint8_t code,
871 uint8_t subcode)
872 {
873 /* When the "N" bit has been exchanged, a Hard Reset message is used to
874 * indicate to the peer that the session is to be fully terminated.
875 */
876 if (!bgp_has_graceful_restart_notification(peer))
877 return false;
878
879 if (code == BGP_NOTIFY_CEASE && subcode == BGP_NOTIFY_CEASE_HARD_RESET)
880 return true;
881
882 return false;
883 }
884
885 /*
886 * Creates a BGP Notify and appends it to the peer's output queue.
887 *
888 * This function attempts to write the packet from the thread it is called
889 * from, to ensure the packet gets out ASAP.
890 *
891 * This function may be called from multiple threads. Since the function
892 * modifies I/O buffer(s) in the peer, these are locked for the duration of the
893 * call to prevent tampering from other threads.
894 *
895 * Delivery of the NOTIFICATION is attempted once and is best-effort. After
896 * return, the peer structure *must* be reset; no assumptions about session
897 * state are valid.
898 *
899 * @param peer
900 * @param code BGP error code
901 * @param sub_code BGP error subcode
902 * @param data Data portion
903 * @param datalen length of data portion
904 */
905 static void bgp_notify_send_internal(struct peer *peer, uint8_t code,
906 uint8_t sub_code, uint8_t *data,
907 size_t datalen, bool use_curr)
908 {
909 struct stream *s;
910 bool hard_reset = bgp_notify_send_hard_reset(peer, code, sub_code);
911
912 /* Lock I/O mutex to prevent other threads from pushing packets */
913 frr_mutex_lock_autounlock(&peer->io_mtx);
914 /* ============================================== */
915
916 /* Allocate new stream. */
917 s = stream_new(peer->max_packet_size);
918
919 /* Make notify packet. */
920 bgp_packet_set_marker(s, BGP_MSG_NOTIFY);
921
922 /* Check if we should send Hard Reset Notification or not */
923 if (hard_reset) {
924 uint8_t *hard_reset_message = bgp_notify_encapsulate_hard_reset(
925 code, sub_code, data, datalen);
926
927 /* Hard Reset encapsulates another NOTIFICATION message
928 * in its data portion.
929 */
930 stream_putc(s, BGP_NOTIFY_CEASE);
931 stream_putc(s, BGP_NOTIFY_CEASE_HARD_RESET);
932 stream_write(s, hard_reset_message, datalen + 2);
933
934 XFREE(MTYPE_BGP_NOTIFICATION, hard_reset_message);
935 } else {
936 stream_putc(s, code);
937 stream_putc(s, sub_code);
938 if (data)
939 stream_write(s, data, datalen);
940 }
941
942 /* Set BGP packet length. */
943 bgp_packet_set_size(s);
944
945 /* wipe output buffer */
946 stream_fifo_clean(peer->obuf);
947
948 /*
949 * If possible, store last packet for debugging purposes. This check is
950 * in place because we are sometimes called with a doppelganger peer,
951 * who tends to have a plethora of fields nulled out.
952 *
953 * Some callers should not attempt this - the io pthread for example
954 * should not touch internals of the peer struct.
955 */
956 if (use_curr && peer->curr) {
957 size_t packetsize = stream_get_endp(peer->curr);
958 assert(packetsize <= peer->max_packet_size);
959 memcpy(peer->last_reset_cause, peer->curr->data, packetsize);
960 peer->last_reset_cause_size = packetsize;
961 }
962
963 /* For debug */
964 {
965 struct bgp_notify bgp_notify;
966 int first = 0;
967 int i;
968 char c[4];
969
970 bgp_notify.code = code;
971 bgp_notify.subcode = sub_code;
972 bgp_notify.data = NULL;
973 bgp_notify.length = datalen;
974 bgp_notify.raw_data = data;
975
976 peer->notify.code = bgp_notify.code;
977 peer->notify.subcode = bgp_notify.subcode;
978 peer->notify.length = bgp_notify.length;
979
980 if (bgp_notify.length && data) {
981 bgp_notify.data = XMALLOC(MTYPE_BGP_NOTIFICATION,
982 bgp_notify.length * 3);
983 for (i = 0; i < bgp_notify.length; i++)
984 if (first) {
985 snprintf(c, sizeof(c), " %02x",
986 data[i]);
987
988 strlcat(bgp_notify.data, c,
989 bgp_notify.length);
990
991 } else {
992 first = 1;
993 snprintf(c, sizeof(c), "%02x", data[i]);
994
995 strlcpy(bgp_notify.data, c,
996 bgp_notify.length);
997 }
998 }
999 bgp_notify_print(peer, &bgp_notify, "sending", hard_reset);
1000
1001 if (bgp_notify.data) {
1002 if (data) {
1003 XFREE(MTYPE_BGP_NOTIFICATION,
1004 peer->notify.data);
1005 peer->notify.data = XCALLOC(
1006 MTYPE_BGP_NOTIFICATION, datalen);
1007 memcpy(peer->notify.data, data, datalen);
1008 }
1009
1010 XFREE(MTYPE_BGP_NOTIFICATION, bgp_notify.data);
1011 bgp_notify.length = 0;
1012 }
1013 }
1014
1015 /* peer reset cause */
1016 if (code == BGP_NOTIFY_CEASE) {
1017 if (sub_code == BGP_NOTIFY_CEASE_ADMIN_RESET)
1018 peer->last_reset = PEER_DOWN_USER_RESET;
1019 else if (sub_code == BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN) {
1020 if (CHECK_FLAG(peer->sflags, PEER_STATUS_RTT_SHUTDOWN))
1021 peer->last_reset = PEER_DOWN_RTT_SHUTDOWN;
1022 else
1023 peer->last_reset = PEER_DOWN_USER_SHUTDOWN;
1024 } else
1025 peer->last_reset = PEER_DOWN_NOTIFY_SEND;
1026 } else
1027 peer->last_reset = PEER_DOWN_NOTIFY_SEND;
1028
1029 /* Add packet to peer's output queue */
1030 stream_fifo_push(peer->obuf, s);
1031
1032 bgp_peer_gr_flags_update(peer);
1033 BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(peer->bgp,
1034 peer->bgp->peer);
1035
1036 bgp_write_notify(peer);
1037 }
1038
1039 /*
1040 * Creates a BGP Notify and appends it to the peer's output queue.
1041 *
1042 * This function attempts to write the packet from the thread it is called
1043 * from, to ensure the packet gets out ASAP.
1044 *
1045 * @param peer
1046 * @param code BGP error code
1047 * @param sub_code BGP error subcode
1048 */
1049 void bgp_notify_send(struct peer *peer, uint8_t code, uint8_t sub_code)
1050 {
1051 bgp_notify_send_internal(peer, code, sub_code, NULL, 0, true);
1052 }
1053
1054 /*
1055 * Enqueue notification; called from the main pthread, peer object access is ok.
1056 */
1057 void bgp_notify_send_with_data(struct peer *peer, uint8_t code,
1058 uint8_t sub_code, uint8_t *data, size_t datalen)
1059 {
1060 bgp_notify_send_internal(peer, code, sub_code, data, datalen, true);
1061 }
1062
1063 /*
1064 * For use by the io pthread, queueing a notification but avoiding access to
1065 * the peer object.
1066 */
1067 void bgp_notify_io_invalid(struct peer *peer, uint8_t code, uint8_t sub_code,
1068 uint8_t *data, size_t datalen)
1069 {
1070 /* Avoid touching the peer object */
1071 bgp_notify_send_internal(peer, code, sub_code, data, datalen, false);
1072 }
1073
1074 /*
1075 * Creates BGP Route Refresh packet and appends it to the peer's output queue.
1076 *
1077 * @param peer
1078 * @param afi Address Family Identifier
1079 * @param safi Subsequent Address Family Identifier
1080 * @param orf_type Outbound Route Filtering type
1081 * @param when_to_refresh Whether to refresh immediately or defer
1082 * @param remove Whether to remove ORF for specified AFI/SAFI
1083 */
1084 void bgp_route_refresh_send(struct peer *peer, afi_t afi, safi_t safi,
1085 uint8_t orf_type, uint8_t when_to_refresh,
1086 int remove, uint8_t subtype)
1087 {
1088 struct stream *s;
1089 struct bgp_filter *filter;
1090 int orf_refresh = 0;
1091 iana_afi_t pkt_afi = IANA_AFI_IPV4;
1092 iana_safi_t pkt_safi = IANA_SAFI_UNICAST;
1093
1094 if (DISABLE_BGP_ANNOUNCE)
1095 return;
1096
1097 filter = &peer->filter[afi][safi];
1098
1099 /* Convert AFI, SAFI to values for packet. */
1100 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
1101
1102 s = stream_new(peer->max_packet_size);
1103
1104 /* Make BGP update packet. */
1105 if (CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_NEW_RCV))
1106 bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_NEW);
1107 else
1108 bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_OLD);
1109
1110 /* Encode Route Refresh message. */
1111 stream_putw(s, pkt_afi);
1112 if (subtype)
1113 stream_putc(s, subtype);
1114 else
1115 stream_putc(s, 0);
1116 stream_putc(s, pkt_safi);
1117
1118 if (orf_type == ORF_TYPE_PREFIX || orf_type == ORF_TYPE_PREFIX_OLD)
1119 if (remove || filter->plist[FILTER_IN].plist) {
1120 uint16_t orf_len;
1121 unsigned long orfp;
1122
1123 orf_refresh = 1;
1124 stream_putc(s, when_to_refresh);
1125 stream_putc(s, orf_type);
1126 orfp = stream_get_endp(s);
1127 stream_putw(s, 0);
1128
1129 if (remove) {
1130 UNSET_FLAG(peer->af_sflags[afi][safi],
1131 PEER_STATUS_ORF_PREFIX_SEND);
1132 stream_putc(s, ORF_COMMON_PART_REMOVE_ALL);
1133 if (bgp_debug_neighbor_events(peer))
1134 zlog_debug(
1135 "%pBP sending REFRESH_REQ to remove ORF(%d) (%s) for afi/safi: %s/%s",
1136 peer, orf_type,
1137 (when_to_refresh ==
1138 REFRESH_DEFER
1139 ? "defer"
1140 : "immediate"),
1141 iana_afi2str(pkt_afi),
1142 iana_safi2str(pkt_safi));
1143 } else {
1144 SET_FLAG(peer->af_sflags[afi][safi],
1145 PEER_STATUS_ORF_PREFIX_SEND);
1146 prefix_bgp_orf_entry(
1147 s, filter->plist[FILTER_IN].plist,
1148 ORF_COMMON_PART_ADD,
1149 ORF_COMMON_PART_PERMIT,
1150 ORF_COMMON_PART_DENY);
1151 if (bgp_debug_neighbor_events(peer))
1152 zlog_debug(
1153 "%pBP sending REFRESH_REQ with pfxlist ORF(%d) (%s) for afi/safi: %s/%s",
1154 peer, orf_type,
1155 (when_to_refresh ==
1156 REFRESH_DEFER
1157 ? "defer"
1158 : "immediate"),
1159 iana_afi2str(pkt_afi),
1160 iana_safi2str(pkt_safi));
1161 }
1162
1163 /* Total ORF Entry Len. */
1164 orf_len = stream_get_endp(s) - orfp - 2;
1165 stream_putw_at(s, orfp, orf_len);
1166 }
1167
1168 /* Set packet size. */
1169 bgp_packet_set_size(s);
1170
1171 if (bgp_debug_neighbor_events(peer)) {
1172 if (!orf_refresh)
1173 zlog_debug(
1174 "%pBP sending REFRESH_REQ for afi/safi: %s/%s",
1175 peer, iana_afi2str(pkt_afi),
1176 iana_safi2str(pkt_safi));
1177 }
1178
1179 /* Add packet to the peer. */
1180 bgp_packet_add(peer, s);
1181
1182 bgp_writes_on(peer);
1183 }
1184
1185 /*
1186 * Create a BGP Capability packet and append it to the peer's output queue.
1187 *
1188 * @param peer
1189 * @param afi Address Family Identifier
1190 * @param safi Subsequent Address Family Identifier
1191 * @param capability_code BGP Capability Code
1192 * @param action Set or Remove capability
1193 */
1194 void bgp_capability_send(struct peer *peer, afi_t afi, safi_t safi,
1195 int capability_code, int action)
1196 {
1197 struct stream *s;
1198 iana_afi_t pkt_afi = IANA_AFI_IPV4;
1199 iana_safi_t pkt_safi = IANA_SAFI_UNICAST;
1200
1201 /* Convert AFI, SAFI to values for packet. */
1202 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
1203
1204 s = stream_new(peer->max_packet_size);
1205
1206 /* Make BGP update packet. */
1207 bgp_packet_set_marker(s, BGP_MSG_CAPABILITY);
1208
1209 /* Encode MP_EXT capability. */
1210 if (capability_code == CAPABILITY_CODE_MP) {
1211 stream_putc(s, action);
1212 stream_putc(s, CAPABILITY_CODE_MP);
1213 stream_putc(s, CAPABILITY_CODE_MP_LEN);
1214 stream_putw(s, pkt_afi);
1215 stream_putc(s, 0);
1216 stream_putc(s, pkt_safi);
1217
1218 if (bgp_debug_neighbor_events(peer))
1219 zlog_debug(
1220 "%pBP sending CAPABILITY has %s MP_EXT CAP for afi/safi: %s/%s",
1221 peer,
1222 action == CAPABILITY_ACTION_SET ? "Advertising"
1223 : "Removing",
1224 iana_afi2str(pkt_afi), iana_safi2str(pkt_safi));
1225 }
1226
1227 /* Set packet size. */
1228 bgp_packet_set_size(s);
1229
1230 /* Add packet to the peer. */
1231 bgp_packet_add(peer, s);
1232
1233 bgp_writes_on(peer);
1234 }
1235
1236 /* RFC1771 6.8 Connection collision detection. */
1237 static int bgp_collision_detect(struct peer *new, struct in_addr remote_id)
1238 {
1239 struct peer *peer;
1240
1241 /*
1242 * Upon receipt of an OPEN message, the local system must examine
1243 * all of its connections that are in the OpenConfirm state. A BGP
1244 * speaker may also examine connections in an OpenSent state if it
1245 * knows the BGP Identifier of the peer by means outside of the
1246 * protocol. If among these connections there is a connection to a
1247 * remote BGP speaker whose BGP Identifier equals the one in the
1248 * OPEN message, then the local system performs the following
1249 * collision resolution procedure:
1250 */
1251 peer = new->doppelganger;
1252 if (peer == NULL)
1253 return 0;
1254
1255 /*
1256 * Do not accept the new connection in Established or Clearing
1257 * states. Note that a peer GR is handled by closing the existing
1258 * connection upon receipt of new one.
1259 */
1260 if (peer_established(peer) || peer->status == Clearing) {
1261 bgp_notify_send(new, BGP_NOTIFY_CEASE,
1262 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1263 return -1;
1264 }
1265
1266 if ((peer->status != OpenConfirm) && (peer->status != OpenSent))
1267 return 0;
1268
1269 /*
1270 * 1. The BGP Identifier of the local system is
1271 * compared to the BGP Identifier of the remote
1272 * system (as specified in the OPEN message).
1273 *
1274 * If the BGP Identifiers of the peers
1275 * involved in the connection collision
1276 * are identical, then the connection
1277 * initiated by the BGP speaker with the
1278 * larger AS number is preserved.
1279 */
1280 if (ntohl(peer->local_id.s_addr) < ntohl(remote_id.s_addr)
1281 || (ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr)
1282 && peer->local_as < peer->as))
1283 if (!CHECK_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER)) {
1284 /*
1285 * 2. If the value of the local BGP
1286 * Identifier is less than the remote one,
1287 * the local system closes BGP connection
1288 * that already exists (the one that is
1289 * already in the OpenConfirm state),
1290 * and accepts BGP connection initiated by
1291 * the remote system.
1292 */
1293 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
1294 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1295 return 1;
1296 } else {
1297 bgp_notify_send(new, BGP_NOTIFY_CEASE,
1298 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1299 return -1;
1300 }
1301 else {
1302 if (ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr)
1303 && peer->local_as == peer->as)
1304 flog_err(EC_BGP_ROUTER_ID_SAME,
1305 "Peer's router-id %pI4 is the same as ours",
1306 &remote_id);
1307
1308 /*
1309 * 3. Otherwise, the local system closes newly
1310 * created BGP connection (the one associated with the
1311 * newly received OPEN message), and continues to use
1312 * the existing one (the one that is already in the
1313 * OpenConfirm state).
1314 */
1315 if (CHECK_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER)) {
1316 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
1317 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1318 return 1;
1319 } else {
1320 bgp_notify_send(new, BGP_NOTIFY_CEASE,
1321 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1322 return -1;
1323 }
1324 }
1325 }
1326
1327 /* Packet processing routines ---------------------------------------------- */
1328 /*
1329 * This is a family of functions designed to be called from
1330 * bgp_process_packet(). These functions all share similar behavior and should
1331 * adhere to the following invariants and restrictions:
1332 *
1333 * Return codes
1334 * ------------
1335 * The return code of any one of those functions should be one of the FSM event
1336 * codes specified in bgpd.h. If a NOTIFY was sent, this event code MUST be
1337 * BGP_Stop. Otherwise, the code SHOULD correspond to the function's expected
1338 * packet type. For example, bgp_open_receive() should return BGP_Stop upon
1339 * error and Receive_OPEN_message otherwise.
1340 *
1341 * If no action is necessary, the correct return code is BGP_PACKET_NOOP as
1342 * defined below.
1343 *
1344 * Side effects
1345 * ------------
1346 * - May send NOTIFY messages
1347 * - May not modify peer->status
1348 * - May not call bgp_event_update()
1349 */
1350
1351 #define BGP_PACKET_NOOP 0
1352
1353 /**
1354 * Process BGP OPEN message for peer.
1355 *
1356 * If any errors are encountered in the OPEN message, immediately sends NOTIFY
1357 * and returns BGP_Stop.
1358 *
1359 * @param peer
1360 * @param size size of the packet
1361 * @return as in summary
1362 */
1363 static int bgp_open_receive(struct peer *peer, bgp_size_t size)
1364 {
1365 int ret;
1366 uint8_t version;
1367 uint16_t optlen;
1368 uint16_t holdtime;
1369 uint16_t send_holdtime;
1370 as_t remote_as;
1371 as_t as4 = 0, as4_be;
1372 struct in_addr remote_id;
1373 int mp_capability;
1374 uint8_t notify_data_remote_as[2];
1375 uint8_t notify_data_remote_as4[4];
1376 uint8_t notify_data_remote_id[4];
1377 uint16_t *holdtime_ptr;
1378
1379 /* Parse open packet. */
1380 version = stream_getc(peer->curr);
1381 memcpy(notify_data_remote_as, stream_pnt(peer->curr), 2);
1382 remote_as = stream_getw(peer->curr);
1383 holdtime_ptr = (uint16_t *)stream_pnt(peer->curr);
1384 holdtime = stream_getw(peer->curr);
1385 memcpy(notify_data_remote_id, stream_pnt(peer->curr), 4);
1386 remote_id.s_addr = stream_get_ipv4(peer->curr);
1387
1388 /* BEGIN to read the capability here, but dont do it yet */
1389 mp_capability = 0;
1390 optlen = stream_getc(peer->curr);
1391
1392 /* Extended Optional Parameters Length for BGP OPEN Message */
1393 if (optlen == BGP_OPEN_NON_EXT_OPT_LEN
1394 || CHECK_FLAG(peer->flags, PEER_FLAG_EXTENDED_OPT_PARAMS)) {
1395 uint8_t opttype;
1396
1397 if (STREAM_READABLE(peer->curr) < 1) {
1398 flog_err(
1399 EC_BGP_PKT_OPEN,
1400 "%s: stream does not have enough bytes for extended optional parameters",
1401 peer->host);
1402 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1403 BGP_NOTIFY_OPEN_MALFORMED_ATTR);
1404 return BGP_Stop;
1405 }
1406
1407 opttype = stream_getc(peer->curr);
1408 if (opttype == BGP_OPEN_NON_EXT_OPT_TYPE_EXTENDED_LENGTH) {
1409 if (STREAM_READABLE(peer->curr) < 2) {
1410 flog_err(
1411 EC_BGP_PKT_OPEN,
1412 "%s: stream does not have enough bytes to read the extended optional parameters optlen",
1413 peer->host);
1414 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1415 BGP_NOTIFY_OPEN_MALFORMED_ATTR);
1416 return BGP_Stop;
1417 }
1418 optlen = stream_getw(peer->curr);
1419 SET_FLAG(peer->sflags,
1420 PEER_STATUS_EXT_OPT_PARAMS_LENGTH);
1421 }
1422 }
1423
1424 /* Receive OPEN message log */
1425 if (bgp_debug_neighbor_events(peer))
1426 zlog_debug(
1427 "%s rcv OPEN%s, version %d, remote-as (in open) %u, holdtime %d, id %pI4",
1428 peer->host,
1429 CHECK_FLAG(peer->sflags,
1430 PEER_STATUS_EXT_OPT_PARAMS_LENGTH)
1431 ? " (Extended)"
1432 : "",
1433 version, remote_as, holdtime, &remote_id);
1434
1435 if (optlen != 0) {
1436 /* If not enough bytes, it is an error. */
1437 if (STREAM_READABLE(peer->curr) < optlen) {
1438 flog_err(EC_BGP_PKT_OPEN,
1439 "%s: stream has not enough bytes (%u)",
1440 peer->host, optlen);
1441 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1442 BGP_NOTIFY_OPEN_MALFORMED_ATTR);
1443 return BGP_Stop;
1444 }
1445
1446 /* We need the as4 capability value *right now* because
1447 * if it is there, we have not got the remote_as yet, and
1448 * without
1449 * that we do not know which peer is connecting to us now.
1450 */
1451 as4 = peek_for_as4_capability(peer, optlen);
1452 }
1453
1454 as4_be = htonl(as4);
1455 memcpy(notify_data_remote_as4, &as4_be, 4);
1456
1457 /* Just in case we have a silly peer who sends AS4 capability set to 0
1458 */
1459 if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV) && !as4) {
1460 flog_err(EC_BGP_PKT_OPEN,
1461 "%s bad OPEN, got AS4 capability, but AS4 set to 0",
1462 peer->host);
1463 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1464 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1465 notify_data_remote_as4, 4);
1466 return BGP_Stop;
1467 }
1468
1469 /* Codification of AS 0 Processing */
1470 if (remote_as == BGP_AS_ZERO) {
1471 flog_err(EC_BGP_PKT_OPEN, "%s bad OPEN, got AS set to 0",
1472 peer->host);
1473 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1474 BGP_NOTIFY_OPEN_BAD_PEER_AS);
1475 return BGP_Stop;
1476 }
1477
1478 if (remote_as == BGP_AS_TRANS) {
1479 /* Take the AS4 from the capability. We must have received the
1480 * capability now! Otherwise we have a asn16 peer who uses
1481 * BGP_AS_TRANS, for some unknown reason.
1482 */
1483 if (as4 == BGP_AS_TRANS) {
1484 flog_err(
1485 EC_BGP_PKT_OPEN,
1486 "%s [AS4] NEW speaker using AS_TRANS for AS4, not allowed",
1487 peer->host);
1488 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1489 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1490 notify_data_remote_as4, 4);
1491 return BGP_Stop;
1492 }
1493
1494 if (!as4 && BGP_DEBUG(as4, AS4))
1495 zlog_debug(
1496 "%s [AS4] OPEN remote_as is AS_TRANS, but no AS4. Odd, but proceeding.",
1497 peer->host);
1498 else if (as4 < BGP_AS_MAX && BGP_DEBUG(as4, AS4))
1499 zlog_debug(
1500 "%s [AS4] OPEN remote_as is AS_TRANS, but AS4 (%u) fits in 2-bytes, very odd peer.",
1501 peer->host, as4);
1502 if (as4)
1503 remote_as = as4;
1504 } else {
1505 /* We may have a partner with AS4 who has an asno < BGP_AS_MAX
1506 */
1507 /* If we have got the capability, peer->as4cap must match
1508 * remote_as */
1509 if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV)
1510 && as4 != remote_as) {
1511 /* raise error, log this, close session */
1512 flog_err(
1513 EC_BGP_PKT_OPEN,
1514 "%s bad OPEN, got AS4 capability, but remote_as %u mismatch with 16bit 'myasn' %u in open",
1515 peer->host, as4, remote_as);
1516 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1517 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1518 notify_data_remote_as4, 4);
1519 return BGP_Stop;
1520 }
1521 }
1522
1523 /* rfc6286:
1524 * If the BGP Identifier field of the OPEN message
1525 * is zero, or if it is the same as the BGP Identifier
1526 * of the local BGP speaker and the message is from an
1527 * internal peer, then the Error Subcode is set to
1528 * "Bad BGP Identifier".
1529 */
1530 if (remote_id.s_addr == INADDR_ANY
1531 || (peer->sort == BGP_PEER_IBGP
1532 && ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr))) {
1533 if (bgp_debug_neighbor_events(peer))
1534 zlog_debug("%s bad OPEN, wrong router identifier %pI4",
1535 peer->host, &remote_id);
1536 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1537 BGP_NOTIFY_OPEN_BAD_BGP_IDENT,
1538 notify_data_remote_id, 4);
1539 return BGP_Stop;
1540 }
1541
1542 /* Peer BGP version check. */
1543 if (version != BGP_VERSION_4) {
1544 uint16_t maxver = htons(BGP_VERSION_4);
1545 /* XXX this reply may not be correct if version < 4 XXX */
1546 if (bgp_debug_neighbor_events(peer))
1547 zlog_debug(
1548 "%s bad protocol version, remote requested %d, local request %d",
1549 peer->host, version, BGP_VERSION_4);
1550 /* Data must be in network byte order here */
1551 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1552 BGP_NOTIFY_OPEN_UNSUP_VERSION,
1553 (uint8_t *)&maxver, 2);
1554 return BGP_Stop;
1555 }
1556
1557 /* Check neighbor as number. */
1558 if (peer->as_type == AS_UNSPECIFIED) {
1559 if (bgp_debug_neighbor_events(peer))
1560 zlog_debug(
1561 "%s bad OPEN, remote AS is unspecified currently",
1562 peer->host);
1563 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1564 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1565 notify_data_remote_as, 2);
1566 return BGP_Stop;
1567 } else if (peer->as_type == AS_INTERNAL) {
1568 if (remote_as != peer->bgp->as) {
1569 if (bgp_debug_neighbor_events(peer))
1570 zlog_debug(
1571 "%s bad OPEN, remote AS is %u, internal specified",
1572 peer->host, remote_as);
1573 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1574 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1575 notify_data_remote_as, 2);
1576 return BGP_Stop;
1577 }
1578 peer->as = peer->local_as;
1579 } else if (peer->as_type == AS_EXTERNAL) {
1580 if (remote_as == peer->bgp->as) {
1581 if (bgp_debug_neighbor_events(peer))
1582 zlog_debug(
1583 "%s bad OPEN, remote AS is %u, external specified",
1584 peer->host, remote_as);
1585 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1586 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1587 notify_data_remote_as, 2);
1588 return BGP_Stop;
1589 }
1590 peer->as = remote_as;
1591 } else if ((peer->as_type == AS_SPECIFIED) && (remote_as != peer->as)) {
1592 if (bgp_debug_neighbor_events(peer))
1593 zlog_debug("%s bad OPEN, remote AS is %u, expected %u",
1594 peer->host, remote_as, peer->as);
1595 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1596 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1597 notify_data_remote_as, 2);
1598 return BGP_Stop;
1599 }
1600
1601 /*
1602 * When collision is detected and this peer is closed.
1603 * Return immediately.
1604 */
1605 ret = bgp_collision_detect(peer, remote_id);
1606 if (ret < 0)
1607 return BGP_Stop;
1608
1609 /* Get sockname. */
1610 if (bgp_getsockname(peer) < 0) {
1611 flog_err_sys(EC_LIB_SOCKET,
1612 "%s: bgp_getsockname() failed for peer: %s",
1613 __func__, peer->host);
1614 return BGP_Stop;
1615 }
1616
1617 /* Set remote router-id */
1618 peer->remote_id = remote_id;
1619
1620 /* From the rfc: Upon receipt of an OPEN message, a BGP speaker MUST
1621 calculate the value of the Hold Timer by using the smaller of its
1622 configured Hold Time and the Hold Time received in the OPEN message.
1623 The Hold Time MUST be either zero or at least three seconds. An
1624 implementation may reject connections on the basis of the Hold Time.
1625 */
1626
1627 if (holdtime < 3 && holdtime != 0) {
1628 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1629 BGP_NOTIFY_OPEN_UNACEP_HOLDTIME,
1630 (uint8_t *)holdtime_ptr, 2);
1631 return BGP_Stop;
1632 }
1633
1634 /* Send notification message when Hold Time received in the OPEN message
1635 * is smaller than configured minimum Hold Time. */
1636 if (holdtime < peer->bgp->default_min_holdtime
1637 && peer->bgp->default_min_holdtime != 0) {
1638 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1639 BGP_NOTIFY_OPEN_UNACEP_HOLDTIME,
1640 (uint8_t *)holdtime_ptr, 2);
1641 return BGP_Stop;
1642 }
1643
1644 /* From the rfc: A reasonable maximum time between KEEPALIVE messages
1645 would be one third of the Hold Time interval. KEEPALIVE messages
1646 MUST NOT be sent more frequently than one per second. An
1647 implementation MAY adjust the rate at which it sends KEEPALIVE
1648 messages as a function of the Hold Time interval. */
1649
1650 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
1651 send_holdtime = peer->holdtime;
1652 else
1653 send_holdtime = peer->bgp->default_holdtime;
1654
1655 if (holdtime < send_holdtime)
1656 peer->v_holdtime = holdtime;
1657 else
1658 peer->v_holdtime = send_holdtime;
1659
1660 /* Set effective keepalive to 1/3 the effective holdtime.
1661 * Use configured keeplive when < effective keepalive.
1662 */
1663 peer->v_keepalive = peer->v_holdtime / 3;
1664 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER)) {
1665 if (peer->keepalive && peer->keepalive < peer->v_keepalive)
1666 peer->v_keepalive = peer->keepalive;
1667 } else {
1668 if (peer->bgp->default_keepalive
1669 && peer->bgp->default_keepalive < peer->v_keepalive)
1670 peer->v_keepalive = peer->bgp->default_keepalive;
1671 }
1672
1673 /* If another side disabled sending Software Version capability,
1674 * we MUST drop the previous from showing in the outputs to avoid
1675 * stale information and due to security reasons.
1676 */
1677 if (peer->soft_version)
1678 XFREE(MTYPE_BGP_SOFT_VERSION, peer->soft_version);
1679
1680 /* Open option part parse. */
1681 if (optlen != 0) {
1682 if (bgp_open_option_parse(peer, optlen, &mp_capability) < 0)
1683 return BGP_Stop;
1684 } else {
1685 if (bgp_debug_neighbor_events(peer))
1686 zlog_debug("%s rcvd OPEN w/ OPTION parameter len: 0",
1687 peer->host);
1688 }
1689
1690 /*
1691 * Assume that the peer supports the locally configured set of
1692 * AFI/SAFIs if the peer did not send us any Mulitiprotocol
1693 * capabilities, or if 'override-capability' is configured.
1694 */
1695 if (!mp_capability
1696 || CHECK_FLAG(peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) {
1697 peer->afc_nego[AFI_IP][SAFI_UNICAST] =
1698 peer->afc[AFI_IP][SAFI_UNICAST];
1699 peer->afc_nego[AFI_IP][SAFI_MULTICAST] =
1700 peer->afc[AFI_IP][SAFI_MULTICAST];
1701 peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST] =
1702 peer->afc[AFI_IP][SAFI_LABELED_UNICAST];
1703 peer->afc_nego[AFI_IP][SAFI_FLOWSPEC] =
1704 peer->afc[AFI_IP][SAFI_FLOWSPEC];
1705 peer->afc_nego[AFI_IP6][SAFI_UNICAST] =
1706 peer->afc[AFI_IP6][SAFI_UNICAST];
1707 peer->afc_nego[AFI_IP6][SAFI_MULTICAST] =
1708 peer->afc[AFI_IP6][SAFI_MULTICAST];
1709 peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST] =
1710 peer->afc[AFI_IP6][SAFI_LABELED_UNICAST];
1711 peer->afc_nego[AFI_L2VPN][SAFI_EVPN] =
1712 peer->afc[AFI_L2VPN][SAFI_EVPN];
1713 peer->afc_nego[AFI_IP6][SAFI_FLOWSPEC] =
1714 peer->afc[AFI_IP6][SAFI_FLOWSPEC];
1715 }
1716
1717 /* Verify valid local address present based on negotiated
1718 * address-families. */
1719 if (peer->afc_nego[AFI_IP][SAFI_UNICAST]
1720 || peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST]
1721 || peer->afc_nego[AFI_IP][SAFI_MULTICAST]
1722 || peer->afc_nego[AFI_IP][SAFI_MPLS_VPN]
1723 || peer->afc_nego[AFI_IP][SAFI_ENCAP]) {
1724 if (peer->nexthop.v4.s_addr == INADDR_ANY) {
1725 #if defined(HAVE_CUMULUS)
1726 zlog_warn("%s: No local IPv4 addr, BGP routing may not work",
1727 peer->host);
1728 #endif
1729 }
1730 }
1731 if (peer->afc_nego[AFI_IP6][SAFI_UNICAST]
1732 || peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST]
1733 || peer->afc_nego[AFI_IP6][SAFI_MULTICAST]
1734 || peer->afc_nego[AFI_IP6][SAFI_MPLS_VPN]
1735 || peer->afc_nego[AFI_IP6][SAFI_ENCAP]) {
1736 if (IN6_IS_ADDR_UNSPECIFIED(&peer->nexthop.v6_global)) {
1737 #if defined(HAVE_CUMULUS)
1738 zlog_warn("%s: No local IPv6 address, BGP routing may not work",
1739 peer->host);
1740 #endif
1741 }
1742 }
1743 peer->rtt = sockopt_tcp_rtt(peer->fd);
1744
1745 return Receive_OPEN_message;
1746 }
1747
1748 /**
1749 * Process BGP KEEPALIVE message for peer.
1750 *
1751 * @param peer
1752 * @param size size of the packet
1753 * @return as in summary
1754 */
1755 static int bgp_keepalive_receive(struct peer *peer, bgp_size_t size)
1756 {
1757 if (bgp_debug_keepalive(peer))
1758 zlog_debug("%s KEEPALIVE rcvd", peer->host);
1759
1760 bgp_update_implicit_eors(peer);
1761
1762 peer->rtt = sockopt_tcp_rtt(peer->fd);
1763
1764 /* If the peer's RTT is higher than expected, shutdown
1765 * the peer automatically.
1766 */
1767 if (!CHECK_FLAG(peer->flags, PEER_FLAG_RTT_SHUTDOWN))
1768 return Receive_KEEPALIVE_message;
1769
1770 if (peer->rtt > peer->rtt_expected) {
1771 peer->rtt_keepalive_rcv++;
1772
1773 if (peer->rtt_keepalive_rcv > peer->rtt_keepalive_conf) {
1774 char rtt_shutdown_reason[BUFSIZ] = {};
1775
1776 snprintfrr(
1777 rtt_shutdown_reason,
1778 sizeof(rtt_shutdown_reason),
1779 "shutdown due to high round-trip-time (%dms > %dms, hit %u times)",
1780 peer->rtt, peer->rtt_expected,
1781 peer->rtt_keepalive_rcv);
1782 zlog_warn("%s %s", peer->host, rtt_shutdown_reason);
1783 SET_FLAG(peer->sflags, PEER_STATUS_RTT_SHUTDOWN);
1784 peer_tx_shutdown_message_set(peer, rtt_shutdown_reason);
1785 peer_flag_set(peer, PEER_FLAG_SHUTDOWN);
1786 }
1787 } else {
1788 if (peer->rtt_keepalive_rcv)
1789 peer->rtt_keepalive_rcv--;
1790 }
1791
1792 return Receive_KEEPALIVE_message;
1793 }
1794
1795 static void bgp_refresh_stalepath_timer_expire(struct event *thread)
1796 {
1797 struct peer_af *paf;
1798
1799 paf = EVENT_ARG(thread);
1800
1801 afi_t afi = paf->afi;
1802 safi_t safi = paf->safi;
1803 struct peer *peer = paf->peer;
1804
1805 peer->t_refresh_stalepath = NULL;
1806
1807 if (peer->nsf[afi][safi])
1808 bgp_clear_stale_route(peer, afi, safi);
1809
1810 if (bgp_debug_neighbor_events(peer))
1811 zlog_debug(
1812 "%pBP route-refresh (BoRR) timer expired for afi/safi: %d/%d",
1813 peer, afi, safi);
1814
1815 bgp_timer_set(peer);
1816 }
1817
1818 /**
1819 * Process BGP UPDATE message for peer.
1820 *
1821 * Parses UPDATE and creates attribute object.
1822 *
1823 * @param peer
1824 * @param size size of the packet
1825 * @return as in summary
1826 */
1827 static int bgp_update_receive(struct peer *peer, bgp_size_t size)
1828 {
1829 int ret, nlri_ret;
1830 uint8_t *end;
1831 struct stream *s;
1832 struct attr attr;
1833 bgp_size_t attribute_len;
1834 bgp_size_t update_len;
1835 bgp_size_t withdraw_len;
1836 bool restart = false;
1837
1838 enum NLRI_TYPES {
1839 NLRI_UPDATE,
1840 NLRI_WITHDRAW,
1841 NLRI_MP_UPDATE,
1842 NLRI_MP_WITHDRAW,
1843 NLRI_TYPE_MAX
1844 };
1845 struct bgp_nlri nlris[NLRI_TYPE_MAX];
1846
1847 /* Status must be Established. */
1848 if (!peer_established(peer)) {
1849 flog_err(EC_BGP_INVALID_STATUS,
1850 "%s [FSM] Update packet received under status %s",
1851 peer->host,
1852 lookup_msg(bgp_status_msg, peer->status, NULL));
1853 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR,
1854 bgp_fsm_error_subcode(peer->status));
1855 return BGP_Stop;
1856 }
1857
1858 /* Set initial values. */
1859 memset(&attr, 0, sizeof(attr));
1860 attr.label_index = BGP_INVALID_LABEL_INDEX;
1861 attr.label = MPLS_INVALID_LABEL;
1862 memset(&nlris, 0, sizeof(nlris));
1863 memset(peer->rcvd_attr_str, 0, BUFSIZ);
1864 peer->rcvd_attr_printed = 0;
1865
1866 s = peer->curr;
1867 end = stream_pnt(s) + size;
1868
1869 /* RFC1771 6.3 If the Unfeasible Routes Length or Total Attribute
1870 Length is too large (i.e., if Unfeasible Routes Length + Total
1871 Attribute Length + 23 exceeds the message Length), then the Error
1872 Subcode is set to Malformed Attribute List. */
1873 if (stream_pnt(s) + 2 > end) {
1874 flog_err(EC_BGP_UPDATE_RCV,
1875 "%s [Error] Update packet error (packet length is short for unfeasible length)",
1876 peer->host);
1877 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1878 BGP_NOTIFY_UPDATE_MAL_ATTR);
1879 return BGP_Stop;
1880 }
1881
1882 /* Unfeasible Route Length. */
1883 withdraw_len = stream_getw(s);
1884
1885 /* Unfeasible Route Length check. */
1886 if (stream_pnt(s) + withdraw_len > end) {
1887 flog_err(EC_BGP_UPDATE_RCV,
1888 "%s [Error] Update packet error (packet unfeasible length overflow %d)",
1889 peer->host, withdraw_len);
1890 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1891 BGP_NOTIFY_UPDATE_MAL_ATTR);
1892 return BGP_Stop;
1893 }
1894
1895 /* Unfeasible Route packet format check. */
1896 if (withdraw_len > 0) {
1897 nlris[NLRI_WITHDRAW].afi = AFI_IP;
1898 nlris[NLRI_WITHDRAW].safi = SAFI_UNICAST;
1899 nlris[NLRI_WITHDRAW].nlri = stream_pnt(s);
1900 nlris[NLRI_WITHDRAW].length = withdraw_len;
1901 stream_forward_getp(s, withdraw_len);
1902 }
1903
1904 /* Attribute total length check. */
1905 if (stream_pnt(s) + 2 > end) {
1906 flog_warn(
1907 EC_BGP_UPDATE_PACKET_SHORT,
1908 "%s [Error] Packet Error (update packet is short for attribute length)",
1909 peer->host);
1910 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1911 BGP_NOTIFY_UPDATE_MAL_ATTR);
1912 return BGP_Stop;
1913 }
1914
1915 /* Fetch attribute total length. */
1916 attribute_len = stream_getw(s);
1917
1918 /* Attribute length check. */
1919 if (stream_pnt(s) + attribute_len > end) {
1920 flog_warn(
1921 EC_BGP_UPDATE_PACKET_LONG,
1922 "%s [Error] Packet Error (update packet attribute length overflow %d)",
1923 peer->host, attribute_len);
1924 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1925 BGP_NOTIFY_UPDATE_MAL_ATTR);
1926 return BGP_Stop;
1927 }
1928
1929 /* Certain attribute parsing errors should not be considered bad enough
1930 * to reset the session for, most particularly any partial/optional
1931 * attributes that have 'tunneled' over speakers that don't understand
1932 * them. Instead we withdraw only the prefix concerned.
1933 *
1934 * Complicates the flow a little though..
1935 */
1936 enum bgp_attr_parse_ret attr_parse_ret = BGP_ATTR_PARSE_PROCEED;
1937 /* This define morphs the update case into a withdraw when lower levels
1938 * have signalled an error condition where this is best.
1939 */
1940 #define NLRI_ATTR_ARG (attr_parse_ret != BGP_ATTR_PARSE_WITHDRAW ? &attr : NULL)
1941
1942 /* Parse attribute when it exists. */
1943 if (attribute_len) {
1944 attr_parse_ret = bgp_attr_parse(peer, &attr, attribute_len,
1945 &nlris[NLRI_MP_UPDATE],
1946 &nlris[NLRI_MP_WITHDRAW]);
1947 if (attr_parse_ret == BGP_ATTR_PARSE_ERROR) {
1948 bgp_attr_unintern_sub(&attr);
1949 return BGP_Stop;
1950 }
1951 }
1952
1953 /* Logging the attribute. */
1954 if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW
1955 || BGP_DEBUG(update, UPDATE_IN)
1956 || BGP_DEBUG(update, UPDATE_PREFIX)) {
1957 ret = bgp_dump_attr(&attr, peer->rcvd_attr_str,
1958 sizeof(peer->rcvd_attr_str));
1959
1960 peer->stat_upd_7606++;
1961
1962 if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW)
1963 flog_err(
1964 EC_BGP_UPDATE_RCV,
1965 "%pBP rcvd UPDATE with errors in attr(s)!! Withdrawing route.",
1966 peer);
1967
1968 if (ret && bgp_debug_update(peer, NULL, NULL, 1)) {
1969 zlog_debug("%pBP rcvd UPDATE w/ attr: %s", peer,
1970 peer->rcvd_attr_str);
1971 peer->rcvd_attr_printed = 1;
1972 }
1973 }
1974
1975 /* Network Layer Reachability Information. */
1976 update_len = end - stream_pnt(s);
1977
1978 if (update_len) {
1979 /* Set NLRI portion to structure. */
1980 nlris[NLRI_UPDATE].afi = AFI_IP;
1981 nlris[NLRI_UPDATE].safi = SAFI_UNICAST;
1982 nlris[NLRI_UPDATE].nlri = stream_pnt(s);
1983 nlris[NLRI_UPDATE].length = update_len;
1984 stream_forward_getp(s, update_len);
1985
1986 if (CHECK_FLAG(attr.flag, ATTR_FLAG_BIT(BGP_ATTR_MP_REACH_NLRI))) {
1987 /*
1988 * We skipped nexthop attribute validation earlier so
1989 * validate the nexthop now.
1990 */
1991 if (bgp_attr_nexthop_valid(peer, &attr) < 0) {
1992 bgp_attr_unintern_sub(&attr);
1993 return BGP_Stop;
1994 }
1995 }
1996 }
1997
1998 if (BGP_DEBUG(update, UPDATE_IN))
1999 zlog_debug("%pBP rcvd UPDATE wlen %d attrlen %d alen %d", peer,
2000 withdraw_len, attribute_len, update_len);
2001
2002 /* Parse any given NLRIs */
2003 for (int i = NLRI_UPDATE; i < NLRI_TYPE_MAX; i++) {
2004 if (!nlris[i].nlri)
2005 continue;
2006
2007 /* NLRI is processed iff the peer if configured for the specific
2008 * afi/safi */
2009 if (!peer->afc[nlris[i].afi][nlris[i].safi]) {
2010 zlog_info(
2011 "%s [Info] UPDATE for non-enabled AFI/SAFI %u/%u",
2012 peer->host, nlris[i].afi, nlris[i].safi);
2013 continue;
2014 }
2015
2016 /* EoR handled later */
2017 if (nlris[i].length == 0)
2018 continue;
2019
2020 switch (i) {
2021 case NLRI_UPDATE:
2022 case NLRI_MP_UPDATE:
2023 nlri_ret = bgp_nlri_parse(peer, NLRI_ATTR_ARG,
2024 &nlris[i], 0);
2025 break;
2026 case NLRI_WITHDRAW:
2027 case NLRI_MP_WITHDRAW:
2028 nlri_ret = bgp_nlri_parse(peer, NLRI_ATTR_ARG,
2029 &nlris[i], 1);
2030 break;
2031 default:
2032 nlri_ret = BGP_NLRI_PARSE_ERROR;
2033 }
2034
2035 if (nlri_ret < BGP_NLRI_PARSE_OK
2036 && nlri_ret != BGP_NLRI_PARSE_ERROR_PREFIX_OVERFLOW) {
2037 flog_err(EC_BGP_UPDATE_RCV,
2038 "%s [Error] Error parsing NLRI", peer->host);
2039 if (peer_established(peer))
2040 bgp_notify_send(
2041 peer, BGP_NOTIFY_UPDATE_ERR,
2042 i <= NLRI_WITHDRAW
2043 ? BGP_NOTIFY_UPDATE_INVAL_NETWORK
2044 : BGP_NOTIFY_UPDATE_OPT_ATTR_ERR);
2045 bgp_attr_unintern_sub(&attr);
2046 return BGP_Stop;
2047 }
2048 }
2049
2050 /* EoR checks
2051 *
2052 * Non-MP IPv4/Unicast EoR is a completely empty UPDATE
2053 * and MP EoR should have only an empty MP_UNREACH
2054 */
2055 if ((!update_len && !withdraw_len && nlris[NLRI_MP_UPDATE].length == 0)
2056 || (attr_parse_ret == BGP_ATTR_PARSE_EOR)) {
2057 afi_t afi = 0;
2058 safi_t safi;
2059 struct graceful_restart_info *gr_info;
2060
2061 /* Restarting router */
2062 if (BGP_PEER_GRACEFUL_RESTART_CAPABLE(peer)
2063 && BGP_PEER_RESTARTING_MODE(peer))
2064 restart = true;
2065
2066 /* Non-MP IPv4/Unicast is a completely emtpy UPDATE - already
2067 * checked
2068 * update and withdraw NLRI lengths are 0.
2069 */
2070 if (!attribute_len) {
2071 afi = AFI_IP;
2072 safi = SAFI_UNICAST;
2073 } else if (attr.flag & ATTR_FLAG_BIT(BGP_ATTR_MP_UNREACH_NLRI)
2074 && nlris[NLRI_MP_WITHDRAW].length == 0) {
2075 afi = nlris[NLRI_MP_WITHDRAW].afi;
2076 safi = nlris[NLRI_MP_WITHDRAW].safi;
2077 } else if (attr_parse_ret == BGP_ATTR_PARSE_EOR) {
2078 afi = nlris[NLRI_MP_UPDATE].afi;
2079 safi = nlris[NLRI_MP_UPDATE].safi;
2080 }
2081
2082 if (afi && peer->afc[afi][safi]) {
2083 struct vrf *vrf = vrf_lookup_by_id(peer->bgp->vrf_id);
2084
2085 /* End-of-RIB received */
2086 if (!CHECK_FLAG(peer->af_sflags[afi][safi],
2087 PEER_STATUS_EOR_RECEIVED)) {
2088 SET_FLAG(peer->af_sflags[afi][safi],
2089 PEER_STATUS_EOR_RECEIVED);
2090 bgp_update_explicit_eors(peer);
2091 /* Update graceful restart information */
2092 gr_info = &(peer->bgp->gr_info[afi][safi]);
2093 if (restart)
2094 gr_info->eor_received++;
2095 /* If EOR received from all peers and selection
2096 * deferral timer is running, cancel the timer
2097 * and invoke the best path calculation
2098 */
2099 if (gr_info->eor_required
2100 == gr_info->eor_received) {
2101 if (bgp_debug_neighbor_events(peer))
2102 zlog_debug(
2103 "%s %d, %s %d",
2104 "EOR REQ",
2105 gr_info->eor_required,
2106 "EOR RCV",
2107 gr_info->eor_received);
2108 if (gr_info->t_select_deferral) {
2109 void *info = EVENT_ARG(
2110 gr_info->t_select_deferral);
2111 XFREE(MTYPE_TMP, info);
2112 }
2113 EVENT_OFF(gr_info->t_select_deferral);
2114 gr_info->eor_required = 0;
2115 gr_info->eor_received = 0;
2116 /* Best path selection */
2117 bgp_best_path_select_defer(peer->bgp,
2118 afi, safi);
2119 }
2120 }
2121
2122 /* NSF delete stale route */
2123 if (peer->nsf[afi][safi])
2124 bgp_clear_stale_route(peer, afi, safi);
2125
2126 zlog_info(
2127 "%s: rcvd End-of-RIB for %s from %s in vrf %s",
2128 __func__, get_afi_safi_str(afi, safi, false),
2129 peer->host, vrf ? vrf->name : VRF_DEFAULT_NAME);
2130 }
2131 }
2132
2133 /* Everything is done. We unintern temporary structures which
2134 interned in bgp_attr_parse(). */
2135 bgp_attr_unintern_sub(&attr);
2136
2137 peer->update_time = monotime(NULL);
2138
2139 /* Notify BGP Conditional advertisement scanner process */
2140 peer->advmap_table_change = true;
2141
2142 return Receive_UPDATE_message;
2143 }
2144
2145 /**
2146 * Process BGP NOTIFY message for peer.
2147 *
2148 * @param peer
2149 * @param size size of the packet
2150 * @return as in summary
2151 */
2152 static int bgp_notify_receive(struct peer *peer, bgp_size_t size)
2153 {
2154 struct bgp_notify outer = {};
2155 struct bgp_notify inner = {};
2156 bool hard_reset = false;
2157
2158 if (peer->notify.data) {
2159 XFREE(MTYPE_BGP_NOTIFICATION, peer->notify.data);
2160 peer->notify.length = 0;
2161 peer->notify.hard_reset = false;
2162 }
2163
2164 outer.code = stream_getc(peer->curr);
2165 outer.subcode = stream_getc(peer->curr);
2166 outer.length = size - 2;
2167 outer.data = NULL;
2168 outer.raw_data = NULL;
2169 if (outer.length) {
2170 outer.raw_data = XMALLOC(MTYPE_BGP_NOTIFICATION, outer.length);
2171 memcpy(outer.raw_data, stream_pnt(peer->curr), outer.length);
2172 }
2173
2174 hard_reset =
2175 bgp_notify_received_hard_reset(peer, outer.code, outer.subcode);
2176 if (hard_reset && outer.length) {
2177 inner = bgp_notify_decapsulate_hard_reset(&outer);
2178 peer->notify.hard_reset = true;
2179 } else {
2180 inner = outer;
2181 }
2182
2183 /* Preserv notify code and sub code. */
2184 peer->notify.code = inner.code;
2185 peer->notify.subcode = inner.subcode;
2186 /* For further diagnostic record returned Data. */
2187 if (inner.length) {
2188 peer->notify.length = inner.length;
2189 peer->notify.data =
2190 XMALLOC(MTYPE_BGP_NOTIFICATION, inner.length);
2191 memcpy(peer->notify.data, inner.raw_data, inner.length);
2192 }
2193
2194 /* For debug */
2195 {
2196 int i;
2197 int first = 0;
2198 char c[4];
2199
2200 if (inner.length) {
2201 inner.data = XMALLOC(MTYPE_BGP_NOTIFICATION,
2202 inner.length * 3);
2203 for (i = 0; i < inner.length; i++)
2204 if (first) {
2205 snprintf(c, sizeof(c), " %02x",
2206 stream_getc(peer->curr));
2207
2208 strlcat(inner.data, c,
2209 inner.length * 3);
2210
2211 } else {
2212 first = 1;
2213 snprintf(c, sizeof(c), "%02x",
2214 stream_getc(peer->curr));
2215
2216 strlcpy(inner.data, c,
2217 inner.length * 3);
2218 }
2219 }
2220
2221 bgp_notify_print(peer, &inner, "received", hard_reset);
2222 if (inner.length) {
2223 XFREE(MTYPE_BGP_NOTIFICATION, inner.data);
2224 inner.length = 0;
2225 }
2226 if (outer.length) {
2227 XFREE(MTYPE_BGP_NOTIFICATION, outer.data);
2228 XFREE(MTYPE_BGP_NOTIFICATION, outer.raw_data);
2229
2230 /* If this is a Hard Reset notification, we MUST free
2231 * the inner (encapsulated) notification too.
2232 */
2233 if (hard_reset)
2234 XFREE(MTYPE_BGP_NOTIFICATION, inner.raw_data);
2235 outer.length = 0;
2236 }
2237 }
2238
2239 /* peer count update */
2240 atomic_fetch_add_explicit(&peer->notify_in, 1, memory_order_relaxed);
2241
2242 peer->last_reset = PEER_DOWN_NOTIFY_RECEIVED;
2243
2244 /* We have to check for Notify with Unsupported Optional Parameter.
2245 in that case we fallback to open without the capability option.
2246 But this done in bgp_stop. We just mark it here to avoid changing
2247 the fsm tables. */
2248 if (inner.code == BGP_NOTIFY_OPEN_ERR &&
2249 inner.subcode == BGP_NOTIFY_OPEN_UNSUP_PARAM)
2250 UNSET_FLAG(peer->sflags, PEER_STATUS_CAPABILITY_OPEN);
2251
2252 /* If Graceful-Restart N-bit (Notification) is exchanged,
2253 * and it's not a Hard Reset, let's retain the routes.
2254 */
2255 if (bgp_has_graceful_restart_notification(peer) && !hard_reset &&
2256 CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE))
2257 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
2258
2259 bgp_peer_gr_flags_update(peer);
2260 BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(peer->bgp,
2261 peer->bgp->peer);
2262
2263 return Receive_NOTIFICATION_message;
2264 }
2265
2266 /**
2267 * Process BGP ROUTEREFRESH message for peer.
2268 *
2269 * @param peer
2270 * @param size size of the packet
2271 * @return as in summary
2272 */
2273 static int bgp_route_refresh_receive(struct peer *peer, bgp_size_t size)
2274 {
2275 iana_afi_t pkt_afi;
2276 afi_t afi;
2277 iana_safi_t pkt_safi;
2278 safi_t safi;
2279 struct stream *s;
2280 struct peer_af *paf;
2281 struct update_group *updgrp;
2282 struct peer *updgrp_peer;
2283 uint8_t subtype;
2284 bool force_update = false;
2285 bgp_size_t msg_length =
2286 size - (BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE);
2287
2288 /* If peer does not have the capability, send notification. */
2289 if (!CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_ADV)) {
2290 flog_err(EC_BGP_NO_CAP,
2291 "%s [Error] BGP route refresh is not enabled",
2292 peer->host);
2293 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
2294 BGP_NOTIFY_HEADER_BAD_MESTYPE);
2295 return BGP_Stop;
2296 }
2297
2298 /* Status must be Established. */
2299 if (!peer_established(peer)) {
2300 flog_err(
2301 EC_BGP_INVALID_STATUS,
2302 "%s [Error] Route refresh packet received under status %s",
2303 peer->host,
2304 lookup_msg(bgp_status_msg, peer->status, NULL));
2305 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR,
2306 bgp_fsm_error_subcode(peer->status));
2307 return BGP_Stop;
2308 }
2309
2310 s = peer->curr;
2311
2312 /* Parse packet. */
2313 pkt_afi = stream_getw(s);
2314 subtype = stream_getc(s);
2315 pkt_safi = stream_getc(s);
2316
2317 /* Convert AFI, SAFI to internal values and check. */
2318 if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi, &safi)) {
2319 zlog_info(
2320 "%s REFRESH_REQ for unrecognized afi/safi: %s/%s - ignored",
2321 peer->host, iana_afi2str(pkt_afi),
2322 iana_safi2str(pkt_safi));
2323 return BGP_PACKET_NOOP;
2324 }
2325
2326 if (size != BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE) {
2327 uint8_t *end;
2328 uint8_t when_to_refresh;
2329 uint8_t orf_type;
2330 uint16_t orf_len;
2331
2332 if (subtype) {
2333 /* If the length, excluding the fixed-size message
2334 * header, of the received ROUTE-REFRESH message with
2335 * Message Subtype 1 and 2 is not 4, then the BGP
2336 * speaker MUST send a NOTIFICATION message with the
2337 * Error Code of "ROUTE-REFRESH Message Error" and the
2338 * subcode of "Invalid Message Length".
2339 */
2340 if (msg_length != 4) {
2341 zlog_err(
2342 "%s Enhanced Route Refresh message length error",
2343 peer->host);
2344 bgp_notify_send(
2345 peer, BGP_NOTIFY_ROUTE_REFRESH_ERR,
2346 BGP_NOTIFY_ROUTE_REFRESH_INVALID_MSG_LEN);
2347 }
2348
2349 /* When the BGP speaker receives a ROUTE-REFRESH message
2350 * with a "Message Subtype" field other than 0, 1, or 2,
2351 * it MUST ignore the received ROUTE-REFRESH message.
2352 */
2353 if (subtype > 2)
2354 zlog_err(
2355 "%s Enhanced Route Refresh invalid subtype",
2356 peer->host);
2357 }
2358
2359 if (msg_length < 5) {
2360 zlog_info("%s ORF route refresh length error",
2361 peer->host);
2362 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2363 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
2364 return BGP_Stop;
2365 }
2366
2367 when_to_refresh = stream_getc(s);
2368 end = stream_pnt(s) + (size - 5);
2369
2370 while ((stream_pnt(s) + 2) < end) {
2371 orf_type = stream_getc(s);
2372 orf_len = stream_getw(s);
2373
2374 /* orf_len in bounds? */
2375 if ((stream_pnt(s) + orf_len) > end)
2376 break; /* XXX: Notify instead?? */
2377 if (orf_type == ORF_TYPE_PREFIX
2378 || orf_type == ORF_TYPE_PREFIX_OLD) {
2379 uint8_t *p_pnt = stream_pnt(s);
2380 uint8_t *p_end = stream_pnt(s) + orf_len;
2381 struct orf_prefix orfp;
2382 uint8_t common = 0;
2383 uint32_t seq;
2384 int psize;
2385 char name[BUFSIZ];
2386 int ret = CMD_SUCCESS;
2387
2388 if (bgp_debug_neighbor_events(peer)) {
2389 zlog_debug(
2390 "%pBP rcvd Prefixlist ORF(%d) length %d",
2391 peer, orf_type, orf_len);
2392 }
2393
2394 /* ORF prefix-list name */
2395 snprintf(name, sizeof(name), "%s.%d.%d",
2396 peer->host, afi, safi);
2397
2398 /* we're going to read at least 1 byte of common
2399 * ORF header,
2400 * and 7 bytes of ORF Address-filter entry from
2401 * the stream
2402 */
2403 if (*p_pnt & ORF_COMMON_PART_REMOVE_ALL) {
2404 if (bgp_debug_neighbor_events(peer))
2405 zlog_debug(
2406 "%pBP rcvd Remove-All pfxlist ORF request",
2407 peer);
2408 prefix_bgp_orf_remove_all(afi, name);
2409 break;
2410 }
2411
2412 if (orf_len < 7)
2413 break;
2414
2415 while (p_pnt < p_end) {
2416 /* If the ORF entry is malformed, want
2417 * to read as much of it
2418 * as possible without going beyond the
2419 * bounds of the entry,
2420 * to maximise debug information.
2421 */
2422 int ok;
2423 memset(&orfp, 0, sizeof(orfp));
2424 common = *p_pnt++;
2425 /* after ++: p_pnt <= p_end */
2426 ok = ((uint32_t)(p_end - p_pnt)
2427 >= sizeof(uint32_t));
2428 if (ok) {
2429 memcpy(&seq, p_pnt,
2430 sizeof(uint32_t));
2431 p_pnt += sizeof(uint32_t);
2432 orfp.seq = ntohl(seq);
2433 } else
2434 p_pnt = p_end;
2435
2436 /* val checked in prefix_bgp_orf_set */
2437 if (p_pnt < p_end)
2438 orfp.ge = *p_pnt++;
2439
2440 /* val checked in prefix_bgp_orf_set */
2441 if (p_pnt < p_end)
2442 orfp.le = *p_pnt++;
2443
2444 if ((ok = (p_pnt < p_end)))
2445 orfp.p.prefixlen = *p_pnt++;
2446
2447 /* afi checked already */
2448 orfp.p.family = afi2family(afi);
2449
2450 /* 0 if not ok */
2451 psize = PSIZE(orfp.p.prefixlen);
2452 /* valid for family ? */
2453 if (psize > prefix_blen(&orfp.p)) {
2454 ok = 0;
2455 psize = prefix_blen(&orfp.p);
2456 }
2457 /* valid for packet ? */
2458 if (psize > (p_end - p_pnt)) {
2459 ok = 0;
2460 psize = p_end - p_pnt;
2461 }
2462
2463 if (psize > 0)
2464 memcpy(&orfp.p.u.prefix, p_pnt,
2465 psize);
2466 p_pnt += psize;
2467
2468 if (bgp_debug_neighbor_events(peer)) {
2469 char buf[INET6_BUFSIZ];
2470
2471 zlog_debug(
2472 "%pBP rcvd %s %s seq %u %s/%d ge %d le %d%s",
2473 peer,
2474 (common & ORF_COMMON_PART_REMOVE
2475 ? "Remove"
2476 : "Add"),
2477 (common & ORF_COMMON_PART_DENY
2478 ? "deny"
2479 : "permit"),
2480 orfp.seq,
2481 inet_ntop(
2482 orfp.p.family,
2483 &orfp.p.u.prefix,
2484 buf,
2485 INET6_BUFSIZ),
2486 orfp.p.prefixlen,
2487 orfp.ge, orfp.le,
2488 ok ? "" : " MALFORMED");
2489 }
2490
2491 if (ok)
2492 ret = prefix_bgp_orf_set(
2493 name, afi, &orfp,
2494 (common & ORF_COMMON_PART_DENY
2495 ? 0
2496 : 1),
2497 (common & ORF_COMMON_PART_REMOVE
2498 ? 0
2499 : 1));
2500
2501 if (!ok || (ok && ret != CMD_SUCCESS)) {
2502 zlog_info(
2503 "%pBP Received misformatted prefixlist ORF. Remove All pfxlist",
2504 peer);
2505 prefix_bgp_orf_remove_all(afi,
2506 name);
2507 break;
2508 }
2509 }
2510
2511 peer->orf_plist[afi][safi] =
2512 prefix_bgp_orf_lookup(afi, name);
2513 }
2514 stream_forward_getp(s, orf_len);
2515 }
2516 if (bgp_debug_neighbor_events(peer))
2517 zlog_debug("%pBP rcvd Refresh %s ORF request", peer,
2518 when_to_refresh == REFRESH_DEFER
2519 ? "Defer"
2520 : "Immediate");
2521 if (when_to_refresh == REFRESH_DEFER)
2522 return BGP_PACKET_NOOP;
2523 }
2524
2525 /* First update is deferred until ORF or ROUTE-REFRESH is received */
2526 if (CHECK_FLAG(peer->af_sflags[afi][safi],
2527 PEER_STATUS_ORF_WAIT_REFRESH))
2528 UNSET_FLAG(peer->af_sflags[afi][safi],
2529 PEER_STATUS_ORF_WAIT_REFRESH);
2530
2531 paf = peer_af_find(peer, afi, safi);
2532 if (paf && paf->subgroup) {
2533 if (peer->orf_plist[afi][safi]) {
2534 updgrp = PAF_UPDGRP(paf);
2535 updgrp_peer = UPDGRP_PEER(updgrp);
2536 updgrp_peer->orf_plist[afi][safi] =
2537 peer->orf_plist[afi][safi];
2538 }
2539
2540 /* Avoid supressing duplicate routes later
2541 * when processing in subgroup_announce_table().
2542 */
2543 force_update = true;
2544
2545 /* If the peer is configured for default-originate clear the
2546 * SUBGRP_STATUS_DEFAULT_ORIGINATE flag so that we will
2547 * re-advertise the
2548 * default
2549 */
2550 if (CHECK_FLAG(paf->subgroup->sflags,
2551 SUBGRP_STATUS_DEFAULT_ORIGINATE))
2552 UNSET_FLAG(paf->subgroup->sflags,
2553 SUBGRP_STATUS_DEFAULT_ORIGINATE);
2554 }
2555
2556 if (subtype == BGP_ROUTE_REFRESH_BORR) {
2557 /* A BGP speaker that has received the Graceful Restart
2558 * Capability from its neighbor MUST ignore any BoRRs for
2559 * an <AFI, SAFI> from the neighbor before the speaker
2560 * receives the EoR for the given <AFI, SAFI> from the
2561 * neighbor.
2562 */
2563 if (CHECK_FLAG(peer->cap, PEER_CAP_RESTART_RCV)
2564 && !CHECK_FLAG(peer->af_sflags[afi][safi],
2565 PEER_STATUS_EOR_RECEIVED)) {
2566 if (bgp_debug_neighbor_events(peer))
2567 zlog_debug(
2568 "%pBP rcvd route-refresh (BoRR) for %s/%s before EoR",
2569 peer, afi2str(afi), safi2str(safi));
2570 return BGP_PACKET_NOOP;
2571 }
2572
2573 if (peer->t_refresh_stalepath) {
2574 if (bgp_debug_neighbor_events(peer))
2575 zlog_debug(
2576 "%pBP rcvd route-refresh (BoRR) for %s/%s, whereas BoRR already received",
2577 peer, afi2str(afi), safi2str(safi));
2578 return BGP_PACKET_NOOP;
2579 }
2580
2581 SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_BORR_RECEIVED);
2582 UNSET_FLAG(peer->af_sflags[afi][safi],
2583 PEER_STATUS_EORR_RECEIVED);
2584
2585 /* When a BGP speaker receives a BoRR message from
2586 * a peer, it MUST mark all the routes with the given
2587 * Address Family Identifier and Subsequent Address
2588 * Family Identifier, <AFI, SAFI> [RFC2918], from
2589 * that peer as stale.
2590 */
2591 if (peer_active_nego(peer)) {
2592 SET_FLAG(peer->af_sflags[afi][safi],
2593 PEER_STATUS_ENHANCED_REFRESH);
2594 bgp_set_stale_route(peer, afi, safi);
2595 }
2596
2597 if (peer_established(peer))
2598 event_add_timer(bm->master,
2599 bgp_refresh_stalepath_timer_expire, paf,
2600 peer->bgp->stalepath_time,
2601 &peer->t_refresh_stalepath);
2602
2603 if (bgp_debug_neighbor_events(peer))
2604 zlog_debug(
2605 "%pBP rcvd route-refresh (BoRR) for %s/%s, triggering timer for %u seconds",
2606 peer, afi2str(afi), safi2str(safi),
2607 peer->bgp->stalepath_time);
2608 } else if (subtype == BGP_ROUTE_REFRESH_EORR) {
2609 if (!peer->t_refresh_stalepath) {
2610 zlog_err(
2611 "%pBP rcvd route-refresh (EoRR) for %s/%s, whereas no BoRR received",
2612 peer, afi2str(afi), safi2str(safi));
2613 return BGP_PACKET_NOOP;
2614 }
2615
2616 EVENT_OFF(peer->t_refresh_stalepath);
2617
2618 SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_EORR_RECEIVED);
2619 UNSET_FLAG(peer->af_sflags[afi][safi],
2620 PEER_STATUS_BORR_RECEIVED);
2621
2622 if (bgp_debug_neighbor_events(peer))
2623 zlog_debug(
2624 "%pBP rcvd route-refresh (EoRR) for %s/%s, stopping BoRR timer",
2625 peer, afi2str(afi), safi2str(safi));
2626
2627 if (peer->nsf[afi][safi])
2628 bgp_clear_stale_route(peer, afi, safi);
2629 } else {
2630 if (bgp_debug_neighbor_events(peer))
2631 zlog_debug(
2632 "%pBP rcvd route-refresh (REQUEST) for %s/%s",
2633 peer, afi2str(afi), safi2str(safi));
2634
2635 /* In response to a "normal route refresh request" from the
2636 * peer, the speaker MUST send a BoRR message.
2637 */
2638 if (CHECK_FLAG(peer->cap, PEER_CAP_ENHANCED_RR_RCV)) {
2639 /* For a BGP speaker that supports the BGP Graceful
2640 * Restart, it MUST NOT send a BoRR for an <AFI, SAFI>
2641 * to a neighbor before it sends the EoR for the
2642 * <AFI, SAFI> to the neighbor.
2643 */
2644 if (!CHECK_FLAG(peer->af_sflags[afi][safi],
2645 PEER_STATUS_EOR_SEND)) {
2646 if (bgp_debug_neighbor_events(peer))
2647 zlog_debug(
2648 "%pBP rcvd route-refresh (REQUEST) for %s/%s before EoR",
2649 peer, afi2str(afi),
2650 safi2str(safi));
2651 /* Can't send BoRR now, postpone after EoR */
2652 SET_FLAG(peer->af_sflags[afi][safi],
2653 PEER_STATUS_REFRESH_PENDING);
2654 return BGP_PACKET_NOOP;
2655 }
2656
2657 bgp_route_refresh_send(peer, afi, safi, 0, 0, 0,
2658 BGP_ROUTE_REFRESH_BORR);
2659
2660 if (bgp_debug_neighbor_events(peer))
2661 zlog_debug(
2662 "%pBP sending route-refresh (BoRR) for %s/%s",
2663 peer, afi2str(afi), safi2str(safi));
2664
2665 /* Set flag Ready-To-Send to know when we can send EoRR
2666 * message.
2667 */
2668 SET_FLAG(peer->af_sflags[afi][safi],
2669 PEER_STATUS_BORR_SEND);
2670 UNSET_FLAG(peer->af_sflags[afi][safi],
2671 PEER_STATUS_EORR_SEND);
2672 }
2673 }
2674
2675 /* Perform route refreshment to the peer */
2676 bgp_announce_route(peer, afi, safi, force_update);
2677
2678 /* No FSM action necessary */
2679 return BGP_PACKET_NOOP;
2680 }
2681
2682 /**
2683 * Parse BGP CAPABILITY message for peer.
2684 *
2685 * @param peer
2686 * @param size size of the packet
2687 * @return as in summary
2688 */
2689 static int bgp_capability_msg_parse(struct peer *peer, uint8_t *pnt,
2690 bgp_size_t length)
2691 {
2692 uint8_t *end;
2693 struct capability_mp_data mpc;
2694 struct capability_header *hdr;
2695 uint8_t action;
2696 iana_afi_t pkt_afi;
2697 afi_t afi;
2698 iana_safi_t pkt_safi;
2699 safi_t safi;
2700
2701 end = pnt + length;
2702
2703 while (pnt < end) {
2704 /* We need at least action, capability code and capability
2705 * length. */
2706 if (pnt + 3 > end) {
2707 zlog_info("%s Capability length error", peer->host);
2708 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2709 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
2710 return BGP_Stop;
2711 }
2712 action = *pnt;
2713 hdr = (struct capability_header *)(pnt + 1);
2714
2715 /* Action value check. */
2716 if (action != CAPABILITY_ACTION_SET
2717 && action != CAPABILITY_ACTION_UNSET) {
2718 zlog_info("%s Capability Action Value error %d",
2719 peer->host, action);
2720 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2721 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
2722 return BGP_Stop;
2723 }
2724
2725 if (bgp_debug_neighbor_events(peer))
2726 zlog_debug(
2727 "%s CAPABILITY has action: %d, code: %u, length %u",
2728 peer->host, action, hdr->code, hdr->length);
2729
2730 if (hdr->length < sizeof(struct capability_mp_data)) {
2731 zlog_info(
2732 "%pBP Capability structure is not properly filled out, expected at least %zu bytes but header length specified is %d",
2733 peer, sizeof(struct capability_mp_data),
2734 hdr->length);
2735 return BGP_Stop;
2736 }
2737
2738 /* Capability length check. */
2739 if ((pnt + hdr->length + 3) > end) {
2740 zlog_info("%s Capability length error", peer->host);
2741 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2742 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
2743 return BGP_Stop;
2744 }
2745
2746 /* Fetch structure to the byte stream. */
2747 memcpy(&mpc, pnt + 3, sizeof(struct capability_mp_data));
2748 pnt += hdr->length + 3;
2749
2750 /* We know MP Capability Code. */
2751 if (hdr->code == CAPABILITY_CODE_MP) {
2752 pkt_afi = ntohs(mpc.afi);
2753 pkt_safi = mpc.safi;
2754
2755 /* Ignore capability when override-capability is set. */
2756 if (CHECK_FLAG(peer->flags,
2757 PEER_FLAG_OVERRIDE_CAPABILITY))
2758 continue;
2759
2760 /* Convert AFI, SAFI to internal values. */
2761 if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi,
2762 &safi)) {
2763 if (bgp_debug_neighbor_events(peer))
2764 zlog_debug(
2765 "%s Dynamic Capability MP_EXT afi/safi invalid (%s/%s)",
2766 peer->host,
2767 iana_afi2str(pkt_afi),
2768 iana_safi2str(pkt_safi));
2769 continue;
2770 }
2771
2772 /* Address family check. */
2773 if (bgp_debug_neighbor_events(peer))
2774 zlog_debug(
2775 "%s CAPABILITY has %s MP_EXT CAP for afi/safi: %s/%s",
2776 peer->host,
2777 action == CAPABILITY_ACTION_SET
2778 ? "Advertising"
2779 : "Removing",
2780 iana_afi2str(pkt_afi),
2781 iana_safi2str(pkt_safi));
2782
2783 if (action == CAPABILITY_ACTION_SET) {
2784 peer->afc_recv[afi][safi] = 1;
2785 if (peer->afc[afi][safi]) {
2786 peer->afc_nego[afi][safi] = 1;
2787 bgp_announce_route(peer, afi, safi,
2788 false);
2789 }
2790 } else {
2791 peer->afc_recv[afi][safi] = 0;
2792 peer->afc_nego[afi][safi] = 0;
2793
2794 if (peer_active_nego(peer))
2795 bgp_clear_route(peer, afi, safi);
2796 else
2797 return BGP_Stop;
2798 }
2799 } else {
2800 flog_warn(
2801 EC_BGP_UNRECOGNIZED_CAPABILITY,
2802 "%s unrecognized capability code: %d - ignored",
2803 peer->host, hdr->code);
2804 }
2805 }
2806
2807 /* No FSM action necessary */
2808 return BGP_PACKET_NOOP;
2809 }
2810
2811 /**
2812 * Parse BGP CAPABILITY message for peer.
2813 *
2814 * Exported for unit testing.
2815 *
2816 * @param peer
2817 * @param size size of the packet
2818 * @return as in summary
2819 */
2820 int bgp_capability_receive(struct peer *peer, bgp_size_t size)
2821 {
2822 uint8_t *pnt;
2823
2824 /* Fetch pointer. */
2825 pnt = stream_pnt(peer->curr);
2826
2827 if (bgp_debug_neighbor_events(peer))
2828 zlog_debug("%s rcv CAPABILITY", peer->host);
2829
2830 /* If peer does not have the capability, send notification. */
2831 if (!CHECK_FLAG(peer->cap, PEER_CAP_DYNAMIC_ADV)) {
2832 flog_err(EC_BGP_NO_CAP,
2833 "%s [Error] BGP dynamic capability is not enabled",
2834 peer->host);
2835 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
2836 BGP_NOTIFY_HEADER_BAD_MESTYPE);
2837 return BGP_Stop;
2838 }
2839
2840 /* Status must be Established. */
2841 if (!peer_established(peer)) {
2842 flog_err(
2843 EC_BGP_NO_CAP,
2844 "%s [Error] Dynamic capability packet received under status %s",
2845 peer->host,
2846 lookup_msg(bgp_status_msg, peer->status, NULL));
2847 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR,
2848 bgp_fsm_error_subcode(peer->status));
2849 return BGP_Stop;
2850 }
2851
2852 /* Parse packet. */
2853 return bgp_capability_msg_parse(peer, pnt, size);
2854 }
2855
2856 /**
2857 * Processes a peer's input buffer.
2858 *
2859 * This function sidesteps the event loop and directly calls bgp_event_update()
2860 * after processing each BGP message. This is necessary to ensure proper
2861 * ordering of FSM events and unifies the behavior that was present previously,
2862 * whereby some of the packet handling functions would update the FSM and some
2863 * would not, making event flow difficult to understand. Please think twice
2864 * before hacking this.
2865 *
2866 * Thread type: EVENT_EVENT
2867 * @param thread
2868 * @return 0
2869 */
2870 void bgp_process_packet(struct event *thread)
2871 {
2872 /* Yes first of all get peer pointer. */
2873 struct peer *peer; // peer
2874 uint32_t rpkt_quanta_old; // how many packets to read
2875 int fsm_update_result; // return code of bgp_event_update()
2876 int mprc; // message processing return code
2877
2878 peer = EVENT_ARG(thread);
2879 rpkt_quanta_old = atomic_load_explicit(&peer->bgp->rpkt_quanta,
2880 memory_order_relaxed);
2881 fsm_update_result = 0;
2882
2883 /* Guard against scheduled events that occur after peer deletion. */
2884 if (peer->status == Deleted || peer->status == Clearing)
2885 return;
2886
2887 unsigned int processed = 0;
2888
2889 while (processed < rpkt_quanta_old) {
2890 uint8_t type = 0;
2891 bgp_size_t size;
2892 char notify_data_length[2];
2893
2894 frr_with_mutex (&peer->io_mtx) {
2895 peer->curr = stream_fifo_pop(peer->ibuf);
2896 }
2897
2898 if (peer->curr == NULL) // no packets to process, hmm...
2899 return;
2900
2901 /* skip the marker and copy the packet length */
2902 stream_forward_getp(peer->curr, BGP_MARKER_SIZE);
2903 memcpy(notify_data_length, stream_pnt(peer->curr), 2);
2904
2905 /* read in the packet length and type */
2906 size = stream_getw(peer->curr);
2907 type = stream_getc(peer->curr);
2908
2909 hook_call(bgp_packet_dump, peer, type, size, peer->curr);
2910
2911 /* adjust size to exclude the marker + length + type */
2912 size -= BGP_HEADER_SIZE;
2913
2914 /* Read rest of the packet and call each sort of packet routine
2915 */
2916 switch (type) {
2917 case BGP_MSG_OPEN:
2918 frrtrace(2, frr_bgp, open_process, peer, size);
2919 atomic_fetch_add_explicit(&peer->open_in, 1,
2920 memory_order_relaxed);
2921 mprc = bgp_open_receive(peer, size);
2922 if (mprc == BGP_Stop)
2923 flog_err(
2924 EC_BGP_PKT_OPEN,
2925 "%s: BGP OPEN receipt failed for peer: %s",
2926 __func__, peer->host);
2927 break;
2928 case BGP_MSG_UPDATE:
2929 frrtrace(2, frr_bgp, update_process, peer, size);
2930 atomic_fetch_add_explicit(&peer->update_in, 1,
2931 memory_order_relaxed);
2932 peer->readtime = monotime(NULL);
2933 mprc = bgp_update_receive(peer, size);
2934 if (mprc == BGP_Stop)
2935 flog_err(
2936 EC_BGP_UPDATE_RCV,
2937 "%s: BGP UPDATE receipt failed for peer: %s",
2938 __func__, peer->host);
2939 break;
2940 case BGP_MSG_NOTIFY:
2941 frrtrace(2, frr_bgp, notification_process, peer, size);
2942 atomic_fetch_add_explicit(&peer->notify_in, 1,
2943 memory_order_relaxed);
2944 mprc = bgp_notify_receive(peer, size);
2945 if (mprc == BGP_Stop)
2946 flog_err(
2947 EC_BGP_NOTIFY_RCV,
2948 "%s: BGP NOTIFY receipt failed for peer: %s",
2949 __func__, peer->host);
2950 break;
2951 case BGP_MSG_KEEPALIVE:
2952 frrtrace(2, frr_bgp, keepalive_process, peer, size);
2953 peer->readtime = monotime(NULL);
2954 atomic_fetch_add_explicit(&peer->keepalive_in, 1,
2955 memory_order_relaxed);
2956 mprc = bgp_keepalive_receive(peer, size);
2957 if (mprc == BGP_Stop)
2958 flog_err(
2959 EC_BGP_KEEP_RCV,
2960 "%s: BGP KEEPALIVE receipt failed for peer: %s",
2961 __func__, peer->host);
2962 break;
2963 case BGP_MSG_ROUTE_REFRESH_NEW:
2964 case BGP_MSG_ROUTE_REFRESH_OLD:
2965 frrtrace(2, frr_bgp, refresh_process, peer, size);
2966 atomic_fetch_add_explicit(&peer->refresh_in, 1,
2967 memory_order_relaxed);
2968 mprc = bgp_route_refresh_receive(peer, size);
2969 if (mprc == BGP_Stop)
2970 flog_err(
2971 EC_BGP_RFSH_RCV,
2972 "%s: BGP ROUTEREFRESH receipt failed for peer: %s",
2973 __func__, peer->host);
2974 break;
2975 case BGP_MSG_CAPABILITY:
2976 frrtrace(2, frr_bgp, capability_process, peer, size);
2977 atomic_fetch_add_explicit(&peer->dynamic_cap_in, 1,
2978 memory_order_relaxed);
2979 mprc = bgp_capability_receive(peer, size);
2980 if (mprc == BGP_Stop)
2981 flog_err(
2982 EC_BGP_CAP_RCV,
2983 "%s: BGP CAPABILITY receipt failed for peer: %s",
2984 __func__, peer->host);
2985 break;
2986 default:
2987 /* Suppress uninitialized variable warning */
2988 mprc = 0;
2989 (void)mprc;
2990 /*
2991 * The message type should have been sanitized before
2992 * we ever got here. Receipt of a message with an
2993 * invalid header at this point is indicative of a
2994 * security issue.
2995 */
2996 assert (!"Message of invalid type received during input processing");
2997 }
2998
2999 /* delete processed packet */
3000 stream_free(peer->curr);
3001 peer->curr = NULL;
3002 processed++;
3003
3004 /* Update FSM */
3005 if (mprc != BGP_PACKET_NOOP)
3006 fsm_update_result = bgp_event_update(peer, mprc);
3007 else
3008 continue;
3009
3010 /*
3011 * If peer was deleted, do not process any more packets. This
3012 * is usually due to executing BGP_Stop or a stub deletion.
3013 */
3014 if (fsm_update_result == FSM_PEER_TRANSFERRED
3015 || fsm_update_result == FSM_PEER_STOPPED)
3016 break;
3017 }
3018
3019 if (fsm_update_result != FSM_PEER_TRANSFERRED
3020 && fsm_update_result != FSM_PEER_STOPPED) {
3021 frr_with_mutex (&peer->io_mtx) {
3022 // more work to do, come back later
3023 if (peer->ibuf->count > 0)
3024 event_add_event(bm->master, bgp_process_packet,
3025 peer, 0,
3026 &peer->t_process_packet);
3027 }
3028 }
3029 }
3030
3031 /* Send EOR when routes are processed by selection deferral timer */
3032 void bgp_send_delayed_eor(struct bgp *bgp)
3033 {
3034 struct peer *peer;
3035 struct listnode *node, *nnode;
3036
3037 /* EOR message sent in bgp_write_proceed_actions */
3038 for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer))
3039 bgp_write_proceed_actions(peer);
3040 }
3041
3042 /*
3043 * Task callback to handle socket error encountered in the io pthread. We avoid
3044 * having the io pthread try to enqueue fsm events or mess with the peer
3045 * struct.
3046 */
3047 void bgp_packet_process_error(struct event *thread)
3048 {
3049 struct peer *peer;
3050 int code;
3051
3052 peer = EVENT_ARG(thread);
3053 code = EVENT_VAL(thread);
3054
3055 if (bgp_debug_neighbor_events(peer))
3056 zlog_debug("%s [Event] BGP error %d on fd %d",
3057 peer->host, code, peer->fd);
3058
3059 /* Closed connection or error on the socket */
3060 if (peer_established(peer)) {
3061 if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART)
3062 || CHECK_FLAG(peer->flags,
3063 PEER_FLAG_GRACEFUL_RESTART_HELPER))
3064 && CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
3065 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
3066 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
3067 } else
3068 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
3069 }
3070
3071 bgp_event_update(peer, code);
3072 }