]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_packet.c
Merge pull request #12647 from anlancs/fix/bgpd-type-2
[mirror_frr.git] / bgpd / bgp_packet.c
1 /* BGP packet management routine.
2 * Contains utility functions for constructing and consuming BGP messages.
3 * Copyright (C) 2017 Cumulus Networks
4 * Copyright (C) 1999 Kunihiro Ishiguro
5 *
6 * This file is part of GNU Zebra.
7 *
8 * GNU Zebra is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2, or (at your option) any
11 * later version.
12 *
13 * GNU Zebra is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; see the file COPYING; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include <zebra.h>
24 #include <sys/time.h>
25
26 #include "thread.h"
27 #include "stream.h"
28 #include "network.h"
29 #include "prefix.h"
30 #include "command.h"
31 #include "log.h"
32 #include "memory.h"
33 #include "sockunion.h" /* for inet_ntop () */
34 #include "sockopt.h"
35 #include "linklist.h"
36 #include "plist.h"
37 #include "queue.h"
38 #include "filter.h"
39 #include "lib_errors.h"
40
41 #include "bgpd/bgpd.h"
42 #include "bgpd/bgp_table.h"
43 #include "bgpd/bgp_dump.h"
44 #include "bgpd/bgp_bmp.h"
45 #include "bgpd/bgp_attr.h"
46 #include "bgpd/bgp_debug.h"
47 #include "bgpd/bgp_errors.h"
48 #include "bgpd/bgp_fsm.h"
49 #include "bgpd/bgp_route.h"
50 #include "bgpd/bgp_packet.h"
51 #include "bgpd/bgp_open.h"
52 #include "bgpd/bgp_aspath.h"
53 #include "bgpd/bgp_community.h"
54 #include "bgpd/bgp_ecommunity.h"
55 #include "bgpd/bgp_lcommunity.h"
56 #include "bgpd/bgp_network.h"
57 #include "bgpd/bgp_mplsvpn.h"
58 #include "bgpd/bgp_evpn.h"
59 #include "bgpd/bgp_advertise.h"
60 #include "bgpd/bgp_vty.h"
61 #include "bgpd/bgp_updgrp.h"
62 #include "bgpd/bgp_label.h"
63 #include "bgpd/bgp_io.h"
64 #include "bgpd/bgp_keepalives.h"
65 #include "bgpd/bgp_flowspec.h"
66 #include "bgpd/bgp_trace.h"
67
68 DEFINE_HOOK(bgp_packet_dump,
69 (struct peer *peer, uint8_t type, bgp_size_t size,
70 struct stream *s),
71 (peer, type, size, s));
72
73 DEFINE_HOOK(bgp_packet_send,
74 (struct peer *peer, uint8_t type, bgp_size_t size,
75 struct stream *s),
76 (peer, type, size, s));
77
78 /**
79 * Sets marker and type fields for a BGP message.
80 *
81 * @param s the stream containing the packet
82 * @param type the packet type
83 * @return the size of the stream
84 */
85 int bgp_packet_set_marker(struct stream *s, uint8_t type)
86 {
87 int i;
88
89 /* Fill in marker. */
90 for (i = 0; i < BGP_MARKER_SIZE; i++)
91 stream_putc(s, 0xff);
92
93 /* Dummy total length. This field is should be filled in later on. */
94 stream_putw(s, 0);
95
96 /* BGP packet type. */
97 stream_putc(s, type);
98
99 /* Return current stream size. */
100 return stream_get_endp(s);
101 }
102
103 /**
104 * Sets size field for a BGP message.
105 *
106 * Size field is set to the size of the stream passed.
107 *
108 * @param s the stream containing the packet
109 */
110 void bgp_packet_set_size(struct stream *s)
111 {
112 int cp;
113
114 /* Preserve current pointer. */
115 cp = stream_get_endp(s);
116 stream_putw_at(s, BGP_MARKER_SIZE, cp);
117 }
118
119 /*
120 * Push a packet onto the beginning of the peer's output queue.
121 * This function acquires the peer's write mutex before proceeding.
122 */
123 static void bgp_packet_add(struct peer *peer, struct stream *s)
124 {
125 intmax_t delta;
126 uint32_t holdtime;
127 intmax_t sendholdtime;
128
129 frr_with_mutex (&peer->io_mtx) {
130 /* if the queue is empty, reset the "last OK" timestamp to
131 * now, otherwise if we write another packet immediately
132 * after it'll get confused
133 */
134 if (!stream_fifo_count_safe(peer->obuf))
135 peer->last_sendq_ok = monotime(NULL);
136
137 stream_fifo_push(peer->obuf, s);
138
139 delta = monotime(NULL) - peer->last_sendq_ok;
140
141 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
142 holdtime = atomic_load_explicit(&peer->holdtime,
143 memory_order_relaxed);
144 else
145 holdtime = peer->bgp->default_holdtime;
146
147 sendholdtime = holdtime * 2;
148
149 /* Note that when we're here, we're adding some packet to the
150 * OutQ. That includes keepalives when there is nothing to
151 * do, so there's a guarantee we pass by here once in a while.
152 *
153 * That implies there is no need to go set up another separate
154 * timer that ticks down SendHoldTime, as we'll be here sooner
155 * or later anyway and will see the checks below failing.
156 */
157 if (!holdtime) {
158 /* no holdtime, do nothing. */
159 } else if (delta > sendholdtime) {
160 flog_err(
161 EC_BGP_SENDQ_STUCK_PROPER,
162 "%pBP has not made any SendQ progress for 2 holdtimes (%jds), terminating session",
163 peer, sendholdtime);
164 BGP_EVENT_ADD(peer, TCP_fatal_error);
165 } else if (delta > (intmax_t)holdtime &&
166 monotime(NULL) - peer->last_sendq_warn > 5) {
167 flog_warn(
168 EC_BGP_SENDQ_STUCK_WARN,
169 "%pBP has not made any SendQ progress for 1 holdtime (%us), peer overloaded?",
170 peer, holdtime);
171 peer->last_sendq_warn = monotime(NULL);
172 }
173 }
174 }
175
176 static struct stream *bgp_update_packet_eor(struct peer *peer, afi_t afi,
177 safi_t safi)
178 {
179 struct stream *s;
180 iana_afi_t pkt_afi = IANA_AFI_IPV4;
181 iana_safi_t pkt_safi = IANA_SAFI_UNICAST;
182
183 if (DISABLE_BGP_ANNOUNCE)
184 return NULL;
185
186 if (bgp_debug_neighbor_events(peer))
187 zlog_debug("send End-of-RIB for %s to %s",
188 get_afi_safi_str(afi, safi, false), peer->host);
189
190 s = stream_new(peer->max_packet_size);
191
192 /* Make BGP update packet. */
193 bgp_packet_set_marker(s, BGP_MSG_UPDATE);
194
195 /* Unfeasible Routes Length */
196 stream_putw(s, 0);
197
198 if (afi == AFI_IP && safi == SAFI_UNICAST) {
199 /* Total Path Attribute Length */
200 stream_putw(s, 0);
201 } else {
202 /* Convert AFI, SAFI to values for packet. */
203 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
204
205 /* Total Path Attribute Length */
206 stream_putw(s, 6);
207 stream_putc(s, BGP_ATTR_FLAG_OPTIONAL);
208 stream_putc(s, BGP_ATTR_MP_UNREACH_NLRI);
209 stream_putc(s, 3);
210 stream_putw(s, pkt_afi);
211 stream_putc(s, pkt_safi);
212 }
213
214 bgp_packet_set_size(s);
215 return s;
216 }
217
218 /* Called when there is a change in the EOR(implicit or explicit) status of a
219 * peer. Ends the update-delay if all expected peers are done with EORs. */
220 void bgp_check_update_delay(struct bgp *bgp)
221 {
222 struct listnode *node, *nnode;
223 struct peer *peer = NULL;
224
225 if (bgp_debug_neighbor_events(peer))
226 zlog_debug("Checking update delay, T: %d R: %d I:%d E: %d",
227 bgp->established, bgp->restarted_peers,
228 bgp->implicit_eors, bgp->explicit_eors);
229
230 if (bgp->established
231 <= bgp->restarted_peers + bgp->implicit_eors + bgp->explicit_eors) {
232 /*
233 * This is an extra sanity check to make sure we wait for all
234 * the eligible configured peers. This check is performed if
235 * establish wait timer is on, or establish wait option is not
236 * given with the update-delay command
237 */
238 if (bgp->t_establish_wait
239 || (bgp->v_establish_wait == bgp->v_update_delay))
240 for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer)) {
241 if (CHECK_FLAG(peer->flags,
242 PEER_FLAG_CONFIG_NODE)
243 && !CHECK_FLAG(peer->flags,
244 PEER_FLAG_SHUTDOWN)
245 && !CHECK_FLAG(peer->bgp->flags,
246 BGP_FLAG_SHUTDOWN)
247 && !peer->update_delay_over) {
248 if (bgp_debug_neighbor_events(peer))
249 zlog_debug(
250 " Peer %s pending, continuing read-only mode",
251 peer->host);
252 return;
253 }
254 }
255
256 zlog_info(
257 "Update delay ended, restarted: %d, EORs implicit: %d, explicit: %d",
258 bgp->restarted_peers, bgp->implicit_eors,
259 bgp->explicit_eors);
260 bgp_update_delay_end(bgp);
261 }
262 }
263
264 /*
265 * Called if peer is known to have restarted. The restart-state bit in
266 * Graceful-Restart capability is used for that
267 */
268 void bgp_update_restarted_peers(struct peer *peer)
269 {
270 if (!bgp_update_delay_active(peer->bgp))
271 return; /* BGP update delay has ended */
272 if (peer->update_delay_over)
273 return; /* This peer has already been considered */
274
275 if (bgp_debug_neighbor_events(peer))
276 zlog_debug("Peer %s: Checking restarted", peer->host);
277
278 if (peer_established(peer)) {
279 peer->update_delay_over = 1;
280 peer->bgp->restarted_peers++;
281 bgp_check_update_delay(peer->bgp);
282 }
283 }
284
285 /*
286 * Called as peer receives a keep-alive. Determines if this occurence can be
287 * taken as an implicit EOR for this peer.
288 * NOTE: The very first keep-alive after the Established state of a peer is
289 * considered implicit EOR for the update-delay purposes
290 */
291 void bgp_update_implicit_eors(struct peer *peer)
292 {
293 if (!bgp_update_delay_active(peer->bgp))
294 return; /* BGP update delay has ended */
295 if (peer->update_delay_over)
296 return; /* This peer has already been considered */
297
298 if (bgp_debug_neighbor_events(peer))
299 zlog_debug("Peer %s: Checking implicit EORs", peer->host);
300
301 if (peer_established(peer)) {
302 peer->update_delay_over = 1;
303 peer->bgp->implicit_eors++;
304 bgp_check_update_delay(peer->bgp);
305 }
306 }
307
308 /*
309 * Should be called only when there is a change in the EOR_RECEIVED status
310 * for any afi/safi on a peer.
311 */
312 static void bgp_update_explicit_eors(struct peer *peer)
313 {
314 afi_t afi;
315 safi_t safi;
316
317 if (!bgp_update_delay_active(peer->bgp))
318 return; /* BGP update delay has ended */
319 if (peer->update_delay_over)
320 return; /* This peer has already been considered */
321
322 if (bgp_debug_neighbor_events(peer))
323 zlog_debug("Peer %s: Checking explicit EORs", peer->host);
324
325 FOREACH_AFI_SAFI (afi, safi) {
326 if (peer->afc_nego[afi][safi]
327 && !CHECK_FLAG(peer->af_sflags[afi][safi],
328 PEER_STATUS_EOR_RECEIVED)) {
329 if (bgp_debug_neighbor_events(peer))
330 zlog_debug(
331 " afi %d safi %d didn't receive EOR",
332 afi, safi);
333 return;
334 }
335 }
336
337 peer->update_delay_over = 1;
338 peer->bgp->explicit_eors++;
339 bgp_check_update_delay(peer->bgp);
340 }
341
342 /**
343 * Frontend for NLRI parsing, to fan-out to AFI/SAFI specific parsers.
344 *
345 * mp_withdraw, if set, is used to nullify attr structure on most of the
346 * calling safi function and for evpn, passed as parameter
347 */
348 int bgp_nlri_parse(struct peer *peer, struct attr *attr,
349 struct bgp_nlri *packet, int mp_withdraw)
350 {
351 switch (packet->safi) {
352 case SAFI_UNICAST:
353 case SAFI_MULTICAST:
354 return bgp_nlri_parse_ip(peer, mp_withdraw ? NULL : attr,
355 packet);
356 case SAFI_LABELED_UNICAST:
357 return bgp_nlri_parse_label(peer, mp_withdraw ? NULL : attr,
358 packet);
359 case SAFI_MPLS_VPN:
360 return bgp_nlri_parse_vpn(peer, mp_withdraw ? NULL : attr,
361 packet);
362 case SAFI_EVPN:
363 return bgp_nlri_parse_evpn(peer, attr, packet, mp_withdraw);
364 case SAFI_FLOWSPEC:
365 return bgp_nlri_parse_flowspec(peer, attr, packet, mp_withdraw);
366 }
367 return BGP_NLRI_PARSE_ERROR;
368 }
369
370
371 /*
372 * Check if route-refresh request from peer is pending (received before EoR),
373 * and process it now.
374 */
375 static void bgp_process_pending_refresh(struct peer *peer, afi_t afi,
376 safi_t safi)
377 {
378 if (CHECK_FLAG(peer->af_sflags[afi][safi],
379 PEER_STATUS_REFRESH_PENDING)) {
380 UNSET_FLAG(peer->af_sflags[afi][safi],
381 PEER_STATUS_REFRESH_PENDING);
382 bgp_route_refresh_send(peer, afi, safi, 0, 0, 0,
383 BGP_ROUTE_REFRESH_BORR);
384 if (bgp_debug_neighbor_events(peer))
385 zlog_debug(
386 "%pBP sending route-refresh (BoRR) for %s/%s (for pending REQUEST)",
387 peer, afi2str(afi), safi2str(safi));
388
389 SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_BORR_SEND);
390 UNSET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_EORR_SEND);
391 bgp_announce_route(peer, afi, safi, true);
392 }
393 }
394
395 /*
396 * Checks a variety of conditions to determine whether the peer needs to be
397 * rescheduled for packet generation again, and does so if necessary.
398 *
399 * @param peer to check for rescheduling
400 */
401 static void bgp_write_proceed_actions(struct peer *peer)
402 {
403 afi_t afi;
404 safi_t safi;
405 struct peer_af *paf;
406 struct bpacket *next_pkt;
407 struct update_subgroup *subgrp;
408 enum bgp_af_index index;
409
410 for (index = BGP_AF_START; index < BGP_AF_MAX; index++) {
411 paf = peer->peer_af_array[index];
412 if (!paf)
413 continue;
414
415 subgrp = paf->subgroup;
416 if (!subgrp)
417 continue;
418
419 next_pkt = paf->next_pkt_to_send;
420 if (next_pkt && next_pkt->buffer) {
421 BGP_TIMER_ON(peer->t_generate_updgrp_packets,
422 bgp_generate_updgrp_packets, 0);
423 return;
424 }
425
426 /* No packets readily available for AFI/SAFI, are there
427 * subgroup packets
428 * that need to be generated? */
429 if (bpacket_queue_is_full(SUBGRP_INST(subgrp),
430 SUBGRP_PKTQ(subgrp))
431 || subgroup_packets_to_build(subgrp)) {
432 BGP_TIMER_ON(peer->t_generate_updgrp_packets,
433 bgp_generate_updgrp_packets, 0);
434 return;
435 }
436
437 afi = paf->afi;
438 safi = paf->safi;
439
440 /* No packets to send, see if EOR is pending */
441 if (CHECK_FLAG(peer->cap, PEER_CAP_RESTART_RCV)) {
442 if (!subgrp->t_coalesce && peer->afc_nego[afi][safi]
443 && peer->synctime
444 && !CHECK_FLAG(peer->af_sflags[afi][safi],
445 PEER_STATUS_EOR_SEND)
446 && safi != SAFI_MPLS_VPN) {
447 BGP_TIMER_ON(peer->t_generate_updgrp_packets,
448 bgp_generate_updgrp_packets, 0);
449 return;
450 }
451 }
452 }
453 }
454
455 /*
456 * Generate advertisement information (withdraws, updates, EOR) from each
457 * update group a peer belongs to, encode this information into packets, and
458 * enqueue the packets onto the peer's output buffer.
459 */
460 void bgp_generate_updgrp_packets(struct thread *thread)
461 {
462 struct peer *peer = THREAD_ARG(thread);
463
464 struct stream *s;
465 struct peer_af *paf;
466 struct bpacket *next_pkt;
467 uint32_t wpq;
468 uint32_t generated = 0;
469 afi_t afi;
470 safi_t safi;
471
472 wpq = atomic_load_explicit(&peer->bgp->wpkt_quanta,
473 memory_order_relaxed);
474
475 /*
476 * The code beyond this part deals with update packets, proceed only
477 * if peer is Established and updates are not on hold (as part of
478 * update-delay processing).
479 */
480 if (!peer_established(peer))
481 return;
482
483 if ((peer->bgp->main_peers_update_hold)
484 || bgp_update_delay_active(peer->bgp))
485 return;
486
487 if (peer->t_routeadv)
488 return;
489
490 /*
491 * Since the following is a do while loop
492 * let's stop adding to the outq if we are
493 * already at the limit.
494 */
495 if (peer->obuf->count >= bm->outq_limit) {
496 bgp_write_proceed_actions(peer);
497 return;
498 }
499
500 do {
501 enum bgp_af_index index;
502
503 s = NULL;
504 for (index = BGP_AF_START; index < BGP_AF_MAX; index++) {
505 paf = peer->peer_af_array[index];
506 if (!paf || !PAF_SUBGRP(paf))
507 continue;
508
509 afi = paf->afi;
510 safi = paf->safi;
511 next_pkt = paf->next_pkt_to_send;
512
513 /*
514 * Try to generate a packet for the peer if we are at
515 * the end of the list. Always try to push out
516 * WITHDRAWs first.
517 */
518 if (!next_pkt || !next_pkt->buffer) {
519 next_pkt = subgroup_withdraw_packet(
520 PAF_SUBGRP(paf));
521 if (!next_pkt || !next_pkt->buffer)
522 subgroup_update_packet(PAF_SUBGRP(paf));
523 next_pkt = paf->next_pkt_to_send;
524 }
525
526 /*
527 * If we still don't have a packet to send to the peer,
528 * then try to find out out if we have to send eor or
529 * if not, skip to the next AFI, SAFI. Don't send the
530 * EOR prematurely; if the subgroup's coalesce timer is
531 * running, the adjacency-out structure is not created
532 * yet.
533 */
534 if (!next_pkt || !next_pkt->buffer) {
535 if (!paf->t_announce_route) {
536 /* Make sure we supress BGP UPDATES
537 * for normal processing later again.
538 */
539 UNSET_FLAG(paf->subgroup->sflags,
540 SUBGRP_STATUS_FORCE_UPDATES);
541
542 /* If route-refresh BoRR message was
543 * already sent and we are done with
544 * re-announcing tables for a decent
545 * afi/safi, we ready to send
546 * EoRR request.
547 */
548 if (CHECK_FLAG(
549 peer->af_sflags[afi][safi],
550 PEER_STATUS_BORR_SEND)) {
551 bgp_route_refresh_send(
552 peer, afi, safi, 0, 0,
553 0,
554 BGP_ROUTE_REFRESH_EORR);
555
556 SET_FLAG(peer->af_sflags[afi]
557 [safi],
558 PEER_STATUS_EORR_SEND);
559 UNSET_FLAG(
560 peer->af_sflags[afi]
561 [safi],
562 PEER_STATUS_BORR_SEND);
563
564 if (bgp_debug_neighbor_events(
565 peer))
566 zlog_debug(
567 "%pBP sending route-refresh (EoRR) for %s/%s",
568 peer,
569 afi2str(afi),
570 safi2str(safi));
571 }
572 }
573
574 if (CHECK_FLAG(peer->cap,
575 PEER_CAP_RESTART_RCV)) {
576 if (!(PAF_SUBGRP(paf))->t_coalesce
577 && peer->afc_nego[afi][safi]
578 && peer->synctime
579 && !CHECK_FLAG(
580 peer->af_sflags[afi][safi],
581 PEER_STATUS_EOR_SEND)) {
582 /* If EOR is disabled,
583 * the message is not sent
584 */
585 if (BGP_SEND_EOR(peer->bgp, afi,
586 safi)) {
587 SET_FLAG(
588 peer->af_sflags
589 [afi]
590 [safi],
591 PEER_STATUS_EOR_SEND);
592
593 /* Update EOR
594 * send time
595 */
596 peer->eor_stime[afi]
597 [safi] =
598 monotime(NULL);
599
600 BGP_UPDATE_EOR_PKT(
601 peer, afi, safi,
602 s);
603 bgp_process_pending_refresh(
604 peer, afi,
605 safi);
606 }
607 }
608 }
609 continue;
610 }
611
612 /* Update packet send time */
613 peer->pkt_stime[afi][safi] = monotime(NULL);
614
615 /* Found a packet template to send, overwrite
616 * packet with appropriate attributes from peer
617 * and advance peer */
618 s = bpacket_reformat_for_peer(next_pkt, paf);
619 bgp_packet_add(peer, s);
620 bpacket_queue_advance_peer(paf);
621 }
622 } while (s && (++generated < wpq) &&
623 (peer->obuf->count <= bm->outq_limit));
624
625 if (generated)
626 bgp_writes_on(peer);
627
628 bgp_write_proceed_actions(peer);
629 }
630
631 /*
632 * Creates a BGP Keepalive packet and appends it to the peer's output queue.
633 */
634 void bgp_keepalive_send(struct peer *peer)
635 {
636 struct stream *s;
637
638 s = stream_new(BGP_STANDARD_MESSAGE_MAX_PACKET_SIZE);
639
640 /* Make keepalive packet. */
641 bgp_packet_set_marker(s, BGP_MSG_KEEPALIVE);
642
643 /* Set packet size. */
644 bgp_packet_set_size(s);
645
646 /* Dump packet if debug option is set. */
647 /* bgp_packet_dump (s); */
648
649 if (bgp_debug_keepalive(peer))
650 zlog_debug("%s sending KEEPALIVE", peer->host);
651
652 /* Add packet to the peer. */
653 bgp_packet_add(peer, s);
654
655 bgp_writes_on(peer);
656 }
657
658 /*
659 * Creates a BGP Open packet and appends it to the peer's output queue.
660 * Sets capabilities as necessary.
661 */
662 void bgp_open_send(struct peer *peer)
663 {
664 struct stream *s;
665 uint16_t send_holdtime;
666 as_t local_as;
667
668 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
669 send_holdtime = peer->holdtime;
670 else
671 send_holdtime = peer->bgp->default_holdtime;
672
673 /* local-as Change */
674 if (peer->change_local_as)
675 local_as = peer->change_local_as;
676 else
677 local_as = peer->local_as;
678
679 s = stream_new(BGP_STANDARD_MESSAGE_MAX_PACKET_SIZE);
680
681 /* Make open packet. */
682 bgp_packet_set_marker(s, BGP_MSG_OPEN);
683
684 /* Set open packet values. */
685 stream_putc(s, BGP_VERSION_4); /* BGP version */
686 stream_putw(s, (local_as <= BGP_AS_MAX) ? (uint16_t)local_as
687 : BGP_AS_TRANS);
688 stream_putw(s, send_holdtime); /* Hold Time */
689 stream_put_in_addr(s, &peer->local_id); /* BGP Identifier */
690
691 /* Set capabilities */
692 if (CHECK_FLAG(peer->flags, PEER_FLAG_EXTENDED_OPT_PARAMS)) {
693 (void)bgp_open_capability(s, peer, true);
694 } else {
695 struct stream *tmp = stream_new(STREAM_SIZE(s));
696
697 stream_copy(tmp, s);
698 if (bgp_open_capability(tmp, peer, false)
699 > BGP_OPEN_NON_EXT_OPT_LEN) {
700 stream_free(tmp);
701 (void)bgp_open_capability(s, peer, true);
702 } else {
703 stream_copy(s, tmp);
704 stream_free(tmp);
705 }
706 }
707
708 /* Set BGP packet length. */
709 bgp_packet_set_size(s);
710
711 if (bgp_debug_neighbor_events(peer))
712 zlog_debug(
713 "%s sending OPEN, version %d, my as %u, holdtime %d, id %pI4",
714 peer->host, BGP_VERSION_4, local_as, send_holdtime,
715 &peer->local_id);
716
717 /* Dump packet if debug option is set. */
718 /* bgp_packet_dump (s); */
719 hook_call(bgp_packet_send, peer, BGP_MSG_OPEN, stream_get_endp(s), s);
720
721 /* Add packet to the peer. */
722 bgp_packet_add(peer, s);
723
724 bgp_writes_on(peer);
725 }
726
727 /*
728 * Writes NOTIFICATION message directly to a peer socket without waiting for
729 * the I/O thread.
730 *
731 * There must be exactly one stream on the peer->obuf FIFO, and the data within
732 * this stream must match the format of a BGP NOTIFICATION message.
733 * Transmission is best-effort.
734 *
735 * @requires peer->io_mtx
736 * @param peer
737 * @return 0
738 */
739 static void bgp_write_notify(struct peer *peer)
740 {
741 int ret, val;
742 uint8_t type;
743 struct stream *s;
744
745 /* There should be at least one packet. */
746 s = stream_fifo_pop(peer->obuf);
747
748 if (!s)
749 return;
750
751 assert(stream_get_endp(s) >= BGP_HEADER_SIZE);
752
753 /*
754 * socket is in nonblocking mode, if we can't deliver the NOTIFY, well,
755 * we only care about getting a clean shutdown at this point.
756 */
757 ret = write(peer->fd, STREAM_DATA(s), stream_get_endp(s));
758
759 /*
760 * only connection reset/close gets counted as TCP_fatal_error, failure
761 * to write the entire NOTIFY doesn't get different FSM treatment
762 */
763 if (ret <= 0) {
764 stream_free(s);
765 BGP_EVENT_ADD(peer, TCP_fatal_error);
766 return;
767 }
768
769 /* Disable Nagle, make NOTIFY packet go out right away */
770 val = 1;
771 (void)setsockopt(peer->fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val,
772 sizeof(val));
773
774 /* Retrieve BGP packet type. */
775 stream_set_getp(s, BGP_MARKER_SIZE + 2);
776 type = stream_getc(s);
777
778 assert(type == BGP_MSG_NOTIFY);
779
780 /* Type should be notify. */
781 atomic_fetch_add_explicit(&peer->notify_out, 1, memory_order_relaxed);
782
783 /* Double start timer. */
784 peer->v_start *= 2;
785
786 /* Overflow check. */
787 if (peer->v_start >= (60 * 2))
788 peer->v_start = (60 * 2);
789
790 /*
791 * Handle Graceful Restart case where the state changes to
792 * Connect instead of Idle
793 */
794 BGP_EVENT_ADD(peer, BGP_Stop);
795
796 stream_free(s);
797 }
798
799 /*
800 * Encapsulate an original BGP CEASE Notification into Hard Reset
801 */
802 static uint8_t *bgp_notify_encapsulate_hard_reset(uint8_t code, uint8_t subcode,
803 uint8_t *data, size_t datalen)
804 {
805 uint8_t *message = XCALLOC(MTYPE_BGP_NOTIFICATION, datalen + 2);
806
807 /* ErrCode */
808 message[0] = code;
809 /* Subcode */
810 message[1] = subcode;
811 /* Data */
812 if (datalen)
813 memcpy(message + 2, data, datalen);
814
815 return message;
816 }
817
818 /*
819 * Decapsulate an original BGP CEASE Notification from Hard Reset
820 */
821 struct bgp_notify bgp_notify_decapsulate_hard_reset(struct bgp_notify *notify)
822 {
823 struct bgp_notify bn = {};
824
825 bn.code = notify->raw_data[0];
826 bn.subcode = notify->raw_data[1];
827 bn.length = notify->length - 2;
828
829 bn.raw_data = XMALLOC(MTYPE_BGP_NOTIFICATION, bn.length);
830 memcpy(bn.raw_data, notify->raw_data + 2, bn.length);
831
832 return bn;
833 }
834
835 /* Check if Graceful-Restart N-bit is exchanged */
836 bool bgp_has_graceful_restart_notification(struct peer *peer)
837 {
838 return CHECK_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_N_BIT_RCV) &&
839 CHECK_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_N_BIT_ADV);
840 }
841
842 /*
843 * Check if to send BGP CEASE Notification/Hard Reset?
844 */
845 bool bgp_notify_send_hard_reset(struct peer *peer, uint8_t code,
846 uint8_t subcode)
847 {
848 /* When the "N" bit has been exchanged, a Hard Reset message is used to
849 * indicate to the peer that the session is to be fully terminated.
850 */
851 if (!bgp_has_graceful_restart_notification(peer))
852 return false;
853
854 /*
855 * https://datatracker.ietf.org/doc/html/rfc8538#section-5.1
856 */
857 if (code == BGP_NOTIFY_CEASE) {
858 switch (subcode) {
859 case BGP_NOTIFY_CEASE_MAX_PREFIX:
860 case BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN:
861 case BGP_NOTIFY_CEASE_PEER_UNCONFIG:
862 case BGP_NOTIFY_CEASE_HARD_RESET:
863 case BGP_NOTIFY_CEASE_BFD_DOWN:
864 return true;
865 case BGP_NOTIFY_CEASE_ADMIN_RESET:
866 /* Provide user control:
867 * `bgp hard-adminstrative-reset`
868 */
869 if (CHECK_FLAG(peer->bgp->flags,
870 BGP_FLAG_HARD_ADMIN_RESET))
871 return true;
872 else
873 return false;
874 default:
875 break;
876 }
877 }
878
879 return false;
880 }
881
882 /*
883 * Check if received BGP CEASE Notification/Hard Reset?
884 */
885 bool bgp_notify_received_hard_reset(struct peer *peer, uint8_t code,
886 uint8_t subcode)
887 {
888 /* When the "N" bit has been exchanged, a Hard Reset message is used to
889 * indicate to the peer that the session is to be fully terminated.
890 */
891 if (!bgp_has_graceful_restart_notification(peer))
892 return false;
893
894 if (code == BGP_NOTIFY_CEASE && subcode == BGP_NOTIFY_CEASE_HARD_RESET)
895 return true;
896
897 return false;
898 }
899
900 /*
901 * Creates a BGP Notify and appends it to the peer's output queue.
902 *
903 * This function attempts to write the packet from the thread it is called
904 * from, to ensure the packet gets out ASAP.
905 *
906 * This function may be called from multiple threads. Since the function
907 * modifies I/O buffer(s) in the peer, these are locked for the duration of the
908 * call to prevent tampering from other threads.
909 *
910 * Delivery of the NOTIFICATION is attempted once and is best-effort. After
911 * return, the peer structure *must* be reset; no assumptions about session
912 * state are valid.
913 *
914 * @param peer
915 * @param code BGP error code
916 * @param sub_code BGP error subcode
917 * @param data Data portion
918 * @param datalen length of data portion
919 */
920 static void bgp_notify_send_internal(struct peer *peer, uint8_t code,
921 uint8_t sub_code, uint8_t *data,
922 size_t datalen, bool use_curr)
923 {
924 struct stream *s;
925 bool hard_reset = bgp_notify_send_hard_reset(peer, code, sub_code);
926
927 /* Lock I/O mutex to prevent other threads from pushing packets */
928 frr_mutex_lock_autounlock(&peer->io_mtx);
929 /* ============================================== */
930
931 /* Allocate new stream. */
932 s = stream_new(peer->max_packet_size);
933
934 /* Make notify packet. */
935 bgp_packet_set_marker(s, BGP_MSG_NOTIFY);
936
937 /* Check if we should send Hard Reset Notification or not */
938 if (hard_reset) {
939 uint8_t *hard_reset_message = bgp_notify_encapsulate_hard_reset(
940 code, sub_code, data, datalen);
941
942 /* Hard Reset encapsulates another NOTIFICATION message
943 * in its data portion.
944 */
945 stream_putc(s, BGP_NOTIFY_CEASE);
946 stream_putc(s, BGP_NOTIFY_CEASE_HARD_RESET);
947 stream_write(s, hard_reset_message, datalen + 2);
948
949 XFREE(MTYPE_BGP_NOTIFICATION, hard_reset_message);
950 } else {
951 stream_putc(s, code);
952 stream_putc(s, sub_code);
953 if (data)
954 stream_write(s, data, datalen);
955 }
956
957 /* Set BGP packet length. */
958 bgp_packet_set_size(s);
959
960 /* wipe output buffer */
961 stream_fifo_clean(peer->obuf);
962
963 /*
964 * If possible, store last packet for debugging purposes. This check is
965 * in place because we are sometimes called with a doppelganger peer,
966 * who tends to have a plethora of fields nulled out.
967 *
968 * Some callers should not attempt this - the io pthread for example
969 * should not touch internals of the peer struct.
970 */
971 if (use_curr && peer->curr) {
972 size_t packetsize = stream_get_endp(peer->curr);
973 assert(packetsize <= peer->max_packet_size);
974 memcpy(peer->last_reset_cause, peer->curr->data, packetsize);
975 peer->last_reset_cause_size = packetsize;
976 }
977
978 /* For debug */
979 {
980 struct bgp_notify bgp_notify;
981 int first = 0;
982 int i;
983 char c[4];
984
985 bgp_notify.code = code;
986 bgp_notify.subcode = sub_code;
987 bgp_notify.data = NULL;
988 bgp_notify.length = datalen;
989 bgp_notify.raw_data = data;
990
991 peer->notify.code = bgp_notify.code;
992 peer->notify.subcode = bgp_notify.subcode;
993 peer->notify.length = bgp_notify.length;
994
995 if (bgp_notify.length && data) {
996 bgp_notify.data = XMALLOC(MTYPE_BGP_NOTIFICATION,
997 bgp_notify.length * 3);
998 for (i = 0; i < bgp_notify.length; i++)
999 if (first) {
1000 snprintf(c, sizeof(c), " %02x",
1001 data[i]);
1002
1003 strlcat(bgp_notify.data, c,
1004 bgp_notify.length);
1005
1006 } else {
1007 first = 1;
1008 snprintf(c, sizeof(c), "%02x", data[i]);
1009
1010 strlcpy(bgp_notify.data, c,
1011 bgp_notify.length);
1012 }
1013 }
1014 bgp_notify_print(peer, &bgp_notify, "sending", hard_reset);
1015
1016 if (bgp_notify.data) {
1017 if (data) {
1018 XFREE(MTYPE_BGP_NOTIFICATION,
1019 peer->notify.data);
1020 peer->notify.data = XCALLOC(
1021 MTYPE_BGP_NOTIFICATION, datalen);
1022 memcpy(peer->notify.data, data, datalen);
1023 }
1024
1025 XFREE(MTYPE_BGP_NOTIFICATION, bgp_notify.data);
1026 bgp_notify.length = 0;
1027 }
1028 }
1029
1030 /* peer reset cause */
1031 if (code == BGP_NOTIFY_CEASE) {
1032 if (sub_code == BGP_NOTIFY_CEASE_ADMIN_RESET)
1033 peer->last_reset = PEER_DOWN_USER_RESET;
1034 else if (sub_code == BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN) {
1035 if (CHECK_FLAG(peer->sflags, PEER_STATUS_RTT_SHUTDOWN))
1036 peer->last_reset = PEER_DOWN_RTT_SHUTDOWN;
1037 else
1038 peer->last_reset = PEER_DOWN_USER_SHUTDOWN;
1039 } else
1040 peer->last_reset = PEER_DOWN_NOTIFY_SEND;
1041 } else
1042 peer->last_reset = PEER_DOWN_NOTIFY_SEND;
1043
1044 /* Add packet to peer's output queue */
1045 stream_fifo_push(peer->obuf, s);
1046
1047 bgp_peer_gr_flags_update(peer);
1048 BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(peer->bgp,
1049 peer->bgp->peer);
1050
1051 bgp_write_notify(peer);
1052 }
1053
1054 /*
1055 * Creates a BGP Notify and appends it to the peer's output queue.
1056 *
1057 * This function attempts to write the packet from the thread it is called
1058 * from, to ensure the packet gets out ASAP.
1059 *
1060 * @param peer
1061 * @param code BGP error code
1062 * @param sub_code BGP error subcode
1063 */
1064 void bgp_notify_send(struct peer *peer, uint8_t code, uint8_t sub_code)
1065 {
1066 bgp_notify_send_internal(peer, code, sub_code, NULL, 0, true);
1067 }
1068
1069 /*
1070 * Enqueue notification; called from the main pthread, peer object access is ok.
1071 */
1072 void bgp_notify_send_with_data(struct peer *peer, uint8_t code,
1073 uint8_t sub_code, uint8_t *data, size_t datalen)
1074 {
1075 bgp_notify_send_internal(peer, code, sub_code, data, datalen, true);
1076 }
1077
1078 /*
1079 * For use by the io pthread, queueing a notification but avoiding access to
1080 * the peer object.
1081 */
1082 void bgp_notify_io_invalid(struct peer *peer, uint8_t code, uint8_t sub_code,
1083 uint8_t *data, size_t datalen)
1084 {
1085 /* Avoid touching the peer object */
1086 bgp_notify_send_internal(peer, code, sub_code, data, datalen, false);
1087 }
1088
1089 /*
1090 * Creates BGP Route Refresh packet and appends it to the peer's output queue.
1091 *
1092 * @param peer
1093 * @param afi Address Family Identifier
1094 * @param safi Subsequent Address Family Identifier
1095 * @param orf_type Outbound Route Filtering type
1096 * @param when_to_refresh Whether to refresh immediately or defer
1097 * @param remove Whether to remove ORF for specified AFI/SAFI
1098 */
1099 void bgp_route_refresh_send(struct peer *peer, afi_t afi, safi_t safi,
1100 uint8_t orf_type, uint8_t when_to_refresh,
1101 int remove, uint8_t subtype)
1102 {
1103 struct stream *s;
1104 struct bgp_filter *filter;
1105 int orf_refresh = 0;
1106 iana_afi_t pkt_afi = IANA_AFI_IPV4;
1107 iana_safi_t pkt_safi = IANA_SAFI_UNICAST;
1108
1109 if (DISABLE_BGP_ANNOUNCE)
1110 return;
1111
1112 filter = &peer->filter[afi][safi];
1113
1114 /* Convert AFI, SAFI to values for packet. */
1115 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
1116
1117 s = stream_new(peer->max_packet_size);
1118
1119 /* Make BGP update packet. */
1120 if (CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_NEW_RCV))
1121 bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_NEW);
1122 else
1123 bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_OLD);
1124
1125 /* Encode Route Refresh message. */
1126 stream_putw(s, pkt_afi);
1127 if (subtype)
1128 stream_putc(s, subtype);
1129 else
1130 stream_putc(s, 0);
1131 stream_putc(s, pkt_safi);
1132
1133 if (orf_type == ORF_TYPE_PREFIX || orf_type == ORF_TYPE_PREFIX_OLD)
1134 if (remove || filter->plist[FILTER_IN].plist) {
1135 uint16_t orf_len;
1136 unsigned long orfp;
1137
1138 orf_refresh = 1;
1139 stream_putc(s, when_to_refresh);
1140 stream_putc(s, orf_type);
1141 orfp = stream_get_endp(s);
1142 stream_putw(s, 0);
1143
1144 if (remove) {
1145 UNSET_FLAG(peer->af_sflags[afi][safi],
1146 PEER_STATUS_ORF_PREFIX_SEND);
1147 stream_putc(s, ORF_COMMON_PART_REMOVE_ALL);
1148 if (bgp_debug_neighbor_events(peer))
1149 zlog_debug(
1150 "%pBP sending REFRESH_REQ to remove ORF(%d) (%s) for afi/safi: %s/%s",
1151 peer, orf_type,
1152 (when_to_refresh ==
1153 REFRESH_DEFER
1154 ? "defer"
1155 : "immediate"),
1156 iana_afi2str(pkt_afi),
1157 iana_safi2str(pkt_safi));
1158 } else {
1159 SET_FLAG(peer->af_sflags[afi][safi],
1160 PEER_STATUS_ORF_PREFIX_SEND);
1161 prefix_bgp_orf_entry(
1162 s, filter->plist[FILTER_IN].plist,
1163 ORF_COMMON_PART_ADD,
1164 ORF_COMMON_PART_PERMIT,
1165 ORF_COMMON_PART_DENY);
1166 if (bgp_debug_neighbor_events(peer))
1167 zlog_debug(
1168 "%pBP sending REFRESH_REQ with pfxlist ORF(%d) (%s) for afi/safi: %s/%s",
1169 peer, orf_type,
1170 (when_to_refresh ==
1171 REFRESH_DEFER
1172 ? "defer"
1173 : "immediate"),
1174 iana_afi2str(pkt_afi),
1175 iana_safi2str(pkt_safi));
1176 }
1177
1178 /* Total ORF Entry Len. */
1179 orf_len = stream_get_endp(s) - orfp - 2;
1180 stream_putw_at(s, orfp, orf_len);
1181 }
1182
1183 /* Set packet size. */
1184 bgp_packet_set_size(s);
1185
1186 if (bgp_debug_neighbor_events(peer)) {
1187 if (!orf_refresh)
1188 zlog_debug(
1189 "%pBP sending REFRESH_REQ for afi/safi: %s/%s",
1190 peer, iana_afi2str(pkt_afi),
1191 iana_safi2str(pkt_safi));
1192 }
1193
1194 /* Add packet to the peer. */
1195 bgp_packet_add(peer, s);
1196
1197 bgp_writes_on(peer);
1198 }
1199
1200 /*
1201 * Create a BGP Capability packet and append it to the peer's output queue.
1202 *
1203 * @param peer
1204 * @param afi Address Family Identifier
1205 * @param safi Subsequent Address Family Identifier
1206 * @param capability_code BGP Capability Code
1207 * @param action Set or Remove capability
1208 */
1209 void bgp_capability_send(struct peer *peer, afi_t afi, safi_t safi,
1210 int capability_code, int action)
1211 {
1212 struct stream *s;
1213 iana_afi_t pkt_afi = IANA_AFI_IPV4;
1214 iana_safi_t pkt_safi = IANA_SAFI_UNICAST;
1215
1216 /* Convert AFI, SAFI to values for packet. */
1217 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
1218
1219 s = stream_new(peer->max_packet_size);
1220
1221 /* Make BGP update packet. */
1222 bgp_packet_set_marker(s, BGP_MSG_CAPABILITY);
1223
1224 /* Encode MP_EXT capability. */
1225 if (capability_code == CAPABILITY_CODE_MP) {
1226 stream_putc(s, action);
1227 stream_putc(s, CAPABILITY_CODE_MP);
1228 stream_putc(s, CAPABILITY_CODE_MP_LEN);
1229 stream_putw(s, pkt_afi);
1230 stream_putc(s, 0);
1231 stream_putc(s, pkt_safi);
1232
1233 if (bgp_debug_neighbor_events(peer))
1234 zlog_debug(
1235 "%pBP sending CAPABILITY has %s MP_EXT CAP for afi/safi: %s/%s",
1236 peer,
1237 action == CAPABILITY_ACTION_SET ? "Advertising"
1238 : "Removing",
1239 iana_afi2str(pkt_afi), iana_safi2str(pkt_safi));
1240 }
1241
1242 /* Set packet size. */
1243 bgp_packet_set_size(s);
1244
1245 /* Add packet to the peer. */
1246 bgp_packet_add(peer, s);
1247
1248 bgp_writes_on(peer);
1249 }
1250
1251 /* RFC1771 6.8 Connection collision detection. */
1252 static int bgp_collision_detect(struct peer *new, struct in_addr remote_id)
1253 {
1254 struct peer *peer;
1255
1256 /*
1257 * Upon receipt of an OPEN message, the local system must examine
1258 * all of its connections that are in the OpenConfirm state. A BGP
1259 * speaker may also examine connections in an OpenSent state if it
1260 * knows the BGP Identifier of the peer by means outside of the
1261 * protocol. If among these connections there is a connection to a
1262 * remote BGP speaker whose BGP Identifier equals the one in the
1263 * OPEN message, then the local system performs the following
1264 * collision resolution procedure:
1265 */
1266 peer = new->doppelganger;
1267 if (peer == NULL)
1268 return 0;
1269
1270 /*
1271 * Do not accept the new connection in Established or Clearing
1272 * states. Note that a peer GR is handled by closing the existing
1273 * connection upon receipt of new one.
1274 */
1275 if (peer_established(peer) || peer->status == Clearing) {
1276 bgp_notify_send(new, BGP_NOTIFY_CEASE,
1277 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1278 return -1;
1279 }
1280
1281 if ((peer->status != OpenConfirm) && (peer->status != OpenSent))
1282 return 0;
1283
1284 /*
1285 * 1. The BGP Identifier of the local system is
1286 * compared to the BGP Identifier of the remote
1287 * system (as specified in the OPEN message).
1288 *
1289 * If the BGP Identifiers of the peers
1290 * involved in the connection collision
1291 * are identical, then the connection
1292 * initiated by the BGP speaker with the
1293 * larger AS number is preserved.
1294 */
1295 if (ntohl(peer->local_id.s_addr) < ntohl(remote_id.s_addr)
1296 || (ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr)
1297 && peer->local_as < peer->as))
1298 if (!CHECK_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER)) {
1299 /*
1300 * 2. If the value of the local BGP
1301 * Identifier is less than the remote one,
1302 * the local system closes BGP connection
1303 * that already exists (the one that is
1304 * already in the OpenConfirm state),
1305 * and accepts BGP connection initiated by
1306 * the remote system.
1307 */
1308 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
1309 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1310 return 1;
1311 } else {
1312 bgp_notify_send(new, BGP_NOTIFY_CEASE,
1313 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1314 return -1;
1315 }
1316 else {
1317 if (ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr)
1318 && peer->local_as == peer->as)
1319 flog_err(EC_BGP_ROUTER_ID_SAME,
1320 "Peer's router-id %pI4 is the same as ours",
1321 &remote_id);
1322
1323 /*
1324 * 3. Otherwise, the local system closes newly
1325 * created BGP connection (the one associated with the
1326 * newly received OPEN message), and continues to use
1327 * the existing one (the one that is already in the
1328 * OpenConfirm state).
1329 */
1330 if (CHECK_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER)) {
1331 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
1332 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1333 return 1;
1334 } else {
1335 bgp_notify_send(new, BGP_NOTIFY_CEASE,
1336 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1337 return -1;
1338 }
1339 }
1340 }
1341
1342 /* Packet processing routines ---------------------------------------------- */
1343 /*
1344 * This is a family of functions designed to be called from
1345 * bgp_process_packet(). These functions all share similar behavior and should
1346 * adhere to the following invariants and restrictions:
1347 *
1348 * Return codes
1349 * ------------
1350 * The return code of any one of those functions should be one of the FSM event
1351 * codes specified in bgpd.h. If a NOTIFY was sent, this event code MUST be
1352 * BGP_Stop. Otherwise, the code SHOULD correspond to the function's expected
1353 * packet type. For example, bgp_open_receive() should return BGP_Stop upon
1354 * error and Receive_OPEN_message otherwise.
1355 *
1356 * If no action is necessary, the correct return code is BGP_PACKET_NOOP as
1357 * defined below.
1358 *
1359 * Side effects
1360 * ------------
1361 * - May send NOTIFY messages
1362 * - May not modify peer->status
1363 * - May not call bgp_event_update()
1364 */
1365
1366 #define BGP_PACKET_NOOP 0
1367
1368 /**
1369 * Process BGP OPEN message for peer.
1370 *
1371 * If any errors are encountered in the OPEN message, immediately sends NOTIFY
1372 * and returns BGP_Stop.
1373 *
1374 * @param peer
1375 * @param size size of the packet
1376 * @return as in summary
1377 */
1378 static int bgp_open_receive(struct peer *peer, bgp_size_t size)
1379 {
1380 int ret;
1381 uint8_t version;
1382 uint16_t optlen;
1383 uint16_t holdtime;
1384 uint16_t send_holdtime;
1385 as_t remote_as;
1386 as_t as4 = 0, as4_be;
1387 struct in_addr remote_id;
1388 int mp_capability;
1389 uint8_t notify_data_remote_as[2];
1390 uint8_t notify_data_remote_as4[4];
1391 uint8_t notify_data_remote_id[4];
1392 uint16_t *holdtime_ptr;
1393
1394 /* Parse open packet. */
1395 version = stream_getc(peer->curr);
1396 memcpy(notify_data_remote_as, stream_pnt(peer->curr), 2);
1397 remote_as = stream_getw(peer->curr);
1398 holdtime_ptr = (uint16_t *)stream_pnt(peer->curr);
1399 holdtime = stream_getw(peer->curr);
1400 memcpy(notify_data_remote_id, stream_pnt(peer->curr), 4);
1401 remote_id.s_addr = stream_get_ipv4(peer->curr);
1402
1403 /* BEGIN to read the capability here, but dont do it yet */
1404 mp_capability = 0;
1405 optlen = stream_getc(peer->curr);
1406
1407 /* Extended Optional Parameters Length for BGP OPEN Message */
1408 if (optlen == BGP_OPEN_NON_EXT_OPT_LEN
1409 || CHECK_FLAG(peer->flags, PEER_FLAG_EXTENDED_OPT_PARAMS)) {
1410 uint8_t opttype;
1411
1412 if (STREAM_READABLE(peer->curr) < 1) {
1413 flog_err(
1414 EC_BGP_PKT_OPEN,
1415 "%s: stream does not have enough bytes for extended optional parameters",
1416 peer->host);
1417 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1418 BGP_NOTIFY_OPEN_MALFORMED_ATTR);
1419 return BGP_Stop;
1420 }
1421
1422 opttype = stream_getc(peer->curr);
1423 if (opttype == BGP_OPEN_NON_EXT_OPT_TYPE_EXTENDED_LENGTH) {
1424 if (STREAM_READABLE(peer->curr) < 2) {
1425 flog_err(
1426 EC_BGP_PKT_OPEN,
1427 "%s: stream does not have enough bytes to read the extended optional parameters optlen",
1428 peer->host);
1429 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1430 BGP_NOTIFY_OPEN_MALFORMED_ATTR);
1431 return BGP_Stop;
1432 }
1433 optlen = stream_getw(peer->curr);
1434 SET_FLAG(peer->sflags,
1435 PEER_STATUS_EXT_OPT_PARAMS_LENGTH);
1436 }
1437 }
1438
1439 /* Receive OPEN message log */
1440 if (bgp_debug_neighbor_events(peer))
1441 zlog_debug(
1442 "%s rcv OPEN%s, version %d, remote-as (in open) %u, holdtime %d, id %pI4",
1443 peer->host,
1444 CHECK_FLAG(peer->sflags,
1445 PEER_STATUS_EXT_OPT_PARAMS_LENGTH)
1446 ? " (Extended)"
1447 : "",
1448 version, remote_as, holdtime, &remote_id);
1449
1450 if (optlen != 0) {
1451 /* If not enough bytes, it is an error. */
1452 if (STREAM_READABLE(peer->curr) < optlen) {
1453 flog_err(EC_BGP_PKT_OPEN,
1454 "%s: stream has not enough bytes (%u)",
1455 peer->host, optlen);
1456 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1457 BGP_NOTIFY_OPEN_MALFORMED_ATTR);
1458 return BGP_Stop;
1459 }
1460
1461 /* We need the as4 capability value *right now* because
1462 * if it is there, we have not got the remote_as yet, and
1463 * without
1464 * that we do not know which peer is connecting to us now.
1465 */
1466 as4 = peek_for_as4_capability(peer, optlen);
1467 }
1468
1469 as4_be = htonl(as4);
1470 memcpy(notify_data_remote_as4, &as4_be, 4);
1471
1472 /* Just in case we have a silly peer who sends AS4 capability set to 0
1473 */
1474 if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV) && !as4) {
1475 flog_err(EC_BGP_PKT_OPEN,
1476 "%s bad OPEN, got AS4 capability, but AS4 set to 0",
1477 peer->host);
1478 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1479 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1480 notify_data_remote_as4, 4);
1481 return BGP_Stop;
1482 }
1483
1484 /* Codification of AS 0 Processing */
1485 if (remote_as == BGP_AS_ZERO) {
1486 flog_err(EC_BGP_PKT_OPEN, "%s bad OPEN, got AS set to 0",
1487 peer->host);
1488 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1489 BGP_NOTIFY_OPEN_BAD_PEER_AS);
1490 return BGP_Stop;
1491 }
1492
1493 if (remote_as == BGP_AS_TRANS) {
1494 /* Take the AS4 from the capability. We must have received the
1495 * capability now! Otherwise we have a asn16 peer who uses
1496 * BGP_AS_TRANS, for some unknown reason.
1497 */
1498 if (as4 == BGP_AS_TRANS) {
1499 flog_err(
1500 EC_BGP_PKT_OPEN,
1501 "%s [AS4] NEW speaker using AS_TRANS for AS4, not allowed",
1502 peer->host);
1503 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1504 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1505 notify_data_remote_as4, 4);
1506 return BGP_Stop;
1507 }
1508
1509 if (!as4 && BGP_DEBUG(as4, AS4))
1510 zlog_debug(
1511 "%s [AS4] OPEN remote_as is AS_TRANS, but no AS4. Odd, but proceeding.",
1512 peer->host);
1513 else if (as4 < BGP_AS_MAX && BGP_DEBUG(as4, AS4))
1514 zlog_debug(
1515 "%s [AS4] OPEN remote_as is AS_TRANS, but AS4 (%u) fits in 2-bytes, very odd peer.",
1516 peer->host, as4);
1517 if (as4)
1518 remote_as = as4;
1519 } else {
1520 /* We may have a partner with AS4 who has an asno < BGP_AS_MAX
1521 */
1522 /* If we have got the capability, peer->as4cap must match
1523 * remote_as */
1524 if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV)
1525 && as4 != remote_as) {
1526 /* raise error, log this, close session */
1527 flog_err(
1528 EC_BGP_PKT_OPEN,
1529 "%s bad OPEN, got AS4 capability, but remote_as %u mismatch with 16bit 'myasn' %u in open",
1530 peer->host, as4, remote_as);
1531 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1532 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1533 notify_data_remote_as4, 4);
1534 return BGP_Stop;
1535 }
1536 }
1537
1538 /* rfc6286:
1539 * If the BGP Identifier field of the OPEN message
1540 * is zero, or if it is the same as the BGP Identifier
1541 * of the local BGP speaker and the message is from an
1542 * internal peer, then the Error Subcode is set to
1543 * "Bad BGP Identifier".
1544 */
1545 if (remote_id.s_addr == INADDR_ANY
1546 || (peer->sort == BGP_PEER_IBGP
1547 && ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr))) {
1548 if (bgp_debug_neighbor_events(peer))
1549 zlog_debug("%s bad OPEN, wrong router identifier %pI4",
1550 peer->host, &remote_id);
1551 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1552 BGP_NOTIFY_OPEN_BAD_BGP_IDENT,
1553 notify_data_remote_id, 4);
1554 return BGP_Stop;
1555 }
1556
1557 /* Peer BGP version check. */
1558 if (version != BGP_VERSION_4) {
1559 uint16_t maxver = htons(BGP_VERSION_4);
1560 /* XXX this reply may not be correct if version < 4 XXX */
1561 if (bgp_debug_neighbor_events(peer))
1562 zlog_debug(
1563 "%s bad protocol version, remote requested %d, local request %d",
1564 peer->host, version, BGP_VERSION_4);
1565 /* Data must be in network byte order here */
1566 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1567 BGP_NOTIFY_OPEN_UNSUP_VERSION,
1568 (uint8_t *)&maxver, 2);
1569 return BGP_Stop;
1570 }
1571
1572 /* Check neighbor as number. */
1573 if (peer->as_type == AS_UNSPECIFIED) {
1574 if (bgp_debug_neighbor_events(peer))
1575 zlog_debug(
1576 "%s bad OPEN, remote AS is unspecified currently",
1577 peer->host);
1578 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1579 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1580 notify_data_remote_as, 2);
1581 return BGP_Stop;
1582 } else if (peer->as_type == AS_INTERNAL) {
1583 if (remote_as != peer->bgp->as) {
1584 if (bgp_debug_neighbor_events(peer))
1585 zlog_debug(
1586 "%s bad OPEN, remote AS is %u, internal specified",
1587 peer->host, remote_as);
1588 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1589 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1590 notify_data_remote_as, 2);
1591 return BGP_Stop;
1592 }
1593 peer->as = peer->local_as;
1594 } else if (peer->as_type == AS_EXTERNAL) {
1595 if (remote_as == peer->bgp->as) {
1596 if (bgp_debug_neighbor_events(peer))
1597 zlog_debug(
1598 "%s bad OPEN, remote AS is %u, external specified",
1599 peer->host, remote_as);
1600 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1601 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1602 notify_data_remote_as, 2);
1603 return BGP_Stop;
1604 }
1605 peer->as = remote_as;
1606 } else if ((peer->as_type == AS_SPECIFIED) && (remote_as != peer->as)) {
1607 if (bgp_debug_neighbor_events(peer))
1608 zlog_debug("%s bad OPEN, remote AS is %u, expected %u",
1609 peer->host, remote_as, peer->as);
1610 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1611 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1612 notify_data_remote_as, 2);
1613 return BGP_Stop;
1614 }
1615
1616 /*
1617 * When collision is detected and this peer is closed.
1618 * Return immediately.
1619 */
1620 ret = bgp_collision_detect(peer, remote_id);
1621 if (ret < 0)
1622 return BGP_Stop;
1623
1624 /* Get sockname. */
1625 if (bgp_getsockname(peer) < 0) {
1626 flog_err_sys(EC_LIB_SOCKET,
1627 "%s: bgp_getsockname() failed for peer: %s",
1628 __func__, peer->host);
1629 return BGP_Stop;
1630 }
1631
1632 /* Set remote router-id */
1633 peer->remote_id = remote_id;
1634
1635 /* From the rfc: Upon receipt of an OPEN message, a BGP speaker MUST
1636 calculate the value of the Hold Timer by using the smaller of its
1637 configured Hold Time and the Hold Time received in the OPEN message.
1638 The Hold Time MUST be either zero or at least three seconds. An
1639 implementation may reject connections on the basis of the Hold Time.
1640 */
1641
1642 if (holdtime < 3 && holdtime != 0) {
1643 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1644 BGP_NOTIFY_OPEN_UNACEP_HOLDTIME,
1645 (uint8_t *)holdtime_ptr, 2);
1646 return BGP_Stop;
1647 }
1648
1649 /* Send notification message when Hold Time received in the OPEN message
1650 * is smaller than configured minimum Hold Time. */
1651 if (holdtime < peer->bgp->default_min_holdtime
1652 && peer->bgp->default_min_holdtime != 0) {
1653 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1654 BGP_NOTIFY_OPEN_UNACEP_HOLDTIME,
1655 (uint8_t *)holdtime_ptr, 2);
1656 return BGP_Stop;
1657 }
1658
1659 /* From the rfc: A reasonable maximum time between KEEPALIVE messages
1660 would be one third of the Hold Time interval. KEEPALIVE messages
1661 MUST NOT be sent more frequently than one per second. An
1662 implementation MAY adjust the rate at which it sends KEEPALIVE
1663 messages as a function of the Hold Time interval. */
1664
1665 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
1666 send_holdtime = peer->holdtime;
1667 else
1668 send_holdtime = peer->bgp->default_holdtime;
1669
1670 if (holdtime < send_holdtime)
1671 peer->v_holdtime = holdtime;
1672 else
1673 peer->v_holdtime = send_holdtime;
1674
1675 /* Set effective keepalive to 1/3 the effective holdtime.
1676 * Use configured keeplive when < effective keepalive.
1677 */
1678 peer->v_keepalive = peer->v_holdtime / 3;
1679 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER)) {
1680 if (peer->keepalive && peer->keepalive < peer->v_keepalive)
1681 peer->v_keepalive = peer->keepalive;
1682 } else {
1683 if (peer->bgp->default_keepalive
1684 && peer->bgp->default_keepalive < peer->v_keepalive)
1685 peer->v_keepalive = peer->bgp->default_keepalive;
1686 }
1687
1688 /* Open option part parse. */
1689 if (optlen != 0) {
1690 if (bgp_open_option_parse(peer, optlen, &mp_capability) < 0)
1691 return BGP_Stop;
1692 } else {
1693 if (bgp_debug_neighbor_events(peer))
1694 zlog_debug("%s rcvd OPEN w/ OPTION parameter len: 0",
1695 peer->host);
1696 }
1697
1698 /*
1699 * Assume that the peer supports the locally configured set of
1700 * AFI/SAFIs if the peer did not send us any Mulitiprotocol
1701 * capabilities, or if 'override-capability' is configured.
1702 */
1703 if (!mp_capability
1704 || CHECK_FLAG(peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) {
1705 peer->afc_nego[AFI_IP][SAFI_UNICAST] =
1706 peer->afc[AFI_IP][SAFI_UNICAST];
1707 peer->afc_nego[AFI_IP][SAFI_MULTICAST] =
1708 peer->afc[AFI_IP][SAFI_MULTICAST];
1709 peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST] =
1710 peer->afc[AFI_IP][SAFI_LABELED_UNICAST];
1711 peer->afc_nego[AFI_IP][SAFI_FLOWSPEC] =
1712 peer->afc[AFI_IP][SAFI_FLOWSPEC];
1713 peer->afc_nego[AFI_IP6][SAFI_UNICAST] =
1714 peer->afc[AFI_IP6][SAFI_UNICAST];
1715 peer->afc_nego[AFI_IP6][SAFI_MULTICAST] =
1716 peer->afc[AFI_IP6][SAFI_MULTICAST];
1717 peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST] =
1718 peer->afc[AFI_IP6][SAFI_LABELED_UNICAST];
1719 peer->afc_nego[AFI_L2VPN][SAFI_EVPN] =
1720 peer->afc[AFI_L2VPN][SAFI_EVPN];
1721 peer->afc_nego[AFI_IP6][SAFI_FLOWSPEC] =
1722 peer->afc[AFI_IP6][SAFI_FLOWSPEC];
1723 }
1724
1725 /* Verify valid local address present based on negotiated
1726 * address-families. */
1727 if (peer->afc_nego[AFI_IP][SAFI_UNICAST]
1728 || peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST]
1729 || peer->afc_nego[AFI_IP][SAFI_MULTICAST]
1730 || peer->afc_nego[AFI_IP][SAFI_MPLS_VPN]
1731 || peer->afc_nego[AFI_IP][SAFI_ENCAP]) {
1732 if (peer->nexthop.v4.s_addr == INADDR_ANY) {
1733 #if defined(HAVE_CUMULUS)
1734 zlog_warn("%s: No local IPv4 addr, BGP routing may not work",
1735 peer->host);
1736 #endif
1737 }
1738 }
1739 if (peer->afc_nego[AFI_IP6][SAFI_UNICAST]
1740 || peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST]
1741 || peer->afc_nego[AFI_IP6][SAFI_MULTICAST]
1742 || peer->afc_nego[AFI_IP6][SAFI_MPLS_VPN]
1743 || peer->afc_nego[AFI_IP6][SAFI_ENCAP]) {
1744 if (IN6_IS_ADDR_UNSPECIFIED(&peer->nexthop.v6_global)) {
1745 #if defined(HAVE_CUMULUS)
1746 zlog_warn("%s: No local IPv6 address, BGP routing may not work",
1747 peer->host);
1748 #endif
1749 }
1750 }
1751 peer->rtt = sockopt_tcp_rtt(peer->fd);
1752
1753 return Receive_OPEN_message;
1754 }
1755
1756 /**
1757 * Process BGP KEEPALIVE message for peer.
1758 *
1759 * @param peer
1760 * @param size size of the packet
1761 * @return as in summary
1762 */
1763 static int bgp_keepalive_receive(struct peer *peer, bgp_size_t size)
1764 {
1765 if (bgp_debug_keepalive(peer))
1766 zlog_debug("%s KEEPALIVE rcvd", peer->host);
1767
1768 bgp_update_implicit_eors(peer);
1769
1770 peer->rtt = sockopt_tcp_rtt(peer->fd);
1771
1772 /* If the peer's RTT is higher than expected, shutdown
1773 * the peer automatically.
1774 */
1775 if (!CHECK_FLAG(peer->flags, PEER_FLAG_RTT_SHUTDOWN))
1776 return Receive_KEEPALIVE_message;
1777
1778 if (peer->rtt > peer->rtt_expected) {
1779 peer->rtt_keepalive_rcv++;
1780
1781 if (peer->rtt_keepalive_rcv > peer->rtt_keepalive_conf) {
1782 char rtt_shutdown_reason[BUFSIZ] = {};
1783
1784 snprintfrr(
1785 rtt_shutdown_reason,
1786 sizeof(rtt_shutdown_reason),
1787 "shutdown due to high round-trip-time (%dms > %dms, hit %u times)",
1788 peer->rtt, peer->rtt_expected,
1789 peer->rtt_keepalive_rcv);
1790 zlog_warn("%s %s", peer->host, rtt_shutdown_reason);
1791 SET_FLAG(peer->sflags, PEER_STATUS_RTT_SHUTDOWN);
1792 peer_tx_shutdown_message_set(peer, rtt_shutdown_reason);
1793 peer_flag_set(peer, PEER_FLAG_SHUTDOWN);
1794 }
1795 } else {
1796 if (peer->rtt_keepalive_rcv)
1797 peer->rtt_keepalive_rcv--;
1798 }
1799
1800 return Receive_KEEPALIVE_message;
1801 }
1802
1803 static void bgp_refresh_stalepath_timer_expire(struct thread *thread)
1804 {
1805 struct peer_af *paf;
1806
1807 paf = THREAD_ARG(thread);
1808
1809 afi_t afi = paf->afi;
1810 safi_t safi = paf->safi;
1811 struct peer *peer = paf->peer;
1812
1813 peer->t_refresh_stalepath = NULL;
1814
1815 if (peer->nsf[afi][safi])
1816 bgp_clear_stale_route(peer, afi, safi);
1817
1818 if (bgp_debug_neighbor_events(peer))
1819 zlog_debug(
1820 "%pBP route-refresh (BoRR) timer expired for afi/safi: %d/%d",
1821 peer, afi, safi);
1822
1823 bgp_timer_set(peer);
1824 }
1825
1826 /**
1827 * Process BGP UPDATE message for peer.
1828 *
1829 * Parses UPDATE and creates attribute object.
1830 *
1831 * @param peer
1832 * @param size size of the packet
1833 * @return as in summary
1834 */
1835 static int bgp_update_receive(struct peer *peer, bgp_size_t size)
1836 {
1837 int ret, nlri_ret;
1838 uint8_t *end;
1839 struct stream *s;
1840 struct attr attr;
1841 bgp_size_t attribute_len;
1842 bgp_size_t update_len;
1843 bgp_size_t withdraw_len;
1844 bool restart = false;
1845
1846 enum NLRI_TYPES {
1847 NLRI_UPDATE,
1848 NLRI_WITHDRAW,
1849 NLRI_MP_UPDATE,
1850 NLRI_MP_WITHDRAW,
1851 NLRI_TYPE_MAX
1852 };
1853 struct bgp_nlri nlris[NLRI_TYPE_MAX];
1854
1855 /* Status must be Established. */
1856 if (!peer_established(peer)) {
1857 flog_err(EC_BGP_INVALID_STATUS,
1858 "%s [FSM] Update packet received under status %s",
1859 peer->host,
1860 lookup_msg(bgp_status_msg, peer->status, NULL));
1861 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR,
1862 bgp_fsm_error_subcode(peer->status));
1863 return BGP_Stop;
1864 }
1865
1866 /* Set initial values. */
1867 memset(&attr, 0, sizeof(attr));
1868 attr.label_index = BGP_INVALID_LABEL_INDEX;
1869 attr.label = MPLS_INVALID_LABEL;
1870 memset(&nlris, 0, sizeof(nlris));
1871 memset(peer->rcvd_attr_str, 0, BUFSIZ);
1872 peer->rcvd_attr_printed = 0;
1873
1874 s = peer->curr;
1875 end = stream_pnt(s) + size;
1876
1877 /* RFC1771 6.3 If the Unfeasible Routes Length or Total Attribute
1878 Length is too large (i.e., if Unfeasible Routes Length + Total
1879 Attribute Length + 23 exceeds the message Length), then the Error
1880 Subcode is set to Malformed Attribute List. */
1881 if (stream_pnt(s) + 2 > end) {
1882 flog_err(EC_BGP_UPDATE_RCV,
1883 "%s [Error] Update packet error (packet length is short for unfeasible length)",
1884 peer->host);
1885 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1886 BGP_NOTIFY_UPDATE_MAL_ATTR);
1887 return BGP_Stop;
1888 }
1889
1890 /* Unfeasible Route Length. */
1891 withdraw_len = stream_getw(s);
1892
1893 /* Unfeasible Route Length check. */
1894 if (stream_pnt(s) + withdraw_len > end) {
1895 flog_err(EC_BGP_UPDATE_RCV,
1896 "%s [Error] Update packet error (packet unfeasible length overflow %d)",
1897 peer->host, withdraw_len);
1898 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1899 BGP_NOTIFY_UPDATE_MAL_ATTR);
1900 return BGP_Stop;
1901 }
1902
1903 /* Unfeasible Route packet format check. */
1904 if (withdraw_len > 0) {
1905 nlris[NLRI_WITHDRAW].afi = AFI_IP;
1906 nlris[NLRI_WITHDRAW].safi = SAFI_UNICAST;
1907 nlris[NLRI_WITHDRAW].nlri = stream_pnt(s);
1908 nlris[NLRI_WITHDRAW].length = withdraw_len;
1909 stream_forward_getp(s, withdraw_len);
1910 }
1911
1912 /* Attribute total length check. */
1913 if (stream_pnt(s) + 2 > end) {
1914 flog_warn(
1915 EC_BGP_UPDATE_PACKET_SHORT,
1916 "%s [Error] Packet Error (update packet is short for attribute length)",
1917 peer->host);
1918 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1919 BGP_NOTIFY_UPDATE_MAL_ATTR);
1920 return BGP_Stop;
1921 }
1922
1923 /* Fetch attribute total length. */
1924 attribute_len = stream_getw(s);
1925
1926 /* Attribute length check. */
1927 if (stream_pnt(s) + attribute_len > end) {
1928 flog_warn(
1929 EC_BGP_UPDATE_PACKET_LONG,
1930 "%s [Error] Packet Error (update packet attribute length overflow %d)",
1931 peer->host, attribute_len);
1932 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1933 BGP_NOTIFY_UPDATE_MAL_ATTR);
1934 return BGP_Stop;
1935 }
1936
1937 /* Certain attribute parsing errors should not be considered bad enough
1938 * to reset the session for, most particularly any partial/optional
1939 * attributes that have 'tunneled' over speakers that don't understand
1940 * them. Instead we withdraw only the prefix concerned.
1941 *
1942 * Complicates the flow a little though..
1943 */
1944 enum bgp_attr_parse_ret attr_parse_ret = BGP_ATTR_PARSE_PROCEED;
1945 /* This define morphs the update case into a withdraw when lower levels
1946 * have signalled an error condition where this is best.
1947 */
1948 #define NLRI_ATTR_ARG (attr_parse_ret != BGP_ATTR_PARSE_WITHDRAW ? &attr : NULL)
1949
1950 /* Parse attribute when it exists. */
1951 if (attribute_len) {
1952 attr_parse_ret = bgp_attr_parse(peer, &attr, attribute_len,
1953 &nlris[NLRI_MP_UPDATE],
1954 &nlris[NLRI_MP_WITHDRAW]);
1955 if (attr_parse_ret == BGP_ATTR_PARSE_ERROR) {
1956 bgp_attr_unintern_sub(&attr);
1957 return BGP_Stop;
1958 }
1959 }
1960
1961 /* Logging the attribute. */
1962 if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW
1963 || BGP_DEBUG(update, UPDATE_IN)
1964 || BGP_DEBUG(update, UPDATE_PREFIX)) {
1965 ret = bgp_dump_attr(&attr, peer->rcvd_attr_str,
1966 sizeof(peer->rcvd_attr_str));
1967
1968 peer->stat_upd_7606++;
1969
1970 if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW)
1971 flog_err(
1972 EC_BGP_UPDATE_RCV,
1973 "%pBP rcvd UPDATE with errors in attr(s)!! Withdrawing route.",
1974 peer);
1975
1976 if (ret && bgp_debug_update(peer, NULL, NULL, 1)) {
1977 zlog_debug("%pBP rcvd UPDATE w/ attr: %s", peer,
1978 peer->rcvd_attr_str);
1979 peer->rcvd_attr_printed = 1;
1980 }
1981 }
1982
1983 /* Network Layer Reachability Information. */
1984 update_len = end - stream_pnt(s);
1985
1986 if (update_len) {
1987 /* Set NLRI portion to structure. */
1988 nlris[NLRI_UPDATE].afi = AFI_IP;
1989 nlris[NLRI_UPDATE].safi = SAFI_UNICAST;
1990 nlris[NLRI_UPDATE].nlri = stream_pnt(s);
1991 nlris[NLRI_UPDATE].length = update_len;
1992 stream_forward_getp(s, update_len);
1993
1994 if (CHECK_FLAG(attr.flag, ATTR_FLAG_BIT(BGP_ATTR_MP_REACH_NLRI))) {
1995 /*
1996 * We skipped nexthop attribute validation earlier so
1997 * validate the nexthop now.
1998 */
1999 if (bgp_attr_nexthop_valid(peer, &attr) < 0) {
2000 bgp_attr_unintern_sub(&attr);
2001 return BGP_Stop;
2002 }
2003 }
2004 }
2005
2006 if (BGP_DEBUG(update, UPDATE_IN))
2007 zlog_debug("%pBP rcvd UPDATE wlen %d attrlen %d alen %d", peer,
2008 withdraw_len, attribute_len, update_len);
2009
2010 /* Parse any given NLRIs */
2011 for (int i = NLRI_UPDATE; i < NLRI_TYPE_MAX; i++) {
2012 if (!nlris[i].nlri)
2013 continue;
2014
2015 /* NLRI is processed iff the peer if configured for the specific
2016 * afi/safi */
2017 if (!peer->afc[nlris[i].afi][nlris[i].safi]) {
2018 zlog_info(
2019 "%s [Info] UPDATE for non-enabled AFI/SAFI %u/%u",
2020 peer->host, nlris[i].afi, nlris[i].safi);
2021 continue;
2022 }
2023
2024 /* EoR handled later */
2025 if (nlris[i].length == 0)
2026 continue;
2027
2028 switch (i) {
2029 case NLRI_UPDATE:
2030 case NLRI_MP_UPDATE:
2031 nlri_ret = bgp_nlri_parse(peer, NLRI_ATTR_ARG,
2032 &nlris[i], 0);
2033 break;
2034 case NLRI_WITHDRAW:
2035 case NLRI_MP_WITHDRAW:
2036 nlri_ret = bgp_nlri_parse(peer, &attr, &nlris[i], 1);
2037 break;
2038 default:
2039 nlri_ret = BGP_NLRI_PARSE_ERROR;
2040 }
2041
2042 if (nlri_ret < BGP_NLRI_PARSE_OK
2043 && nlri_ret != BGP_NLRI_PARSE_ERROR_PREFIX_OVERFLOW) {
2044 flog_err(EC_BGP_UPDATE_RCV,
2045 "%s [Error] Error parsing NLRI", peer->host);
2046 if (peer_established(peer))
2047 bgp_notify_send(
2048 peer, BGP_NOTIFY_UPDATE_ERR,
2049 i <= NLRI_WITHDRAW
2050 ? BGP_NOTIFY_UPDATE_INVAL_NETWORK
2051 : BGP_NOTIFY_UPDATE_OPT_ATTR_ERR);
2052 bgp_attr_unintern_sub(&attr);
2053 return BGP_Stop;
2054 }
2055 }
2056
2057 /* EoR checks
2058 *
2059 * Non-MP IPv4/Unicast EoR is a completely empty UPDATE
2060 * and MP EoR should have only an empty MP_UNREACH
2061 */
2062 if ((!update_len && !withdraw_len && nlris[NLRI_MP_UPDATE].length == 0)
2063 || (attr_parse_ret == BGP_ATTR_PARSE_EOR)) {
2064 afi_t afi = 0;
2065 safi_t safi;
2066 struct graceful_restart_info *gr_info;
2067
2068 /* Restarting router */
2069 if (BGP_PEER_GRACEFUL_RESTART_CAPABLE(peer)
2070 && BGP_PEER_RESTARTING_MODE(peer))
2071 restart = true;
2072
2073 /* Non-MP IPv4/Unicast is a completely emtpy UPDATE - already
2074 * checked
2075 * update and withdraw NLRI lengths are 0.
2076 */
2077 if (!attribute_len) {
2078 afi = AFI_IP;
2079 safi = SAFI_UNICAST;
2080 } else if (attr.flag & ATTR_FLAG_BIT(BGP_ATTR_MP_UNREACH_NLRI)
2081 && nlris[NLRI_MP_WITHDRAW].length == 0) {
2082 afi = nlris[NLRI_MP_WITHDRAW].afi;
2083 safi = nlris[NLRI_MP_WITHDRAW].safi;
2084 } else if (attr_parse_ret == BGP_ATTR_PARSE_EOR) {
2085 afi = nlris[NLRI_MP_UPDATE].afi;
2086 safi = nlris[NLRI_MP_UPDATE].safi;
2087 }
2088
2089 if (afi && peer->afc[afi][safi]) {
2090 struct vrf *vrf = vrf_lookup_by_id(peer->bgp->vrf_id);
2091
2092 /* End-of-RIB received */
2093 if (!CHECK_FLAG(peer->af_sflags[afi][safi],
2094 PEER_STATUS_EOR_RECEIVED)) {
2095 SET_FLAG(peer->af_sflags[afi][safi],
2096 PEER_STATUS_EOR_RECEIVED);
2097 bgp_update_explicit_eors(peer);
2098 /* Update graceful restart information */
2099 gr_info = &(peer->bgp->gr_info[afi][safi]);
2100 if (restart)
2101 gr_info->eor_received++;
2102 /* If EOR received from all peers and selection
2103 * deferral timer is running, cancel the timer
2104 * and invoke the best path calculation
2105 */
2106 if (gr_info->eor_required
2107 == gr_info->eor_received) {
2108 if (bgp_debug_neighbor_events(peer))
2109 zlog_debug(
2110 "%s %d, %s %d",
2111 "EOR REQ",
2112 gr_info->eor_required,
2113 "EOR RCV",
2114 gr_info->eor_received);
2115 if (gr_info->t_select_deferral) {
2116 void *info = THREAD_ARG(
2117 gr_info->t_select_deferral);
2118 XFREE(MTYPE_TMP, info);
2119 }
2120 THREAD_OFF(gr_info->t_select_deferral);
2121 gr_info->eor_required = 0;
2122 gr_info->eor_received = 0;
2123 /* Best path selection */
2124 bgp_best_path_select_defer(peer->bgp,
2125 afi, safi);
2126 }
2127 }
2128
2129 /* NSF delete stale route */
2130 if (peer->nsf[afi][safi])
2131 bgp_clear_stale_route(peer, afi, safi);
2132
2133 zlog_info(
2134 "%s: rcvd End-of-RIB for %s from %s in vrf %s",
2135 __func__, get_afi_safi_str(afi, safi, false),
2136 peer->host, vrf ? vrf->name : VRF_DEFAULT_NAME);
2137 }
2138 }
2139
2140 /* Everything is done. We unintern temporary structures which
2141 interned in bgp_attr_parse(). */
2142 bgp_attr_unintern_sub(&attr);
2143
2144 peer->update_time = monotime(NULL);
2145
2146 /* Notify BGP Conditional advertisement scanner process */
2147 peer->advmap_table_change = true;
2148
2149 return Receive_UPDATE_message;
2150 }
2151
2152 /**
2153 * Process BGP NOTIFY message for peer.
2154 *
2155 * @param peer
2156 * @param size size of the packet
2157 * @return as in summary
2158 */
2159 static int bgp_notify_receive(struct peer *peer, bgp_size_t size)
2160 {
2161 struct bgp_notify outer = {};
2162 struct bgp_notify inner = {};
2163 bool hard_reset = false;
2164
2165 if (peer->notify.data) {
2166 XFREE(MTYPE_BGP_NOTIFICATION, peer->notify.data);
2167 peer->notify.length = 0;
2168 peer->notify.hard_reset = false;
2169 }
2170
2171 outer.code = stream_getc(peer->curr);
2172 outer.subcode = stream_getc(peer->curr);
2173 outer.length = size - 2;
2174 outer.data = NULL;
2175 outer.raw_data = NULL;
2176 if (outer.length) {
2177 outer.raw_data = XMALLOC(MTYPE_BGP_NOTIFICATION, outer.length);
2178 memcpy(outer.raw_data, stream_pnt(peer->curr), outer.length);
2179 }
2180
2181 hard_reset =
2182 bgp_notify_received_hard_reset(peer, outer.code, outer.subcode);
2183 if (hard_reset && outer.length) {
2184 inner = bgp_notify_decapsulate_hard_reset(&outer);
2185 peer->notify.hard_reset = true;
2186 } else {
2187 inner = outer;
2188 }
2189
2190 /* Preserv notify code and sub code. */
2191 peer->notify.code = inner.code;
2192 peer->notify.subcode = inner.subcode;
2193 /* For further diagnostic record returned Data. */
2194 if (inner.length) {
2195 peer->notify.length = inner.length;
2196 peer->notify.data =
2197 XMALLOC(MTYPE_BGP_NOTIFICATION, inner.length);
2198 memcpy(peer->notify.data, inner.raw_data, inner.length);
2199 }
2200
2201 /* For debug */
2202 {
2203 int i;
2204 int first = 0;
2205 char c[4];
2206
2207 if (inner.length) {
2208 inner.data = XMALLOC(MTYPE_BGP_NOTIFICATION,
2209 inner.length * 3);
2210 for (i = 0; i < inner.length; i++)
2211 if (first) {
2212 snprintf(c, sizeof(c), " %02x",
2213 stream_getc(peer->curr));
2214
2215 strlcat(inner.data, c,
2216 inner.length * 3);
2217
2218 } else {
2219 first = 1;
2220 snprintf(c, sizeof(c), "%02x",
2221 stream_getc(peer->curr));
2222
2223 strlcpy(inner.data, c,
2224 inner.length * 3);
2225 }
2226 }
2227
2228 bgp_notify_print(peer, &inner, "received", hard_reset);
2229 if (inner.length) {
2230 XFREE(MTYPE_BGP_NOTIFICATION, inner.data);
2231 inner.length = 0;
2232 }
2233 if (outer.length) {
2234 XFREE(MTYPE_BGP_NOTIFICATION, outer.data);
2235 XFREE(MTYPE_BGP_NOTIFICATION, outer.raw_data);
2236
2237 /* If this is a Hard Reset notification, we MUST free
2238 * the inner (encapsulated) notification too.
2239 */
2240 if (hard_reset)
2241 XFREE(MTYPE_BGP_NOTIFICATION, inner.raw_data);
2242 outer.length = 0;
2243 }
2244 }
2245
2246 /* peer count update */
2247 atomic_fetch_add_explicit(&peer->notify_in, 1, memory_order_relaxed);
2248
2249 peer->last_reset = PEER_DOWN_NOTIFY_RECEIVED;
2250
2251 /* We have to check for Notify with Unsupported Optional Parameter.
2252 in that case we fallback to open without the capability option.
2253 But this done in bgp_stop. We just mark it here to avoid changing
2254 the fsm tables. */
2255 if (inner.code == BGP_NOTIFY_OPEN_ERR &&
2256 inner.subcode == BGP_NOTIFY_OPEN_UNSUP_PARAM)
2257 UNSET_FLAG(peer->sflags, PEER_STATUS_CAPABILITY_OPEN);
2258
2259 /* If Graceful-Restart N-bit (Notification) is exchanged,
2260 * and it's not a Hard Reset, let's retain the routes.
2261 */
2262 if (bgp_has_graceful_restart_notification(peer) && !hard_reset &&
2263 CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE))
2264 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
2265
2266 bgp_peer_gr_flags_update(peer);
2267 BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(peer->bgp,
2268 peer->bgp->peer);
2269
2270 return Receive_NOTIFICATION_message;
2271 }
2272
2273 /**
2274 * Process BGP ROUTEREFRESH message for peer.
2275 *
2276 * @param peer
2277 * @param size size of the packet
2278 * @return as in summary
2279 */
2280 static int bgp_route_refresh_receive(struct peer *peer, bgp_size_t size)
2281 {
2282 iana_afi_t pkt_afi;
2283 afi_t afi;
2284 iana_safi_t pkt_safi;
2285 safi_t safi;
2286 struct stream *s;
2287 struct peer_af *paf;
2288 struct update_group *updgrp;
2289 struct peer *updgrp_peer;
2290 uint8_t subtype;
2291 bool force_update = false;
2292 bgp_size_t msg_length =
2293 size - (BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE);
2294
2295 /* If peer does not have the capability, send notification. */
2296 if (!CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_ADV)) {
2297 flog_err(EC_BGP_NO_CAP,
2298 "%s [Error] BGP route refresh is not enabled",
2299 peer->host);
2300 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
2301 BGP_NOTIFY_HEADER_BAD_MESTYPE);
2302 return BGP_Stop;
2303 }
2304
2305 /* Status must be Established. */
2306 if (!peer_established(peer)) {
2307 flog_err(
2308 EC_BGP_INVALID_STATUS,
2309 "%s [Error] Route refresh packet received under status %s",
2310 peer->host,
2311 lookup_msg(bgp_status_msg, peer->status, NULL));
2312 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR,
2313 bgp_fsm_error_subcode(peer->status));
2314 return BGP_Stop;
2315 }
2316
2317 s = peer->curr;
2318
2319 /* Parse packet. */
2320 pkt_afi = stream_getw(s);
2321 subtype = stream_getc(s);
2322 pkt_safi = stream_getc(s);
2323
2324 /* Convert AFI, SAFI to internal values and check. */
2325 if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi, &safi)) {
2326 zlog_info(
2327 "%s REFRESH_REQ for unrecognized afi/safi: %s/%s - ignored",
2328 peer->host, iana_afi2str(pkt_afi),
2329 iana_safi2str(pkt_safi));
2330 return BGP_PACKET_NOOP;
2331 }
2332
2333 if (size != BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE) {
2334 uint8_t *end;
2335 uint8_t when_to_refresh;
2336 uint8_t orf_type;
2337 uint16_t orf_len;
2338
2339 if (subtype) {
2340 /* If the length, excluding the fixed-size message
2341 * header, of the received ROUTE-REFRESH message with
2342 * Message Subtype 1 and 2 is not 4, then the BGP
2343 * speaker MUST send a NOTIFICATION message with the
2344 * Error Code of "ROUTE-REFRESH Message Error" and the
2345 * subcode of "Invalid Message Length".
2346 */
2347 if (msg_length != 4) {
2348 zlog_err(
2349 "%s Enhanced Route Refresh message length error",
2350 peer->host);
2351 bgp_notify_send(
2352 peer, BGP_NOTIFY_ROUTE_REFRESH_ERR,
2353 BGP_NOTIFY_ROUTE_REFRESH_INVALID_MSG_LEN);
2354 }
2355
2356 /* When the BGP speaker receives a ROUTE-REFRESH message
2357 * with a "Message Subtype" field other than 0, 1, or 2,
2358 * it MUST ignore the received ROUTE-REFRESH message.
2359 */
2360 if (subtype > 2)
2361 zlog_err(
2362 "%s Enhanced Route Refresh invalid subtype",
2363 peer->host);
2364 }
2365
2366 if (msg_length < 5) {
2367 zlog_info("%s ORF route refresh length error",
2368 peer->host);
2369 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2370 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
2371 return BGP_Stop;
2372 }
2373
2374 when_to_refresh = stream_getc(s);
2375 end = stream_pnt(s) + (size - 5);
2376
2377 while ((stream_pnt(s) + 2) < end) {
2378 orf_type = stream_getc(s);
2379 orf_len = stream_getw(s);
2380
2381 /* orf_len in bounds? */
2382 if ((stream_pnt(s) + orf_len) > end)
2383 break; /* XXX: Notify instead?? */
2384 if (orf_type == ORF_TYPE_PREFIX
2385 || orf_type == ORF_TYPE_PREFIX_OLD) {
2386 uint8_t *p_pnt = stream_pnt(s);
2387 uint8_t *p_end = stream_pnt(s) + orf_len;
2388 struct orf_prefix orfp;
2389 uint8_t common = 0;
2390 uint32_t seq;
2391 int psize;
2392 char name[BUFSIZ];
2393 int ret = CMD_SUCCESS;
2394
2395 if (bgp_debug_neighbor_events(peer)) {
2396 zlog_debug(
2397 "%pBP rcvd Prefixlist ORF(%d) length %d",
2398 peer, orf_type, orf_len);
2399 }
2400
2401 /* ORF prefix-list name */
2402 snprintf(name, sizeof(name), "%s.%d.%d",
2403 peer->host, afi, safi);
2404
2405 /* we're going to read at least 1 byte of common
2406 * ORF header,
2407 * and 7 bytes of ORF Address-filter entry from
2408 * the stream
2409 */
2410 if (*p_pnt & ORF_COMMON_PART_REMOVE_ALL) {
2411 if (bgp_debug_neighbor_events(peer))
2412 zlog_debug(
2413 "%pBP rcvd Remove-All pfxlist ORF request",
2414 peer);
2415 prefix_bgp_orf_remove_all(afi, name);
2416 break;
2417 }
2418
2419 if (orf_len < 7)
2420 break;
2421
2422 while (p_pnt < p_end) {
2423 /* If the ORF entry is malformed, want
2424 * to read as much of it
2425 * as possible without going beyond the
2426 * bounds of the entry,
2427 * to maximise debug information.
2428 */
2429 int ok;
2430 memset(&orfp, 0, sizeof(orfp));
2431 common = *p_pnt++;
2432 /* after ++: p_pnt <= p_end */
2433 ok = ((uint32_t)(p_end - p_pnt)
2434 >= sizeof(uint32_t));
2435 if (ok) {
2436 memcpy(&seq, p_pnt,
2437 sizeof(uint32_t));
2438 p_pnt += sizeof(uint32_t);
2439 orfp.seq = ntohl(seq);
2440 } else
2441 p_pnt = p_end;
2442
2443 /* val checked in prefix_bgp_orf_set */
2444 if (p_pnt < p_end)
2445 orfp.ge = *p_pnt++;
2446
2447 /* val checked in prefix_bgp_orf_set */
2448 if (p_pnt < p_end)
2449 orfp.le = *p_pnt++;
2450
2451 if ((ok = (p_pnt < p_end)))
2452 orfp.p.prefixlen = *p_pnt++;
2453
2454 /* afi checked already */
2455 orfp.p.family = afi2family(afi);
2456
2457 /* 0 if not ok */
2458 psize = PSIZE(orfp.p.prefixlen);
2459 /* valid for family ? */
2460 if (psize > prefix_blen(&orfp.p)) {
2461 ok = 0;
2462 psize = prefix_blen(&orfp.p);
2463 }
2464 /* valid for packet ? */
2465 if (psize > (p_end - p_pnt)) {
2466 ok = 0;
2467 psize = p_end - p_pnt;
2468 }
2469
2470 if (psize > 0)
2471 memcpy(&orfp.p.u.prefix, p_pnt,
2472 psize);
2473 p_pnt += psize;
2474
2475 if (bgp_debug_neighbor_events(peer)) {
2476 char buf[INET6_BUFSIZ];
2477
2478 zlog_debug(
2479 "%pBP rcvd %s %s seq %u %s/%d ge %d le %d%s",
2480 peer,
2481 (common & ORF_COMMON_PART_REMOVE
2482 ? "Remove"
2483 : "Add"),
2484 (common & ORF_COMMON_PART_DENY
2485 ? "deny"
2486 : "permit"),
2487 orfp.seq,
2488 inet_ntop(
2489 orfp.p.family,
2490 &orfp.p.u.prefix,
2491 buf,
2492 INET6_BUFSIZ),
2493 orfp.p.prefixlen,
2494 orfp.ge, orfp.le,
2495 ok ? "" : " MALFORMED");
2496 }
2497
2498 if (ok)
2499 ret = prefix_bgp_orf_set(
2500 name, afi, &orfp,
2501 (common & ORF_COMMON_PART_DENY
2502 ? 0
2503 : 1),
2504 (common & ORF_COMMON_PART_REMOVE
2505 ? 0
2506 : 1));
2507
2508 if (!ok || (ok && ret != CMD_SUCCESS)) {
2509 zlog_info(
2510 "%pBP Received misformatted prefixlist ORF. Remove All pfxlist",
2511 peer);
2512 prefix_bgp_orf_remove_all(afi,
2513 name);
2514 break;
2515 }
2516 }
2517
2518 peer->orf_plist[afi][safi] =
2519 prefix_bgp_orf_lookup(afi, name);
2520 }
2521 stream_forward_getp(s, orf_len);
2522 }
2523 if (bgp_debug_neighbor_events(peer))
2524 zlog_debug("%pBP rcvd Refresh %s ORF request", peer,
2525 when_to_refresh == REFRESH_DEFER
2526 ? "Defer"
2527 : "Immediate");
2528 if (when_to_refresh == REFRESH_DEFER)
2529 return BGP_PACKET_NOOP;
2530 }
2531
2532 /* First update is deferred until ORF or ROUTE-REFRESH is received */
2533 if (CHECK_FLAG(peer->af_sflags[afi][safi],
2534 PEER_STATUS_ORF_WAIT_REFRESH))
2535 UNSET_FLAG(peer->af_sflags[afi][safi],
2536 PEER_STATUS_ORF_WAIT_REFRESH);
2537
2538 paf = peer_af_find(peer, afi, safi);
2539 if (paf && paf->subgroup) {
2540 if (peer->orf_plist[afi][safi]) {
2541 updgrp = PAF_UPDGRP(paf);
2542 updgrp_peer = UPDGRP_PEER(updgrp);
2543 updgrp_peer->orf_plist[afi][safi] =
2544 peer->orf_plist[afi][safi];
2545 }
2546
2547 /* Avoid supressing duplicate routes later
2548 * when processing in subgroup_announce_table().
2549 */
2550 force_update = true;
2551
2552 /* If the peer is configured for default-originate clear the
2553 * SUBGRP_STATUS_DEFAULT_ORIGINATE flag so that we will
2554 * re-advertise the
2555 * default
2556 */
2557 if (CHECK_FLAG(paf->subgroup->sflags,
2558 SUBGRP_STATUS_DEFAULT_ORIGINATE))
2559 UNSET_FLAG(paf->subgroup->sflags,
2560 SUBGRP_STATUS_DEFAULT_ORIGINATE);
2561 }
2562
2563 if (subtype == BGP_ROUTE_REFRESH_BORR) {
2564 /* A BGP speaker that has received the Graceful Restart
2565 * Capability from its neighbor MUST ignore any BoRRs for
2566 * an <AFI, SAFI> from the neighbor before the speaker
2567 * receives the EoR for the given <AFI, SAFI> from the
2568 * neighbor.
2569 */
2570 if (CHECK_FLAG(peer->cap, PEER_CAP_RESTART_RCV)
2571 && !CHECK_FLAG(peer->af_sflags[afi][safi],
2572 PEER_STATUS_EOR_RECEIVED)) {
2573 if (bgp_debug_neighbor_events(peer))
2574 zlog_debug(
2575 "%pBP rcvd route-refresh (BoRR) for %s/%s before EoR",
2576 peer, afi2str(afi), safi2str(safi));
2577 return BGP_PACKET_NOOP;
2578 }
2579
2580 if (peer->t_refresh_stalepath) {
2581 if (bgp_debug_neighbor_events(peer))
2582 zlog_debug(
2583 "%pBP rcvd route-refresh (BoRR) for %s/%s, whereas BoRR already received",
2584 peer, afi2str(afi), safi2str(safi));
2585 return BGP_PACKET_NOOP;
2586 }
2587
2588 SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_BORR_RECEIVED);
2589 UNSET_FLAG(peer->af_sflags[afi][safi],
2590 PEER_STATUS_EORR_RECEIVED);
2591
2592 /* When a BGP speaker receives a BoRR message from
2593 * a peer, it MUST mark all the routes with the given
2594 * Address Family Identifier and Subsequent Address
2595 * Family Identifier, <AFI, SAFI> [RFC2918], from
2596 * that peer as stale.
2597 */
2598 if (peer_active_nego(peer)) {
2599 SET_FLAG(peer->af_sflags[afi][safi],
2600 PEER_STATUS_ENHANCED_REFRESH);
2601 bgp_set_stale_route(peer, afi, safi);
2602 }
2603
2604 if (peer_established(peer))
2605 thread_add_timer(bm->master,
2606 bgp_refresh_stalepath_timer_expire,
2607 paf, peer->bgp->stalepath_time,
2608 &peer->t_refresh_stalepath);
2609
2610 if (bgp_debug_neighbor_events(peer))
2611 zlog_debug(
2612 "%pBP rcvd route-refresh (BoRR) for %s/%s, triggering timer for %u seconds",
2613 peer, afi2str(afi), safi2str(safi),
2614 peer->bgp->stalepath_time);
2615 } else if (subtype == BGP_ROUTE_REFRESH_EORR) {
2616 if (!peer->t_refresh_stalepath) {
2617 zlog_err(
2618 "%pBP rcvd route-refresh (EoRR) for %s/%s, whereas no BoRR received",
2619 peer, afi2str(afi), safi2str(safi));
2620 return BGP_PACKET_NOOP;
2621 }
2622
2623 THREAD_OFF(peer->t_refresh_stalepath);
2624
2625 SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_EORR_RECEIVED);
2626 UNSET_FLAG(peer->af_sflags[afi][safi],
2627 PEER_STATUS_BORR_RECEIVED);
2628
2629 if (bgp_debug_neighbor_events(peer))
2630 zlog_debug(
2631 "%pBP rcvd route-refresh (EoRR) for %s/%s, stopping BoRR timer",
2632 peer, afi2str(afi), safi2str(safi));
2633
2634 if (peer->nsf[afi][safi])
2635 bgp_clear_stale_route(peer, afi, safi);
2636 } else {
2637 if (bgp_debug_neighbor_events(peer))
2638 zlog_debug(
2639 "%pBP rcvd route-refresh (REQUEST) for %s/%s",
2640 peer, afi2str(afi), safi2str(safi));
2641
2642 /* In response to a "normal route refresh request" from the
2643 * peer, the speaker MUST send a BoRR message.
2644 */
2645 if (CHECK_FLAG(peer->cap, PEER_CAP_ENHANCED_RR_RCV)) {
2646 /* For a BGP speaker that supports the BGP Graceful
2647 * Restart, it MUST NOT send a BoRR for an <AFI, SAFI>
2648 * to a neighbor before it sends the EoR for the
2649 * <AFI, SAFI> to the neighbor.
2650 */
2651 if (!CHECK_FLAG(peer->af_sflags[afi][safi],
2652 PEER_STATUS_EOR_SEND)) {
2653 if (bgp_debug_neighbor_events(peer))
2654 zlog_debug(
2655 "%pBP rcvd route-refresh (REQUEST) for %s/%s before EoR",
2656 peer, afi2str(afi),
2657 safi2str(safi));
2658 /* Can't send BoRR now, postpone after EoR */
2659 SET_FLAG(peer->af_sflags[afi][safi],
2660 PEER_STATUS_REFRESH_PENDING);
2661 return BGP_PACKET_NOOP;
2662 }
2663
2664 bgp_route_refresh_send(peer, afi, safi, 0, 0, 0,
2665 BGP_ROUTE_REFRESH_BORR);
2666
2667 if (bgp_debug_neighbor_events(peer))
2668 zlog_debug(
2669 "%pBP sending route-refresh (BoRR) for %s/%s",
2670 peer, afi2str(afi), safi2str(safi));
2671
2672 /* Set flag Ready-To-Send to know when we can send EoRR
2673 * message.
2674 */
2675 SET_FLAG(peer->af_sflags[afi][safi],
2676 PEER_STATUS_BORR_SEND);
2677 UNSET_FLAG(peer->af_sflags[afi][safi],
2678 PEER_STATUS_EORR_SEND);
2679 }
2680 }
2681
2682 /* Perform route refreshment to the peer */
2683 bgp_announce_route(peer, afi, safi, force_update);
2684
2685 /* No FSM action necessary */
2686 return BGP_PACKET_NOOP;
2687 }
2688
2689 /**
2690 * Parse BGP CAPABILITY message for peer.
2691 *
2692 * @param peer
2693 * @param size size of the packet
2694 * @return as in summary
2695 */
2696 static int bgp_capability_msg_parse(struct peer *peer, uint8_t *pnt,
2697 bgp_size_t length)
2698 {
2699 uint8_t *end;
2700 struct capability_mp_data mpc;
2701 struct capability_header *hdr;
2702 uint8_t action;
2703 iana_afi_t pkt_afi;
2704 afi_t afi;
2705 iana_safi_t pkt_safi;
2706 safi_t safi;
2707
2708 end = pnt + length;
2709
2710 while (pnt < end) {
2711 /* We need at least action, capability code and capability
2712 * length. */
2713 if (pnt + 3 > end) {
2714 zlog_info("%s Capability length error", peer->host);
2715 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2716 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
2717 return BGP_Stop;
2718 }
2719 action = *pnt;
2720 hdr = (struct capability_header *)(pnt + 1);
2721
2722 /* Action value check. */
2723 if (action != CAPABILITY_ACTION_SET
2724 && action != CAPABILITY_ACTION_UNSET) {
2725 zlog_info("%s Capability Action Value error %d",
2726 peer->host, action);
2727 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2728 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
2729 return BGP_Stop;
2730 }
2731
2732 if (bgp_debug_neighbor_events(peer))
2733 zlog_debug(
2734 "%s CAPABILITY has action: %d, code: %u, length %u",
2735 peer->host, action, hdr->code, hdr->length);
2736
2737 if (hdr->length < sizeof(struct capability_mp_data)) {
2738 zlog_info(
2739 "%pBP Capability structure is not properly filled out, expected at least %zu bytes but header length specified is %d",
2740 peer, sizeof(struct capability_mp_data),
2741 hdr->length);
2742 return BGP_Stop;
2743 }
2744
2745 /* Capability length check. */
2746 if ((pnt + hdr->length + 3) > end) {
2747 zlog_info("%s Capability length error", peer->host);
2748 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2749 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
2750 return BGP_Stop;
2751 }
2752
2753 /* Fetch structure to the byte stream. */
2754 memcpy(&mpc, pnt + 3, sizeof(struct capability_mp_data));
2755 pnt += hdr->length + 3;
2756
2757 /* We know MP Capability Code. */
2758 if (hdr->code == CAPABILITY_CODE_MP) {
2759 pkt_afi = ntohs(mpc.afi);
2760 pkt_safi = mpc.safi;
2761
2762 /* Ignore capability when override-capability is set. */
2763 if (CHECK_FLAG(peer->flags,
2764 PEER_FLAG_OVERRIDE_CAPABILITY))
2765 continue;
2766
2767 /* Convert AFI, SAFI to internal values. */
2768 if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi,
2769 &safi)) {
2770 if (bgp_debug_neighbor_events(peer))
2771 zlog_debug(
2772 "%s Dynamic Capability MP_EXT afi/safi invalid (%s/%s)",
2773 peer->host,
2774 iana_afi2str(pkt_afi),
2775 iana_safi2str(pkt_safi));
2776 continue;
2777 }
2778
2779 /* Address family check. */
2780 if (bgp_debug_neighbor_events(peer))
2781 zlog_debug(
2782 "%s CAPABILITY has %s MP_EXT CAP for afi/safi: %s/%s",
2783 peer->host,
2784 action == CAPABILITY_ACTION_SET
2785 ? "Advertising"
2786 : "Removing",
2787 iana_afi2str(pkt_afi),
2788 iana_safi2str(pkt_safi));
2789
2790 if (action == CAPABILITY_ACTION_SET) {
2791 peer->afc_recv[afi][safi] = 1;
2792 if (peer->afc[afi][safi]) {
2793 peer->afc_nego[afi][safi] = 1;
2794 bgp_announce_route(peer, afi, safi,
2795 false);
2796 }
2797 } else {
2798 peer->afc_recv[afi][safi] = 0;
2799 peer->afc_nego[afi][safi] = 0;
2800
2801 if (peer_active_nego(peer))
2802 bgp_clear_route(peer, afi, safi);
2803 else
2804 return BGP_Stop;
2805 }
2806 } else {
2807 flog_warn(
2808 EC_BGP_UNRECOGNIZED_CAPABILITY,
2809 "%s unrecognized capability code: %d - ignored",
2810 peer->host, hdr->code);
2811 }
2812 }
2813
2814 /* No FSM action necessary */
2815 return BGP_PACKET_NOOP;
2816 }
2817
2818 /**
2819 * Parse BGP CAPABILITY message for peer.
2820 *
2821 * Exported for unit testing.
2822 *
2823 * @param peer
2824 * @param size size of the packet
2825 * @return as in summary
2826 */
2827 int bgp_capability_receive(struct peer *peer, bgp_size_t size)
2828 {
2829 uint8_t *pnt;
2830
2831 /* Fetch pointer. */
2832 pnt = stream_pnt(peer->curr);
2833
2834 if (bgp_debug_neighbor_events(peer))
2835 zlog_debug("%s rcv CAPABILITY", peer->host);
2836
2837 /* If peer does not have the capability, send notification. */
2838 if (!CHECK_FLAG(peer->cap, PEER_CAP_DYNAMIC_ADV)) {
2839 flog_err(EC_BGP_NO_CAP,
2840 "%s [Error] BGP dynamic capability is not enabled",
2841 peer->host);
2842 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
2843 BGP_NOTIFY_HEADER_BAD_MESTYPE);
2844 return BGP_Stop;
2845 }
2846
2847 /* Status must be Established. */
2848 if (!peer_established(peer)) {
2849 flog_err(
2850 EC_BGP_NO_CAP,
2851 "%s [Error] Dynamic capability packet received under status %s",
2852 peer->host,
2853 lookup_msg(bgp_status_msg, peer->status, NULL));
2854 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR,
2855 bgp_fsm_error_subcode(peer->status));
2856 return BGP_Stop;
2857 }
2858
2859 /* Parse packet. */
2860 return bgp_capability_msg_parse(peer, pnt, size);
2861 }
2862
2863 /**
2864 * Processes a peer's input buffer.
2865 *
2866 * This function sidesteps the event loop and directly calls bgp_event_update()
2867 * after processing each BGP message. This is necessary to ensure proper
2868 * ordering of FSM events and unifies the behavior that was present previously,
2869 * whereby some of the packet handling functions would update the FSM and some
2870 * would not, making event flow difficult to understand. Please think twice
2871 * before hacking this.
2872 *
2873 * Thread type: THREAD_EVENT
2874 * @param thread
2875 * @return 0
2876 */
2877 void bgp_process_packet(struct thread *thread)
2878 {
2879 /* Yes first of all get peer pointer. */
2880 struct peer *peer; // peer
2881 uint32_t rpkt_quanta_old; // how many packets to read
2882 int fsm_update_result; // return code of bgp_event_update()
2883 int mprc; // message processing return code
2884
2885 peer = THREAD_ARG(thread);
2886 rpkt_quanta_old = atomic_load_explicit(&peer->bgp->rpkt_quanta,
2887 memory_order_relaxed);
2888 fsm_update_result = 0;
2889
2890 /* Guard against scheduled events that occur after peer deletion. */
2891 if (peer->status == Deleted || peer->status == Clearing)
2892 return;
2893
2894 unsigned int processed = 0;
2895
2896 while (processed < rpkt_quanta_old) {
2897 uint8_t type = 0;
2898 bgp_size_t size;
2899 char notify_data_length[2];
2900
2901 frr_with_mutex (&peer->io_mtx) {
2902 peer->curr = stream_fifo_pop(peer->ibuf);
2903 }
2904
2905 if (peer->curr == NULL) // no packets to process, hmm...
2906 return;
2907
2908 /* skip the marker and copy the packet length */
2909 stream_forward_getp(peer->curr, BGP_MARKER_SIZE);
2910 memcpy(notify_data_length, stream_pnt(peer->curr), 2);
2911
2912 /* read in the packet length and type */
2913 size = stream_getw(peer->curr);
2914 type = stream_getc(peer->curr);
2915
2916 hook_call(bgp_packet_dump, peer, type, size, peer->curr);
2917
2918 /* adjust size to exclude the marker + length + type */
2919 size -= BGP_HEADER_SIZE;
2920
2921 /* Read rest of the packet and call each sort of packet routine
2922 */
2923 switch (type) {
2924 case BGP_MSG_OPEN:
2925 frrtrace(2, frr_bgp, open_process, peer, size);
2926 atomic_fetch_add_explicit(&peer->open_in, 1,
2927 memory_order_relaxed);
2928 mprc = bgp_open_receive(peer, size);
2929 if (mprc == BGP_Stop)
2930 flog_err(
2931 EC_BGP_PKT_OPEN,
2932 "%s: BGP OPEN receipt failed for peer: %s",
2933 __func__, peer->host);
2934 break;
2935 case BGP_MSG_UPDATE:
2936 frrtrace(2, frr_bgp, update_process, peer, size);
2937 atomic_fetch_add_explicit(&peer->update_in, 1,
2938 memory_order_relaxed);
2939 peer->readtime = monotime(NULL);
2940 mprc = bgp_update_receive(peer, size);
2941 if (mprc == BGP_Stop)
2942 flog_err(
2943 EC_BGP_UPDATE_RCV,
2944 "%s: BGP UPDATE receipt failed for peer: %s",
2945 __func__, peer->host);
2946 break;
2947 case BGP_MSG_NOTIFY:
2948 frrtrace(2, frr_bgp, notification_process, peer, size);
2949 atomic_fetch_add_explicit(&peer->notify_in, 1,
2950 memory_order_relaxed);
2951 mprc = bgp_notify_receive(peer, size);
2952 if (mprc == BGP_Stop)
2953 flog_err(
2954 EC_BGP_NOTIFY_RCV,
2955 "%s: BGP NOTIFY receipt failed for peer: %s",
2956 __func__, peer->host);
2957 break;
2958 case BGP_MSG_KEEPALIVE:
2959 frrtrace(2, frr_bgp, keepalive_process, peer, size);
2960 peer->readtime = monotime(NULL);
2961 atomic_fetch_add_explicit(&peer->keepalive_in, 1,
2962 memory_order_relaxed);
2963 mprc = bgp_keepalive_receive(peer, size);
2964 if (mprc == BGP_Stop)
2965 flog_err(
2966 EC_BGP_KEEP_RCV,
2967 "%s: BGP KEEPALIVE receipt failed for peer: %s",
2968 __func__, peer->host);
2969 break;
2970 case BGP_MSG_ROUTE_REFRESH_NEW:
2971 case BGP_MSG_ROUTE_REFRESH_OLD:
2972 frrtrace(2, frr_bgp, refresh_process, peer, size);
2973 atomic_fetch_add_explicit(&peer->refresh_in, 1,
2974 memory_order_relaxed);
2975 mprc = bgp_route_refresh_receive(peer, size);
2976 if (mprc == BGP_Stop)
2977 flog_err(
2978 EC_BGP_RFSH_RCV,
2979 "%s: BGP ROUTEREFRESH receipt failed for peer: %s",
2980 __func__, peer->host);
2981 break;
2982 case BGP_MSG_CAPABILITY:
2983 frrtrace(2, frr_bgp, capability_process, peer, size);
2984 atomic_fetch_add_explicit(&peer->dynamic_cap_in, 1,
2985 memory_order_relaxed);
2986 mprc = bgp_capability_receive(peer, size);
2987 if (mprc == BGP_Stop)
2988 flog_err(
2989 EC_BGP_CAP_RCV,
2990 "%s: BGP CAPABILITY receipt failed for peer: %s",
2991 __func__, peer->host);
2992 break;
2993 default:
2994 /* Suppress uninitialized variable warning */
2995 mprc = 0;
2996 (void)mprc;
2997 /*
2998 * The message type should have been sanitized before
2999 * we ever got here. Receipt of a message with an
3000 * invalid header at this point is indicative of a
3001 * security issue.
3002 */
3003 assert (!"Message of invalid type received during input processing");
3004 }
3005
3006 /* delete processed packet */
3007 stream_free(peer->curr);
3008 peer->curr = NULL;
3009 processed++;
3010
3011 /* Update FSM */
3012 if (mprc != BGP_PACKET_NOOP)
3013 fsm_update_result = bgp_event_update(peer, mprc);
3014 else
3015 continue;
3016
3017 /*
3018 * If peer was deleted, do not process any more packets. This
3019 * is usually due to executing BGP_Stop or a stub deletion.
3020 */
3021 if (fsm_update_result == FSM_PEER_TRANSFERRED
3022 || fsm_update_result == FSM_PEER_STOPPED)
3023 break;
3024 }
3025
3026 if (fsm_update_result != FSM_PEER_TRANSFERRED
3027 && fsm_update_result != FSM_PEER_STOPPED) {
3028 frr_with_mutex (&peer->io_mtx) {
3029 // more work to do, come back later
3030 if (peer->ibuf->count > 0)
3031 thread_add_event(
3032 bm->master, bgp_process_packet, peer, 0,
3033 &peer->t_process_packet);
3034 }
3035 }
3036 }
3037
3038 /* Send EOR when routes are processed by selection deferral timer */
3039 void bgp_send_delayed_eor(struct bgp *bgp)
3040 {
3041 struct peer *peer;
3042 struct listnode *node, *nnode;
3043
3044 /* EOR message sent in bgp_write_proceed_actions */
3045 for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer))
3046 bgp_write_proceed_actions(peer);
3047 }
3048
3049 /*
3050 * Task callback to handle socket error encountered in the io pthread. We avoid
3051 * having the io pthread try to enqueue fsm events or mess with the peer
3052 * struct.
3053 */
3054 void bgp_packet_process_error(struct thread *thread)
3055 {
3056 struct peer *peer;
3057 int code;
3058
3059 peer = THREAD_ARG(thread);
3060 code = THREAD_VAL(thread);
3061
3062 if (bgp_debug_neighbor_events(peer))
3063 zlog_debug("%s [Event] BGP error %d on fd %d",
3064 peer->host, code, peer->fd);
3065
3066 /* Closed connection or error on the socket */
3067 if (peer_established(peer)) {
3068 if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART)
3069 || CHECK_FLAG(peer->flags,
3070 PEER_FLAG_GRACEFUL_RESTART_HELPER))
3071 && CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
3072 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
3073 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
3074 } else
3075 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
3076 }
3077
3078 bgp_event_update(peer, code);
3079 }