]> git.proxmox.com Git - mirror_frr.git/blame - bgpd/bgp_packet.c
*: Rename thread.[ch] to event.[ch]
[mirror_frr.git] / bgpd / bgp_packet.c
CommitLineData
acddc0ed 1// SPDX-License-Identifier: GPL-2.0-or-later
718e3744 2/* BGP packet management routine.
56257a44
QY
3 * Contains utility functions for constructing and consuming BGP messages.
4 * Copyright (C) 2017 Cumulus Networks
896014f4 5 * Copyright (C) 1999 Kunihiro Ishiguro
896014f4 6 */
718e3744 7
8#include <zebra.h>
d3ecc69e 9#include <sys/time.h>
718e3744 10
cb37cb33 11#include "event.h"
718e3744 12#include "stream.h"
13#include "network.h"
14#include "prefix.h"
15#include "command.h"
16#include "log.h"
17#include "memory.h"
d62a17ae 18#include "sockunion.h" /* for inet_ntop () */
baa376fc 19#include "sockopt.h"
718e3744 20#include "linklist.h"
21#include "plist.h"
3f9c7369 22#include "queue.h"
039f3a34 23#include "filter.h"
02705213 24#include "lib_errors.h"
718e3744 25
26#include "bgpd/bgpd.h"
27#include "bgpd/bgp_table.h"
28#include "bgpd/bgp_dump.h"
6c29258c 29#include "bgpd/bgp_bmp.h"
718e3744 30#include "bgpd/bgp_attr.h"
31#include "bgpd/bgp_debug.h"
14454c9f 32#include "bgpd/bgp_errors.h"
718e3744 33#include "bgpd/bgp_fsm.h"
34#include "bgpd/bgp_route.h"
35#include "bgpd/bgp_packet.h"
36#include "bgpd/bgp_open.h"
37#include "bgpd/bgp_aspath.h"
38#include "bgpd/bgp_community.h"
39#include "bgpd/bgp_ecommunity.h"
57d187bc 40#include "bgpd/bgp_lcommunity.h"
718e3744 41#include "bgpd/bgp_network.h"
42#include "bgpd/bgp_mplsvpn.h"
7ef5a232 43#include "bgpd/bgp_evpn.h"
718e3744 44#include "bgpd/bgp_advertise.h"
93406d87 45#include "bgpd/bgp_vty.h"
3f9c7369 46#include "bgpd/bgp_updgrp.h"
cd1964ff 47#include "bgpd/bgp_label.h"
56257a44 48#include "bgpd/bgp_io.h"
934af458 49#include "bgpd/bgp_keepalives.h"
7c40bf39 50#include "bgpd/bgp_flowspec.h"
d9a03c57 51#include "bgpd/bgp_trace.h"
718e3744 52
584470fb
DL
53DEFINE_HOOK(bgp_packet_dump,
54 (struct peer *peer, uint8_t type, bgp_size_t size,
55 struct stream *s),
8451921b 56 (peer, type, size, s));
584470fb 57
6fd04594
DL
58DEFINE_HOOK(bgp_packet_send,
59 (struct peer *peer, uint8_t type, bgp_size_t size,
60 struct stream *s),
8451921b 61 (peer, type, size, s));
6fd04594 62
d8151687
QY
63/**
64 * Sets marker and type fields for a BGP message.
65 *
66 * @param s the stream containing the packet
67 * @param type the packet type
68 * @return the size of the stream
69 */
d7c0a89a 70int bgp_packet_set_marker(struct stream *s, uint8_t type)
718e3744 71{
d62a17ae 72 int i;
718e3744 73
d62a17ae 74 /* Fill in marker. */
75 for (i = 0; i < BGP_MARKER_SIZE; i++)
76 stream_putc(s, 0xff);
718e3744 77
d62a17ae 78 /* Dummy total length. This field is should be filled in later on. */
79 stream_putw(s, 0);
718e3744 80
d62a17ae 81 /* BGP packet type. */
82 stream_putc(s, type);
718e3744 83
d62a17ae 84 /* Return current stream size. */
85 return stream_get_endp(s);
718e3744 86}
87
d8151687
QY
88/**
89 * Sets size field for a BGP message.
90 *
91 * Size field is set to the size of the stream passed.
92 *
93 * @param s the stream containing the packet
d8151687 94 */
65baedca 95void bgp_packet_set_size(struct stream *s)
718e3744 96{
d62a17ae 97 int cp;
718e3744 98
d62a17ae 99 /* Preserve current pointer. */
100 cp = stream_get_endp(s);
101 stream_putw_at(s, BGP_MARKER_SIZE, cp);
718e3744 102}
103
d3ecc69e
QY
104/*
105 * Push a packet onto the beginning of the peer's output queue.
106 * This function acquires the peer's write mutex before proceeding.
107 */
108static void bgp_packet_add(struct peer *peer, struct stream *s)
109{
bd9fb6f3
DL
110 intmax_t delta;
111 uint32_t holdtime;
460ed839 112 intmax_t sendholdtime;
bd9fb6f3 113
cb1991af 114 frr_with_mutex (&peer->io_mtx) {
bd9fb6f3
DL
115 /* if the queue is empty, reset the "last OK" timestamp to
116 * now, otherwise if we write another packet immediately
117 * after it'll get confused
118 */
119 if (!stream_fifo_count_safe(peer->obuf))
083ec940 120 peer->last_sendq_ok = monotime(NULL);
bd9fb6f3 121
00dffa8c 122 stream_fifo_push(peer->obuf, s);
bd9fb6f3 123
083ec940 124 delta = monotime(NULL) - peer->last_sendq_ok;
460ed839
DA
125
126 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
127 holdtime = atomic_load_explicit(&peer->holdtime,
128 memory_order_relaxed);
129 else
130 holdtime = peer->bgp->default_holdtime;
131
132 sendholdtime = holdtime * 2;
bd9fb6f3
DL
133
134 /* Note that when we're here, we're adding some packet to the
135 * OutQ. That includes keepalives when there is nothing to
136 * do, so there's a guarantee we pass by here once in a while.
137 *
138 * That implies there is no need to go set up another separate
139 * timer that ticks down SendHoldTime, as we'll be here sooner
140 * or later anyway and will see the checks below failing.
141 */
382268f4
DL
142 if (!holdtime) {
143 /* no holdtime, do nothing. */
460ed839 144 } else if (delta > sendholdtime) {
bd9fb6f3
DL
145 flog_err(
146 EC_BGP_SENDQ_STUCK_PROPER,
460ed839
DA
147 "%pBP has not made any SendQ progress for 2 holdtimes (%jds), terminating session",
148 peer, sendholdtime);
bd9fb6f3
DL
149 BGP_EVENT_ADD(peer, TCP_fatal_error);
150 } else if (delta > (intmax_t)holdtime &&
083ec940 151 monotime(NULL) - peer->last_sendq_warn > 5) {
bd9fb6f3
DL
152 flog_warn(
153 EC_BGP_SENDQ_STUCK_WARN,
460ed839
DA
154 "%pBP has not made any SendQ progress for 1 holdtime (%us), peer overloaded?",
155 peer, holdtime);
083ec940 156 peer->last_sendq_warn = monotime(NULL);
bd9fb6f3 157 }
00dffa8c 158 }
718e3744 159}
160
d62a17ae 161static struct stream *bgp_update_packet_eor(struct peer *peer, afi_t afi,
162 safi_t safi)
93406d87 163{
d62a17ae 164 struct stream *s;
617975d1
DS
165 iana_afi_t pkt_afi = IANA_AFI_IPV4;
166 iana_safi_t pkt_safi = IANA_SAFI_UNICAST;
d62a17ae 167
168 if (DISABLE_BGP_ANNOUNCE)
169 return NULL;
170
171 if (bgp_debug_neighbor_events(peer))
172 zlog_debug("send End-of-RIB for %s to %s",
5cb5f4d0 173 get_afi_safi_str(afi, safi, false), peer->host);
d62a17ae 174
ef56aee4 175 s = stream_new(peer->max_packet_size);
d62a17ae 176
177 /* Make BGP update packet. */
178 bgp_packet_set_marker(s, BGP_MSG_UPDATE);
179
180 /* Unfeasible Routes Length */
181 stream_putw(s, 0);
182
183 if (afi == AFI_IP && safi == SAFI_UNICAST) {
184 /* Total Path Attribute Length */
185 stream_putw(s, 0);
186 } else {
187 /* Convert AFI, SAFI to values for packet. */
188 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
189
190 /* Total Path Attribute Length */
191 stream_putw(s, 6);
192 stream_putc(s, BGP_ATTR_FLAG_OPTIONAL);
193 stream_putc(s, BGP_ATTR_MP_UNREACH_NLRI);
194 stream_putc(s, 3);
195 stream_putw(s, pkt_afi);
196 stream_putc(s, pkt_safi);
197 }
198
199 bgp_packet_set_size(s);
d62a17ae 200 return s;
718e3744 201}
202
d8151687
QY
203/* Called when there is a change in the EOR(implicit or explicit) status of a
204 * peer. Ends the update-delay if all expected peers are done with EORs. */
205void bgp_check_update_delay(struct bgp *bgp)
206{
207 struct listnode *node, *nnode;
208 struct peer *peer = NULL;
209
210 if (bgp_debug_neighbor_events(peer))
211 zlog_debug("Checking update delay, T: %d R: %d I:%d E: %d",
212 bgp->established, bgp->restarted_peers,
213 bgp->implicit_eors, bgp->explicit_eors);
214
215 if (bgp->established
216 <= bgp->restarted_peers + bgp->implicit_eors + bgp->explicit_eors) {
becedef6
QY
217 /*
218 * This is an extra sanity check to make sure we wait for all
219 * the eligible configured peers. This check is performed if
220 * establish wait timer is on, or establish wait option is not
221 * given with the update-delay command
222 */
d8151687
QY
223 if (bgp->t_establish_wait
224 || (bgp->v_establish_wait == bgp->v_update_delay))
225 for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer)) {
226 if (CHECK_FLAG(peer->flags,
227 PEER_FLAG_CONFIG_NODE)
228 && !CHECK_FLAG(peer->flags,
229 PEER_FLAG_SHUTDOWN)
cb9196e7
DS
230 && !CHECK_FLAG(peer->bgp->flags,
231 BGP_FLAG_SHUTDOWN)
d8151687
QY
232 && !peer->update_delay_over) {
233 if (bgp_debug_neighbor_events(peer))
234 zlog_debug(
235 " Peer %s pending, continuing read-only mode",
236 peer->host);
237 return;
238 }
239 }
240
241 zlog_info(
242 "Update delay ended, restarted: %d, EORs implicit: %d, explicit: %d",
243 bgp->restarted_peers, bgp->implicit_eors,
244 bgp->explicit_eors);
245 bgp_update_delay_end(bgp);
246 }
247}
248
becedef6
QY
249/*
250 * Called if peer is known to have restarted. The restart-state bit in
251 * Graceful-Restart capability is used for that
252 */
d8151687
QY
253void bgp_update_restarted_peers(struct peer *peer)
254{
255 if (!bgp_update_delay_active(peer->bgp))
256 return; /* BGP update delay has ended */
257 if (peer->update_delay_over)
258 return; /* This peer has already been considered */
259
260 if (bgp_debug_neighbor_events(peer))
261 zlog_debug("Peer %s: Checking restarted", peer->host);
262
feb17238 263 if (peer_established(peer)) {
d8151687
QY
264 peer->update_delay_over = 1;
265 peer->bgp->restarted_peers++;
266 bgp_check_update_delay(peer->bgp);
267 }
268}
269
becedef6
QY
270/*
271 * Called as peer receives a keep-alive. Determines if this occurence can be
272 * taken as an implicit EOR for this peer.
273 * NOTE: The very first keep-alive after the Established state of a peer is
274 * considered implicit EOR for the update-delay purposes
275 */
d8151687
QY
276void bgp_update_implicit_eors(struct peer *peer)
277{
278 if (!bgp_update_delay_active(peer->bgp))
279 return; /* BGP update delay has ended */
280 if (peer->update_delay_over)
281 return; /* This peer has already been considered */
282
283 if (bgp_debug_neighbor_events(peer))
284 zlog_debug("Peer %s: Checking implicit EORs", peer->host);
285
feb17238 286 if (peer_established(peer)) {
d8151687
QY
287 peer->update_delay_over = 1;
288 peer->bgp->implicit_eors++;
289 bgp_check_update_delay(peer->bgp);
290 }
291}
292
becedef6
QY
293/*
294 * Should be called only when there is a change in the EOR_RECEIVED status
295 * for any afi/safi on a peer.
296 */
d8151687
QY
297static void bgp_update_explicit_eors(struct peer *peer)
298{
299 afi_t afi;
300 safi_t safi;
301
302 if (!bgp_update_delay_active(peer->bgp))
303 return; /* BGP update delay has ended */
304 if (peer->update_delay_over)
305 return; /* This peer has already been considered */
306
307 if (bgp_debug_neighbor_events(peer))
308 zlog_debug("Peer %s: Checking explicit EORs", peer->host);
309
f18ba3cd
DS
310 FOREACH_AFI_SAFI (afi, safi) {
311 if (peer->afc_nego[afi][safi]
312 && !CHECK_FLAG(peer->af_sflags[afi][safi],
313 PEER_STATUS_EOR_RECEIVED)) {
314 if (bgp_debug_neighbor_events(peer))
315 zlog_debug(
316 " afi %d safi %d didn't receive EOR",
317 afi, safi);
318 return;
d8151687 319 }
f18ba3cd 320 }
d8151687
QY
321
322 peer->update_delay_over = 1;
323 peer->bgp->explicit_eors++;
324 bgp_check_update_delay(peer->bgp);
325}
326
327/**
328 * Frontend for NLRI parsing, to fan-out to AFI/SAFI specific parsers.
329 *
330 * mp_withdraw, if set, is used to nullify attr structure on most of the
331 * calling safi function and for evpn, passed as parameter
332 */
333int bgp_nlri_parse(struct peer *peer, struct attr *attr,
334 struct bgp_nlri *packet, int mp_withdraw)
335{
336 switch (packet->safi) {
337 case SAFI_UNICAST:
338 case SAFI_MULTICAST:
339 return bgp_nlri_parse_ip(peer, mp_withdraw ? NULL : attr,
340 packet);
341 case SAFI_LABELED_UNICAST:
342 return bgp_nlri_parse_label(peer, mp_withdraw ? NULL : attr,
343 packet);
344 case SAFI_MPLS_VPN:
345 return bgp_nlri_parse_vpn(peer, mp_withdraw ? NULL : attr,
346 packet);
347 case SAFI_EVPN:
348 return bgp_nlri_parse_evpn(peer, attr, packet, mp_withdraw);
7c40bf39 349 case SAFI_FLOWSPEC:
350 return bgp_nlri_parse_flowspec(peer, attr, packet, mp_withdraw);
d8151687 351 }
513386b5 352 return BGP_NLRI_PARSE_ERROR;
d8151687
QY
353}
354
a783cc05
XL
355
356/*
357 * Check if route-refresh request from peer is pending (received before EoR),
358 * and process it now.
359 */
360static void bgp_process_pending_refresh(struct peer *peer, afi_t afi,
361 safi_t safi)
362{
363 if (CHECK_FLAG(peer->af_sflags[afi][safi],
364 PEER_STATUS_REFRESH_PENDING)) {
365 UNSET_FLAG(peer->af_sflags[afi][safi],
366 PEER_STATUS_REFRESH_PENDING);
367 bgp_route_refresh_send(peer, afi, safi, 0, 0, 0,
368 BGP_ROUTE_REFRESH_BORR);
369 if (bgp_debug_neighbor_events(peer))
370 zlog_debug(
371 "%pBP sending route-refresh (BoRR) for %s/%s (for pending REQUEST)",
372 peer, afi2str(afi), safi2str(safi));
373
374 SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_BORR_SEND);
375 UNSET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_EORR_SEND);
376 bgp_announce_route(peer, afi, safi, true);
377 }
378}
379
6ec98a2f
QY
380/*
381 * Checks a variety of conditions to determine whether the peer needs to be
382 * rescheduled for packet generation again, and does so if necessary.
383 *
384 * @param peer to check for rescheduling
385 */
af1e1dc6
QY
386static void bgp_write_proceed_actions(struct peer *peer)
387{
388 afi_t afi;
389 safi_t safi;
390 struct peer_af *paf;
391 struct bpacket *next_pkt;
392 struct update_subgroup *subgrp;
0e5cdd59 393 enum bgp_af_index index;
af1e1dc6 394
0e5cdd59
DS
395 for (index = BGP_AF_START; index < BGP_AF_MAX; index++) {
396 paf = peer->peer_af_array[index];
c58b0f46
QY
397 if (!paf)
398 continue;
0e5cdd59 399
c58b0f46
QY
400 subgrp = paf->subgroup;
401 if (!subgrp)
402 continue;
403
404 next_pkt = paf->next_pkt_to_send;
405 if (next_pkt && next_pkt->buffer) {
406 BGP_TIMER_ON(peer->t_generate_updgrp_packets,
407 bgp_generate_updgrp_packets, 0);
408 return;
409 }
af1e1dc6 410
c58b0f46
QY
411 /* No packets readily available for AFI/SAFI, are there
412 * subgroup packets
413 * that need to be generated? */
414 if (bpacket_queue_is_full(SUBGRP_INST(subgrp),
415 SUBGRP_PKTQ(subgrp))
416 || subgroup_packets_to_build(subgrp)) {
417 BGP_TIMER_ON(peer->t_generate_updgrp_packets,
418 bgp_generate_updgrp_packets, 0);
419 return;
420 }
af1e1dc6 421
0e5cdd59
DS
422 afi = paf->afi;
423 safi = paf->safi;
424
c58b0f46
QY
425 /* No packets to send, see if EOR is pending */
426 if (CHECK_FLAG(peer->cap, PEER_CAP_RESTART_RCV)) {
427 if (!subgrp->t_coalesce && peer->afc_nego[afi][safi]
428 && peer->synctime
429 && !CHECK_FLAG(peer->af_sflags[afi][safi],
430 PEER_STATUS_EOR_SEND)
431 && safi != SAFI_MPLS_VPN) {
af1e1dc6
QY
432 BGP_TIMER_ON(peer->t_generate_updgrp_packets,
433 bgp_generate_updgrp_packets, 0);
434 return;
435 }
af1e1dc6 436 }
c58b0f46 437 }
af1e1dc6
QY
438}
439
6ec98a2f
QY
440/*
441 * Generate advertisement information (withdraws, updates, EOR) from each
442 * update group a peer belongs to, encode this information into packets, and
443 * enqueue the packets onto the peer's output buffer.
56257a44 444 */
cc9f21da 445void bgp_generate_updgrp_packets(struct thread *thread)
718e3744 446{
56257a44 447 struct peer *peer = THREAD_ARG(thread);
56257a44
QY
448
449 struct stream *s;
d62a17ae 450 struct peer_af *paf;
451 struct bpacket *next_pkt;
d0ad6d8e
QY
452 uint32_t wpq;
453 uint32_t generated = 0;
d62a17ae 454 afi_t afi;
455 safi_t safi;
456
d0ad6d8e
QY
457 wpq = atomic_load_explicit(&peer->bgp->wpkt_quanta,
458 memory_order_relaxed);
459
d62a17ae 460 /*
461 * The code beyond this part deals with update packets, proceed only
462 * if peer is Established and updates are not on hold (as part of
f4d2dd84 463 * update-delay processing).
3f9c7369 464 */
feb17238 465 if (!peer_established(peer))
cc9f21da 466 return;
d62a17ae 467
f4d2dd84
DS
468 if ((peer->bgp->main_peers_update_hold)
469 || bgp_update_delay_active(peer->bgp))
cc9f21da 470 return;
d62a17ae 471
b10b6d52 472 if (peer->t_routeadv)
cc9f21da 473 return;
b10b6d52 474
963b7ee4
DS
475 /*
476 * Since the following is a do while loop
477 * let's stop adding to the outq if we are
478 * already at the limit.
479 */
480 if (peer->obuf->count >= bm->outq_limit) {
481 bgp_write_proceed_actions(peer);
482 return;
483 }
484
56257a44 485 do {
0e5cdd59
DS
486 enum bgp_af_index index;
487
56257a44 488 s = NULL;
0e5cdd59
DS
489 for (index = BGP_AF_START; index < BGP_AF_MAX; index++) {
490 paf = peer->peer_af_array[index];
c58b0f46
QY
491 if (!paf || !PAF_SUBGRP(paf))
492 continue;
0e5cdd59
DS
493
494 afi = paf->afi;
495 safi = paf->safi;
c58b0f46 496 next_pkt = paf->next_pkt_to_send;
80bd61c4 497
c58b0f46
QY
498 /*
499 * Try to generate a packet for the peer if we are at
500 * the end of the list. Always try to push out
501 * WITHDRAWs first.
502 */
503 if (!next_pkt || !next_pkt->buffer) {
504 next_pkt = subgroup_withdraw_packet(
505 PAF_SUBGRP(paf));
506 if (!next_pkt || !next_pkt->buffer)
507 subgroup_update_packet(PAF_SUBGRP(paf));
508 next_pkt = paf->next_pkt_to_send;
509 }
80bd61c4 510
c58b0f46
QY
511 /*
512 * If we still don't have a packet to send to the peer,
513 * then try to find out out if we have to send eor or
514 * if not, skip to the next AFI, SAFI. Don't send the
515 * EOR prematurely; if the subgroup's coalesce timer is
516 * running, the adjacency-out structure is not created
517 * yet.
518 */
519 if (!next_pkt || !next_pkt->buffer) {
9af52ccf
DA
520 if (!paf->t_announce_route) {
521 /* Make sure we supress BGP UPDATES
522 * for normal processing later again.
523 */
2adac256
DA
524 UNSET_FLAG(paf->subgroup->sflags,
525 SUBGRP_STATUS_FORCE_UPDATES);
526
9af52ccf
DA
527 /* If route-refresh BoRR message was
528 * already sent and we are done with
529 * re-announcing tables for a decent
530 * afi/safi, we ready to send
531 * EoRR request.
532 */
533 if (CHECK_FLAG(
534 peer->af_sflags[afi][safi],
535 PEER_STATUS_BORR_SEND)) {
536 bgp_route_refresh_send(
537 peer, afi, safi, 0, 0,
538 0,
539 BGP_ROUTE_REFRESH_EORR);
540
541 SET_FLAG(peer->af_sflags[afi]
542 [safi],
543 PEER_STATUS_EORR_SEND);
544 UNSET_FLAG(
545 peer->af_sflags[afi]
546 [safi],
547 PEER_STATUS_BORR_SEND);
548
549 if (bgp_debug_neighbor_events(
550 peer))
551 zlog_debug(
f70c91dc
DA
552 "%pBP sending route-refresh (EoRR) for %s/%s",
553 peer,
9af52ccf
DA
554 afi2str(afi),
555 safi2str(safi));
556 }
557 }
558
c58b0f46 559 if (CHECK_FLAG(peer->cap,
36235319 560 PEER_CAP_RESTART_RCV)) {
c58b0f46 561 if (!(PAF_SUBGRP(paf))->t_coalesce
36235319
QY
562 && peer->afc_nego[afi][safi]
563 && peer->synctime
564 && !CHECK_FLAG(
565 peer->af_sflags[afi][safi],
566 PEER_STATUS_EOR_SEND)) {
d6e3c15b 567 /* If EOR is disabled,
568 * the message is not sent
569 */
36235319
QY
570 if (BGP_SEND_EOR(peer->bgp, afi,
571 safi)) {
d6e3c15b 572 SET_FLAG(
36235319
QY
573 peer->af_sflags
574 [afi]
575 [safi],
576 PEER_STATUS_EOR_SEND);
d6e3c15b 577
9e3b51a7 578 /* Update EOR
579 * send time
580 */
36235319
QY
581 peer->eor_stime[afi]
582 [safi] =
583 monotime(NULL);
9e3b51a7 584
d6e3c15b 585 BGP_UPDATE_EOR_PKT(
36235319
QY
586 peer, afi, safi,
587 s);
a783cc05
XL
588 bgp_process_pending_refresh(
589 peer, afi,
590 safi);
56257a44 591 }
80bd61c4 592 }
d62a17ae 593 }
c58b0f46
QY
594 continue;
595 }
9e3b51a7 596
597 /* Update packet send time */
598 peer->pkt_stime[afi][safi] = monotime(NULL);
599
c58b0f46
QY
600 /* Found a packet template to send, overwrite
601 * packet with appropriate attributes from peer
602 * and advance peer */
603 s = bpacket_reformat_for_peer(next_pkt, paf);
604 bgp_packet_add(peer, s);
c58b0f46
QY
605 bpacket_queue_advance_peer(paf);
606 }
963b7ee4
DS
607 } while (s && (++generated < wpq) &&
608 (peer->obuf->count <= bm->outq_limit));
80bd61c4 609
6ec98a2f
QY
610 if (generated)
611 bgp_writes_on(peer);
612
af1e1dc6 613 bgp_write_proceed_actions(peer);
80bd61c4
QY
614}
615
d3ecc69e
QY
616/*
617 * Creates a BGP Keepalive packet and appends it to the peer's output queue.
618 */
d62a17ae 619void bgp_keepalive_send(struct peer *peer)
718e3744 620{
d62a17ae 621 struct stream *s;
622
556beacf 623 s = stream_new(BGP_STANDARD_MESSAGE_MAX_PACKET_SIZE);
718e3744 624
d62a17ae 625 /* Make keepalive packet. */
626 bgp_packet_set_marker(s, BGP_MSG_KEEPALIVE);
718e3744 627
d62a17ae 628 /* Set packet size. */
65baedca 629 bgp_packet_set_size(s);
718e3744 630
d62a17ae 631 /* Dump packet if debug option is set. */
632 /* bgp_packet_dump (s); */
718e3744 633
d62a17ae 634 if (bgp_debug_keepalive(peer))
635 zlog_debug("%s sending KEEPALIVE", peer->host);
718e3744 636
d62a17ae 637 /* Add packet to the peer. */
638 bgp_packet_add(peer, s);
424ab01d
QY
639
640 bgp_writes_on(peer);
718e3744 641}
642
d3ecc69e
QY
643/*
644 * Creates a BGP Open packet and appends it to the peer's output queue.
645 * Sets capabilities as necessary.
646 */
d62a17ae 647void bgp_open_send(struct peer *peer)
718e3744 648{
d62a17ae 649 struct stream *s;
d7c0a89a 650 uint16_t send_holdtime;
d62a17ae 651 as_t local_as;
718e3744 652
b90a8e13 653 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
d62a17ae 654 send_holdtime = peer->holdtime;
655 else
656 send_holdtime = peer->bgp->default_holdtime;
718e3744 657
d62a17ae 658 /* local-as Change */
659 if (peer->change_local_as)
660 local_as = peer->change_local_as;
661 else
662 local_as = peer->local_as;
718e3744 663
556beacf 664 s = stream_new(BGP_STANDARD_MESSAGE_MAX_PACKET_SIZE);
718e3744 665
d62a17ae 666 /* Make open packet. */
667 bgp_packet_set_marker(s, BGP_MSG_OPEN);
718e3744 668
d62a17ae 669 /* Set open packet values. */
670 stream_putc(s, BGP_VERSION_4); /* BGP version */
d7c0a89a
QY
671 stream_putw(s, (local_as <= BGP_AS_MAX) ? (uint16_t)local_as
672 : BGP_AS_TRANS);
d62a17ae 673 stream_putw(s, send_holdtime); /* Hold Time */
674 stream_put_in_addr(s, &peer->local_id); /* BGP Identifier */
718e3744 675
d08c0c80
DA
676 /* Set capabilities */
677 if (CHECK_FLAG(peer->flags, PEER_FLAG_EXTENDED_OPT_PARAMS)) {
678 (void)bgp_open_capability(s, peer, true);
679 } else {
680 struct stream *tmp = stream_new(STREAM_SIZE(s));
681
682 stream_copy(tmp, s);
683 if (bgp_open_capability(tmp, peer, false)
684 > BGP_OPEN_NON_EXT_OPT_LEN) {
685 stream_free(tmp);
686 (void)bgp_open_capability(s, peer, true);
687 } else {
688 stream_copy(s, tmp);
689 stream_free(tmp);
690 }
691 }
718e3744 692
d62a17ae 693 /* Set BGP packet length. */
65baedca 694 bgp_packet_set_size(s);
718e3744 695
d62a17ae 696 if (bgp_debug_neighbor_events(peer))
697 zlog_debug(
23d0a753 698 "%s sending OPEN, version %d, my as %u, holdtime %d, id %pI4",
d62a17ae 699 peer->host, BGP_VERSION_4, local_as, send_holdtime,
23d0a753 700 &peer->local_id);
718e3744 701
d62a17ae 702 /* Dump packet if debug option is set. */
703 /* bgp_packet_dump (s); */
6fd04594 704 hook_call(bgp_packet_send, peer, BGP_MSG_OPEN, stream_get_endp(s), s);
718e3744 705
d62a17ae 706 /* Add packet to the peer. */
707 bgp_packet_add(peer, s);
424ab01d
QY
708
709 bgp_writes_on(peer);
710}
711
a127f33b
QY
712/*
713 * Writes NOTIFICATION message directly to a peer socket without waiting for
714 * the I/O thread.
715 *
716 * There must be exactly one stream on the peer->obuf FIFO, and the data within
717 * this stream must match the format of a BGP NOTIFICATION message.
718 * Transmission is best-effort.
719 *
720 * @requires peer->io_mtx
721 * @param peer
722 * @return 0
723 */
3dc339cd 724static void bgp_write_notify(struct peer *peer)
424ab01d
QY
725{
726 int ret, val;
d7c0a89a 727 uint8_t type;
424ab01d
QY
728 struct stream *s;
729
a127f33b
QY
730 /* There should be at least one packet. */
731 s = stream_fifo_pop(peer->obuf);
424ab01d 732
8ec586b0 733 if (!s)
3dc339cd 734 return;
8ec586b0
QY
735
736 assert(stream_get_endp(s) >= BGP_HEADER_SIZE);
737
becedef6
QY
738 /*
739 * socket is in nonblocking mode, if we can't deliver the NOTIFY, well,
740 * we only care about getting a clean shutdown at this point.
741 */
424ab01d
QY
742 ret = write(peer->fd, STREAM_DATA(s), stream_get_endp(s));
743
becedef6
QY
744 /*
745 * only connection reset/close gets counted as TCP_fatal_error, failure
746 * to write the entire NOTIFY doesn't get different FSM treatment
747 */
424ab01d 748 if (ret <= 0) {
3735936b 749 stream_free(s);
424ab01d 750 BGP_EVENT_ADD(peer, TCP_fatal_error);
3dc339cd 751 return;
424ab01d
QY
752 }
753
754 /* Disable Nagle, make NOTIFY packet go out right away */
755 val = 1;
756 (void)setsockopt(peer->fd, IPPROTO_TCP, TCP_NODELAY, (char *)&val,
757 sizeof(val));
758
759 /* Retrieve BGP packet type. */
760 stream_set_getp(s, BGP_MARKER_SIZE + 2);
761 type = stream_getc(s);
762
763 assert(type == BGP_MSG_NOTIFY);
764
765 /* Type should be notify. */
0112e9e0 766 atomic_fetch_add_explicit(&peer->notify_out, 1, memory_order_relaxed);
424ab01d
QY
767
768 /* Double start timer. */
769 peer->v_start *= 2;
770
771 /* Overflow check. */
772 if (peer->v_start >= (60 * 2))
773 peer->v_start = (60 * 2);
774
becedef6
QY
775 /*
776 * Handle Graceful Restart case where the state changes to
777 * Connect instead of Idle
778 */
424ab01d
QY
779 BGP_EVENT_ADD(peer, BGP_Stop);
780
3735936b 781 stream_free(s);
718e3744 782}
783
eea685b6
DA
784/*
785 * Encapsulate an original BGP CEASE Notification into Hard Reset
786 */
787static uint8_t *bgp_notify_encapsulate_hard_reset(uint8_t code, uint8_t subcode,
788 uint8_t *data, size_t datalen)
789{
790 uint8_t *message = XCALLOC(MTYPE_BGP_NOTIFICATION, datalen + 2);
791
792 /* ErrCode */
793 message[0] = code;
794 /* Subcode */
795 message[1] = subcode;
796 /* Data */
797 if (datalen)
798 memcpy(message + 2, data, datalen);
799
800 return message;
801}
802
803/*
804 * Decapsulate an original BGP CEASE Notification from Hard Reset
805 */
806struct bgp_notify bgp_notify_decapsulate_hard_reset(struct bgp_notify *notify)
807{
808 struct bgp_notify bn = {};
809
810 bn.code = notify->raw_data[0];
811 bn.subcode = notify->raw_data[1];
812 bn.length = notify->length - 2;
813
c73d2363 814 bn.raw_data = XMALLOC(MTYPE_BGP_NOTIFICATION, bn.length);
eea685b6
DA
815 memcpy(bn.raw_data, notify->raw_data + 2, bn.length);
816
817 return bn;
818}
819
20170775
DA
820/* Check if Graceful-Restart N-bit is exchanged */
821bool bgp_has_graceful_restart_notification(struct peer *peer)
822{
823 return CHECK_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_N_BIT_RCV) &&
824 CHECK_FLAG(peer->cap, PEER_CAP_GRACEFUL_RESTART_N_BIT_ADV);
825}
826
eea685b6
DA
827/*
828 * Check if to send BGP CEASE Notification/Hard Reset?
829 */
1ae314be
DA
830bool bgp_notify_send_hard_reset(struct peer *peer, uint8_t code,
831 uint8_t subcode)
eea685b6
DA
832{
833 /* When the "N" bit has been exchanged, a Hard Reset message is used to
834 * indicate to the peer that the session is to be fully terminated.
835 */
20170775 836 if (!bgp_has_graceful_restart_notification(peer))
eea685b6
DA
837 return false;
838
839 /*
840 * https://datatracker.ietf.org/doc/html/rfc8538#section-5.1
841 */
1ae314be 842 if (code == BGP_NOTIFY_CEASE) {
eea685b6
DA
843 switch (subcode) {
844 case BGP_NOTIFY_CEASE_MAX_PREFIX:
845 case BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN:
846 case BGP_NOTIFY_CEASE_PEER_UNCONFIG:
847 case BGP_NOTIFY_CEASE_HARD_RESET:
aebe2e37 848 case BGP_NOTIFY_CEASE_BFD_DOWN:
eea685b6 849 return true;
1ae314be
DA
850 case BGP_NOTIFY_CEASE_ADMIN_RESET:
851 /* Provide user control:
852 * `bgp hard-adminstrative-reset`
853 */
854 if (CHECK_FLAG(peer->bgp->flags,
855 BGP_FLAG_HARD_ADMIN_RESET))
856 return true;
857 else
858 return false;
eea685b6
DA
859 default:
860 break;
861 }
862 }
863
864 return false;
865}
866
1ae314be
DA
867/*
868 * Check if received BGP CEASE Notification/Hard Reset?
869 */
870bool bgp_notify_received_hard_reset(struct peer *peer, uint8_t code,
871 uint8_t subcode)
872{
873 /* When the "N" bit has been exchanged, a Hard Reset message is used to
874 * indicate to the peer that the session is to be fully terminated.
875 */
20170775 876 if (!bgp_has_graceful_restart_notification(peer))
1ae314be
DA
877 return false;
878
879 if (code == BGP_NOTIFY_CEASE && subcode == BGP_NOTIFY_CEASE_HARD_RESET)
880 return true;
881
882 return false;
883}
884
d3ecc69e
QY
885/*
886 * Creates a BGP Notify and appends it to the peer's output queue.
887 *
becedef6
QY
888 * This function attempts to write the packet from the thread it is called
889 * from, to ensure the packet gets out ASAP.
d3ecc69e 890 *
a127f33b
QY
891 * This function may be called from multiple threads. Since the function
892 * modifies I/O buffer(s) in the peer, these are locked for the duration of the
893 * call to prevent tampering from other threads.
894 *
895 * Delivery of the NOTIFICATION is attempted once and is best-effort. After
896 * return, the peer structure *must* be reset; no assumptions about session
897 * state are valid.
898 *
d3ecc69e
QY
899 * @param peer
900 * @param code BGP error code
901 * @param sub_code BGP error subcode
902 * @param data Data portion
903 * @param datalen length of data portion
904 */
71ca5b09
MS
905static void bgp_notify_send_internal(struct peer *peer, uint8_t code,
906 uint8_t sub_code, uint8_t *data,
907 size_t datalen, bool use_curr)
718e3744 908{
d62a17ae 909 struct stream *s;
1ae314be 910 bool hard_reset = bgp_notify_send_hard_reset(peer, code, sub_code);
d62a17ae 911
a127f33b 912 /* Lock I/O mutex to prevent other threads from pushing packets */
00dffa8c 913 frr_mutex_lock_autounlock(&peer->io_mtx);
a127f33b
QY
914 /* ============================================== */
915
d62a17ae 916 /* Allocate new stream. */
ef56aee4 917 s = stream_new(peer->max_packet_size);
d62a17ae 918
d3ecc69e 919 /* Make notify packet. */
d62a17ae 920 bgp_packet_set_marker(s, BGP_MSG_NOTIFY);
921
eea685b6
DA
922 /* Check if we should send Hard Reset Notification or not */
923 if (hard_reset) {
924 uint8_t *hard_reset_message = bgp_notify_encapsulate_hard_reset(
925 code, sub_code, data, datalen);
d62a17ae 926
eea685b6
DA
927 /* Hard Reset encapsulates another NOTIFICATION message
928 * in its data portion.
929 */
930 stream_putc(s, BGP_NOTIFY_CEASE);
931 stream_putc(s, BGP_NOTIFY_CEASE_HARD_RESET);
932 stream_write(s, hard_reset_message, datalen + 2);
933
934 XFREE(MTYPE_BGP_NOTIFICATION, hard_reset_message);
935 } else {
936 stream_putc(s, code);
937 stream_putc(s, sub_code);
938 if (data)
939 stream_write(s, data, datalen);
940 }
d62a17ae 941
942 /* Set BGP packet length. */
bd6b2706 943 bgp_packet_set_size(s);
d62a17ae 944
424ab01d 945 /* wipe output buffer */
a127f33b 946 stream_fifo_clean(peer->obuf);
d62a17ae 947
becedef6
QY
948 /*
949 * If possible, store last packet for debugging purposes. This check is
950 * in place because we are sometimes called with a doppelganger peer,
951 * who tends to have a plethora of fields nulled out.
71ca5b09
MS
952 *
953 * Some callers should not attempt this - the io pthread for example
954 * should not touch internals of the peer struct.
becedef6 955 */
71ca5b09 956 if (use_curr && peer->curr) {
d8151687 957 size_t packetsize = stream_get_endp(peer->curr);
556beacf 958 assert(packetsize <= peer->max_packet_size);
d8151687
QY
959 memcpy(peer->last_reset_cause, peer->curr->data, packetsize);
960 peer->last_reset_cause_size = packetsize;
961 }
962
d62a17ae 963 /* For debug */
964 {
965 struct bgp_notify bgp_notify;
966 int first = 0;
967 int i;
968 char c[4];
969
970 bgp_notify.code = code;
971 bgp_notify.subcode = sub_code;
972 bgp_notify.data = NULL;
e0981960 973 bgp_notify.length = datalen;
d62a17ae 974 bgp_notify.raw_data = data;
975
976 peer->notify.code = bgp_notify.code;
977 peer->notify.subcode = bgp_notify.subcode;
3573b171 978 peer->notify.length = bgp_notify.length;
d62a17ae 979
e0981960 980 if (bgp_notify.length && data) {
3573b171
DA
981 bgp_notify.data = XMALLOC(MTYPE_BGP_NOTIFICATION,
982 bgp_notify.length * 3);
d62a17ae 983 for (i = 0; i < bgp_notify.length; i++)
984 if (first) {
552d6491
QY
985 snprintf(c, sizeof(c), " %02x",
986 data[i]);
f009ff26 987
552d6491 988 strlcat(bgp_notify.data, c,
2ba1fe69 989 bgp_notify.length);
f009ff26 990
d62a17ae 991 } else {
992 first = 1;
552d6491 993 snprintf(c, sizeof(c), "%02x", data[i]);
f009ff26 994
552d6491 995 strlcpy(bgp_notify.data, c,
2ba1fe69 996 bgp_notify.length);
d62a17ae 997 }
998 }
eea685b6 999 bgp_notify_print(peer, &bgp_notify, "sending", hard_reset);
d62a17ae 1000
1001 if (bgp_notify.data) {
3573b171
DA
1002 if (data) {
1003 XFREE(MTYPE_BGP_NOTIFICATION,
1004 peer->notify.data);
1005 peer->notify.data = XCALLOC(
1006 MTYPE_BGP_NOTIFICATION, datalen);
1007 memcpy(peer->notify.data, data, datalen);
1008 }
1009
1010 XFREE(MTYPE_BGP_NOTIFICATION, bgp_notify.data);
d62a17ae 1011 bgp_notify.length = 0;
1012 }
1013 }
1014
1015 /* peer reset cause */
1016 if (code == BGP_NOTIFY_CEASE) {
1017 if (sub_code == BGP_NOTIFY_CEASE_ADMIN_RESET)
1018 peer->last_reset = PEER_DOWN_USER_RESET;
5597214c
DA
1019 else if (sub_code == BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN) {
1020 if (CHECK_FLAG(peer->sflags, PEER_STATUS_RTT_SHUTDOWN))
1021 peer->last_reset = PEER_DOWN_RTT_SHUTDOWN;
1022 else
1023 peer->last_reset = PEER_DOWN_USER_SHUTDOWN;
1024 } else
d62a17ae 1025 peer->last_reset = PEER_DOWN_NOTIFY_SEND;
1026 } else
1027 peer->last_reset = PEER_DOWN_NOTIFY_SEND;
1028
d3ecc69e 1029 /* Add packet to peer's output queue */
a127f33b 1030 stream_fifo_push(peer->obuf, s);
424ab01d 1031
5cce3f05 1032 bgp_peer_gr_flags_update(peer);
36235319
QY
1033 BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(peer->bgp,
1034 peer->bgp->peer);
5cce3f05 1035
424ab01d 1036 bgp_write_notify(peer);
718e3744 1037}
1038
d3ecc69e
QY
1039/*
1040 * Creates a BGP Notify and appends it to the peer's output queue.
1041 *
becedef6
QY
1042 * This function attempts to write the packet from the thread it is called
1043 * from, to ensure the packet gets out ASAP.
d3ecc69e
QY
1044 *
1045 * @param peer
1046 * @param code BGP error code
1047 * @param sub_code BGP error subcode
1048 */
d7c0a89a 1049void bgp_notify_send(struct peer *peer, uint8_t code, uint8_t sub_code)
718e3744 1050{
71ca5b09
MS
1051 bgp_notify_send_internal(peer, code, sub_code, NULL, 0, true);
1052}
1053
1054/*
1055 * Enqueue notification; called from the main pthread, peer object access is ok.
1056 */
1057void bgp_notify_send_with_data(struct peer *peer, uint8_t code,
1058 uint8_t sub_code, uint8_t *data, size_t datalen)
1059{
1060 bgp_notify_send_internal(peer, code, sub_code, data, datalen, true);
1061}
1062
1063/*
1064 * For use by the io pthread, queueing a notification but avoiding access to
1065 * the peer object.
1066 */
1067void bgp_notify_io_invalid(struct peer *peer, uint8_t code, uint8_t sub_code,
1068 uint8_t *data, size_t datalen)
1069{
1070 /* Avoid touching the peer object */
1071 bgp_notify_send_internal(peer, code, sub_code, data, datalen, false);
718e3744 1072}
1073
d3ecc69e
QY
1074/*
1075 * Creates BGP Route Refresh packet and appends it to the peer's output queue.
1076 *
1077 * @param peer
1078 * @param afi Address Family Identifier
1079 * @param safi Subsequent Address Family Identifier
1080 * @param orf_type Outbound Route Filtering type
1081 * @param when_to_refresh Whether to refresh immediately or defer
1082 * @param remove Whether to remove ORF for specified AFI/SAFI
1083 */
d62a17ae 1084void bgp_route_refresh_send(struct peer *peer, afi_t afi, safi_t safi,
d7c0a89a 1085 uint8_t orf_type, uint8_t when_to_refresh,
9af52ccf 1086 int remove, uint8_t subtype)
718e3744 1087{
d62a17ae 1088 struct stream *s;
1089 struct bgp_filter *filter;
1090 int orf_refresh = 0;
617975d1
DS
1091 iana_afi_t pkt_afi = IANA_AFI_IPV4;
1092 iana_safi_t pkt_safi = IANA_SAFI_UNICAST;
d62a17ae 1093
1094 if (DISABLE_BGP_ANNOUNCE)
1095 return;
1096
1097 filter = &peer->filter[afi][safi];
1098
1099 /* Convert AFI, SAFI to values for packet. */
1100 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
1101
ef56aee4 1102 s = stream_new(peer->max_packet_size);
d62a17ae 1103
1104 /* Make BGP update packet. */
1105 if (CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_NEW_RCV))
1106 bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_NEW);
718e3744 1107 else
d62a17ae 1108 bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_OLD);
1109
1110 /* Encode Route Refresh message. */
1111 stream_putw(s, pkt_afi);
9af52ccf
DA
1112 if (subtype)
1113 stream_putc(s, subtype);
1114 else
1115 stream_putc(s, 0);
d62a17ae 1116 stream_putc(s, pkt_safi);
1117
1118 if (orf_type == ORF_TYPE_PREFIX || orf_type == ORF_TYPE_PREFIX_OLD)
1119 if (remove || filter->plist[FILTER_IN].plist) {
d7c0a89a 1120 uint16_t orf_len;
d62a17ae 1121 unsigned long orfp;
1122
1123 orf_refresh = 1;
1124 stream_putc(s, when_to_refresh);
1125 stream_putc(s, orf_type);
1126 orfp = stream_get_endp(s);
1127 stream_putw(s, 0);
1128
1129 if (remove) {
1130 UNSET_FLAG(peer->af_sflags[afi][safi],
1131 PEER_STATUS_ORF_PREFIX_SEND);
1132 stream_putc(s, ORF_COMMON_PART_REMOVE_ALL);
1133 if (bgp_debug_neighbor_events(peer))
1134 zlog_debug(
f70c91dc
DA
1135 "%pBP sending REFRESH_REQ to remove ORF(%d) (%s) for afi/safi: %s/%s",
1136 peer, orf_type,
a7d91a8c
DA
1137 (when_to_refresh ==
1138 REFRESH_DEFER
d62a17ae 1139 ? "defer"
1140 : "immediate"),
748a041f
DS
1141 iana_afi2str(pkt_afi),
1142 iana_safi2str(pkt_safi));
d62a17ae 1143 } else {
1144 SET_FLAG(peer->af_sflags[afi][safi],
1145 PEER_STATUS_ORF_PREFIX_SEND);
1146 prefix_bgp_orf_entry(
1147 s, filter->plist[FILTER_IN].plist,
1148 ORF_COMMON_PART_ADD,
1149 ORF_COMMON_PART_PERMIT,
1150 ORF_COMMON_PART_DENY);
1151 if (bgp_debug_neighbor_events(peer))
1152 zlog_debug(
f70c91dc
DA
1153 "%pBP sending REFRESH_REQ with pfxlist ORF(%d) (%s) for afi/safi: %s/%s",
1154 peer, orf_type,
a7d91a8c
DA
1155 (when_to_refresh ==
1156 REFRESH_DEFER
d62a17ae 1157 ? "defer"
1158 : "immediate"),
748a041f
DS
1159 iana_afi2str(pkt_afi),
1160 iana_safi2str(pkt_safi));
d62a17ae 1161 }
1162
1163 /* Total ORF Entry Len. */
1164 orf_len = stream_get_endp(s) - orfp - 2;
1165 stream_putw_at(s, orfp, orf_len);
1166 }
1167
1168 /* Set packet size. */
65baedca 1169 bgp_packet_set_size(s);
d62a17ae 1170
1171 if (bgp_debug_neighbor_events(peer)) {
1172 if (!orf_refresh)
a7d91a8c 1173 zlog_debug(
f70c91dc
DA
1174 "%pBP sending REFRESH_REQ for afi/safi: %s/%s",
1175 peer, iana_afi2str(pkt_afi),
1176 iana_safi2str(pkt_safi));
d62a17ae 1177 }
1178
1179 /* Add packet to the peer. */
1180 bgp_packet_add(peer, s);
424ab01d
QY
1181
1182 bgp_writes_on(peer);
718e3744 1183}
1184
d3ecc69e
QY
1185/*
1186 * Create a BGP Capability packet and append it to the peer's output queue.
1187 *
1188 * @param peer
1189 * @param afi Address Family Identifier
1190 * @param safi Subsequent Address Family Identifier
1191 * @param capability_code BGP Capability Code
1192 * @param action Set or Remove capability
1193 */
d62a17ae 1194void bgp_capability_send(struct peer *peer, afi_t afi, safi_t safi,
1195 int capability_code, int action)
718e3744 1196{
d62a17ae 1197 struct stream *s;
617975d1
DS
1198 iana_afi_t pkt_afi = IANA_AFI_IPV4;
1199 iana_safi_t pkt_safi = IANA_SAFI_UNICAST;
d62a17ae 1200
1201 /* Convert AFI, SAFI to values for packet. */
1202 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
1203
ef56aee4 1204 s = stream_new(peer->max_packet_size);
d62a17ae 1205
1206 /* Make BGP update packet. */
1207 bgp_packet_set_marker(s, BGP_MSG_CAPABILITY);
1208
1209 /* Encode MP_EXT capability. */
1210 if (capability_code == CAPABILITY_CODE_MP) {
1211 stream_putc(s, action);
1212 stream_putc(s, CAPABILITY_CODE_MP);
1213 stream_putc(s, CAPABILITY_CODE_MP_LEN);
1214 stream_putw(s, pkt_afi);
1215 stream_putc(s, 0);
1216 stream_putc(s, pkt_safi);
1217
1218 if (bgp_debug_neighbor_events(peer))
1219 zlog_debug(
f70c91dc
DA
1220 "%pBP sending CAPABILITY has %s MP_EXT CAP for afi/safi: %s/%s",
1221 peer,
d62a17ae 1222 action == CAPABILITY_ACTION_SET ? "Advertising"
1223 : "Removing",
748a041f 1224 iana_afi2str(pkt_afi), iana_safi2str(pkt_safi));
d62a17ae 1225 }
1226
1227 /* Set packet size. */
65baedca 1228 bgp_packet_set_size(s);
718e3744 1229
d62a17ae 1230 /* Add packet to the peer. */
1231 bgp_packet_add(peer, s);
424ab01d
QY
1232
1233 bgp_writes_on(peer);
d62a17ae 1234}
718e3744 1235
d62a17ae 1236/* RFC1771 6.8 Connection collision detection. */
1237static int bgp_collision_detect(struct peer *new, struct in_addr remote_id)
1238{
1239 struct peer *peer;
1240
f88221f3
DS
1241 /*
1242 * Upon receipt of an OPEN message, the local system must examine
1243 * all of its connections that are in the OpenConfirm state. A BGP
1244 * speaker may also examine connections in an OpenSent state if it
1245 * knows the BGP Identifier of the peer by means outside of the
1246 * protocol. If among these connections there is a connection to a
1247 * remote BGP speaker whose BGP Identifier equals the one in the
1248 * OPEN message, then the local system performs the following
1249 * collision resolution procedure:
1250 */
1251 peer = new->doppelganger;
1252 if (peer == NULL)
1253 return 0;
1254
1255 /*
1256 * Do not accept the new connection in Established or Clearing
1257 * states. Note that a peer GR is handled by closing the existing
1258 * connection upon receipt of new one.
1259 */
feb17238 1260 if (peer_established(peer) || peer->status == Clearing) {
f88221f3
DS
1261 bgp_notify_send(new, BGP_NOTIFY_CEASE,
1262 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1263 return -1;
1264 }
1265
1266 if ((peer->status != OpenConfirm) && (peer->status != OpenSent))
1267 return 0;
1268
1269 /*
1270 * 1. The BGP Identifier of the local system is
1271 * compared to the BGP Identifier of the remote
1272 * system (as specified in the OPEN message).
1273 *
1274 * If the BGP Identifiers of the peers
1275 * involved in the connection collision
1276 * are identical, then the connection
1277 * initiated by the BGP speaker with the
1278 * larger AS number is preserved.
1279 */
1280 if (ntohl(peer->local_id.s_addr) < ntohl(remote_id.s_addr)
1281 || (ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr)
1282 && peer->local_as < peer->as))
1283 if (!CHECK_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER)) {
1284 /*
1285 * 2. If the value of the local BGP
1286 * Identifier is less than the remote one,
1287 * the local system closes BGP connection
1288 * that already exists (the one that is
1289 * already in the OpenConfirm state),
1290 * and accepts BGP connection initiated by
1291 * the remote system.
1292 */
1293 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
1294 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1295 return 1;
1296 } else {
1297 bgp_notify_send(new, BGP_NOTIFY_CEASE,
1298 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1299 return -1;
1300 }
1301 else {
1302 if (ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr)
1303 && peer->local_as == peer->as)
1304 flog_err(EC_BGP_ROUTER_ID_SAME,
1305 "Peer's router-id %pI4 is the same as ours",
1306 &remote_id);
1307
1308 /*
1309 * 3. Otherwise, the local system closes newly
1310 * created BGP connection (the one associated with the
1311 * newly received OPEN message), and continues to use
1312 * the existing one (the one that is already in the
1313 * OpenConfirm state).
d62a17ae 1314 */
f88221f3
DS
1315 if (CHECK_FLAG(peer->sflags, PEER_STATUS_ACCEPT_PEER)) {
1316 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
1317 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
1318 return 1;
1319 } else {
d62a17ae 1320 bgp_notify_send(new, BGP_NOTIFY_CEASE,
1321 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
95f7965d 1322 return -1;
d62a17ae 1323 }
1324 }
d62a17ae 1325}
718e3744 1326
d8151687
QY
1327/* Packet processing routines ---------------------------------------------- */
1328/*
1329 * This is a family of functions designed to be called from
1330 * bgp_process_packet(). These functions all share similar behavior and should
1331 * adhere to the following invariants and restrictions:
1332 *
1333 * Return codes
1334 * ------------
1335 * The return code of any one of those functions should be one of the FSM event
1336 * codes specified in bgpd.h. If a NOTIFY was sent, this event code MUST be
1337 * BGP_Stop. Otherwise, the code SHOULD correspond to the function's expected
1338 * packet type. For example, bgp_open_receive() should return BGP_Stop upon
1339 * error and Receive_OPEN_message otherwise.
1340 *
1341 * If no action is necessary, the correct return code is BGP_PACKET_NOOP as
1342 * defined below.
1343 *
1344 * Side effects
1345 * ------------
1346 * - May send NOTIFY messages
1347 * - May not modify peer->status
1348 * - May not call bgp_event_update()
1349 */
1350
1351#define BGP_PACKET_NOOP 0
1352
1353/**
1354 * Process BGP OPEN message for peer.
1355 *
1356 * If any errors are encountered in the OPEN message, immediately sends NOTIFY
1357 * and returns BGP_Stop.
1358 *
1359 * @param peer
1360 * @param size size of the packet
1361 * @return as in summary
1362 */
d62a17ae 1363static int bgp_open_receive(struct peer *peer, bgp_size_t size)
1364{
1365 int ret;
d7c0a89a 1366 uint8_t version;
d08c0c80 1367 uint16_t optlen;
d7c0a89a
QY
1368 uint16_t holdtime;
1369 uint16_t send_holdtime;
d62a17ae 1370 as_t remote_as;
6dcef54c 1371 as_t as4 = 0, as4_be;
d62a17ae 1372 struct in_addr remote_id;
1373 int mp_capability;
d7c0a89a
QY
1374 uint8_t notify_data_remote_as[2];
1375 uint8_t notify_data_remote_as4[4];
1376 uint8_t notify_data_remote_id[4];
1377 uint16_t *holdtime_ptr;
d62a17ae 1378
1379 /* Parse open packet. */
424ab01d
QY
1380 version = stream_getc(peer->curr);
1381 memcpy(notify_data_remote_as, stream_pnt(peer->curr), 2);
1382 remote_as = stream_getw(peer->curr);
d7c0a89a 1383 holdtime_ptr = (uint16_t *)stream_pnt(peer->curr);
424ab01d
QY
1384 holdtime = stream_getw(peer->curr);
1385 memcpy(notify_data_remote_id, stream_pnt(peer->curr), 4);
1386 remote_id.s_addr = stream_get_ipv4(peer->curr);
d62a17ae 1387
d62a17ae 1388 /* BEGIN to read the capability here, but dont do it yet */
1389 mp_capability = 0;
424ab01d 1390 optlen = stream_getc(peer->curr);
d62a17ae 1391
d08c0c80
DA
1392 /* Extended Optional Parameters Length for BGP OPEN Message */
1393 if (optlen == BGP_OPEN_NON_EXT_OPT_LEN
1394 || CHECK_FLAG(peer->flags, PEER_FLAG_EXTENDED_OPT_PARAMS)) {
1395 uint8_t opttype;
1396
766eec1b
DS
1397 if (STREAM_READABLE(peer->curr) < 1) {
1398 flog_err(
1399 EC_BGP_PKT_OPEN,
1400 "%s: stream does not have enough bytes for extended optional parameters",
1401 peer->host);
1402 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1403 BGP_NOTIFY_OPEN_MALFORMED_ATTR);
1404 return BGP_Stop;
1405 }
1406
d08c0c80
DA
1407 opttype = stream_getc(peer->curr);
1408 if (opttype == BGP_OPEN_NON_EXT_OPT_TYPE_EXTENDED_LENGTH) {
766eec1b
DS
1409 if (STREAM_READABLE(peer->curr) < 2) {
1410 flog_err(
1411 EC_BGP_PKT_OPEN,
1412 "%s: stream does not have enough bytes to read the extended optional parameters optlen",
1413 peer->host);
1414 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1415 BGP_NOTIFY_OPEN_MALFORMED_ATTR);
1416 return BGP_Stop;
1417 }
d08c0c80
DA
1418 optlen = stream_getw(peer->curr);
1419 SET_FLAG(peer->sflags,
1420 PEER_STATUS_EXT_OPT_PARAMS_LENGTH);
1421 }
1422 }
1423
1424 /* Receive OPEN message log */
1425 if (bgp_debug_neighbor_events(peer))
1426 zlog_debug(
1427 "%s rcv OPEN%s, version %d, remote-as (in open) %u, holdtime %d, id %pI4",
1428 peer->host,
1429 CHECK_FLAG(peer->sflags,
1430 PEER_STATUS_EXT_OPT_PARAMS_LENGTH)
1431 ? " (Extended)"
1432 : "",
1433 version, remote_as, holdtime, &remote_id);
1434
d62a17ae 1435 if (optlen != 0) {
1436 /* If not enough bytes, it is an error. */
424ab01d 1437 if (STREAM_READABLE(peer->curr) < optlen) {
d08c0c80
DA
1438 flog_err(EC_BGP_PKT_OPEN,
1439 "%s: stream has not enough bytes (%u)",
1440 peer->host, optlen);
d62a17ae 1441 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1442 BGP_NOTIFY_OPEN_MALFORMED_ATTR);
d8151687 1443 return BGP_Stop;
d62a17ae 1444 }
718e3744 1445
d62a17ae 1446 /* We need the as4 capability value *right now* because
1447 * if it is there, we have not got the remote_as yet, and
1448 * without
1449 * that we do not know which peer is connecting to us now.
1450 */
1451 as4 = peek_for_as4_capability(peer, optlen);
d62a17ae 1452 }
718e3744 1453
6dcef54c
DL
1454 as4_be = htonl(as4);
1455 memcpy(notify_data_remote_as4, &as4_be, 4);
1456
d62a17ae 1457 /* Just in case we have a silly peer who sends AS4 capability set to 0
1458 */
1459 if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV) && !as4) {
e50f7cfd 1460 flog_err(EC_BGP_PKT_OPEN,
1c50c1c0
QY
1461 "%s bad OPEN, got AS4 capability, but AS4 set to 0",
1462 peer->host);
d62a17ae 1463 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1464 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1465 notify_data_remote_as4, 4);
d8151687 1466 return BGP_Stop;
d62a17ae 1467 }
718e3744 1468
33d022bc
DA
1469 /* Codification of AS 0 Processing */
1470 if (remote_as == BGP_AS_ZERO) {
1471 flog_err(EC_BGP_PKT_OPEN, "%s bad OPEN, got AS set to 0",
1472 peer->host);
1473 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
1474 BGP_NOTIFY_OPEN_BAD_PEER_AS);
1475 return BGP_Stop;
1476 }
1477
d62a17ae 1478 if (remote_as == BGP_AS_TRANS) {
1479 /* Take the AS4 from the capability. We must have received the
1480 * capability now! Otherwise we have a asn16 peer who uses
1481 * BGP_AS_TRANS, for some unknown reason.
1482 */
1483 if (as4 == BGP_AS_TRANS) {
af4c2728 1484 flog_err(
e50f7cfd 1485 EC_BGP_PKT_OPEN,
d62a17ae 1486 "%s [AS4] NEW speaker using AS_TRANS for AS4, not allowed",
1487 peer->host);
1488 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1489 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1490 notify_data_remote_as4, 4);
d8151687 1491 return BGP_Stop;
d62a17ae 1492 }
718e3744 1493
d62a17ae 1494 if (!as4 && BGP_DEBUG(as4, AS4))
1495 zlog_debug(
3efd0893 1496 "%s [AS4] OPEN remote_as is AS_TRANS, but no AS4. Odd, but proceeding.",
d62a17ae 1497 peer->host);
1498 else if (as4 < BGP_AS_MAX && BGP_DEBUG(as4, AS4))
1499 zlog_debug(
3efd0893 1500 "%s [AS4] OPEN remote_as is AS_TRANS, but AS4 (%u) fits in 2-bytes, very odd peer.",
d62a17ae 1501 peer->host, as4);
1502 if (as4)
1503 remote_as = as4;
1504 } else {
1505 /* We may have a partner with AS4 who has an asno < BGP_AS_MAX
1506 */
1507 /* If we have got the capability, peer->as4cap must match
1508 * remote_as */
1509 if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV)
1510 && as4 != remote_as) {
1511 /* raise error, log this, close session */
af4c2728 1512 flog_err(
e50f7cfd 1513 EC_BGP_PKT_OPEN,
3efd0893 1514 "%s bad OPEN, got AS4 capability, but remote_as %u mismatch with 16bit 'myasn' %u in open",
d62a17ae 1515 peer->host, as4, remote_as);
1516 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1517 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1518 notify_data_remote_as4, 4);
d8151687 1519 return BGP_Stop;
d62a17ae 1520 }
1521 }
6b0655a2 1522
787c3020
DA
1523 /* rfc6286:
1524 * If the BGP Identifier field of the OPEN message
1525 * is zero, or if it is the same as the BGP Identifier
1526 * of the local BGP speaker and the message is from an
1527 * internal peer, then the Error Subcode is set to
1528 * "Bad BGP Identifier".
1529 */
975a328e 1530 if (remote_id.s_addr == INADDR_ANY
787c3020
DA
1531 || (peer->sort == BGP_PEER_IBGP
1532 && ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr))) {
d62a17ae 1533 if (bgp_debug_neighbor_events(peer))
23d0a753
DA
1534 zlog_debug("%s bad OPEN, wrong router identifier %pI4",
1535 peer->host, &remote_id);
d62a17ae 1536 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1537 BGP_NOTIFY_OPEN_BAD_BGP_IDENT,
1538 notify_data_remote_id, 4);
d8151687 1539 return BGP_Stop;
d62a17ae 1540 }
1541
d62a17ae 1542 /* Peer BGP version check. */
1543 if (version != BGP_VERSION_4) {
d7c0a89a 1544 uint16_t maxver = htons(BGP_VERSION_4);
d62a17ae 1545 /* XXX this reply may not be correct if version < 4 XXX */
1546 if (bgp_debug_neighbor_events(peer))
1547 zlog_debug(
1548 "%s bad protocol version, remote requested %d, local request %d",
1549 peer->host, version, BGP_VERSION_4);
1550 /* Data must be in network byte order here */
1551 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1552 BGP_NOTIFY_OPEN_UNSUP_VERSION,
d7c0a89a 1553 (uint8_t *)&maxver, 2);
d8151687 1554 return BGP_Stop;
d62a17ae 1555 }
1556
1557 /* Check neighbor as number. */
1558 if (peer->as_type == AS_UNSPECIFIED) {
1559 if (bgp_debug_neighbor_events(peer))
1560 zlog_debug(
1561 "%s bad OPEN, remote AS is unspecified currently",
1562 peer->host);
1563 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1564 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1565 notify_data_remote_as, 2);
d8151687 1566 return BGP_Stop;
d62a17ae 1567 } else if (peer->as_type == AS_INTERNAL) {
1568 if (remote_as != peer->bgp->as) {
1569 if (bgp_debug_neighbor_events(peer))
1570 zlog_debug(
1571 "%s bad OPEN, remote AS is %u, internal specified",
1572 peer->host, remote_as);
1573 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1574 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1575 notify_data_remote_as, 2);
d8151687 1576 return BGP_Stop;
1ff9a340 1577 }
d62a17ae 1578 peer->as = peer->local_as;
1579 } else if (peer->as_type == AS_EXTERNAL) {
1580 if (remote_as == peer->bgp->as) {
1581 if (bgp_debug_neighbor_events(peer))
1582 zlog_debug(
1583 "%s bad OPEN, remote AS is %u, external specified",
1584 peer->host, remote_as);
1585 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1586 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1587 notify_data_remote_as, 2);
d8151687 1588 return BGP_Stop;
1ff9a340 1589 }
d62a17ae 1590 peer->as = remote_as;
1591 } else if ((peer->as_type == AS_SPECIFIED) && (remote_as != peer->as)) {
1592 if (bgp_debug_neighbor_events(peer))
1593 zlog_debug("%s bad OPEN, remote AS is %u, expected %u",
1594 peer->host, remote_as, peer->as);
1595 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1596 BGP_NOTIFY_OPEN_BAD_PEER_AS,
1597 notify_data_remote_as, 2);
d8151687 1598 return BGP_Stop;
eb821189 1599 }
718e3744 1600
7a75470f
DS
1601 /*
1602 * When collision is detected and this peer is closed.
1603 * Return immediately.
1604 */
1605 ret = bgp_collision_detect(peer, remote_id);
1606 if (ret < 0)
1607 return BGP_Stop;
1608
1609 /* Get sockname. */
1610 if (bgp_getsockname(peer) < 0) {
1611 flog_err_sys(EC_LIB_SOCKET,
1612 "%s: bgp_getsockname() failed for peer: %s",
1613 __func__, peer->host);
1614 return BGP_Stop;
1615 }
1616
1617 /* Set remote router-id */
1618 peer->remote_id = remote_id;
1619
d62a17ae 1620 /* From the rfc: Upon receipt of an OPEN message, a BGP speaker MUST
1621 calculate the value of the Hold Timer by using the smaller of its
1622 configured Hold Time and the Hold Time received in the OPEN message.
1623 The Hold Time MUST be either zero or at least three seconds. An
1624 implementation may reject connections on the basis of the Hold Time.
0b2aa3a0 1625 */
d62a17ae 1626
1627 if (holdtime < 3 && holdtime != 0) {
b042667a
TI
1628 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1629 BGP_NOTIFY_OPEN_UNACEP_HOLDTIME,
1630 (uint8_t *)holdtime_ptr, 2);
1631 return BGP_Stop;
1632 }
1633
1634 /* Send notification message when Hold Time received in the OPEN message
1635 * is smaller than configured minimum Hold Time. */
1636 if (holdtime < peer->bgp->default_min_holdtime
1637 && peer->bgp->default_min_holdtime != 0) {
d62a17ae 1638 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
1639 BGP_NOTIFY_OPEN_UNACEP_HOLDTIME,
d7c0a89a 1640 (uint8_t *)holdtime_ptr, 2);
d8151687 1641 return BGP_Stop;
0b2aa3a0 1642 }
d62a17ae 1643
1644 /* From the rfc: A reasonable maximum time between KEEPALIVE messages
1645 would be one third of the Hold Time interval. KEEPALIVE messages
1646 MUST NOT be sent more frequently than one per second. An
1647 implementation MAY adjust the rate at which it sends KEEPALIVE
1648 messages as a function of the Hold Time interval. */
1649
b90a8e13 1650 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER))
d62a17ae 1651 send_holdtime = peer->holdtime;
1652 else
1653 send_holdtime = peer->bgp->default_holdtime;
1654
1655 if (holdtime < send_holdtime)
1656 peer->v_holdtime = holdtime;
1657 else
1658 peer->v_holdtime = send_holdtime;
1659
7aa4fd5b
TA
1660 /* Set effective keepalive to 1/3 the effective holdtime.
1661 * Use configured keeplive when < effective keepalive.
1662 */
1663 peer->v_keepalive = peer->v_holdtime / 3;
1664 if (CHECK_FLAG(peer->flags, PEER_FLAG_TIMER)) {
1665 if (peer->keepalive && peer->keepalive < peer->v_keepalive)
1666 peer->v_keepalive = peer->keepalive;
1667 } else {
1668 if (peer->bgp->default_keepalive
1669 && peer->bgp->default_keepalive < peer->v_keepalive)
1670 peer->v_keepalive = peer->bgp->default_keepalive;
1671 }
d62a17ae 1672
234f6fd4
DA
1673 /* If another side disabled sending Software Version capability,
1674 * we MUST drop the previous from showing in the outputs to avoid
1675 * stale information and due to security reasons.
1676 */
1677 if (peer->soft_version)
1678 XFREE(MTYPE_BGP_SOFT_VERSION, peer->soft_version);
1679
d62a17ae 1680 /* Open option part parse. */
1681 if (optlen != 0) {
1bb379bf 1682 if (bgp_open_option_parse(peer, optlen, &mp_capability) < 0)
d8151687 1683 return BGP_Stop;
d62a17ae 1684 } else {
1685 if (bgp_debug_neighbor_events(peer))
1686 zlog_debug("%s rcvd OPEN w/ OPTION parameter len: 0",
1687 peer->host);
0299c004 1688 }
d62a17ae 1689
1690 /*
1691 * Assume that the peer supports the locally configured set of
1692 * AFI/SAFIs if the peer did not send us any Mulitiprotocol
1693 * capabilities, or if 'override-capability' is configured.
1694 */
1695 if (!mp_capability
1696 || CHECK_FLAG(peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) {
1697 peer->afc_nego[AFI_IP][SAFI_UNICAST] =
1698 peer->afc[AFI_IP][SAFI_UNICAST];
1699 peer->afc_nego[AFI_IP][SAFI_MULTICAST] =
1700 peer->afc[AFI_IP][SAFI_MULTICAST];
1701 peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST] =
1702 peer->afc[AFI_IP][SAFI_LABELED_UNICAST];
7c40bf39 1703 peer->afc_nego[AFI_IP][SAFI_FLOWSPEC] =
1704 peer->afc[AFI_IP][SAFI_FLOWSPEC];
d62a17ae 1705 peer->afc_nego[AFI_IP6][SAFI_UNICAST] =
1706 peer->afc[AFI_IP6][SAFI_UNICAST];
1707 peer->afc_nego[AFI_IP6][SAFI_MULTICAST] =
1708 peer->afc[AFI_IP6][SAFI_MULTICAST];
1709 peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST] =
1710 peer->afc[AFI_IP6][SAFI_LABELED_UNICAST];
1711 peer->afc_nego[AFI_L2VPN][SAFI_EVPN] =
1712 peer->afc[AFI_L2VPN][SAFI_EVPN];
7c40bf39 1713 peer->afc_nego[AFI_IP6][SAFI_FLOWSPEC] =
1714 peer->afc[AFI_IP6][SAFI_FLOWSPEC];
0299c004 1715 }
d62a17ae 1716
d62a17ae 1717 /* Verify valid local address present based on negotiated
1718 * address-families. */
1719 if (peer->afc_nego[AFI_IP][SAFI_UNICAST]
e5f22b30 1720 || peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST]
d62a17ae 1721 || peer->afc_nego[AFI_IP][SAFI_MULTICAST]
1722 || peer->afc_nego[AFI_IP][SAFI_MPLS_VPN]
1723 || peer->afc_nego[AFI_IP][SAFI_ENCAP]) {
975a328e 1724 if (peer->nexthop.v4.s_addr == INADDR_ANY) {
d62a17ae 1725#if defined(HAVE_CUMULUS)
50121ac0
DS
1726 zlog_warn("%s: No local IPv4 addr, BGP routing may not work",
1727 peer->host);
1d808091 1728#endif
d62a17ae 1729 }
1730 }
1731 if (peer->afc_nego[AFI_IP6][SAFI_UNICAST]
e5f22b30 1732 || peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST]
d62a17ae 1733 || peer->afc_nego[AFI_IP6][SAFI_MULTICAST]
1734 || peer->afc_nego[AFI_IP6][SAFI_MPLS_VPN]
1735 || peer->afc_nego[AFI_IP6][SAFI_ENCAP]) {
1736 if (IN6_IS_ADDR_UNSPECIFIED(&peer->nexthop.v6_global)) {
1737#if defined(HAVE_CUMULUS)
50121ac0
DS
1738 zlog_warn("%s: No local IPv6 address, BGP routing may not work",
1739 peer->host);
1d808091 1740#endif
d62a17ae 1741 }
1742 }
1743 peer->rtt = sockopt_tcp_rtt(peer->fd);
1744
d8151687 1745 return Receive_OPEN_message;
718e3744 1746}
1747
d8151687
QY
1748/**
1749 * Process BGP KEEPALIVE message for peer.
1750 *
1751 * @param peer
1752 * @param size size of the packet
1753 * @return as in summary
1754 */
1755static int bgp_keepalive_receive(struct peer *peer, bgp_size_t size)
f188f2c4 1756{
d8151687
QY
1757 if (bgp_debug_keepalive(peer))
1758 zlog_debug("%s KEEPALIVE rcvd", peer->host);
d62a17ae 1759
d8151687 1760 bgp_update_implicit_eors(peer);
d62a17ae 1761
e410d563
DA
1762 peer->rtt = sockopt_tcp_rtt(peer->fd);
1763
8336c896
DA
1764 /* If the peer's RTT is higher than expected, shutdown
1765 * the peer automatically.
1766 */
5597214c
DA
1767 if (!CHECK_FLAG(peer->flags, PEER_FLAG_RTT_SHUTDOWN))
1768 return Receive_KEEPALIVE_message;
8336c896 1769
5597214c 1770 if (peer->rtt > peer->rtt_expected) {
8336c896
DA
1771 peer->rtt_keepalive_rcv++;
1772
1773 if (peer->rtt_keepalive_rcv > peer->rtt_keepalive_conf) {
5597214c
DA
1774 char rtt_shutdown_reason[BUFSIZ] = {};
1775
1776 snprintfrr(
1777 rtt_shutdown_reason,
1778 sizeof(rtt_shutdown_reason),
1779 "shutdown due to high round-trip-time (%dms > %dms, hit %u times)",
1780 peer->rtt, peer->rtt_expected,
1781 peer->rtt_keepalive_rcv);
1782 zlog_warn("%s %s", peer->host, rtt_shutdown_reason);
1783 SET_FLAG(peer->sflags, PEER_STATUS_RTT_SHUTDOWN);
1784 peer_tx_shutdown_message_set(peer, rtt_shutdown_reason);
8336c896
DA
1785 peer_flag_set(peer, PEER_FLAG_SHUTDOWN);
1786 }
1787 } else {
1788 if (peer->rtt_keepalive_rcv)
1789 peer->rtt_keepalive_rcv--;
1790 }
1791
d8151687 1792 return Receive_KEEPALIVE_message;
f188f2c4
DS
1793}
1794
cc9f21da 1795static void bgp_refresh_stalepath_timer_expire(struct thread *thread)
9af52ccf
DA
1796{
1797 struct peer_af *paf;
1798
1799 paf = THREAD_ARG(thread);
1800
1801 afi_t afi = paf->afi;
1802 safi_t safi = paf->safi;
1803 struct peer *peer = paf->peer;
1804
1805 peer->t_refresh_stalepath = NULL;
1806
1807 if (peer->nsf[afi][safi])
1808 bgp_clear_stale_route(peer, afi, safi);
1809
1810 if (bgp_debug_neighbor_events(peer))
a7d91a8c 1811 zlog_debug(
f70c91dc
DA
1812 "%pBP route-refresh (BoRR) timer expired for afi/safi: %d/%d",
1813 peer, afi, safi);
9af52ccf
DA
1814
1815 bgp_timer_set(peer);
9af52ccf 1816}
d62a17ae 1817
d8151687
QY
1818/**
1819 * Process BGP UPDATE message for peer.
1820 *
1821 * Parses UPDATE and creates attribute object.
1822 *
1823 * @param peer
1824 * @param size size of the packet
1825 * @return as in summary
7ef5a232 1826 */
d62a17ae 1827static int bgp_update_receive(struct peer *peer, bgp_size_t size)
718e3744 1828{
d62a17ae 1829 int ret, nlri_ret;
d7c0a89a 1830 uint8_t *end;
d62a17ae 1831 struct stream *s;
1832 struct attr attr;
1833 bgp_size_t attribute_len;
1834 bgp_size_t update_len;
1835 bgp_size_t withdraw_len;
f009ff26 1836 bool restart = false;
d62a17ae 1837
1838 enum NLRI_TYPES {
1839 NLRI_UPDATE,
1840 NLRI_WITHDRAW,
1841 NLRI_MP_UPDATE,
1842 NLRI_MP_WITHDRAW,
1843 NLRI_TYPE_MAX
1844 };
1845 struct bgp_nlri nlris[NLRI_TYPE_MAX];
1846
1847 /* Status must be Established. */
feb17238 1848 if (!peer_established(peer)) {
e50f7cfd 1849 flog_err(EC_BGP_INVALID_STATUS,
1c50c1c0
QY
1850 "%s [FSM] Update packet received under status %s",
1851 peer->host,
1852 lookup_msg(bgp_status_msg, peer->status, NULL));
0e35025e 1853 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR,
3893aeee 1854 bgp_fsm_error_subcode(peer->status));
d8151687 1855 return BGP_Stop;
d62a17ae 1856 }
1857
1858 /* Set initial values. */
6006b807 1859 memset(&attr, 0, sizeof(attr));
d62a17ae 1860 attr.label_index = BGP_INVALID_LABEL_INDEX;
1861 attr.label = MPLS_INVALID_LABEL;
1862 memset(&nlris, 0, sizeof(nlris));
1863 memset(peer->rcvd_attr_str, 0, BUFSIZ);
1864 peer->rcvd_attr_printed = 0;
1865
424ab01d 1866 s = peer->curr;
d62a17ae 1867 end = stream_pnt(s) + size;
1868
1869 /* RFC1771 6.3 If the Unfeasible Routes Length or Total Attribute
1870 Length is too large (i.e., if Unfeasible Routes Length + Total
1871 Attribute Length + 23 exceeds the message Length), then the Error
1872 Subcode is set to Malformed Attribute List. */
1873 if (stream_pnt(s) + 2 > end) {
e50f7cfd 1874 flog_err(EC_BGP_UPDATE_RCV,
3efd0893 1875 "%s [Error] Update packet error (packet length is short for unfeasible length)",
1c50c1c0 1876 peer->host);
d62a17ae 1877 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1878 BGP_NOTIFY_UPDATE_MAL_ATTR);
d8151687 1879 return BGP_Stop;
d62a17ae 1880 }
1881
1882 /* Unfeasible Route Length. */
1883 withdraw_len = stream_getw(s);
1884
1885 /* Unfeasible Route Length check. */
1886 if (stream_pnt(s) + withdraw_len > end) {
e50f7cfd 1887 flog_err(EC_BGP_UPDATE_RCV,
3efd0893 1888 "%s [Error] Update packet error (packet unfeasible length overflow %d)",
1c50c1c0 1889 peer->host, withdraw_len);
d62a17ae 1890 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1891 BGP_NOTIFY_UPDATE_MAL_ATTR);
d8151687 1892 return BGP_Stop;
d62a17ae 1893 }
1894
1895 /* Unfeasible Route packet format check. */
1896 if (withdraw_len > 0) {
1897 nlris[NLRI_WITHDRAW].afi = AFI_IP;
1898 nlris[NLRI_WITHDRAW].safi = SAFI_UNICAST;
1899 nlris[NLRI_WITHDRAW].nlri = stream_pnt(s);
1900 nlris[NLRI_WITHDRAW].length = withdraw_len;
1901 stream_forward_getp(s, withdraw_len);
1902 }
1903
1904 /* Attribute total length check. */
1905 if (stream_pnt(s) + 2 > end) {
ade6974d 1906 flog_warn(
e50f7cfd 1907 EC_BGP_UPDATE_PACKET_SHORT,
ade6974d
QY
1908 "%s [Error] Packet Error (update packet is short for attribute length)",
1909 peer->host);
d62a17ae 1910 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1911 BGP_NOTIFY_UPDATE_MAL_ATTR);
d8151687 1912 return BGP_Stop;
d62a17ae 1913 }
1914
1915 /* Fetch attribute total length. */
1916 attribute_len = stream_getw(s);
1917
1918 /* Attribute length check. */
1919 if (stream_pnt(s) + attribute_len > end) {
ade6974d 1920 flog_warn(
e50f7cfd 1921 EC_BGP_UPDATE_PACKET_LONG,
ade6974d
QY
1922 "%s [Error] Packet Error (update packet attribute length overflow %d)",
1923 peer->host, attribute_len);
d62a17ae 1924 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1925 BGP_NOTIFY_UPDATE_MAL_ATTR);
d8151687 1926 return BGP_Stop;
d62a17ae 1927 }
1928
1929 /* Certain attribute parsing errors should not be considered bad enough
1930 * to reset the session for, most particularly any partial/optional
1931 * attributes that have 'tunneled' over speakers that don't understand
1932 * them. Instead we withdraw only the prefix concerned.
1933 *
1934 * Complicates the flow a little though..
1935 */
79288e4c 1936 enum bgp_attr_parse_ret attr_parse_ret = BGP_ATTR_PARSE_PROCEED;
d62a17ae 1937/* This define morphs the update case into a withdraw when lower levels
1938 * have signalled an error condition where this is best.
1939 */
b881c707 1940#define NLRI_ATTR_ARG (attr_parse_ret != BGP_ATTR_PARSE_WITHDRAW ? &attr : NULL)
718e3744 1941
d62a17ae 1942 /* Parse attribute when it exists. */
1943 if (attribute_len) {
1944 attr_parse_ret = bgp_attr_parse(peer, &attr, attribute_len,
1945 &nlris[NLRI_MP_UPDATE],
1946 &nlris[NLRI_MP_WITHDRAW]);
1947 if (attr_parse_ret == BGP_ATTR_PARSE_ERROR) {
1948 bgp_attr_unintern_sub(&attr);
d8151687 1949 return BGP_Stop;
d62a17ae 1950 }
1951 }
1952
1953 /* Logging the attribute. */
1954 if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW
1955 || BGP_DEBUG(update, UPDATE_IN)
1956 || BGP_DEBUG(update, UPDATE_PREFIX)) {
5022c833
DA
1957 ret = bgp_dump_attr(&attr, peer->rcvd_attr_str,
1958 sizeof(peer->rcvd_attr_str));
d62a17ae 1959
b4d46cc9
DL
1960 peer->stat_upd_7606++;
1961
d62a17ae 1962 if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW)
af4c2728 1963 flog_err(
e50f7cfd 1964 EC_BGP_UPDATE_RCV,
f70c91dc
DA
1965 "%pBP rcvd UPDATE with errors in attr(s)!! Withdrawing route.",
1966 peer);
d62a17ae 1967
1968 if (ret && bgp_debug_update(peer, NULL, NULL, 1)) {
f70c91dc 1969 zlog_debug("%pBP rcvd UPDATE w/ attr: %s", peer,
d62a17ae 1970 peer->rcvd_attr_str);
1971 peer->rcvd_attr_printed = 1;
1972 }
1973 }
1974
1975 /* Network Layer Reachability Information. */
1976 update_len = end - stream_pnt(s);
1977
1978 if (update_len) {
1979 /* Set NLRI portion to structure. */
1980 nlris[NLRI_UPDATE].afi = AFI_IP;
1981 nlris[NLRI_UPDATE].safi = SAFI_UNICAST;
1982 nlris[NLRI_UPDATE].nlri = stream_pnt(s);
1983 nlris[NLRI_UPDATE].length = update_len;
1984 stream_forward_getp(s, update_len);
9738e9aa 1985
1986 if (CHECK_FLAG(attr.flag, ATTR_FLAG_BIT(BGP_ATTR_MP_REACH_NLRI))) {
1987 /*
1988 * We skipped nexthop attribute validation earlier so
1989 * validate the nexthop now.
1990 */
1991 if (bgp_attr_nexthop_valid(peer, &attr) < 0) {
1992 bgp_attr_unintern_sub(&attr);
1993 return BGP_Stop;
1994 }
1995 }
d62a17ae 1996 }
1997
1998 if (BGP_DEBUG(update, UPDATE_IN))
f70c91dc
DA
1999 zlog_debug("%pBP rcvd UPDATE wlen %d attrlen %d alen %d", peer,
2000 withdraw_len, attribute_len, update_len);
d62a17ae 2001
2002 /* Parse any given NLRIs */
2003 for (int i = NLRI_UPDATE; i < NLRI_TYPE_MAX; i++) {
2004 if (!nlris[i].nlri)
2005 continue;
2006
2007 /* NLRI is processed iff the peer if configured for the specific
2008 * afi/safi */
2009 if (!peer->afc[nlris[i].afi][nlris[i].safi]) {
2010 zlog_info(
2011 "%s [Info] UPDATE for non-enabled AFI/SAFI %u/%u",
2012 peer->host, nlris[i].afi, nlris[i].safi);
2013 continue;
2014 }
2015
2016 /* EoR handled later */
2017 if (nlris[i].length == 0)
2018 continue;
2019
2020 switch (i) {
2021 case NLRI_UPDATE:
2022 case NLRI_MP_UPDATE:
2023 nlri_ret = bgp_nlri_parse(peer, NLRI_ATTR_ARG,
2024 &nlris[i], 0);
2025 break;
2026 case NLRI_WITHDRAW:
2027 case NLRI_MP_WITHDRAW:
b1d7888f
DA
2028 nlri_ret = bgp_nlri_parse(peer, NLRI_ATTR_ARG,
2029 &nlris[i], 1);
d62a17ae 2030 break;
2031 default:
513386b5 2032 nlri_ret = BGP_NLRI_PARSE_ERROR;
d62a17ae 2033 }
2034
513386b5
DA
2035 if (nlri_ret < BGP_NLRI_PARSE_OK
2036 && nlri_ret != BGP_NLRI_PARSE_ERROR_PREFIX_OVERFLOW) {
e50f7cfd 2037 flog_err(EC_BGP_UPDATE_RCV,
1c50c1c0 2038 "%s [Error] Error parsing NLRI", peer->host);
feb17238 2039 if (peer_established(peer))
d62a17ae 2040 bgp_notify_send(
2041 peer, BGP_NOTIFY_UPDATE_ERR,
2042 i <= NLRI_WITHDRAW
2043 ? BGP_NOTIFY_UPDATE_INVAL_NETWORK
2044 : BGP_NOTIFY_UPDATE_OPT_ATTR_ERR);
2045 bgp_attr_unintern_sub(&attr);
d8151687 2046 return BGP_Stop;
d62a17ae 2047 }
2048 }
2049
2050 /* EoR checks
2051 *
2052 * Non-MP IPv4/Unicast EoR is a completely empty UPDATE
2053 * and MP EoR should have only an empty MP_UNREACH
2054 */
996c9314
LB
2055 if ((!update_len && !withdraw_len && nlris[NLRI_MP_UPDATE].length == 0)
2056 || (attr_parse_ret == BGP_ATTR_PARSE_EOR)) {
d62a17ae 2057 afi_t afi = 0;
2058 safi_t safi;
f009ff26 2059 struct graceful_restart_info *gr_info;
2060
2061 /* Restarting router */
36235319
QY
2062 if (BGP_PEER_GRACEFUL_RESTART_CAPABLE(peer)
2063 && BGP_PEER_RESTARTING_MODE(peer))
f009ff26 2064 restart = true;
d62a17ae 2065
2066 /* Non-MP IPv4/Unicast is a completely emtpy UPDATE - already
2067 * checked
2068 * update and withdraw NLRI lengths are 0.
2069 */
2070 if (!attribute_len) {
2071 afi = AFI_IP;
2072 safi = SAFI_UNICAST;
2073 } else if (attr.flag & ATTR_FLAG_BIT(BGP_ATTR_MP_UNREACH_NLRI)
2074 && nlris[NLRI_MP_WITHDRAW].length == 0) {
2075 afi = nlris[NLRI_MP_WITHDRAW].afi;
2076 safi = nlris[NLRI_MP_WITHDRAW].safi;
9b9df989
DS
2077 } else if (attr_parse_ret == BGP_ATTR_PARSE_EOR) {
2078 afi = nlris[NLRI_MP_UPDATE].afi;
2079 safi = nlris[NLRI_MP_UPDATE].safi;
d62a17ae 2080 }
2081
2082 if (afi && peer->afc[afi][safi]) {
e82d19a3
DS
2083 struct vrf *vrf = vrf_lookup_by_id(peer->bgp->vrf_id);
2084
d62a17ae 2085 /* End-of-RIB received */
2086 if (!CHECK_FLAG(peer->af_sflags[afi][safi],
2087 PEER_STATUS_EOR_RECEIVED)) {
2088 SET_FLAG(peer->af_sflags[afi][safi],
2089 PEER_STATUS_EOR_RECEIVED);
2090 bgp_update_explicit_eors(peer);
f009ff26 2091 /* Update graceful restart information */
2092 gr_info = &(peer->bgp->gr_info[afi][safi]);
2093 if (restart)
2094 gr_info->eor_received++;
2095 /* If EOR received from all peers and selection
2096 * deferral timer is running, cancel the timer
2097 * and invoke the best path calculation
2098 */
36235319
QY
2099 if (gr_info->eor_required
2100 == gr_info->eor_received) {
2101 if (bgp_debug_neighbor_events(peer))
2102 zlog_debug(
2103 "%s %d, %s %d",
f009ff26 2104 "EOR REQ",
2105 gr_info->eor_required,
2106 "EOR RCV",
2107 gr_info->eor_received);
b96b4f1c
SB
2108 if (gr_info->t_select_deferral) {
2109 void *info = THREAD_ARG(
2110 gr_info->t_select_deferral);
2111 XFREE(MTYPE_TMP, info);
2112 }
fa5806c3 2113 THREAD_OFF(gr_info->t_select_deferral);
f009ff26 2114 gr_info->eor_required = 0;
2115 gr_info->eor_received = 0;
2116 /* Best path selection */
42c93837
DA
2117 bgp_best_path_select_defer(peer->bgp,
2118 afi, safi);
f009ff26 2119 }
d62a17ae 2120 }
2121
2122 /* NSF delete stale route */
2123 if (peer->nsf[afi][safi])
2124 bgp_clear_stale_route(peer, afi, safi);
2125
1479ed2f
DA
2126 zlog_info(
2127 "%s: rcvd End-of-RIB for %s from %s in vrf %s",
2128 __func__, get_afi_safi_str(afi, safi, false),
2129 peer->host, vrf ? vrf->name : VRF_DEFAULT_NAME);
2130 }
f80f838b 2131 }
d62a17ae 2132
2133 /* Everything is done. We unintern temporary structures which
2134 interned in bgp_attr_parse(). */
2135 bgp_attr_unintern_sub(&attr);
2136
083ec940 2137 peer->update_time = monotime(NULL);
d62a17ae 2138
c385f82a
MK
2139 /* Notify BGP Conditional advertisement scanner process */
2140 peer->advmap_table_change = true;
2141
d8151687 2142 return Receive_UPDATE_message;
718e3744 2143}
2144
d8151687
QY
2145/**
2146 * Process BGP NOTIFY message for peer.
2147 *
2148 * @param peer
2149 * @param size size of the packet
2150 * @return as in summary
2151 */
2152static int bgp_notify_receive(struct peer *peer, bgp_size_t size)
718e3744 2153{
10d476d4
DA
2154 struct bgp_notify outer = {};
2155 struct bgp_notify inner = {};
eea685b6 2156 bool hard_reset = false;
d62a17ae 2157
2158 if (peer->notify.data) {
eea685b6 2159 XFREE(MTYPE_BGP_NOTIFICATION, peer->notify.data);
d62a17ae 2160 peer->notify.length = 0;
e7ce634f 2161 peer->notify.hard_reset = false;
d62a17ae 2162 }
2163
eea685b6
DA
2164 outer.code = stream_getc(peer->curr);
2165 outer.subcode = stream_getc(peer->curr);
2166 outer.length = size - 2;
2167 outer.data = NULL;
2168 outer.raw_data = NULL;
2169 if (outer.length) {
2170 outer.raw_data = XMALLOC(MTYPE_BGP_NOTIFICATION, outer.length);
2171 memcpy(outer.raw_data, stream_pnt(peer->curr), outer.length);
2172 }
2173
1ae314be
DA
2174 hard_reset =
2175 bgp_notify_received_hard_reset(peer, outer.code, outer.subcode);
eea685b6
DA
2176 if (hard_reset && outer.length) {
2177 inner = bgp_notify_decapsulate_hard_reset(&outer);
2178 peer->notify.hard_reset = true;
2179 } else {
2180 inner = outer;
2181 }
d62a17ae 2182
2183 /* Preserv notify code and sub code. */
eea685b6
DA
2184 peer->notify.code = inner.code;
2185 peer->notify.subcode = inner.subcode;
d62a17ae 2186 /* For further diagnostic record returned Data. */
eea685b6
DA
2187 if (inner.length) {
2188 peer->notify.length = inner.length;
2189 peer->notify.data =
2190 XMALLOC(MTYPE_BGP_NOTIFICATION, inner.length);
2191 memcpy(peer->notify.data, inner.raw_data, inner.length);
d62a17ae 2192 }
2193
2194 /* For debug */
2195 {
2196 int i;
2197 int first = 0;
2198 char c[4];
2199
eea685b6
DA
2200 if (inner.length) {
2201 inner.data = XMALLOC(MTYPE_BGP_NOTIFICATION,
2202 inner.length * 3);
2203 for (i = 0; i < inner.length; i++)
d62a17ae 2204 if (first) {
552d6491 2205 snprintf(c, sizeof(c), " %02x",
424ab01d 2206 stream_getc(peer->curr));
f009ff26 2207
eea685b6
DA
2208 strlcat(inner.data, c,
2209 inner.length * 3);
f009ff26 2210
d62a17ae 2211 } else {
2212 first = 1;
552d6491
QY
2213 snprintf(c, sizeof(c), "%02x",
2214 stream_getc(peer->curr));
f009ff26 2215
eea685b6
DA
2216 strlcpy(inner.data, c,
2217 inner.length * 3);
d62a17ae 2218 }
d62a17ae 2219 }
2220
eea685b6 2221 bgp_notify_print(peer, &inner, "received", hard_reset);
10d476d4 2222 if (inner.length) {
eea685b6
DA
2223 XFREE(MTYPE_BGP_NOTIFICATION, inner.data);
2224 inner.length = 0;
2225 }
2226 if (outer.length) {
2227 XFREE(MTYPE_BGP_NOTIFICATION, outer.data);
10d476d4 2228 XFREE(MTYPE_BGP_NOTIFICATION, outer.raw_data);
c73d2363
DA
2229
2230 /* If this is a Hard Reset notification, we MUST free
2231 * the inner (encapsulated) notification too.
2232 */
2233 if (hard_reset)
2234 XFREE(MTYPE_BGP_NOTIFICATION, inner.raw_data);
eea685b6 2235 outer.length = 0;
d62a17ae 2236 }
2237 }
2238
2239 /* peer count update */
0112e9e0 2240 atomic_fetch_add_explicit(&peer->notify_in, 1, memory_order_relaxed);
d62a17ae 2241
2242 peer->last_reset = PEER_DOWN_NOTIFY_RECEIVED;
2243
2244 /* We have to check for Notify with Unsupported Optional Parameter.
2245 in that case we fallback to open without the capability option.
2246 But this done in bgp_stop. We just mark it here to avoid changing
2247 the fsm tables. */
eea685b6
DA
2248 if (inner.code == BGP_NOTIFY_OPEN_ERR &&
2249 inner.subcode == BGP_NOTIFY_OPEN_UNSUP_PARAM)
d62a17ae 2250 UNSET_FLAG(peer->sflags, PEER_STATUS_CAPABILITY_OPEN);
2251
20170775
DA
2252 /* If Graceful-Restart N-bit (Notification) is exchanged,
2253 * and it's not a Hard Reset, let's retain the routes.
2254 */
2255 if (bgp_has_graceful_restart_notification(peer) && !hard_reset &&
2256 CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE))
2257 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
2258
5cce3f05 2259 bgp_peer_gr_flags_update(peer);
36235319
QY
2260 BGP_GR_ROUTER_DETECT_AND_SEND_CAPABILITY_TO_ZEBRA(peer->bgp,
2261 peer->bgp->peer);
5cce3f05 2262
d8151687 2263 return Receive_NOTIFICATION_message;
718e3744 2264}
2265
d8151687
QY
2266/**
2267 * Process BGP ROUTEREFRESH message for peer.
2268 *
2269 * @param peer
2270 * @param size size of the packet
2271 * @return as in summary
2272 */
2273static int bgp_route_refresh_receive(struct peer *peer, bgp_size_t size)
718e3744 2274{
d62a17ae 2275 iana_afi_t pkt_afi;
2276 afi_t afi;
5c525538
RW
2277 iana_safi_t pkt_safi;
2278 safi_t safi;
d62a17ae 2279 struct stream *s;
2280 struct peer_af *paf;
2281 struct update_group *updgrp;
2282 struct peer *updgrp_peer;
9af52ccf 2283 uint8_t subtype;
e1a32ec1 2284 bool force_update = false;
9af52ccf
DA
2285 bgp_size_t msg_length =
2286 size - (BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE);
d62a17ae 2287
2288 /* If peer does not have the capability, send notification. */
2289 if (!CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_ADV)) {
e50f7cfd 2290 flog_err(EC_BGP_NO_CAP,
1c50c1c0
QY
2291 "%s [Error] BGP route refresh is not enabled",
2292 peer->host);
d62a17ae 2293 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
2294 BGP_NOTIFY_HEADER_BAD_MESTYPE);
d8151687 2295 return BGP_Stop;
d62a17ae 2296 }
2297
2298 /* Status must be Established. */
feb17238 2299 if (!peer_established(peer)) {
af4c2728 2300 flog_err(
e50f7cfd 2301 EC_BGP_INVALID_STATUS,
d62a17ae 2302 "%s [Error] Route refresh packet received under status %s",
2303 peer->host,
2304 lookup_msg(bgp_status_msg, peer->status, NULL));
0e35025e 2305 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR,
3893aeee 2306 bgp_fsm_error_subcode(peer->status));
d8151687 2307 return BGP_Stop;
d62a17ae 2308 }
2309
424ab01d 2310 s = peer->curr;
d62a17ae 2311
2312 /* Parse packet. */
2313 pkt_afi = stream_getw(s);
9af52ccf 2314 subtype = stream_getc(s);
d62a17ae 2315 pkt_safi = stream_getc(s);
2316
d62a17ae 2317 /* Convert AFI, SAFI to internal values and check. */
2318 if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi, &safi)) {
2319 zlog_info(
748a041f
DS
2320 "%s REFRESH_REQ for unrecognized afi/safi: %s/%s - ignored",
2321 peer->host, iana_afi2str(pkt_afi),
2322 iana_safi2str(pkt_safi));
d8151687 2323 return BGP_PACKET_NOOP;
d62a17ae 2324 }
2325
2326 if (size != BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE) {
d7c0a89a
QY
2327 uint8_t *end;
2328 uint8_t when_to_refresh;
2329 uint8_t orf_type;
2330 uint16_t orf_len;
d62a17ae 2331
9af52ccf
DA
2332 if (subtype) {
2333 /* If the length, excluding the fixed-size message
2334 * header, of the received ROUTE-REFRESH message with
2335 * Message Subtype 1 and 2 is not 4, then the BGP
2336 * speaker MUST send a NOTIFICATION message with the
2337 * Error Code of "ROUTE-REFRESH Message Error" and the
2338 * subcode of "Invalid Message Length".
2339 */
2340 if (msg_length != 4) {
2341 zlog_err(
2342 "%s Enhanced Route Refresh message length error",
2343 peer->host);
2344 bgp_notify_send(
2345 peer, BGP_NOTIFY_ROUTE_REFRESH_ERR,
2346 BGP_NOTIFY_ROUTE_REFRESH_INVALID_MSG_LEN);
2347 }
2348
2349 /* When the BGP speaker receives a ROUTE-REFRESH message
2350 * with a "Message Subtype" field other than 0, 1, or 2,
2351 * it MUST ignore the received ROUTE-REFRESH message.
2352 */
2353 if (subtype > 2)
2354 zlog_err(
2355 "%s Enhanced Route Refresh invalid subtype",
2356 peer->host);
2357 }
2358
2359 if (msg_length < 5) {
d62a17ae 2360 zlog_info("%s ORF route refresh length error",
2361 peer->host);
0e35025e
DA
2362 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2363 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
d8151687 2364 return BGP_Stop;
718e3744 2365 }
2366
d62a17ae 2367 when_to_refresh = stream_getc(s);
2368 end = stream_pnt(s) + (size - 5);
2369
2370 while ((stream_pnt(s) + 2) < end) {
2371 orf_type = stream_getc(s);
2372 orf_len = stream_getw(s);
2373
2374 /* orf_len in bounds? */
2375 if ((stream_pnt(s) + orf_len) > end)
2376 break; /* XXX: Notify instead?? */
2377 if (orf_type == ORF_TYPE_PREFIX
2378 || orf_type == ORF_TYPE_PREFIX_OLD) {
2379 uint8_t *p_pnt = stream_pnt(s);
2380 uint8_t *p_end = stream_pnt(s) + orf_len;
2381 struct orf_prefix orfp;
d7c0a89a
QY
2382 uint8_t common = 0;
2383 uint32_t seq;
d62a17ae 2384 int psize;
2385 char name[BUFSIZ];
2386 int ret = CMD_SUCCESS;
2387
2388 if (bgp_debug_neighbor_events(peer)) {
2389 zlog_debug(
f70c91dc
DA
2390 "%pBP rcvd Prefixlist ORF(%d) length %d",
2391 peer, orf_type, orf_len);
d62a17ae 2392 }
2393
f1aa4929
DA
2394 /* ORF prefix-list name */
2395 snprintf(name, sizeof(name), "%s.%d.%d",
2396 peer->host, afi, safi);
2397
d62a17ae 2398 /* we're going to read at least 1 byte of common
2399 * ORF header,
2400 * and 7 bytes of ORF Address-filter entry from
2401 * the stream
2402 */
f1aa4929
DA
2403 if (*p_pnt & ORF_COMMON_PART_REMOVE_ALL) {
2404 if (bgp_debug_neighbor_events(peer))
2405 zlog_debug(
2406 "%pBP rcvd Remove-All pfxlist ORF request",
2407 peer);
2408 prefix_bgp_orf_remove_all(afi, name);
d62a17ae 2409 break;
f1aa4929 2410 }
d62a17ae 2411
f1aa4929
DA
2412 if (orf_len < 7)
2413 break;
d62a17ae 2414
2415 while (p_pnt < p_end) {
2416 /* If the ORF entry is malformed, want
2417 * to read as much of it
2418 * as possible without going beyond the
2419 * bounds of the entry,
2420 * to maximise debug information.
2421 */
2422 int ok;
6006b807 2423 memset(&orfp, 0, sizeof(orfp));
d62a17ae 2424 common = *p_pnt++;
2425 /* after ++: p_pnt <= p_end */
d7c0a89a
QY
2426 ok = ((uint32_t)(p_end - p_pnt)
2427 >= sizeof(uint32_t));
d62a17ae 2428 if (ok) {
2429 memcpy(&seq, p_pnt,
d7c0a89a
QY
2430 sizeof(uint32_t));
2431 p_pnt += sizeof(uint32_t);
d62a17ae 2432 orfp.seq = ntohl(seq);
2433 } else
2434 p_pnt = p_end;
2435
5ca840a3 2436 /* val checked in prefix_bgp_orf_set */
1bb379bf 2437 if (p_pnt < p_end)
5ca840a3
DS
2438 orfp.ge = *p_pnt++;
2439
2440 /* val checked in prefix_bgp_orf_set */
1bb379bf 2441 if (p_pnt < p_end)
5ca840a3
DS
2442 orfp.le = *p_pnt++;
2443
d62a17ae 2444 if ((ok = (p_pnt < p_end)))
2445 orfp.p.prefixlen = *p_pnt++;
5ca840a3
DS
2446
2447 /* afi checked already */
2448 orfp.p.family = afi2family(afi);
2449
2450 /* 0 if not ok */
2451 psize = PSIZE(orfp.p.prefixlen);
2452 /* valid for family ? */
2453 if (psize > prefix_blen(&orfp.p)) {
d62a17ae 2454 ok = 0;
2455 psize = prefix_blen(&orfp.p);
2456 }
5ca840a3
DS
2457 /* valid for packet ? */
2458 if (psize > (p_end - p_pnt)) {
d62a17ae 2459 ok = 0;
2460 psize = p_end - p_pnt;
2461 }
2462
2463 if (psize > 0)
2464 memcpy(&orfp.p.u.prefix, p_pnt,
2465 psize);
2466 p_pnt += psize;
2467
2468 if (bgp_debug_neighbor_events(peer)) {
2469 char buf[INET6_BUFSIZ];
2470
2471 zlog_debug(
f70c91dc
DA
2472 "%pBP rcvd %s %s seq %u %s/%d ge %d le %d%s",
2473 peer,
d62a17ae 2474 (common & ORF_COMMON_PART_REMOVE
2475 ? "Remove"
2476 : "Add"),
2477 (common & ORF_COMMON_PART_DENY
2478 ? "deny"
2479 : "permit"),
2480 orfp.seq,
2481 inet_ntop(
2482 orfp.p.family,
2483 &orfp.p.u.prefix,
2484 buf,
2485 INET6_BUFSIZ),
2486 orfp.p.prefixlen,
2487 orfp.ge, orfp.le,
2488 ok ? "" : " MALFORMED");
2489 }
2490
2491 if (ok)
2492 ret = prefix_bgp_orf_set(
2493 name, afi, &orfp,
2494 (common & ORF_COMMON_PART_DENY
2495 ? 0
2496 : 1),
2497 (common & ORF_COMMON_PART_REMOVE
2498 ? 0
2499 : 1));
2500
2501 if (!ok || (ok && ret != CMD_SUCCESS)) {
2502 zlog_info(
f70c91dc
DA
2503 "%pBP Received misformatted prefixlist ORF. Remove All pfxlist",
2504 peer);
d62a17ae 2505 prefix_bgp_orf_remove_all(afi,
2506 name);
2507 break;
2508 }
2509 }
2510
2511 peer->orf_plist[afi][safi] =
2512 prefix_bgp_orf_lookup(afi, name);
2513 }
2514 stream_forward_getp(s, orf_len);
718e3744 2515 }
d62a17ae 2516 if (bgp_debug_neighbor_events(peer))
f70c91dc 2517 zlog_debug("%pBP rcvd Refresh %s ORF request", peer,
d62a17ae 2518 when_to_refresh == REFRESH_DEFER
2519 ? "Defer"
2520 : "Immediate");
2521 if (when_to_refresh == REFRESH_DEFER)
d8151687 2522 return BGP_PACKET_NOOP;
d62a17ae 2523 }
40d2700d 2524
d62a17ae 2525 /* First update is deferred until ORF or ROUTE-REFRESH is received */
2526 if (CHECK_FLAG(peer->af_sflags[afi][safi],
2527 PEER_STATUS_ORF_WAIT_REFRESH))
2528 UNSET_FLAG(peer->af_sflags[afi][safi],
2529 PEER_STATUS_ORF_WAIT_REFRESH);
2530
2531 paf = peer_af_find(peer, afi, safi);
2532 if (paf && paf->subgroup) {
2533 if (peer->orf_plist[afi][safi]) {
2534 updgrp = PAF_UPDGRP(paf);
2535 updgrp_peer = UPDGRP_PEER(updgrp);
2536 updgrp_peer->orf_plist[afi][safi] =
2537 peer->orf_plist[afi][safi];
2538 }
2539
2adac256
DA
2540 /* Avoid supressing duplicate routes later
2541 * when processing in subgroup_announce_table().
2542 */
e1a32ec1 2543 force_update = true;
2adac256 2544
d62a17ae 2545 /* If the peer is configured for default-originate clear the
2546 * SUBGRP_STATUS_DEFAULT_ORIGINATE flag so that we will
2547 * re-advertise the
2548 * default
2549 */
2550 if (CHECK_FLAG(paf->subgroup->sflags,
2551 SUBGRP_STATUS_DEFAULT_ORIGINATE))
2552 UNSET_FLAG(paf->subgroup->sflags,
2553 SUBGRP_STATUS_DEFAULT_ORIGINATE);
718e3744 2554 }
d62a17ae 2555
9af52ccf
DA
2556 if (subtype == BGP_ROUTE_REFRESH_BORR) {
2557 /* A BGP speaker that has received the Graceful Restart
2558 * Capability from its neighbor MUST ignore any BoRRs for
2559 * an <AFI, SAFI> from the neighbor before the speaker
2560 * receives the EoR for the given <AFI, SAFI> from the
2561 * neighbor.
2562 */
2563 if (CHECK_FLAG(peer->cap, PEER_CAP_RESTART_RCV)
2564 && !CHECK_FLAG(peer->af_sflags[afi][safi],
2565 PEER_STATUS_EOR_RECEIVED)) {
2566 if (bgp_debug_neighbor_events(peer))
2567 zlog_debug(
f70c91dc
DA
2568 "%pBP rcvd route-refresh (BoRR) for %s/%s before EoR",
2569 peer, afi2str(afi), safi2str(safi));
9af52ccf
DA
2570 return BGP_PACKET_NOOP;
2571 }
2572
2573 if (peer->t_refresh_stalepath) {
2574 if (bgp_debug_neighbor_events(peer))
2575 zlog_debug(
f70c91dc
DA
2576 "%pBP rcvd route-refresh (BoRR) for %s/%s, whereas BoRR already received",
2577 peer, afi2str(afi), safi2str(safi));
9af52ccf
DA
2578 return BGP_PACKET_NOOP;
2579 }
2580
2581 SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_BORR_RECEIVED);
2582 UNSET_FLAG(peer->af_sflags[afi][safi],
2583 PEER_STATUS_EORR_RECEIVED);
2584
2585 /* When a BGP speaker receives a BoRR message from
2586 * a peer, it MUST mark all the routes with the given
2587 * Address Family Identifier and Subsequent Address
2588 * Family Identifier, <AFI, SAFI> [RFC2918], from
2589 * that peer as stale.
2590 */
2591 if (peer_active_nego(peer)) {
2592 SET_FLAG(peer->af_sflags[afi][safi],
2593 PEER_STATUS_ENHANCED_REFRESH);
2594 bgp_set_stale_route(peer, afi, safi);
2595 }
2596
feb17238 2597 if (peer_established(peer))
9af52ccf
DA
2598 thread_add_timer(bm->master,
2599 bgp_refresh_stalepath_timer_expire,
2600 paf, peer->bgp->stalepath_time,
2601 &peer->t_refresh_stalepath);
2602
2603 if (bgp_debug_neighbor_events(peer))
2604 zlog_debug(
f70c91dc
DA
2605 "%pBP rcvd route-refresh (BoRR) for %s/%s, triggering timer for %u seconds",
2606 peer, afi2str(afi), safi2str(safi),
9af52ccf
DA
2607 peer->bgp->stalepath_time);
2608 } else if (subtype == BGP_ROUTE_REFRESH_EORR) {
2609 if (!peer->t_refresh_stalepath) {
2610 zlog_err(
f70c91dc
DA
2611 "%pBP rcvd route-refresh (EoRR) for %s/%s, whereas no BoRR received",
2612 peer, afi2str(afi), safi2str(safi));
9af52ccf
DA
2613 return BGP_PACKET_NOOP;
2614 }
2615
fa5806c3 2616 THREAD_OFF(peer->t_refresh_stalepath);
9af52ccf
DA
2617
2618 SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_EORR_RECEIVED);
2619 UNSET_FLAG(peer->af_sflags[afi][safi],
2620 PEER_STATUS_BORR_RECEIVED);
2621
2622 if (bgp_debug_neighbor_events(peer))
2623 zlog_debug(
f70c91dc
DA
2624 "%pBP rcvd route-refresh (EoRR) for %s/%s, stopping BoRR timer",
2625 peer, afi2str(afi), safi2str(safi));
9af52ccf
DA
2626
2627 if (peer->nsf[afi][safi])
2628 bgp_clear_stale_route(peer, afi, safi);
2629 } else {
bcbeb3f9 2630 if (bgp_debug_neighbor_events(peer))
a7d91a8c 2631 zlog_debug(
f70c91dc
DA
2632 "%pBP rcvd route-refresh (REQUEST) for %s/%s",
2633 peer, afi2str(afi), safi2str(safi));
bcbeb3f9 2634
9af52ccf
DA
2635 /* In response to a "normal route refresh request" from the
2636 * peer, the speaker MUST send a BoRR message.
2637 */
2638 if (CHECK_FLAG(peer->cap, PEER_CAP_ENHANCED_RR_RCV)) {
2639 /* For a BGP speaker that supports the BGP Graceful
2640 * Restart, it MUST NOT send a BoRR for an <AFI, SAFI>
2641 * to a neighbor before it sends the EoR for the
2642 * <AFI, SAFI> to the neighbor.
2643 */
2644 if (!CHECK_FLAG(peer->af_sflags[afi][safi],
2645 PEER_STATUS_EOR_SEND)) {
2646 if (bgp_debug_neighbor_events(peer))
2647 zlog_debug(
f70c91dc
DA
2648 "%pBP rcvd route-refresh (REQUEST) for %s/%s before EoR",
2649 peer, afi2str(afi),
2650 safi2str(safi));
a783cc05
XL
2651 /* Can't send BoRR now, postpone after EoR */
2652 SET_FLAG(peer->af_sflags[afi][safi],
2653 PEER_STATUS_REFRESH_PENDING);
9af52ccf
DA
2654 return BGP_PACKET_NOOP;
2655 }
2656
2657 bgp_route_refresh_send(peer, afi, safi, 0, 0, 0,
2658 BGP_ROUTE_REFRESH_BORR);
2659
2660 if (bgp_debug_neighbor_events(peer))
2661 zlog_debug(
f70c91dc
DA
2662 "%pBP sending route-refresh (BoRR) for %s/%s",
2663 peer, afi2str(afi), safi2str(safi));
9af52ccf
DA
2664
2665 /* Set flag Ready-To-Send to know when we can send EoRR
2666 * message.
2667 */
2668 SET_FLAG(peer->af_sflags[afi][safi],
2669 PEER_STATUS_BORR_SEND);
2670 UNSET_FLAG(peer->af_sflags[afi][safi],
2671 PEER_STATUS_EORR_SEND);
2672 }
2673 }
2674
d62a17ae 2675 /* Perform route refreshment to the peer */
e1a32ec1 2676 bgp_announce_route(peer, afi, safi, force_update);
d8151687
QY
2677
2678 /* No FSM action necessary */
2679 return BGP_PACKET_NOOP;
718e3744 2680}
2681
d8151687
QY
2682/**
2683 * Parse BGP CAPABILITY message for peer.
2684 *
2685 * @param peer
2686 * @param size size of the packet
2687 * @return as in summary
2688 */
d7c0a89a 2689static int bgp_capability_msg_parse(struct peer *peer, uint8_t *pnt,
d62a17ae 2690 bgp_size_t length)
718e3744 2691{
d7c0a89a 2692 uint8_t *end;
d62a17ae 2693 struct capability_mp_data mpc;
2694 struct capability_header *hdr;
d7c0a89a 2695 uint8_t action;
d62a17ae 2696 iana_afi_t pkt_afi;
2697 afi_t afi;
5c525538
RW
2698 iana_safi_t pkt_safi;
2699 safi_t safi;
d62a17ae 2700
2701 end = pnt + length;
2702
2703 while (pnt < end) {
2704 /* We need at least action, capability code and capability
2705 * length. */
2706 if (pnt + 3 > end) {
2707 zlog_info("%s Capability length error", peer->host);
0e35025e
DA
2708 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2709 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
d8151687 2710 return BGP_Stop;
d62a17ae 2711 }
2712 action = *pnt;
2713 hdr = (struct capability_header *)(pnt + 1);
2714
2715 /* Action value check. */
2716 if (action != CAPABILITY_ACTION_SET
2717 && action != CAPABILITY_ACTION_UNSET) {
2718 zlog_info("%s Capability Action Value error %d",
2719 peer->host, action);
0e35025e
DA
2720 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2721 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
d8151687 2722 return BGP_Stop;
d62a17ae 2723 }
2724
2725 if (bgp_debug_neighbor_events(peer))
2726 zlog_debug(
2727 "%s CAPABILITY has action: %d, code: %u, length %u",
2728 peer->host, action, hdr->code, hdr->length);
2729
ff6db102
DS
2730 if (hdr->length < sizeof(struct capability_mp_data)) {
2731 zlog_info(
2732 "%pBP Capability structure is not properly filled out, expected at least %zu bytes but header length specified is %d",
2733 peer, sizeof(struct capability_mp_data),
2734 hdr->length);
2735 return BGP_Stop;
2736 }
2737
d62a17ae 2738 /* Capability length check. */
2739 if ((pnt + hdr->length + 3) > end) {
2740 zlog_info("%s Capability length error", peer->host);
0e35025e
DA
2741 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
2742 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
d8151687 2743 return BGP_Stop;
d62a17ae 2744 }
2745
2746 /* Fetch structure to the byte stream. */
2747 memcpy(&mpc, pnt + 3, sizeof(struct capability_mp_data));
d2b6417b 2748 pnt += hdr->length + 3;
d62a17ae 2749
2750 /* We know MP Capability Code. */
2751 if (hdr->code == CAPABILITY_CODE_MP) {
2752 pkt_afi = ntohs(mpc.afi);
2753 pkt_safi = mpc.safi;
2754
2755 /* Ignore capability when override-capability is set. */
2756 if (CHECK_FLAG(peer->flags,
2757 PEER_FLAG_OVERRIDE_CAPABILITY))
2758 continue;
2759
2760 /* Convert AFI, SAFI to internal values. */
2761 if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi,
2762 &safi)) {
2763 if (bgp_debug_neighbor_events(peer))
2764 zlog_debug(
3efd0893 2765 "%s Dynamic Capability MP_EXT afi/safi invalid (%s/%s)",
748a041f
DS
2766 peer->host,
2767 iana_afi2str(pkt_afi),
2768 iana_safi2str(pkt_safi));
d62a17ae 2769 continue;
2770 }
2771
2772 /* Address family check. */
2773 if (bgp_debug_neighbor_events(peer))
2774 zlog_debug(
c386cdd8 2775 "%s CAPABILITY has %s MP_EXT CAP for afi/safi: %s/%s",
d62a17ae 2776 peer->host,
2777 action == CAPABILITY_ACTION_SET
2778 ? "Advertising"
2779 : "Removing",
c386cdd8
DA
2780 iana_afi2str(pkt_afi),
2781 iana_safi2str(pkt_safi));
d62a17ae 2782
2783 if (action == CAPABILITY_ACTION_SET) {
2784 peer->afc_recv[afi][safi] = 1;
2785 if (peer->afc[afi][safi]) {
2786 peer->afc_nego[afi][safi] = 1;
e1a32ec1
DS
2787 bgp_announce_route(peer, afi, safi,
2788 false);
d62a17ae 2789 }
2790 } else {
2791 peer->afc_recv[afi][safi] = 0;
2792 peer->afc_nego[afi][safi] = 0;
2793
2794 if (peer_active_nego(peer))
2795 bgp_clear_route(peer, afi, safi);
2796 else
d8151687 2797 return BGP_Stop;
d62a17ae 2798 }
2799 } else {
ade6974d 2800 flog_warn(
e50f7cfd 2801 EC_BGP_UNRECOGNIZED_CAPABILITY,
ade6974d
QY
2802 "%s unrecognized capability code: %d - ignored",
2803 peer->host, hdr->code);
d62a17ae 2804 }
d62a17ae 2805 }
d8151687
QY
2806
2807 /* No FSM action necessary */
2808 return BGP_PACKET_NOOP;
718e3744 2809}
2810
d8151687
QY
2811/**
2812 * Parse BGP CAPABILITY message for peer.
01b7ce2d 2813 *
d8151687
QY
2814 * Exported for unit testing.
2815 *
2816 * @param peer
2817 * @param size size of the packet
2818 * @return as in summary
01b7ce2d 2819 */
d62a17ae 2820int bgp_capability_receive(struct peer *peer, bgp_size_t size)
718e3744 2821{
d7c0a89a 2822 uint8_t *pnt;
d62a17ae 2823
2824 /* Fetch pointer. */
424ab01d 2825 pnt = stream_pnt(peer->curr);
d62a17ae 2826
2827 if (bgp_debug_neighbor_events(peer))
2828 zlog_debug("%s rcv CAPABILITY", peer->host);
2829
2830 /* If peer does not have the capability, send notification. */
2831 if (!CHECK_FLAG(peer->cap, PEER_CAP_DYNAMIC_ADV)) {
e50f7cfd 2832 flog_err(EC_BGP_NO_CAP,
1c50c1c0
QY
2833 "%s [Error] BGP dynamic capability is not enabled",
2834 peer->host);
d62a17ae 2835 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
2836 BGP_NOTIFY_HEADER_BAD_MESTYPE);
d8151687 2837 return BGP_Stop;
d62a17ae 2838 }
2839
2840 /* Status must be Established. */
feb17238 2841 if (!peer_established(peer)) {
af4c2728 2842 flog_err(
e50f7cfd 2843 EC_BGP_NO_CAP,
d62a17ae 2844 "%s [Error] Dynamic capability packet received under status %s",
2845 peer->host,
2846 lookup_msg(bgp_status_msg, peer->status, NULL));
0e35025e 2847 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR,
3893aeee 2848 bgp_fsm_error_subcode(peer->status));
d8151687 2849 return BGP_Stop;
d62a17ae 2850 }
2851
2852 /* Parse packet. */
2853 return bgp_capability_msg_parse(peer, pnt, size);
718e3744 2854}
6b0655a2 2855
d8151687
QY
2856/**
2857 * Processes a peer's input buffer.
2858 *
2859 * This function sidesteps the event loop and directly calls bgp_event_update()
2860 * after processing each BGP message. This is necessary to ensure proper
2861 * ordering of FSM events and unifies the behavior that was present previously,
2862 * whereby some of the packet handling functions would update the FSM and some
2863 * would not, making event flow difficult to understand. Please think twice
2864 * before hacking this.
2865 *
2866 * Thread type: THREAD_EVENT
2867 * @param thread
2868 * @return 0
2869 */
cc9f21da 2870void bgp_process_packet(struct thread *thread)
718e3744 2871{
424ab01d 2872 /* Yes first of all get peer pointer. */
d8151687
QY
2873 struct peer *peer; // peer
2874 uint32_t rpkt_quanta_old; // how many packets to read
2875 int fsm_update_result; // return code of bgp_event_update()
2876 int mprc; // message processing return code
555e09d4 2877
424ab01d 2878 peer = THREAD_ARG(thread);
555e09d4
QY
2879 rpkt_quanta_old = atomic_load_explicit(&peer->bgp->rpkt_quanta,
2880 memory_order_relaxed);
e3c7270d 2881 fsm_update_result = 0;
555e09d4 2882
424ab01d 2883 /* Guard against scheduled events that occur after peer deletion. */
9eb217ff 2884 if (peer->status == Deleted || peer->status == Clearing)
cc9f21da 2885 return;
718e3744 2886
555e09d4 2887 unsigned int processed = 0;
d62a17ae 2888
555e09d4 2889 while (processed < rpkt_quanta_old) {
d7c0a89a 2890 uint8_t type = 0;
9eb217ff
QY
2891 bgp_size_t size;
2892 char notify_data_length[2];
d62a17ae 2893
cb1991af 2894 frr_with_mutex (&peer->io_mtx) {
9eb217ff
QY
2895 peer->curr = stream_fifo_pop(peer->ibuf);
2896 }
d62a17ae 2897
9eb217ff 2898 if (peer->curr == NULL) // no packets to process, hmm...
cc9f21da 2899 return;
d62a17ae 2900
9eb217ff
QY
2901 /* skip the marker and copy the packet length */
2902 stream_forward_getp(peer->curr, BGP_MARKER_SIZE);
2903 memcpy(notify_data_length, stream_pnt(peer->curr), 2);
2904
2905 /* read in the packet length and type */
2906 size = stream_getw(peer->curr);
2907 type = stream_getc(peer->curr);
2908
584470fb 2909 hook_call(bgp_packet_dump, peer, type, size, peer->curr);
9eb217ff
QY
2910
2911 /* adjust size to exclude the marker + length + type */
2912 size -= BGP_HEADER_SIZE;
2913
2914 /* Read rest of the packet and call each sort of packet routine
2915 */
2916 switch (type) {
2917 case BGP_MSG_OPEN:
c7bb4f00 2918 frrtrace(2, frr_bgp, open_process, peer, size);
0112e9e0
QY
2919 atomic_fetch_add_explicit(&peer->open_in, 1,
2920 memory_order_relaxed);
d8151687
QY
2921 mprc = bgp_open_receive(peer, size);
2922 if (mprc == BGP_Stop)
af4c2728 2923 flog_err(
e50f7cfd 2924 EC_BGP_PKT_OPEN,
d8151687 2925 "%s: BGP OPEN receipt failed for peer: %s",
0767b4f3 2926 __func__, peer->host);
9eb217ff
QY
2927 break;
2928 case BGP_MSG_UPDATE:
c7bb4f00 2929 frrtrace(2, frr_bgp, update_process, peer, size);
0112e9e0
QY
2930 atomic_fetch_add_explicit(&peer->update_in, 1,
2931 memory_order_relaxed);
9eb217ff 2932 peer->readtime = monotime(NULL);
d8151687
QY
2933 mprc = bgp_update_receive(peer, size);
2934 if (mprc == BGP_Stop)
af4c2728 2935 flog_err(
e50f7cfd 2936 EC_BGP_UPDATE_RCV,
d8151687 2937 "%s: BGP UPDATE receipt failed for peer: %s",
0767b4f3 2938 __func__, peer->host);
9eb217ff
QY
2939 break;
2940 case BGP_MSG_NOTIFY:
c7bb4f00 2941 frrtrace(2, frr_bgp, notification_process, peer, size);
0112e9e0
QY
2942 atomic_fetch_add_explicit(&peer->notify_in, 1,
2943 memory_order_relaxed);
d8151687
QY
2944 mprc = bgp_notify_receive(peer, size);
2945 if (mprc == BGP_Stop)
af4c2728 2946 flog_err(
e50f7cfd 2947 EC_BGP_NOTIFY_RCV,
d8151687 2948 "%s: BGP NOTIFY receipt failed for peer: %s",
0767b4f3 2949 __func__, peer->host);
9eb217ff
QY
2950 break;
2951 case BGP_MSG_KEEPALIVE:
c7bb4f00 2952 frrtrace(2, frr_bgp, keepalive_process, peer, size);
9eb217ff 2953 peer->readtime = monotime(NULL);
0112e9e0
QY
2954 atomic_fetch_add_explicit(&peer->keepalive_in, 1,
2955 memory_order_relaxed);
d8151687
QY
2956 mprc = bgp_keepalive_receive(peer, size);
2957 if (mprc == BGP_Stop)
af4c2728 2958 flog_err(
e50f7cfd 2959 EC_BGP_KEEP_RCV,
d8151687 2960 "%s: BGP KEEPALIVE receipt failed for peer: %s",
0767b4f3 2961 __func__, peer->host);
9eb217ff
QY
2962 break;
2963 case BGP_MSG_ROUTE_REFRESH_NEW:
2964 case BGP_MSG_ROUTE_REFRESH_OLD:
c7bb4f00 2965 frrtrace(2, frr_bgp, refresh_process, peer, size);
0112e9e0
QY
2966 atomic_fetch_add_explicit(&peer->refresh_in, 1,
2967 memory_order_relaxed);
d8151687
QY
2968 mprc = bgp_route_refresh_receive(peer, size);
2969 if (mprc == BGP_Stop)
af4c2728 2970 flog_err(
e50f7cfd 2971 EC_BGP_RFSH_RCV,
d8151687 2972 "%s: BGP ROUTEREFRESH receipt failed for peer: %s",
0767b4f3 2973 __func__, peer->host);
9eb217ff
QY
2974 break;
2975 case BGP_MSG_CAPABILITY:
c7bb4f00 2976 frrtrace(2, frr_bgp, capability_process, peer, size);
0112e9e0
QY
2977 atomic_fetch_add_explicit(&peer->dynamic_cap_in, 1,
2978 memory_order_relaxed);
d8151687
QY
2979 mprc = bgp_capability_receive(peer, size);
2980 if (mprc == BGP_Stop)
af4c2728 2981 flog_err(
e50f7cfd 2982 EC_BGP_CAP_RCV,
d8151687 2983 "%s: BGP CAPABILITY receipt failed for peer: %s",
0767b4f3 2984 __func__, peer->host);
9eb217ff 2985 break;
e3c7270d 2986 default:
db878db0
QY
2987 /* Suppress uninitialized variable warning */
2988 mprc = 0;
5041dc4f 2989 (void)mprc;
becedef6
QY
2990 /*
2991 * The message type should have been sanitized before
2992 * we ever got here. Receipt of a message with an
2993 * invalid header at this point is indicative of a
2994 * security issue.
2995 */
e3c7270d 2996 assert (!"Message of invalid type received during input processing");
9eb217ff
QY
2997 }
2998
d8151687
QY
2999 /* delete processed packet */
3000 stream_free(peer->curr);
3001 peer->curr = NULL;
3002 processed++;
9eb217ff 3003
d8151687
QY
3004 /* Update FSM */
3005 if (mprc != BGP_PACKET_NOOP)
3006 fsm_update_result = bgp_event_update(peer, mprc);
e3c7270d
QY
3007 else
3008 continue;
d8151687 3009
becedef6
QY
3010 /*
3011 * If peer was deleted, do not process any more packets. This
3012 * is usually due to executing BGP_Stop or a stub deletion.
3013 */
d8151687
QY
3014 if (fsm_update_result == FSM_PEER_TRANSFERRED
3015 || fsm_update_result == FSM_PEER_STOPPED)
3016 break;
d62a17ae 3017 }
3018
d8151687
QY
3019 if (fsm_update_result != FSM_PEER_TRANSFERRED
3020 && fsm_update_result != FSM_PEER_STOPPED) {
cb1991af 3021 frr_with_mutex (&peer->io_mtx) {
becedef6
QY
3022 // more work to do, come back later
3023 if (peer->ibuf->count > 0)
e0d550df 3024 thread_add_event(
4af76660
QY
3025 bm->master, bgp_process_packet, peer, 0,
3026 &peer->t_process_packet);
d8151687 3027 }
718e3744 3028 }
718e3744 3029}
9e3b51a7 3030
3031/* Send EOR when routes are processed by selection deferral timer */
3032void bgp_send_delayed_eor(struct bgp *bgp)
3033{
3034 struct peer *peer;
3035 struct listnode *node, *nnode;
3036
3037 /* EOR message sent in bgp_write_proceed_actions */
3038 for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer))
3039 bgp_write_proceed_actions(peer);
3040}
6af96fa3
MS
3041
3042/*
3043 * Task callback to handle socket error encountered in the io pthread. We avoid
3044 * having the io pthread try to enqueue fsm events or mess with the peer
3045 * struct.
3046 */
cc9f21da 3047void bgp_packet_process_error(struct thread *thread)
6af96fa3
MS
3048{
3049 struct peer *peer;
3050 int code;
3051
3052 peer = THREAD_ARG(thread);
3053 code = THREAD_VAL(thread);
3054
3055 if (bgp_debug_neighbor_events(peer))
3056 zlog_debug("%s [Event] BGP error %d on fd %d",
046bb347 3057 peer->host, code, peer->fd);
6af96fa3
MS
3058
3059 /* Closed connection or error on the socket */
feb17238 3060 if (peer_established(peer)) {
6af96fa3
MS
3061 if ((CHECK_FLAG(peer->flags, PEER_FLAG_GRACEFUL_RESTART)
3062 || CHECK_FLAG(peer->flags,
3063 PEER_FLAG_GRACEFUL_RESTART_HELPER))
3064 && CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
3065 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
3066 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
3067 } else
3068 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
3069 }
3070
3071 bgp_event_update(peer, code);
6af96fa3 3072}