]> git.proxmox.com Git - mirror_frr.git/blame - bgpd/bgp_packet.c
bgpd: move packet writes into dedicated pthread
[mirror_frr.git] / bgpd / bgp_packet.c
CommitLineData
718e3744 1/* BGP packet management routine.
896014f4
DL
2 * Copyright (C) 1999 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
718e3744 20
21#include <zebra.h>
d3ecc69e 22#include <sys/time.h>
718e3744 23
24#include "thread.h"
25#include "stream.h"
26#include "network.h"
27#include "prefix.h"
28#include "command.h"
29#include "log.h"
30#include "memory.h"
d62a17ae 31#include "sockunion.h" /* for inet_ntop () */
baa376fc 32#include "sockopt.h"
718e3744 33#include "linklist.h"
34#include "plist.h"
3f9c7369 35#include "queue.h"
039f3a34 36#include "filter.h"
718e3744 37
38#include "bgpd/bgpd.h"
39#include "bgpd/bgp_table.h"
40#include "bgpd/bgp_dump.h"
41#include "bgpd/bgp_attr.h"
42#include "bgpd/bgp_debug.h"
43#include "bgpd/bgp_fsm.h"
44#include "bgpd/bgp_route.h"
45#include "bgpd/bgp_packet.h"
46#include "bgpd/bgp_open.h"
47#include "bgpd/bgp_aspath.h"
48#include "bgpd/bgp_community.h"
49#include "bgpd/bgp_ecommunity.h"
57d187bc 50#include "bgpd/bgp_lcommunity.h"
718e3744 51#include "bgpd/bgp_network.h"
52#include "bgpd/bgp_mplsvpn.h"
7ef5a232 53#include "bgpd/bgp_evpn.h"
718e3744 54#include "bgpd/bgp_advertise.h"
93406d87 55#include "bgpd/bgp_vty.h"
3f9c7369 56#include "bgpd/bgp_updgrp.h"
cd1964ff 57#include "bgpd/bgp_label.h"
718e3744 58
d3ecc69e
QY
59/* Linked list of active peers */
60static pthread_mutex_t plist_mtx = PTHREAD_MUTEX_INITIALIZER;
61static pthread_cond_t write_cond = PTHREAD_COND_INITIALIZER;
62static struct list *plist;
63
64bool bgp_packet_writes_thread_run;
65
718e3744 66/* Set up BGP packet marker and packet type. */
d62a17ae 67int bgp_packet_set_marker(struct stream *s, u_char type)
718e3744 68{
d62a17ae 69 int i;
718e3744 70
d62a17ae 71 /* Fill in marker. */
72 for (i = 0; i < BGP_MARKER_SIZE; i++)
73 stream_putc(s, 0xff);
718e3744 74
d62a17ae 75 /* Dummy total length. This field is should be filled in later on. */
76 stream_putw(s, 0);
718e3744 77
d62a17ae 78 /* BGP packet type. */
79 stream_putc(s, type);
718e3744 80
d62a17ae 81 /* Return current stream size. */
82 return stream_get_endp(s);
718e3744 83}
84
85/* Set BGP packet header size entry. If size is zero then use current
86 stream size. */
d62a17ae 87int bgp_packet_set_size(struct stream *s)
718e3744 88{
d62a17ae 89 int cp;
718e3744 90
d62a17ae 91 /* Preserve current pointer. */
92 cp = stream_get_endp(s);
93 stream_putw_at(s, BGP_MARKER_SIZE, cp);
718e3744 94
d62a17ae 95 return cp;
718e3744 96}
97
d3ecc69e
QY
98/**
99 * Push a packet onto the beginning of the peer's output queue.
100 * Must be externally synchronized around 'peer'.
101 */
102static void bgp_packet_add_unsafe(struct peer *peer, struct stream *s)
718e3744 103{
d62a17ae 104 /* Add packet to the end of list. */
105 stream_fifo_push(peer->obuf, s);
d3ecc69e
QY
106 peer_writes_wake();
107}
108
109/*
110 * Push a packet onto the beginning of the peer's output queue.
111 * This function acquires the peer's write mutex before proceeding.
112 */
113static void bgp_packet_add(struct peer *peer, struct stream *s)
114{
115 pthread_mutex_lock(&peer->obuf_mtx);
116 bgp_packet_add_unsafe(peer, s);
117 pthread_mutex_unlock(&peer->obuf_mtx);
718e3744 118}
119
d3ecc69e
QY
120/**
121 * Pop a packet off the end of the peer's output queue.
122 * Must be externally synchronized around 'peer'.
123 */
124static void bgp_packet_delete_unsafe(struct peer *peer)
718e3744 125{
d62a17ae 126 stream_free(stream_fifo_pop(peer->obuf));
718e3744 127}
128
d3ecc69e 129
718e3744 130/* Check file descriptor whether connect is established. */
d3ecc69e 131static int bgp_connect_check(struct peer *peer, int change_state)
718e3744 132{
d62a17ae 133 int status;
134 socklen_t slen;
135 int ret;
136
137 /* Anyway I have to reset read and write thread. */
138 BGP_READ_OFF(peer->t_read);
d62a17ae 139
140 /* Check file descriptor. */
141 slen = sizeof(status);
142 ret = getsockopt(peer->fd, SOL_SOCKET, SO_ERROR, (void *)&status,
143 &slen);
144
145 /* If getsockopt is fail, this is fatal error. */
146 if (ret < 0) {
147 zlog_info("can't get sockopt for nonblocking connect");
148 BGP_EVENT_ADD(peer, TCP_fatal_error);
149 return -1;
150 }
151
152 /* When status is 0 then TCP connection is established. */
153 if (status == 0) {
154 BGP_EVENT_ADD(peer, TCP_connection_open);
155 return 1;
156 } else {
157 if (bgp_debug_neighbor_events(peer))
158 zlog_debug("%s [Event] Connect failed (%s)", peer->host,
159 safe_strerror(errno));
160 if (change_state)
161 BGP_EVENT_ADD(peer, TCP_connection_open_failed);
162 return 0;
163 }
718e3744 164}
165
d62a17ae 166static struct stream *bgp_update_packet_eor(struct peer *peer, afi_t afi,
167 safi_t safi)
93406d87 168{
d62a17ae 169 struct stream *s;
170 iana_afi_t pkt_afi;
5c525538 171 iana_safi_t pkt_safi;
d62a17ae 172
173 if (DISABLE_BGP_ANNOUNCE)
174 return NULL;
175
176 if (bgp_debug_neighbor_events(peer))
177 zlog_debug("send End-of-RIB for %s to %s",
178 afi_safi_print(afi, safi), peer->host);
179
180 s = stream_new(BGP_MAX_PACKET_SIZE);
181
182 /* Make BGP update packet. */
183 bgp_packet_set_marker(s, BGP_MSG_UPDATE);
184
185 /* Unfeasible Routes Length */
186 stream_putw(s, 0);
187
188 if (afi == AFI_IP && safi == SAFI_UNICAST) {
189 /* Total Path Attribute Length */
190 stream_putw(s, 0);
191 } else {
192 /* Convert AFI, SAFI to values for packet. */
193 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
194
195 /* Total Path Attribute Length */
196 stream_putw(s, 6);
197 stream_putc(s, BGP_ATTR_FLAG_OPTIONAL);
198 stream_putc(s, BGP_ATTR_MP_UNREACH_NLRI);
199 stream_putc(s, 3);
200 stream_putw(s, pkt_afi);
201 stream_putc(s, pkt_safi);
202 }
203
204 bgp_packet_set_size(s);
d3ecc69e 205 bgp_packet_add_unsafe(peer, s);
d62a17ae 206 return s;
718e3744 207}
208
718e3744 209/* Get next packet to be written. */
d62a17ae 210static struct stream *bgp_write_packet(struct peer *peer)
718e3744 211{
d62a17ae 212 struct stream *s = NULL;
213 struct peer_af *paf;
214 struct bpacket *next_pkt;
215 afi_t afi;
216 safi_t safi;
217
218 s = stream_fifo_head(peer->obuf);
219 if (s)
220 return s;
221
222 /*
223 * The code beyond this part deals with update packets, proceed only
224 * if peer is Established and updates are not on hold (as part of
225 * update-delay post processing).
3f9c7369 226 */
d62a17ae 227 if (peer->status != Established)
228 return NULL;
229
230 if (peer->bgp && peer->bgp->main_peers_update_hold)
231 return NULL;
232
05c7a1cc
QY
233 FOREACH_AFI_SAFI (afi, safi) {
234 paf = peer_af_find(peer, afi, safi);
235 if (!paf || !PAF_SUBGRP(paf))
236 continue;
237 next_pkt = paf->next_pkt_to_send;
238
239 /* Try to generate a packet for the peer if we are at
240 * the end of
241 * the list. Always try to push out WITHDRAWs first. */
242 if (!next_pkt || !next_pkt->buffer) {
243 next_pkt = subgroup_withdraw_packet(PAF_SUBGRP(paf));
244 if (!next_pkt || !next_pkt->buffer)
245 subgroup_update_packet(PAF_SUBGRP(paf));
d62a17ae 246 next_pkt = paf->next_pkt_to_send;
05c7a1cc 247 }
d62a17ae 248
05c7a1cc
QY
249 /* If we still don't have a packet to send to the peer,
250 * then
251 * try to find out out if we have to send eor or if not,
252 * skip to
253 * the next AFI, SAFI.
254 * Don't send the EOR prematurely... if the subgroup's
255 * coalesce
256 * timer is running, the adjacency-out structure is not
257 * created
258 * yet.
259 */
260 if (!next_pkt || !next_pkt->buffer) {
261 if (CHECK_FLAG(peer->cap, PEER_CAP_RESTART_RCV)) {
262 if (!(PAF_SUBGRP(paf))->t_coalesce
263 && peer->afc_nego[afi][safi]
264 && peer->synctime
265 && !CHECK_FLAG(peer->af_sflags[afi][safi],
266 PEER_STATUS_EOR_SEND)) {
267 SET_FLAG(peer->af_sflags[afi][safi],
268 PEER_STATUS_EOR_SEND);
269 return bgp_update_packet_eor(peer, afi,
270 safi);
d62a17ae 271 }
d62a17ae 272 }
05c7a1cc
QY
273 continue;
274 }
d62a17ae 275
276
d3ecc69e
QY
277 /* Found a packet template to send, overwrite packet
278 * with appropriate
279 * attributes from peer and advance peer */
280 s = bpacket_reformat_for_peer(next_pkt, paf);
281 bgp_packet_add_unsafe(peer, s);
282 bpacket_queue_advance_peer(paf);
283 return s;
d62a17ae 284 }
cb1faec9 285
d3ecc69e 286 return NULL;
718e3744 287}
288
d3ecc69e
QY
289/*
290 * Creates a BGP Keepalive packet and appends it to the peer's output queue.
291 */
d62a17ae 292void bgp_keepalive_send(struct peer *peer)
718e3744 293{
d62a17ae 294 struct stream *s;
295
296 s = stream_new(BGP_MAX_PACKET_SIZE);
718e3744 297
d62a17ae 298 /* Make keepalive packet. */
299 bgp_packet_set_marker(s, BGP_MSG_KEEPALIVE);
718e3744 300
d62a17ae 301 /* Set packet size. */
302 (void)bgp_packet_set_size(s);
718e3744 303
d62a17ae 304 /* Dump packet if debug option is set. */
305 /* bgp_packet_dump (s); */
718e3744 306
d62a17ae 307 if (bgp_debug_keepalive(peer))
308 zlog_debug("%s sending KEEPALIVE", peer->host);
718e3744 309
d62a17ae 310 /* Add packet to the peer. */
311 bgp_packet_add(peer, s);
718e3744 312}
313
d3ecc69e
QY
314/*
315 * Creates a BGP Open packet and appends it to the peer's output queue.
316 * Sets capabilities as necessary.
317 */
d62a17ae 318void bgp_open_send(struct peer *peer)
718e3744 319{
d62a17ae 320 struct stream *s;
321 u_int16_t send_holdtime;
322 as_t local_as;
718e3744 323
d25e4efc 324 if (PEER_OR_GROUP_TIMER_SET(peer))
d62a17ae 325 send_holdtime = peer->holdtime;
326 else
327 send_holdtime = peer->bgp->default_holdtime;
718e3744 328
d62a17ae 329 /* local-as Change */
330 if (peer->change_local_as)
331 local_as = peer->change_local_as;
332 else
333 local_as = peer->local_as;
718e3744 334
d62a17ae 335 s = stream_new(BGP_MAX_PACKET_SIZE);
718e3744 336
d62a17ae 337 /* Make open packet. */
338 bgp_packet_set_marker(s, BGP_MSG_OPEN);
718e3744 339
d62a17ae 340 /* Set open packet values. */
341 stream_putc(s, BGP_VERSION_4); /* BGP version */
9d303b37
DL
342 stream_putw(s,
343 (local_as <= BGP_AS_MAX) ? (u_int16_t)local_as
344 : BGP_AS_TRANS);
d62a17ae 345 stream_putw(s, send_holdtime); /* Hold Time */
346 stream_put_in_addr(s, &peer->local_id); /* BGP Identifier */
718e3744 347
d62a17ae 348 /* Set capability code. */
349 bgp_open_capability(s, peer);
718e3744 350
d62a17ae 351 /* Set BGP packet length. */
352 (void)bgp_packet_set_size(s);
718e3744 353
d62a17ae 354 if (bgp_debug_neighbor_events(peer))
355 zlog_debug(
356 "%s sending OPEN, version %d, my as %u, holdtime %d, id %s",
357 peer->host, BGP_VERSION_4, local_as, send_holdtime,
358 inet_ntoa(peer->local_id));
718e3744 359
d62a17ae 360 /* Dump packet if debug option is set. */
361 /* bgp_packet_dump (s); */
718e3744 362
d62a17ae 363 /* Add packet to the peer. */
364 bgp_packet_add(peer, s);
718e3744 365}
366
d3ecc69e
QY
367/*
368 * Creates a BGP Notify and appends it to the peer's output queue.
369 *
370 * This function awakens the write thread to ensure the packet
371 * gets out ASAP.
372 *
373 * @param peer
374 * @param code BGP error code
375 * @param sub_code BGP error subcode
376 * @param data Data portion
377 * @param datalen length of data portion
378 */
d62a17ae 379void bgp_notify_send_with_data(struct peer *peer, u_char code, u_char sub_code,
380 u_char *data, size_t datalen)
718e3744 381{
d62a17ae 382 struct stream *s;
383 int length;
384
385 /* Allocate new stream. */
386 s = stream_new(BGP_MAX_PACKET_SIZE);
387
d3ecc69e 388 /* Make notify packet. */
d62a17ae 389 bgp_packet_set_marker(s, BGP_MSG_NOTIFY);
390
391 /* Set notify packet values. */
392 stream_putc(s, code); /* BGP notify code */
393 stream_putc(s, sub_code); /* BGP notify sub_code */
394
395 /* If notify data is present. */
396 if (data)
397 stream_write(s, data, datalen);
398
399 /* Set BGP packet length. */
400 length = bgp_packet_set_size(s);
401
402 /* Add packet to the peer. */
d3ecc69e 403 pthread_mutex_lock(&peer->obuf_mtx);
d62a17ae 404 stream_fifo_clean(peer->obuf);
d3ecc69e 405 pthread_mutex_unlock(&peer->obuf_mtx);
d62a17ae 406
407 /* For debug */
408 {
409 struct bgp_notify bgp_notify;
410 int first = 0;
411 int i;
412 char c[4];
413
414 bgp_notify.code = code;
415 bgp_notify.subcode = sub_code;
416 bgp_notify.data = NULL;
417 bgp_notify.length = length - BGP_MSG_NOTIFY_MIN_SIZE;
418 bgp_notify.raw_data = data;
419
420 peer->notify.code = bgp_notify.code;
421 peer->notify.subcode = bgp_notify.subcode;
422
423 if (bgp_notify.length) {
424 bgp_notify.data =
425 XMALLOC(MTYPE_TMP, bgp_notify.length * 3);
426 for (i = 0; i < bgp_notify.length; i++)
427 if (first) {
428 sprintf(c, " %02x", data[i]);
429 strcat(bgp_notify.data, c);
430 } else {
431 first = 1;
432 sprintf(c, "%02x", data[i]);
433 strcpy(bgp_notify.data, c);
434 }
435 }
436 bgp_notify_print(peer, &bgp_notify, "sending");
437
438 if (bgp_notify.data) {
439 XFREE(MTYPE_TMP, bgp_notify.data);
440 bgp_notify.data = NULL;
441 bgp_notify.length = 0;
442 }
443 }
444
445 /* peer reset cause */
446 if (code == BGP_NOTIFY_CEASE) {
447 if (sub_code == BGP_NOTIFY_CEASE_ADMIN_RESET)
448 peer->last_reset = PEER_DOWN_USER_RESET;
449 else if (sub_code == BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN)
450 peer->last_reset = PEER_DOWN_USER_SHUTDOWN;
451 else
452 peer->last_reset = PEER_DOWN_NOTIFY_SEND;
453 } else
454 peer->last_reset = PEER_DOWN_NOTIFY_SEND;
455
d3ecc69e
QY
456 /* Add packet to peer's output queue */
457 bgp_packet_add(peer, s);
458 /* Wake up the write thread to get the notify out ASAP */
459 peer_writes_wake();
718e3744 460}
461
d3ecc69e
QY
462/*
463 * Creates a BGP Notify and appends it to the peer's output queue.
464 *
465 * This function awakens the write thread to ensure the packet
466 * gets out ASAP.
467 *
468 * @param peer
469 * @param code BGP error code
470 * @param sub_code BGP error subcode
471 */
d62a17ae 472void bgp_notify_send(struct peer *peer, u_char code, u_char sub_code)
718e3744 473{
d62a17ae 474 bgp_notify_send_with_data(peer, code, sub_code, NULL, 0);
718e3744 475}
476
d3ecc69e
QY
477/*
478 * Creates BGP Route Refresh packet and appends it to the peer's output queue.
479 *
480 * @param peer
481 * @param afi Address Family Identifier
482 * @param safi Subsequent Address Family Identifier
483 * @param orf_type Outbound Route Filtering type
484 * @param when_to_refresh Whether to refresh immediately or defer
485 * @param remove Whether to remove ORF for specified AFI/SAFI
486 */
d62a17ae 487void bgp_route_refresh_send(struct peer *peer, afi_t afi, safi_t safi,
488 u_char orf_type, u_char when_to_refresh, int remove)
718e3744 489{
d62a17ae 490 struct stream *s;
491 struct bgp_filter *filter;
492 int orf_refresh = 0;
493 iana_afi_t pkt_afi;
5c525538 494 iana_safi_t pkt_safi;
d62a17ae 495
496 if (DISABLE_BGP_ANNOUNCE)
497 return;
498
499 filter = &peer->filter[afi][safi];
500
501 /* Convert AFI, SAFI to values for packet. */
502 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
503
504 s = stream_new(BGP_MAX_PACKET_SIZE);
505
506 /* Make BGP update packet. */
507 if (CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_NEW_RCV))
508 bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_NEW);
718e3744 509 else
d62a17ae 510 bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_OLD);
511
512 /* Encode Route Refresh message. */
513 stream_putw(s, pkt_afi);
514 stream_putc(s, 0);
515 stream_putc(s, pkt_safi);
516
517 if (orf_type == ORF_TYPE_PREFIX || orf_type == ORF_TYPE_PREFIX_OLD)
518 if (remove || filter->plist[FILTER_IN].plist) {
519 u_int16_t orf_len;
520 unsigned long orfp;
521
522 orf_refresh = 1;
523 stream_putc(s, when_to_refresh);
524 stream_putc(s, orf_type);
525 orfp = stream_get_endp(s);
526 stream_putw(s, 0);
527
528 if (remove) {
529 UNSET_FLAG(peer->af_sflags[afi][safi],
530 PEER_STATUS_ORF_PREFIX_SEND);
531 stream_putc(s, ORF_COMMON_PART_REMOVE_ALL);
532 if (bgp_debug_neighbor_events(peer))
533 zlog_debug(
534 "%s sending REFRESH_REQ to remove ORF(%d) (%s) for afi/safi: %d/%d",
535 peer->host, orf_type,
536 (when_to_refresh == REFRESH_DEFER
537 ? "defer"
538 : "immediate"),
539 pkt_afi, pkt_safi);
540 } else {
541 SET_FLAG(peer->af_sflags[afi][safi],
542 PEER_STATUS_ORF_PREFIX_SEND);
543 prefix_bgp_orf_entry(
544 s, filter->plist[FILTER_IN].plist,
545 ORF_COMMON_PART_ADD,
546 ORF_COMMON_PART_PERMIT,
547 ORF_COMMON_PART_DENY);
548 if (bgp_debug_neighbor_events(peer))
549 zlog_debug(
550 "%s sending REFRESH_REQ with pfxlist ORF(%d) (%s) for afi/safi: %d/%d",
551 peer->host, orf_type,
552 (when_to_refresh == REFRESH_DEFER
553 ? "defer"
554 : "immediate"),
555 pkt_afi, pkt_safi);
556 }
557
558 /* Total ORF Entry Len. */
559 orf_len = stream_get_endp(s) - orfp - 2;
560 stream_putw_at(s, orfp, orf_len);
561 }
562
563 /* Set packet size. */
564 (void)bgp_packet_set_size(s);
565
566 if (bgp_debug_neighbor_events(peer)) {
567 if (!orf_refresh)
568 zlog_debug("%s sending REFRESH_REQ for afi/safi: %d/%d",
569 peer->host, pkt_afi, pkt_safi);
570 }
571
572 /* Add packet to the peer. */
573 bgp_packet_add(peer, s);
718e3744 574}
575
d3ecc69e
QY
576/*
577 * Create a BGP Capability packet and append it to the peer's output queue.
578 *
579 * @param peer
580 * @param afi Address Family Identifier
581 * @param safi Subsequent Address Family Identifier
582 * @param capability_code BGP Capability Code
583 * @param action Set or Remove capability
584 */
d62a17ae 585void bgp_capability_send(struct peer *peer, afi_t afi, safi_t safi,
586 int capability_code, int action)
718e3744 587{
d62a17ae 588 struct stream *s;
589 iana_afi_t pkt_afi;
5c525538 590 iana_safi_t pkt_safi;
d62a17ae 591
592 /* Convert AFI, SAFI to values for packet. */
593 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
594
595 s = stream_new(BGP_MAX_PACKET_SIZE);
596
597 /* Make BGP update packet. */
598 bgp_packet_set_marker(s, BGP_MSG_CAPABILITY);
599
600 /* Encode MP_EXT capability. */
601 if (capability_code == CAPABILITY_CODE_MP) {
602 stream_putc(s, action);
603 stream_putc(s, CAPABILITY_CODE_MP);
604 stream_putc(s, CAPABILITY_CODE_MP_LEN);
605 stream_putw(s, pkt_afi);
606 stream_putc(s, 0);
607 stream_putc(s, pkt_safi);
608
609 if (bgp_debug_neighbor_events(peer))
610 zlog_debug(
611 "%s sending CAPABILITY has %s MP_EXT CAP for afi/safi: %d/%d",
612 peer->host,
613 action == CAPABILITY_ACTION_SET ? "Advertising"
614 : "Removing",
615 pkt_afi, pkt_safi);
616 }
617
618 /* Set packet size. */
619 (void)bgp_packet_set_size(s);
718e3744 620
d62a17ae 621 /* Add packet to the peer. */
622 bgp_packet_add(peer, s);
d62a17ae 623}
718e3744 624
d62a17ae 625/* RFC1771 6.8 Connection collision detection. */
626static int bgp_collision_detect(struct peer *new, struct in_addr remote_id)
627{
628 struct peer *peer;
629
630 /* Upon receipt of an OPEN message, the local system must examine
631 all of its connections that are in the OpenConfirm state. A BGP
632 speaker may also examine connections in an OpenSent state if it
633 knows the BGP Identifier of the peer by means outside of the
634 protocol. If among these connections there is a connection to a
635 remote BGP speaker whose BGP Identifier equals the one in the
636 OPEN message, then the local system performs the following
637 collision resolution procedure: */
638
639 if ((peer = new->doppelganger) != NULL) {
640 /* Do not accept the new connection in Established or Clearing
641 * states.
642 * Note that a peer GR is handled by closing the existing
643 * connection
644 * upon receipt of new one.
645 */
646 if (peer->status == Established || peer->status == Clearing) {
647 bgp_notify_send(new, BGP_NOTIFY_CEASE,
648 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
649 return (-1);
650 } else if ((peer->status == OpenConfirm)
651 || (peer->status == OpenSent)) {
652 /* 1. The BGP Identifier of the local system is compared
653 to
654 the BGP Identifier of the remote system (as specified
655 in
656 the OPEN message). */
657
658 if (ntohl(peer->local_id.s_addr)
659 < ntohl(remote_id.s_addr))
660 if (!CHECK_FLAG(peer->sflags,
661 PEER_STATUS_ACCEPT_PEER)) {
662 /* 2. If the value of the local BGP
663 Identifier is less
664 than the remote one, the local system
665 closes BGP
666 connection that already exists (the
667 one that is
668 already in the OpenConfirm state),
669 and accepts BGP
670 connection initiated by the remote
671 system. */
672 bgp_notify_send(
673 peer, BGP_NOTIFY_CEASE,
674 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
675 return 1;
676 } else {
677 bgp_notify_send(
678 new, BGP_NOTIFY_CEASE,
679 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
680 return -1;
681 }
682 else {
683 /* 3. Otherwise, the local system closes newly
684 created
685 BGP connection (the one associated with the
686 newly
687 received OPEN message), and continues to use
688 the
689 existing one (the one that is already in the
690 OpenConfirm state). */
691 if (CHECK_FLAG(peer->sflags,
692 PEER_STATUS_ACCEPT_PEER)) {
693 bgp_notify_send(
694 peer, BGP_NOTIFY_CEASE,
695 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
696 return 1;
697 } else {
698 bgp_notify_send(
699 new, BGP_NOTIFY_CEASE,
700 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
701 return -1;
702 }
703 }
704 }
705 }
706 return 0;
707}
718e3744 708
d62a17ae 709static int bgp_open_receive(struct peer *peer, bgp_size_t size)
710{
711 int ret;
712 u_char version;
713 u_char optlen;
714 u_int16_t holdtime;
715 u_int16_t send_holdtime;
716 as_t remote_as;
717 as_t as4 = 0;
718 struct in_addr remote_id;
719 int mp_capability;
720 u_int8_t notify_data_remote_as[2];
721 u_int8_t notify_data_remote_as4[4];
722 u_int8_t notify_data_remote_id[4];
723 u_int16_t *holdtime_ptr;
724
725 /* Parse open packet. */
726 version = stream_getc(peer->ibuf);
727 memcpy(notify_data_remote_as, stream_pnt(peer->ibuf), 2);
728 remote_as = stream_getw(peer->ibuf);
729 holdtime_ptr = (u_int16_t *)stream_pnt(peer->ibuf);
730 holdtime = stream_getw(peer->ibuf);
731 memcpy(notify_data_remote_id, stream_pnt(peer->ibuf), 4);
732 remote_id.s_addr = stream_get_ipv4(peer->ibuf);
733
734 /* Receive OPEN message log */
735 if (bgp_debug_neighbor_events(peer))
736 zlog_debug(
737 "%s rcv OPEN, version %d, remote-as (in open) %u,"
738 " holdtime %d, id %s",
739 peer->host, version, remote_as, holdtime,
740 inet_ntoa(remote_id));
741
742 /* BEGIN to read the capability here, but dont do it yet */
743 mp_capability = 0;
744 optlen = stream_getc(peer->ibuf);
745
746 if (optlen != 0) {
747 /* If not enough bytes, it is an error. */
748 if (STREAM_READABLE(peer->ibuf) < optlen) {
749 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
750 BGP_NOTIFY_OPEN_MALFORMED_ATTR);
751 return -1;
752 }
718e3744 753
d62a17ae 754 /* We need the as4 capability value *right now* because
755 * if it is there, we have not got the remote_as yet, and
756 * without
757 * that we do not know which peer is connecting to us now.
758 */
759 as4 = peek_for_as4_capability(peer, optlen);
760 memcpy(notify_data_remote_as4, &as4, 4);
761 }
718e3744 762
d62a17ae 763 /* Just in case we have a silly peer who sends AS4 capability set to 0
764 */
765 if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV) && !as4) {
766 zlog_err("%s bad OPEN, got AS4 capability, but AS4 set to 0",
767 peer->host);
768 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
769 BGP_NOTIFY_OPEN_BAD_PEER_AS,
770 notify_data_remote_as4, 4);
771 return -1;
772 }
718e3744 773
d62a17ae 774 if (remote_as == BGP_AS_TRANS) {
775 /* Take the AS4 from the capability. We must have received the
776 * capability now! Otherwise we have a asn16 peer who uses
777 * BGP_AS_TRANS, for some unknown reason.
778 */
779 if (as4 == BGP_AS_TRANS) {
780 zlog_err(
781 "%s [AS4] NEW speaker using AS_TRANS for AS4, not allowed",
782 peer->host);
783 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
784 BGP_NOTIFY_OPEN_BAD_PEER_AS,
785 notify_data_remote_as4, 4);
786 return -1;
787 }
718e3744 788
d62a17ae 789 if (!as4 && BGP_DEBUG(as4, AS4))
790 zlog_debug(
791 "%s [AS4] OPEN remote_as is AS_TRANS, but no AS4."
792 " Odd, but proceeding.",
793 peer->host);
794 else if (as4 < BGP_AS_MAX && BGP_DEBUG(as4, AS4))
795 zlog_debug(
796 "%s [AS4] OPEN remote_as is AS_TRANS, but AS4 (%u) fits "
797 "in 2-bytes, very odd peer.",
798 peer->host, as4);
799 if (as4)
800 remote_as = as4;
801 } else {
802 /* We may have a partner with AS4 who has an asno < BGP_AS_MAX
803 */
804 /* If we have got the capability, peer->as4cap must match
805 * remote_as */
806 if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV)
807 && as4 != remote_as) {
808 /* raise error, log this, close session */
809 zlog_err(
810 "%s bad OPEN, got AS4 capability, but remote_as %u"
811 " mismatch with 16bit 'myasn' %u in open",
812 peer->host, as4, remote_as);
813 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
814 BGP_NOTIFY_OPEN_BAD_PEER_AS,
815 notify_data_remote_as4, 4);
816 return -1;
817 }
818 }
6b0655a2 819
d62a17ae 820 /* remote router-id check. */
821 if (remote_id.s_addr == 0 || IPV4_CLASS_DE(ntohl(remote_id.s_addr))
822 || ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr)) {
823 if (bgp_debug_neighbor_events(peer))
824 zlog_debug("%s bad OPEN, wrong router identifier %s",
825 peer->host, inet_ntoa(remote_id));
826 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
827 BGP_NOTIFY_OPEN_BAD_BGP_IDENT,
828 notify_data_remote_id, 4);
829 return -1;
830 }
831
832 /* Set remote router-id */
833 peer->remote_id = remote_id;
834
835 /* Peer BGP version check. */
836 if (version != BGP_VERSION_4) {
837 u_int16_t maxver = htons(BGP_VERSION_4);
838 /* XXX this reply may not be correct if version < 4 XXX */
839 if (bgp_debug_neighbor_events(peer))
840 zlog_debug(
841 "%s bad protocol version, remote requested %d, local request %d",
842 peer->host, version, BGP_VERSION_4);
843 /* Data must be in network byte order here */
844 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
845 BGP_NOTIFY_OPEN_UNSUP_VERSION,
846 (u_int8_t *)&maxver, 2);
847 return -1;
848 }
849
850 /* Check neighbor as number. */
851 if (peer->as_type == AS_UNSPECIFIED) {
852 if (bgp_debug_neighbor_events(peer))
853 zlog_debug(
854 "%s bad OPEN, remote AS is unspecified currently",
855 peer->host);
856 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
857 BGP_NOTIFY_OPEN_BAD_PEER_AS,
858 notify_data_remote_as, 2);
1ff9a340 859 return -1;
d62a17ae 860 } else if (peer->as_type == AS_INTERNAL) {
861 if (remote_as != peer->bgp->as) {
862 if (bgp_debug_neighbor_events(peer))
863 zlog_debug(
864 "%s bad OPEN, remote AS is %u, internal specified",
865 peer->host, remote_as);
866 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
867 BGP_NOTIFY_OPEN_BAD_PEER_AS,
868 notify_data_remote_as, 2);
869 return -1;
1ff9a340 870 }
d62a17ae 871 peer->as = peer->local_as;
872 } else if (peer->as_type == AS_EXTERNAL) {
873 if (remote_as == peer->bgp->as) {
874 if (bgp_debug_neighbor_events(peer))
875 zlog_debug(
876 "%s bad OPEN, remote AS is %u, external specified",
877 peer->host, remote_as);
878 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
879 BGP_NOTIFY_OPEN_BAD_PEER_AS,
880 notify_data_remote_as, 2);
881 return -1;
1ff9a340 882 }
d62a17ae 883 peer->as = remote_as;
884 } else if ((peer->as_type == AS_SPECIFIED) && (remote_as != peer->as)) {
885 if (bgp_debug_neighbor_events(peer))
886 zlog_debug("%s bad OPEN, remote AS is %u, expected %u",
887 peer->host, remote_as, peer->as);
888 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
889 BGP_NOTIFY_OPEN_BAD_PEER_AS,
890 notify_data_remote_as, 2);
891 return -1;
eb821189 892 }
718e3744 893
d62a17ae 894 /* From the rfc: Upon receipt of an OPEN message, a BGP speaker MUST
895 calculate the value of the Hold Timer by using the smaller of its
896 configured Hold Time and the Hold Time received in the OPEN message.
897 The Hold Time MUST be either zero or at least three seconds. An
898 implementation may reject connections on the basis of the Hold Time.
0b2aa3a0 899 */
d62a17ae 900
901 if (holdtime < 3 && holdtime != 0) {
902 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
903 BGP_NOTIFY_OPEN_UNACEP_HOLDTIME,
904 (u_char *)holdtime_ptr, 2);
905 return -1;
0b2aa3a0 906 }
d62a17ae 907
908 /* From the rfc: A reasonable maximum time between KEEPALIVE messages
909 would be one third of the Hold Time interval. KEEPALIVE messages
910 MUST NOT be sent more frequently than one per second. An
911 implementation MAY adjust the rate at which it sends KEEPALIVE
912 messages as a function of the Hold Time interval. */
913
d25e4efc 914 if (PEER_OR_GROUP_TIMER_SET(peer))
d62a17ae 915 send_holdtime = peer->holdtime;
916 else
917 send_holdtime = peer->bgp->default_holdtime;
918
919 if (holdtime < send_holdtime)
920 peer->v_holdtime = holdtime;
921 else
922 peer->v_holdtime = send_holdtime;
923
d25e4efc
DS
924 if ((PEER_OR_GROUP_TIMER_SET(peer))
925 && (peer->keepalive < peer->v_holdtime / 3))
d62a17ae 926 peer->v_keepalive = peer->keepalive;
927 else
928 peer->v_keepalive = peer->v_holdtime / 3;
929
930 /* Open option part parse. */
931 if (optlen != 0) {
932 if ((ret = bgp_open_option_parse(peer, optlen, &mp_capability))
933 < 0)
934 return ret;
935 } else {
936 if (bgp_debug_neighbor_events(peer))
937 zlog_debug("%s rcvd OPEN w/ OPTION parameter len: 0",
938 peer->host);
0299c004 939 }
d62a17ae 940
941 /*
942 * Assume that the peer supports the locally configured set of
943 * AFI/SAFIs if the peer did not send us any Mulitiprotocol
944 * capabilities, or if 'override-capability' is configured.
945 */
946 if (!mp_capability
947 || CHECK_FLAG(peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) {
948 peer->afc_nego[AFI_IP][SAFI_UNICAST] =
949 peer->afc[AFI_IP][SAFI_UNICAST];
950 peer->afc_nego[AFI_IP][SAFI_MULTICAST] =
951 peer->afc[AFI_IP][SAFI_MULTICAST];
952 peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST] =
953 peer->afc[AFI_IP][SAFI_LABELED_UNICAST];
954 peer->afc_nego[AFI_IP6][SAFI_UNICAST] =
955 peer->afc[AFI_IP6][SAFI_UNICAST];
956 peer->afc_nego[AFI_IP6][SAFI_MULTICAST] =
957 peer->afc[AFI_IP6][SAFI_MULTICAST];
958 peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST] =
959 peer->afc[AFI_IP6][SAFI_LABELED_UNICAST];
960 peer->afc_nego[AFI_L2VPN][SAFI_EVPN] =
961 peer->afc[AFI_L2VPN][SAFI_EVPN];
0299c004 962 }
d62a17ae 963
964 /* When collision is detected and this peer is closed. Retrun
965 immidiately. */
966 ret = bgp_collision_detect(peer, remote_id);
967 if (ret < 0)
968 return ret;
969
970 /* Get sockname. */
971 if ((ret = bgp_getsockname(peer)) < 0) {
972 zlog_err("%s: bgp_getsockname() failed for peer: %s",
973 __FUNCTION__, peer->host);
974 return (ret);
975 }
976
977 /* Verify valid local address present based on negotiated
978 * address-families. */
979 if (peer->afc_nego[AFI_IP][SAFI_UNICAST]
e5f22b30 980 || peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST]
d62a17ae 981 || peer->afc_nego[AFI_IP][SAFI_MULTICAST]
982 || peer->afc_nego[AFI_IP][SAFI_MPLS_VPN]
983 || peer->afc_nego[AFI_IP][SAFI_ENCAP]) {
984 if (!peer->nexthop.v4.s_addr) {
985#if defined(HAVE_CUMULUS)
986 zlog_err(
987 "%s: No local IPv4 addr resetting connection, fd %d",
988 peer->host, peer->fd);
989 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
990 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
991 return -1;
1d808091 992#endif
d62a17ae 993 }
994 }
995 if (peer->afc_nego[AFI_IP6][SAFI_UNICAST]
e5f22b30 996 || peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST]
d62a17ae 997 || peer->afc_nego[AFI_IP6][SAFI_MULTICAST]
998 || peer->afc_nego[AFI_IP6][SAFI_MPLS_VPN]
999 || peer->afc_nego[AFI_IP6][SAFI_ENCAP]) {
1000 if (IN6_IS_ADDR_UNSPECIFIED(&peer->nexthop.v6_global)) {
1001#if defined(HAVE_CUMULUS)
1002 zlog_err(
1003 "%s: No local IPv6 addr resetting connection, fd %d",
1004 peer->host, peer->fd);
1005 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
1006 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
1007 return -1;
1d808091 1008#endif
d62a17ae 1009 }
1010 }
1011 peer->rtt = sockopt_tcp_rtt(peer->fd);
1012
1013 if ((ret = bgp_event_update(peer, Receive_OPEN_message)) < 0) {
1014 zlog_err("%s: BGP event update failed for peer: %s",
1015 __FUNCTION__, peer->host);
1016 /* DD: bgp send notify and reset state */
1017 return (ret);
1018 }
1019
1020 peer->packet_size = 0;
1021 if (peer->ibuf)
1022 stream_reset(peer->ibuf);
1023
1024 return 0;
718e3744 1025}
1026
d62a17ae 1027/* Called when there is a change in the EOR(implicit or explicit) status of a
1028 peer.
f188f2c4 1029 Ends the update-delay if all expected peers are done with EORs. */
d62a17ae 1030void bgp_check_update_delay(struct bgp *bgp)
f188f2c4 1031{
d62a17ae 1032 struct listnode *node, *nnode;
1033 struct peer *peer = NULL;
1034
1035 if (bgp_debug_neighbor_events(peer))
1036 zlog_debug("Checking update delay, T: %d R: %d I:%d E: %d",
1037 bgp->established, bgp->restarted_peers,
1038 bgp->implicit_eors, bgp->explicit_eors);
1039
1040 if (bgp->established
1041 <= bgp->restarted_peers + bgp->implicit_eors + bgp->explicit_eors) {
1042 /* This is an extra sanity check to make sure we wait for all
1043 the
1044 eligible configured peers. This check is performed if
1045 establish wait
1046 timer is on, or establish wait option is not given with the
1047 update-delay command */
1048 if (bgp->t_establish_wait
1049 || (bgp->v_establish_wait == bgp->v_update_delay))
1050 for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer)) {
1051 if (CHECK_FLAG(peer->flags,
1052 PEER_FLAG_CONFIG_NODE)
1053 && !CHECK_FLAG(peer->flags,
1054 PEER_FLAG_SHUTDOWN)
1055 && !peer->update_delay_over) {
1056 if (bgp_debug_neighbor_events(peer))
1057 zlog_debug(
1058 " Peer %s pending, continuing read-only mode",
1059 peer->host);
1060 return;
1061 }
1062 }
1063
1064 zlog_info(
1065 "Update delay ended, restarted: %d, EORs implicit: %d, explicit: %d",
1066 bgp->restarted_peers, bgp->implicit_eors,
1067 bgp->explicit_eors);
1068 bgp_update_delay_end(bgp);
1069 }
f188f2c4
DS
1070}
1071
1072/* Called if peer is known to have restarted. The restart-state bit in
1073 Graceful-Restart capability is used for that */
d62a17ae 1074void bgp_update_restarted_peers(struct peer *peer)
f188f2c4 1075{
d62a17ae 1076 if (!bgp_update_delay_active(peer->bgp))
1077 return; /* BGP update delay has ended */
1078 if (peer->update_delay_over)
1079 return; /* This peer has already been considered */
1080
1081 if (bgp_debug_neighbor_events(peer))
1082 zlog_debug("Peer %s: Checking restarted", peer->host);
1083
1084 if (peer->status == Established) {
1085 peer->update_delay_over = 1;
1086 peer->bgp->restarted_peers++;
1087 bgp_check_update_delay(peer->bgp);
1088 }
f188f2c4
DS
1089}
1090
1091/* Called as peer receives a keep-alive. Determines if this occurence can be
1092 taken as an implicit EOR for this peer.
1093 NOTE: The very first keep-alive after the Established state of a peer is
d62a17ae 1094 considered implicit EOR for the update-delay purposes */
1095void bgp_update_implicit_eors(struct peer *peer)
f188f2c4 1096{
d62a17ae 1097 if (!bgp_update_delay_active(peer->bgp))
1098 return; /* BGP update delay has ended */
1099 if (peer->update_delay_over)
1100 return; /* This peer has already been considered */
1101
1102 if (bgp_debug_neighbor_events(peer))
1103 zlog_debug("Peer %s: Checking implicit EORs", peer->host);
1104
1105 if (peer->status == Established) {
1106 peer->update_delay_over = 1;
1107 peer->bgp->implicit_eors++;
1108 bgp_check_update_delay(peer->bgp);
1109 }
f188f2c4
DS
1110}
1111
1112/* Should be called only when there is a change in the EOR_RECEIVED status
1113 for any afi/safi on a peer */
d62a17ae 1114static void bgp_update_explicit_eors(struct peer *peer)
f188f2c4 1115{
d62a17ae 1116 afi_t afi;
1117 safi_t safi;
1118
1119 if (!bgp_update_delay_active(peer->bgp))
1120 return; /* BGP update delay has ended */
1121 if (peer->update_delay_over)
1122 return; /* This peer has already been considered */
1123
1124 if (bgp_debug_neighbor_events(peer))
1125 zlog_debug("Peer %s: Checking explicit EORs", peer->host);
1126
05c7a1cc
QY
1127 FOREACH_AFI_SAFI (afi, safi) {
1128 if (peer->afc_nego[afi][safi]
1129 && !CHECK_FLAG(peer->af_sflags[afi][safi],
1130 PEER_STATUS_EOR_RECEIVED)) {
1131 if (bgp_debug_neighbor_events(peer))
1132 zlog_debug(
1133 " afi %d safi %d didnt receive EOR",
1134 afi, safi);
1135 return;
d62a17ae 1136 }
05c7a1cc 1137 }
d62a17ae 1138
1139 peer->update_delay_over = 1;
1140 peer->bgp->explicit_eors++;
1141 bgp_check_update_delay(peer->bgp);
f188f2c4
DS
1142}
1143
d62a17ae 1144/* Frontend for NLRI parsing, to fan-out to AFI/SAFI specific parsers
1145 * mp_withdraw, if set, is used to nullify attr structure on most of the calling
1146 * safi function
7ef5a232
PG
1147 * and for evpn, passed as parameter
1148 */
d62a17ae 1149int bgp_nlri_parse(struct peer *peer, struct attr *attr,
1150 struct bgp_nlri *packet, int mp_withdraw)
96e52474 1151{
d62a17ae 1152 switch (packet->safi) {
1153 case SAFI_UNICAST:
1154 case SAFI_MULTICAST:
1155 return bgp_nlri_parse_ip(peer, mp_withdraw ? NULL : attr,
1156 packet);
1157 case SAFI_LABELED_UNICAST:
1158 return bgp_nlri_parse_label(peer, mp_withdraw ? NULL : attr,
1159 packet);
1160 case SAFI_MPLS_VPN:
1161 return bgp_nlri_parse_vpn(peer, mp_withdraw ? NULL : attr,
1162 packet);
1163 case SAFI_EVPN:
1164 return bgp_nlri_parse_evpn(peer, attr, packet, mp_withdraw);
5c525538
RW
1165 default:
1166 return -1;
d62a17ae 1167 }
96e52474
PJ
1168}
1169
718e3744 1170/* Parse BGP Update packet and make attribute object. */
d62a17ae 1171static int bgp_update_receive(struct peer *peer, bgp_size_t size)
718e3744 1172{
d62a17ae 1173 int ret, nlri_ret;
1174 u_char *end;
1175 struct stream *s;
1176 struct attr attr;
1177 bgp_size_t attribute_len;
1178 bgp_size_t update_len;
1179 bgp_size_t withdraw_len;
1180
1181 enum NLRI_TYPES {
1182 NLRI_UPDATE,
1183 NLRI_WITHDRAW,
1184 NLRI_MP_UPDATE,
1185 NLRI_MP_WITHDRAW,
1186 NLRI_TYPE_MAX
1187 };
1188 struct bgp_nlri nlris[NLRI_TYPE_MAX];
1189
1190 /* Status must be Established. */
1191 if (peer->status != Established) {
1192 zlog_err("%s [FSM] Update packet received under status %s",
1193 peer->host,
1194 lookup_msg(bgp_status_msg, peer->status, NULL));
1195 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR, 0);
1196 return -1;
1197 }
1198
1199 /* Set initial values. */
1200 memset(&attr, 0, sizeof(struct attr));
1201 attr.label_index = BGP_INVALID_LABEL_INDEX;
1202 attr.label = MPLS_INVALID_LABEL;
1203 memset(&nlris, 0, sizeof(nlris));
1204 memset(peer->rcvd_attr_str, 0, BUFSIZ);
1205 peer->rcvd_attr_printed = 0;
1206
1207 s = peer->ibuf;
1208 end = stream_pnt(s) + size;
1209
1210 /* RFC1771 6.3 If the Unfeasible Routes Length or Total Attribute
1211 Length is too large (i.e., if Unfeasible Routes Length + Total
1212 Attribute Length + 23 exceeds the message Length), then the Error
1213 Subcode is set to Malformed Attribute List. */
1214 if (stream_pnt(s) + 2 > end) {
1215 zlog_err(
1216 "%s [Error] Update packet error"
1217 " (packet length is short for unfeasible length)",
1218 peer->host);
1219 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1220 BGP_NOTIFY_UPDATE_MAL_ATTR);
1221 return -1;
1222 }
1223
1224 /* Unfeasible Route Length. */
1225 withdraw_len = stream_getw(s);
1226
1227 /* Unfeasible Route Length check. */
1228 if (stream_pnt(s) + withdraw_len > end) {
1229 zlog_err(
1230 "%s [Error] Update packet error"
1231 " (packet unfeasible length overflow %d)",
1232 peer->host, withdraw_len);
1233 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1234 BGP_NOTIFY_UPDATE_MAL_ATTR);
1235 return -1;
1236 }
1237
1238 /* Unfeasible Route packet format check. */
1239 if (withdraw_len > 0) {
1240 nlris[NLRI_WITHDRAW].afi = AFI_IP;
1241 nlris[NLRI_WITHDRAW].safi = SAFI_UNICAST;
1242 nlris[NLRI_WITHDRAW].nlri = stream_pnt(s);
1243 nlris[NLRI_WITHDRAW].length = withdraw_len;
1244 stream_forward_getp(s, withdraw_len);
1245 }
1246
1247 /* Attribute total length check. */
1248 if (stream_pnt(s) + 2 > end) {
1249 zlog_warn(
1250 "%s [Error] Packet Error"
1251 " (update packet is short for attribute length)",
1252 peer->host);
1253 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1254 BGP_NOTIFY_UPDATE_MAL_ATTR);
1255 return -1;
1256 }
1257
1258 /* Fetch attribute total length. */
1259 attribute_len = stream_getw(s);
1260
1261 /* Attribute length check. */
1262 if (stream_pnt(s) + attribute_len > end) {
1263 zlog_warn(
1264 "%s [Error] Packet Error"
1265 " (update packet attribute length overflow %d)",
1266 peer->host, attribute_len);
1267 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1268 BGP_NOTIFY_UPDATE_MAL_ATTR);
1269 return -1;
1270 }
1271
1272 /* Certain attribute parsing errors should not be considered bad enough
1273 * to reset the session for, most particularly any partial/optional
1274 * attributes that have 'tunneled' over speakers that don't understand
1275 * them. Instead we withdraw only the prefix concerned.
1276 *
1277 * Complicates the flow a little though..
1278 */
1279 bgp_attr_parse_ret_t attr_parse_ret = BGP_ATTR_PARSE_PROCEED;
1280/* This define morphs the update case into a withdraw when lower levels
1281 * have signalled an error condition where this is best.
1282 */
b881c707 1283#define NLRI_ATTR_ARG (attr_parse_ret != BGP_ATTR_PARSE_WITHDRAW ? &attr : NULL)
718e3744 1284
d62a17ae 1285 /* Parse attribute when it exists. */
1286 if (attribute_len) {
1287 attr_parse_ret = bgp_attr_parse(peer, &attr, attribute_len,
1288 &nlris[NLRI_MP_UPDATE],
1289 &nlris[NLRI_MP_WITHDRAW]);
1290 if (attr_parse_ret == BGP_ATTR_PARSE_ERROR) {
1291 bgp_attr_unintern_sub(&attr);
1292 return -1;
1293 }
1294 }
1295
1296 /* Logging the attribute. */
1297 if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW
1298 || BGP_DEBUG(update, UPDATE_IN)
1299 || BGP_DEBUG(update, UPDATE_PREFIX)) {
1300 ret = bgp_dump_attr(&attr, peer->rcvd_attr_str, BUFSIZ);
1301
1302 if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW)
1303 zlog_err(
1304 "%s rcvd UPDATE with errors in attr(s)!! Withdrawing route.",
1305 peer->host);
1306
1307 if (ret && bgp_debug_update(peer, NULL, NULL, 1)) {
1308 zlog_debug("%s rcvd UPDATE w/ attr: %s", peer->host,
1309 peer->rcvd_attr_str);
1310 peer->rcvd_attr_printed = 1;
1311 }
1312 }
1313
1314 /* Network Layer Reachability Information. */
1315 update_len = end - stream_pnt(s);
1316
1317 if (update_len) {
1318 /* Set NLRI portion to structure. */
1319 nlris[NLRI_UPDATE].afi = AFI_IP;
1320 nlris[NLRI_UPDATE].safi = SAFI_UNICAST;
1321 nlris[NLRI_UPDATE].nlri = stream_pnt(s);
1322 nlris[NLRI_UPDATE].length = update_len;
1323 stream_forward_getp(s, update_len);
1324 }
1325
1326 if (BGP_DEBUG(update, UPDATE_IN))
1327 zlog_debug("%s rcvd UPDATE wlen %d attrlen %d alen %d",
1328 peer->host, withdraw_len, attribute_len, update_len);
1329
1330 /* Parse any given NLRIs */
1331 for (int i = NLRI_UPDATE; i < NLRI_TYPE_MAX; i++) {
1332 if (!nlris[i].nlri)
1333 continue;
1334
1335 /* NLRI is processed iff the peer if configured for the specific
1336 * afi/safi */
1337 if (!peer->afc[nlris[i].afi][nlris[i].safi]) {
1338 zlog_info(
1339 "%s [Info] UPDATE for non-enabled AFI/SAFI %u/%u",
1340 peer->host, nlris[i].afi, nlris[i].safi);
1341 continue;
1342 }
1343
1344 /* EoR handled later */
1345 if (nlris[i].length == 0)
1346 continue;
1347
1348 switch (i) {
1349 case NLRI_UPDATE:
1350 case NLRI_MP_UPDATE:
1351 nlri_ret = bgp_nlri_parse(peer, NLRI_ATTR_ARG,
1352 &nlris[i], 0);
1353 break;
1354 case NLRI_WITHDRAW:
1355 case NLRI_MP_WITHDRAW:
1356 nlri_ret = bgp_nlri_parse(peer, &attr, &nlris[i], 1);
1357 break;
1358 default:
1359 nlri_ret = -1;
1360 }
1361
1362 if (nlri_ret < 0) {
1363 zlog_err("%s [Error] Error parsing NLRI", peer->host);
1364 if (peer->status == Established)
1365 bgp_notify_send(
1366 peer, BGP_NOTIFY_UPDATE_ERR,
1367 i <= NLRI_WITHDRAW
1368 ? BGP_NOTIFY_UPDATE_INVAL_NETWORK
1369 : BGP_NOTIFY_UPDATE_OPT_ATTR_ERR);
1370 bgp_attr_unintern_sub(&attr);
1371 return -1;
1372 }
1373 }
1374
1375 /* EoR checks
1376 *
1377 * Non-MP IPv4/Unicast EoR is a completely empty UPDATE
1378 * and MP EoR should have only an empty MP_UNREACH
1379 */
9b9df989
DS
1380 if ((!update_len && !withdraw_len &&
1381 nlris[NLRI_MP_UPDATE].length == 0) ||
1382 (attr_parse_ret == BGP_ATTR_PARSE_EOR)) {
d62a17ae 1383 afi_t afi = 0;
1384 safi_t safi;
1385
1386 /* Non-MP IPv4/Unicast is a completely emtpy UPDATE - already
1387 * checked
1388 * update and withdraw NLRI lengths are 0.
1389 */
1390 if (!attribute_len) {
1391 afi = AFI_IP;
1392 safi = SAFI_UNICAST;
1393 } else if (attr.flag & ATTR_FLAG_BIT(BGP_ATTR_MP_UNREACH_NLRI)
1394 && nlris[NLRI_MP_WITHDRAW].length == 0) {
1395 afi = nlris[NLRI_MP_WITHDRAW].afi;
1396 safi = nlris[NLRI_MP_WITHDRAW].safi;
9b9df989
DS
1397 } else if (attr_parse_ret == BGP_ATTR_PARSE_EOR) {
1398 afi = nlris[NLRI_MP_UPDATE].afi;
1399 safi = nlris[NLRI_MP_UPDATE].safi;
d62a17ae 1400 }
1401
1402 if (afi && peer->afc[afi][safi]) {
1403 /* End-of-RIB received */
1404 if (!CHECK_FLAG(peer->af_sflags[afi][safi],
1405 PEER_STATUS_EOR_RECEIVED)) {
1406 SET_FLAG(peer->af_sflags[afi][safi],
1407 PEER_STATUS_EOR_RECEIVED);
1408 bgp_update_explicit_eors(peer);
1409 }
1410
1411 /* NSF delete stale route */
1412 if (peer->nsf[afi][safi])
1413 bgp_clear_stale_route(peer, afi, safi);
1414
1415 if (bgp_debug_neighbor_events(peer)) {
1416 zlog_debug("rcvd End-of-RIB for %s from %s",
1417 afi_safi_print(afi, safi),
1418 peer->host);
1419 }
1420 }
f80f838b 1421 }
d62a17ae 1422
1423 /* Everything is done. We unintern temporary structures which
1424 interned in bgp_attr_parse(). */
1425 bgp_attr_unintern_sub(&attr);
1426
1427 /* If peering is stopped due to some reason, do not generate BGP
1428 event. */
1429 if (peer->status != Established)
1430 return 0;
1431
1432 /* Increment packet counter. */
1433 peer->update_in++;
1434 peer->update_time = bgp_clock();
1435
1436 /* Rearm holdtime timer */
1437 BGP_TIMER_OFF(peer->t_holdtime);
1438 bgp_timer_set(peer);
1439
1440 return 0;
718e3744 1441}
1442
1443/* Notify message treatment function. */
d62a17ae 1444static void bgp_notify_receive(struct peer *peer, bgp_size_t size)
718e3744 1445{
d62a17ae 1446 struct bgp_notify bgp_notify;
1447
1448 if (peer->notify.data) {
1449 XFREE(MTYPE_TMP, peer->notify.data);
1450 peer->notify.data = NULL;
1451 peer->notify.length = 0;
1452 }
1453
1454 bgp_notify.code = stream_getc(peer->ibuf);
1455 bgp_notify.subcode = stream_getc(peer->ibuf);
1456 bgp_notify.length = size - 2;
1457 bgp_notify.data = NULL;
1458
1459 /* Preserv notify code and sub code. */
1460 peer->notify.code = bgp_notify.code;
1461 peer->notify.subcode = bgp_notify.subcode;
1462 /* For further diagnostic record returned Data. */
1463 if (bgp_notify.length) {
1464 peer->notify.length = size - 2;
1465 peer->notify.data = XMALLOC(MTYPE_TMP, size - 2);
1466 memcpy(peer->notify.data, stream_pnt(peer->ibuf), size - 2);
1467 }
1468
1469 /* For debug */
1470 {
1471 int i;
1472 int first = 0;
1473 char c[4];
1474
1475 if (bgp_notify.length) {
1476 bgp_notify.data =
1477 XMALLOC(MTYPE_TMP, bgp_notify.length * 3);
1478 for (i = 0; i < bgp_notify.length; i++)
1479 if (first) {
1480 sprintf(c, " %02x",
1481 stream_getc(peer->ibuf));
1482 strcat(bgp_notify.data, c);
1483 } else {
1484 first = 1;
1485 sprintf(c, "%02x",
1486 stream_getc(peer->ibuf));
1487 strcpy(bgp_notify.data, c);
1488 }
1489 bgp_notify.raw_data = (u_char *)peer->notify.data;
1490 }
1491
1492 bgp_notify_print(peer, &bgp_notify, "received");
1493 if (bgp_notify.data) {
1494 XFREE(MTYPE_TMP, bgp_notify.data);
1495 bgp_notify.data = NULL;
1496 bgp_notify.length = 0;
1497 }
1498 }
1499
1500 /* peer count update */
1501 peer->notify_in++;
1502
1503 peer->last_reset = PEER_DOWN_NOTIFY_RECEIVED;
1504
1505 /* We have to check for Notify with Unsupported Optional Parameter.
1506 in that case we fallback to open without the capability option.
1507 But this done in bgp_stop. We just mark it here to avoid changing
1508 the fsm tables. */
1509 if (bgp_notify.code == BGP_NOTIFY_OPEN_ERR
1510 && bgp_notify.subcode == BGP_NOTIFY_OPEN_UNSUP_PARAM)
1511 UNSET_FLAG(peer->sflags, PEER_STATUS_CAPABILITY_OPEN);
1512
1513 BGP_EVENT_ADD(peer, Receive_NOTIFICATION_message);
718e3744 1514}
1515
1516/* Keepalive treatment function -- get keepalive send keepalive */
d62a17ae 1517static void bgp_keepalive_receive(struct peer *peer, bgp_size_t size)
718e3744 1518{
d62a17ae 1519 if (bgp_debug_keepalive(peer))
1520 zlog_debug("%s KEEPALIVE rcvd", peer->host);
1521
1522 BGP_EVENT_ADD(peer, Receive_KEEPALIVE_message);
718e3744 1523}
1524
1525/* Route refresh message is received. */
d62a17ae 1526static void bgp_route_refresh_receive(struct peer *peer, bgp_size_t size)
718e3744 1527{
d62a17ae 1528 iana_afi_t pkt_afi;
1529 afi_t afi;
5c525538
RW
1530 iana_safi_t pkt_safi;
1531 safi_t safi;
d62a17ae 1532 struct stream *s;
1533 struct peer_af *paf;
1534 struct update_group *updgrp;
1535 struct peer *updgrp_peer;
1536
1537 /* If peer does not have the capability, send notification. */
1538 if (!CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_ADV)) {
1539 zlog_err("%s [Error] BGP route refresh is not enabled",
1540 peer->host);
1541 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
1542 BGP_NOTIFY_HEADER_BAD_MESTYPE);
1543 return;
1544 }
1545
1546 /* Status must be Established. */
1547 if (peer->status != Established) {
1548 zlog_err(
1549 "%s [Error] Route refresh packet received under status %s",
1550 peer->host,
1551 lookup_msg(bgp_status_msg, peer->status, NULL));
1552 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR, 0);
1553 return;
1554 }
1555
1556 s = peer->ibuf;
1557
1558 /* Parse packet. */
1559 pkt_afi = stream_getw(s);
1560 (void)stream_getc(s);
1561 pkt_safi = stream_getc(s);
1562
1563 if (bgp_debug_update(peer, NULL, NULL, 0))
1564 zlog_debug("%s rcvd REFRESH_REQ for afi/safi: %d/%d",
1565 peer->host, pkt_afi, pkt_safi);
1566
1567 /* Convert AFI, SAFI to internal values and check. */
1568 if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi, &safi)) {
1569 zlog_info(
1570 "%s REFRESH_REQ for unrecognized afi/safi: %d/%d - ignored",
1571 peer->host, pkt_afi, pkt_safi);
1572 return;
1573 }
1574
1575 if (size != BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE) {
1576 u_char *end;
1577 u_char when_to_refresh;
1578 u_char orf_type;
1579 u_int16_t orf_len;
1580
1581 if (size - (BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE)
1582 < 5) {
1583 zlog_info("%s ORF route refresh length error",
1584 peer->host);
1585 bgp_notify_send(peer, BGP_NOTIFY_CEASE, 0);
1586 return;
718e3744 1587 }
1588
d62a17ae 1589 when_to_refresh = stream_getc(s);
1590 end = stream_pnt(s) + (size - 5);
1591
1592 while ((stream_pnt(s) + 2) < end) {
1593 orf_type = stream_getc(s);
1594 orf_len = stream_getw(s);
1595
1596 /* orf_len in bounds? */
1597 if ((stream_pnt(s) + orf_len) > end)
1598 break; /* XXX: Notify instead?? */
1599 if (orf_type == ORF_TYPE_PREFIX
1600 || orf_type == ORF_TYPE_PREFIX_OLD) {
1601 uint8_t *p_pnt = stream_pnt(s);
1602 uint8_t *p_end = stream_pnt(s) + orf_len;
1603 struct orf_prefix orfp;
1604 u_char common = 0;
1605 u_int32_t seq;
1606 int psize;
1607 char name[BUFSIZ];
1608 int ret = CMD_SUCCESS;
1609
1610 if (bgp_debug_neighbor_events(peer)) {
1611 zlog_debug(
1612 "%s rcvd Prefixlist ORF(%d) length %d",
1613 peer->host, orf_type, orf_len);
1614 }
1615
1616 /* we're going to read at least 1 byte of common
1617 * ORF header,
1618 * and 7 bytes of ORF Address-filter entry from
1619 * the stream
1620 */
1621 if (orf_len < 7)
1622 break;
1623
1624 /* ORF prefix-list name */
1625 sprintf(name, "%s.%d.%d", peer->host, afi,
1626 safi);
1627
1628 while (p_pnt < p_end) {
1629 /* If the ORF entry is malformed, want
1630 * to read as much of it
1631 * as possible without going beyond the
1632 * bounds of the entry,
1633 * to maximise debug information.
1634 */
1635 int ok;
1636 memset(&orfp, 0,
1637 sizeof(struct orf_prefix));
1638 common = *p_pnt++;
1639 /* after ++: p_pnt <= p_end */
1640 if (common
1641 & ORF_COMMON_PART_REMOVE_ALL) {
1642 if (bgp_debug_neighbor_events(
1643 peer))
1644 zlog_debug(
1645 "%s rcvd Remove-All pfxlist ORF request",
1646 peer->host);
1647 prefix_bgp_orf_remove_all(afi,
1648 name);
1649 break;
1650 }
1651 ok = ((u_int32_t)(p_end - p_pnt)
1652 >= sizeof(u_int32_t));
1653 if (ok) {
1654 memcpy(&seq, p_pnt,
1655 sizeof(u_int32_t));
1656 p_pnt += sizeof(u_int32_t);
1657 orfp.seq = ntohl(seq);
1658 } else
1659 p_pnt = p_end;
1660
1661 if ((ok = (p_pnt < p_end)))
1662 orfp.ge =
1663 *p_pnt++; /* value
1664 checked in
1665 prefix_bgp_orf_set()
1666 */
1667 if ((ok = (p_pnt < p_end)))
1668 orfp.le =
1669 *p_pnt++; /* value
1670 checked in
1671 prefix_bgp_orf_set()
1672 */
1673 if ((ok = (p_pnt < p_end)))
1674 orfp.p.prefixlen = *p_pnt++;
1675 orfp.p.family = afi2family(
1676 afi); /* afi checked already */
1677
1678 psize = PSIZE(
1679 orfp.p.prefixlen); /* 0 if not
1680 ok */
1681 if (psize
1682 > prefix_blen(
1683 &orfp.p)) /* valid for
1684 family ? */
1685 {
1686 ok = 0;
1687 psize = prefix_blen(&orfp.p);
1688 }
1689 if (psize
1690 > (p_end - p_pnt)) /* valid for
1691 packet ? */
1692 {
1693 ok = 0;
1694 psize = p_end - p_pnt;
1695 }
1696
1697 if (psize > 0)
1698 memcpy(&orfp.p.u.prefix, p_pnt,
1699 psize);
1700 p_pnt += psize;
1701
1702 if (bgp_debug_neighbor_events(peer)) {
1703 char buf[INET6_BUFSIZ];
1704
1705 zlog_debug(
1706 "%s rcvd %s %s seq %u %s/%d ge %d le %d%s",
1707 peer->host,
1708 (common & ORF_COMMON_PART_REMOVE
1709 ? "Remove"
1710 : "Add"),
1711 (common & ORF_COMMON_PART_DENY
1712 ? "deny"
1713 : "permit"),
1714 orfp.seq,
1715 inet_ntop(
1716 orfp.p.family,
1717 &orfp.p.u.prefix,
1718 buf,
1719 INET6_BUFSIZ),
1720 orfp.p.prefixlen,
1721 orfp.ge, orfp.le,
1722 ok ? "" : " MALFORMED");
1723 }
1724
1725 if (ok)
1726 ret = prefix_bgp_orf_set(
1727 name, afi, &orfp,
1728 (common & ORF_COMMON_PART_DENY
1729 ? 0
1730 : 1),
1731 (common & ORF_COMMON_PART_REMOVE
1732 ? 0
1733 : 1));
1734
1735 if (!ok || (ok && ret != CMD_SUCCESS)) {
1736 zlog_info(
1737 "%s Received misformatted prefixlist ORF."
1738 " Remove All pfxlist",
1739 peer->host);
1740 prefix_bgp_orf_remove_all(afi,
1741 name);
1742 break;
1743 }
1744 }
1745
1746 peer->orf_plist[afi][safi] =
1747 prefix_bgp_orf_lookup(afi, name);
1748 }
1749 stream_forward_getp(s, orf_len);
718e3744 1750 }
d62a17ae 1751 if (bgp_debug_neighbor_events(peer))
1752 zlog_debug("%s rcvd Refresh %s ORF request", peer->host,
1753 when_to_refresh == REFRESH_DEFER
1754 ? "Defer"
1755 : "Immediate");
1756 if (when_to_refresh == REFRESH_DEFER)
1757 return;
1758 }
40d2700d 1759
d62a17ae 1760 /* First update is deferred until ORF or ROUTE-REFRESH is received */
1761 if (CHECK_FLAG(peer->af_sflags[afi][safi],
1762 PEER_STATUS_ORF_WAIT_REFRESH))
1763 UNSET_FLAG(peer->af_sflags[afi][safi],
1764 PEER_STATUS_ORF_WAIT_REFRESH);
1765
1766 paf = peer_af_find(peer, afi, safi);
1767 if (paf && paf->subgroup) {
1768 if (peer->orf_plist[afi][safi]) {
1769 updgrp = PAF_UPDGRP(paf);
1770 updgrp_peer = UPDGRP_PEER(updgrp);
1771 updgrp_peer->orf_plist[afi][safi] =
1772 peer->orf_plist[afi][safi];
1773 }
1774
1775 /* If the peer is configured for default-originate clear the
1776 * SUBGRP_STATUS_DEFAULT_ORIGINATE flag so that we will
1777 * re-advertise the
1778 * default
1779 */
1780 if (CHECK_FLAG(paf->subgroup->sflags,
1781 SUBGRP_STATUS_DEFAULT_ORIGINATE))
1782 UNSET_FLAG(paf->subgroup->sflags,
1783 SUBGRP_STATUS_DEFAULT_ORIGINATE);
718e3744 1784 }
d62a17ae 1785
1786 /* Perform route refreshment to the peer */
1787 bgp_announce_route(peer, afi, safi);
718e3744 1788}
1789
d62a17ae 1790static int bgp_capability_msg_parse(struct peer *peer, u_char *pnt,
1791 bgp_size_t length)
718e3744 1792{
d62a17ae 1793 u_char *end;
1794 struct capability_mp_data mpc;
1795 struct capability_header *hdr;
1796 u_char action;
1797 iana_afi_t pkt_afi;
1798 afi_t afi;
5c525538
RW
1799 iana_safi_t pkt_safi;
1800 safi_t safi;
d62a17ae 1801
1802 end = pnt + length;
1803
1804 while (pnt < end) {
1805 /* We need at least action, capability code and capability
1806 * length. */
1807 if (pnt + 3 > end) {
1808 zlog_info("%s Capability length error", peer->host);
1809 bgp_notify_send(peer, BGP_NOTIFY_CEASE, 0);
1810 return -1;
1811 }
1812 action = *pnt;
1813 hdr = (struct capability_header *)(pnt + 1);
1814
1815 /* Action value check. */
1816 if (action != CAPABILITY_ACTION_SET
1817 && action != CAPABILITY_ACTION_UNSET) {
1818 zlog_info("%s Capability Action Value error %d",
1819 peer->host, action);
1820 bgp_notify_send(peer, BGP_NOTIFY_CEASE, 0);
1821 return -1;
1822 }
1823
1824 if (bgp_debug_neighbor_events(peer))
1825 zlog_debug(
1826 "%s CAPABILITY has action: %d, code: %u, length %u",
1827 peer->host, action, hdr->code, hdr->length);
1828
1829 /* Capability length check. */
1830 if ((pnt + hdr->length + 3) > end) {
1831 zlog_info("%s Capability length error", peer->host);
1832 bgp_notify_send(peer, BGP_NOTIFY_CEASE, 0);
1833 return -1;
1834 }
1835
1836 /* Fetch structure to the byte stream. */
1837 memcpy(&mpc, pnt + 3, sizeof(struct capability_mp_data));
d2b6417b 1838 pnt += hdr->length + 3;
d62a17ae 1839
1840 /* We know MP Capability Code. */
1841 if (hdr->code == CAPABILITY_CODE_MP) {
1842 pkt_afi = ntohs(mpc.afi);
1843 pkt_safi = mpc.safi;
1844
1845 /* Ignore capability when override-capability is set. */
1846 if (CHECK_FLAG(peer->flags,
1847 PEER_FLAG_OVERRIDE_CAPABILITY))
1848 continue;
1849
1850 /* Convert AFI, SAFI to internal values. */
1851 if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi,
1852 &safi)) {
1853 if (bgp_debug_neighbor_events(peer))
1854 zlog_debug(
1855 "%s Dynamic Capability MP_EXT afi/safi invalid "
1856 "(%u/%u)",
1857 peer->host, pkt_afi, pkt_safi);
1858 continue;
1859 }
1860
1861 /* Address family check. */
1862 if (bgp_debug_neighbor_events(peer))
1863 zlog_debug(
1864 "%s CAPABILITY has %s MP_EXT CAP for afi/safi: %u/%u",
1865 peer->host,
1866 action == CAPABILITY_ACTION_SET
1867 ? "Advertising"
1868 : "Removing",
1869 pkt_afi, pkt_safi);
1870
1871 if (action == CAPABILITY_ACTION_SET) {
1872 peer->afc_recv[afi][safi] = 1;
1873 if (peer->afc[afi][safi]) {
1874 peer->afc_nego[afi][safi] = 1;
1875 bgp_announce_route(peer, afi, safi);
1876 }
1877 } else {
1878 peer->afc_recv[afi][safi] = 0;
1879 peer->afc_nego[afi][safi] = 0;
1880
1881 if (peer_active_nego(peer))
1882 bgp_clear_route(peer, afi, safi);
1883 else
1884 BGP_EVENT_ADD(peer, BGP_Stop);
1885 }
1886 } else {
1887 zlog_warn(
1888 "%s unrecognized capability code: %d - ignored",
1889 peer->host, hdr->code);
1890 }
d62a17ae 1891 }
1892 return 0;
718e3744 1893}
1894
d62a17ae 1895/* Dynamic Capability is received.
01b7ce2d
PJ
1896 *
1897 * This is exported for unit-test purposes
1898 */
d62a17ae 1899int bgp_capability_receive(struct peer *peer, bgp_size_t size)
718e3744 1900{
d62a17ae 1901 u_char *pnt;
1902
1903 /* Fetch pointer. */
1904 pnt = stream_pnt(peer->ibuf);
1905
1906 if (bgp_debug_neighbor_events(peer))
1907 zlog_debug("%s rcv CAPABILITY", peer->host);
1908
1909 /* If peer does not have the capability, send notification. */
1910 if (!CHECK_FLAG(peer->cap, PEER_CAP_DYNAMIC_ADV)) {
1911 zlog_err("%s [Error] BGP dynamic capability is not enabled",
1912 peer->host);
1913 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
1914 BGP_NOTIFY_HEADER_BAD_MESTYPE);
1915 return -1;
1916 }
1917
1918 /* Status must be Established. */
1919 if (peer->status != Established) {
1920 zlog_err(
1921 "%s [Error] Dynamic capability packet received under status %s",
1922 peer->host,
1923 lookup_msg(bgp_status_msg, peer->status, NULL));
1924 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR, 0);
1925 return -1;
1926 }
1927
1928 /* Parse packet. */
1929 return bgp_capability_msg_parse(peer, pnt, size);
718e3744 1930}
6b0655a2 1931
718e3744 1932/* BGP read utility function. */
d62a17ae 1933static int bgp_read_packet(struct peer *peer)
718e3744 1934{
d62a17ae 1935 int nbytes;
1936 int readsize;
718e3744 1937
d62a17ae 1938 readsize = peer->packet_size - stream_get_endp(peer->ibuf);
718e3744 1939
d62a17ae 1940 /* If size is zero then return. */
1941 if (!readsize)
1942 return 0;
718e3744 1943
d62a17ae 1944 /* Read packet from fd. */
1945 nbytes = stream_read_try(peer->ibuf, peer->fd, readsize);
718e3744 1946
d62a17ae 1947 /* If read byte is smaller than zero then error occured. */
1948 if (nbytes < 0) {
1949 /* Transient error should retry */
1950 if (nbytes == -2)
1951 return -1;
718e3744 1952
d62a17ae 1953 zlog_err("%s [Error] bgp_read_packet error: %s", peer->host,
1954 safe_strerror(errno));
93406d87 1955
d62a17ae 1956 if (peer->status == Established) {
1957 if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
1958 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
1959 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
1960 } else
1961 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
1962 }
93406d87 1963
d62a17ae 1964 BGP_EVENT_ADD(peer, TCP_fatal_error);
1965 return -1;
1966 }
718e3744 1967
d62a17ae 1968 /* When read byte is zero : clear bgp peer and return */
1969 if (nbytes == 0) {
1970 if (bgp_debug_neighbor_events(peer))
1971 zlog_debug("%s [Event] BGP connection closed fd %d",
1972 peer->host, peer->fd);
1973
1974 if (peer->status == Established) {
1975 if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
1976 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
1977 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
1978 } else
1979 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
1980 }
e0701b79 1981
d62a17ae 1982 BGP_EVENT_ADD(peer, TCP_connection_closed);
1983 return -1;
93406d87 1984 }
e0701b79 1985
d62a17ae 1986 /* We read partial packet. */
1987 if (stream_get_endp(peer->ibuf) != peer->packet_size)
1988 return -1;
718e3744 1989
d62a17ae 1990 return 0;
718e3744 1991}
1992
1993/* Marker check. */
d62a17ae 1994static int bgp_marker_all_one(struct stream *s, int length)
718e3744 1995{
d62a17ae 1996 int i;
718e3744 1997
d62a17ae 1998 for (i = 0; i < length; i++)
1999 if (s->data[i] != 0xff)
2000 return 0;
718e3744 2001
d62a17ae 2002 return 1;
718e3744 2003}
2004
2005/* Starting point of packet process function. */
d62a17ae 2006int bgp_read(struct thread *thread)
718e3744 2007{
d62a17ae 2008 int ret;
2009 u_char type = 0;
2010 struct peer *peer;
2011 bgp_size_t size;
2012 char notify_data_length[2];
2013 u_int32_t notify_out;
2014
2015 /* Yes first of all get peer pointer. */
2016 peer = THREAD_ARG(thread);
2017 peer->t_read = NULL;
2018
2019 /* Note notify_out so we can check later to see if we sent another one
2020 */
2021 notify_out = peer->notify_out;
2022
2023 /* For non-blocking IO check. */
2024 if (peer->status == Connect) {
2025 bgp_connect_check(peer, 1);
2026 goto done;
2027 } else {
2028 if (peer->fd < 0) {
2029 zlog_err("bgp_read peer's fd is negative value %d",
2030 peer->fd);
2031 return -1;
2032 }
2033 BGP_READ_ON(peer->t_read, bgp_read, peer->fd);
718e3744 2034 }
d62a17ae 2035
2036 /* Read packet header to determine type of the packet */
2037 if (peer->packet_size == 0)
2038 peer->packet_size = BGP_HEADER_SIZE;
2039
2040 if (stream_get_endp(peer->ibuf) < BGP_HEADER_SIZE) {
2041 ret = bgp_read_packet(peer);
2042
2043 /* Header read error or partial read packet. */
2044 if (ret < 0)
2045 goto done;
2046
2047 /* Get size and type. */
2048 stream_forward_getp(peer->ibuf, BGP_MARKER_SIZE);
2049 memcpy(notify_data_length, stream_pnt(peer->ibuf), 2);
2050 size = stream_getw(peer->ibuf);
2051 type = stream_getc(peer->ibuf);
2052
2053 /* Marker check */
2054 if (((type == BGP_MSG_OPEN) || (type == BGP_MSG_KEEPALIVE))
2055 && !bgp_marker_all_one(peer->ibuf, BGP_MARKER_SIZE)) {
2056 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
2057 BGP_NOTIFY_HEADER_NOT_SYNC);
2058 goto done;
2059 }
2060
2061 /* BGP type check. */
2062 if (type != BGP_MSG_OPEN && type != BGP_MSG_UPDATE
2063 && type != BGP_MSG_NOTIFY && type != BGP_MSG_KEEPALIVE
2064 && type != BGP_MSG_ROUTE_REFRESH_NEW
2065 && type != BGP_MSG_ROUTE_REFRESH_OLD
2066 && type != BGP_MSG_CAPABILITY) {
2067 if (bgp_debug_neighbor_events(peer))
2068 zlog_debug("%s unknown message type 0x%02x",
2069 peer->host, type);
2070 bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
2071 BGP_NOTIFY_HEADER_BAD_MESTYPE,
2072 &type, 1);
2073 goto done;
2074 }
2075 /* Mimimum packet length check. */
2076 if ((size < BGP_HEADER_SIZE) || (size > BGP_MAX_PACKET_SIZE)
2077 || (type == BGP_MSG_OPEN && size < BGP_MSG_OPEN_MIN_SIZE)
2078 || (type == BGP_MSG_UPDATE
2079 && size < BGP_MSG_UPDATE_MIN_SIZE)
2080 || (type == BGP_MSG_NOTIFY
2081 && size < BGP_MSG_NOTIFY_MIN_SIZE)
2082 || (type == BGP_MSG_KEEPALIVE
2083 && size != BGP_MSG_KEEPALIVE_MIN_SIZE)
2084 || (type == BGP_MSG_ROUTE_REFRESH_NEW
2085 && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE)
2086 || (type == BGP_MSG_ROUTE_REFRESH_OLD
2087 && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE)
2088 || (type == BGP_MSG_CAPABILITY
2089 && size < BGP_MSG_CAPABILITY_MIN_SIZE)) {
2090 if (bgp_debug_neighbor_events(peer))
2091 zlog_debug("%s bad message length - %d for %s",
2092 peer->host, size,
2093 type == 128
2094 ? "ROUTE-REFRESH"
2095 : bgp_type_str[(int)type]);
2096 bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
2097 BGP_NOTIFY_HEADER_BAD_MESLEN,
2098 (u_char *)notify_data_length,
2099 2);
2100 goto done;
2101 }
2102
2103 /* Adjust size to message length. */
2104 peer->packet_size = size;
718e3744 2105 }
2106
d62a17ae 2107 ret = bgp_read_packet(peer);
2108 if (ret < 0)
2109 goto done;
2110
2111 /* Get size and type again. */
2112 (void)stream_getw_from(peer->ibuf, BGP_MARKER_SIZE);
2113 type = stream_getc_from(peer->ibuf, BGP_MARKER_SIZE + 2);
2114
2115 /* BGP packet dump function. */
2116 bgp_dump_packet(peer, type, peer->ibuf);
2117
2118 size = (peer->packet_size - BGP_HEADER_SIZE);
2119
2120 /* Read rest of the packet and call each sort of packet routine */
2121 switch (type) {
2122 case BGP_MSG_OPEN:
2123 peer->open_in++;
2124 bgp_open_receive(peer, size); /* XXX return value ignored! */
2125 break;
2126 case BGP_MSG_UPDATE:
2127 peer->readtime = monotime(NULL);
2128 bgp_update_receive(peer, size);
2129 break;
2130 case BGP_MSG_NOTIFY:
2131 bgp_notify_receive(peer, size);
2132 break;
2133 case BGP_MSG_KEEPALIVE:
2134 peer->readtime = monotime(NULL);
2135 bgp_keepalive_receive(peer, size);
2136 break;
2137 case BGP_MSG_ROUTE_REFRESH_NEW:
2138 case BGP_MSG_ROUTE_REFRESH_OLD:
2139 peer->refresh_in++;
2140 bgp_route_refresh_receive(peer, size);
2141 break;
2142 case BGP_MSG_CAPABILITY:
2143 peer->dynamic_cap_in++;
2144 bgp_capability_receive(peer, size);
2145 break;
718e3744 2146 }
d62a17ae 2147
2148 /* If reading this packet caused us to send a NOTIFICATION then store a
2149 * copy
2150 * of the packet for troubleshooting purposes
2151 */
2152 if (notify_out < peer->notify_out) {
2153 memcpy(peer->last_reset_cause, peer->ibuf->data,
2154 peer->packet_size);
2155 peer->last_reset_cause_size = peer->packet_size;
2156 notify_out = peer->notify_out;
2157 }
2158
2159 /* Clear input buffer. */
2160 peer->packet_size = 0;
2161 if (peer->ibuf)
2162 stream_reset(peer->ibuf);
2163
2164done:
2165 /* If reading this packet caused us to send a NOTIFICATION then store a
2166 * copy
2167 * of the packet for troubleshooting purposes
2168 */
2169 if (notify_out < peer->notify_out) {
2170 memcpy(peer->last_reset_cause, peer->ibuf->data,
2171 peer->packet_size);
2172 peer->last_reset_cause_size = peer->packet_size;
718e3744 2173 }
2174
d62a17ae 2175 return 0;
718e3744 2176}
d3ecc69e
QY
2177
2178/* ------------- write thread ------------------ */
2179
2180/**
2181 * Flush peer output buffer.
2182 *
2183 * This function pops packets off of peer->obuf and writes them to peer->fd.
2184 * The amount of packets written is equal to the minimum of peer->wpkt_quanta
2185 * and the number of packets on the output buffer.
2186 *
2187 * If write() returns an error, the appropriate FSM event is generated.
2188 *
2189 * The return value is equal to the number of packets written
2190 * (which may be zero).
2191 */
2192static int bgp_write(struct peer *peer)
2193{
2194 u_char type;
2195 struct stream *s;
2196 int num;
2197 int update_last_write = 0;
2198 unsigned int count = 0;
2199 unsigned int oc = 0;
2200
2201 /* For non-blocking IO check. */
2202 if (peer->status == Connect) {
2203 bgp_connect_check(peer, 1);
2204 return 0;
2205 }
2206
2207 /* Write packets. The number of packets written is the value of
2208 * bgp->wpkt_quanta or the size of the output buffer, whichever is
2209 * smaller.*/
2210 while (count < peer->bgp->wpkt_quanta
2211 && (s = bgp_write_packet(peer)) != NULL) {
2212 int writenum;
2213 do { // write a full packet, or return on error
2214 writenum = stream_get_endp(s) - stream_get_getp(s);
2215 num = write(peer->fd, STREAM_PNT(s), writenum);
2216
2217 if (num < 0) {
2218 if (ERRNO_IO_RETRY(errno))
2219 continue;
2220
2221 BGP_EVENT_ADD(peer, TCP_fatal_error);
2222 goto done;
2223 } else if (num != writenum) // incomplete write
2224 stream_forward_getp(s, num);
2225
2226 } while (num != writenum);
2227
2228 /* Retrieve BGP packet type. */
2229 stream_set_getp(s, BGP_MARKER_SIZE + 2);
2230 type = stream_getc(s);
2231
2232 switch (type) {
2233 case BGP_MSG_OPEN:
2234 peer->open_out++;
2235 break;
2236 case BGP_MSG_UPDATE:
2237 peer->update_out++;
2238 break;
2239 case BGP_MSG_NOTIFY:
2240 peer->notify_out++;
2241 /* Double start timer. */
2242 peer->v_start *= 2;
2243
2244 /* Overflow check. */
2245 if (peer->v_start >= (60 * 2))
2246 peer->v_start = (60 * 2);
2247
2248 /* Handle Graceful Restart case where the state changes
2249 to
2250 Connect instead of Idle */
2251 /* Flush any existing events */
2252 BGP_EVENT_ADD(peer, BGP_Stop);
2253 goto done;
2254
2255 case BGP_MSG_KEEPALIVE:
2256 peer->keepalive_out++;
2257 break;
2258 case BGP_MSG_ROUTE_REFRESH_NEW:
2259 case BGP_MSG_ROUTE_REFRESH_OLD:
2260 peer->refresh_out++;
2261 break;
2262 case BGP_MSG_CAPABILITY:
2263 peer->dynamic_cap_out++;
2264 break;
2265 }
2266
2267 count++;
2268 /* OK we send packet so delete it. */
2269 bgp_packet_delete_unsafe(peer);
2270 update_last_write = 1;
2271 }
2272
2273done : {
2274 /* Update last_update if UPDATEs were written. */
2275 if (peer->update_out > oc)
2276 peer->last_update = bgp_clock();
2277
2278 /* If we TXed any flavor of packet update last_write */
2279 if (update_last_write)
2280 peer->last_write = bgp_clock();
2281}
2282
2283 return count;
2284}
2285
2286static void cleanup_handler(void *arg)
2287{
2288 if (plist)
2289 list_delete(plist);
2290
2291 plist = NULL;
2292
2293 pthread_mutex_unlock(&plist_mtx);
2294}
2295
2296/**
2297 * Entry function for peer packet flushing pthread.
2298 *
2299 * The plist must be initialized before calling this.
2300 */
2301void *peer_writes_start(void *arg)
2302{
2303 struct timeval currtime = {0, 0};
2304 struct timeval sleeptime = {0, 500};
2305 struct timespec next_update = {0, 0};
2306
2307 // initialize
2308 pthread_mutex_lock(&plist_mtx);
2309 plist = list_new();
2310
2311 struct listnode *ln;
2312 struct peer *peer;
2313
2314 pthread_cleanup_push(&cleanup_handler, NULL);
2315
2316 bgp_packet_writes_thread_run = true;
2317
2318 while (bgp_packet_writes_thread_run) { // wait around until next update
2319 // time
2320 if (plist->count > 0)
2321 pthread_cond_timedwait(&write_cond, &plist_mtx,
2322 &next_update);
2323 else // wait around until we have some peers
2324 while (plist->count == 0
2325 && bgp_packet_writes_thread_run)
2326 pthread_cond_wait(&write_cond, &plist_mtx);
2327
2328 for (ALL_LIST_ELEMENTS_RO(plist, ln, peer)) {
2329 pthread_mutex_lock(&peer->obuf_mtx);
2330 {
2331 bgp_write(peer);
2332 }
2333 pthread_mutex_unlock(&peer->obuf_mtx);
2334 }
2335
2336 gettimeofday(&currtime, NULL);
2337 timeradd(&currtime, &sleeptime, &currtime);
2338 TIMEVAL_TO_TIMESPEC(&currtime, &next_update);
2339 }
2340
2341 // clean up
2342 pthread_cleanup_pop(1);
2343
2344 return NULL;
2345}
2346
2347/**
2348 * Turns on packet writing for a peer.
2349 */
2350void peer_writes_on(struct peer *peer)
2351{
2352 if (peer->status == Deleted)
2353 return;
2354
2355 pthread_mutex_lock(&plist_mtx);
2356 {
2357 struct listnode *ln, *nn;
2358 struct peer *p;
2359
2360 // make sure this peer isn't already in the list
2361 for (ALL_LIST_ELEMENTS(plist, ln, nn, p))
2362 if (p == peer) {
2363 pthread_mutex_unlock(&plist_mtx);
2364 return;
2365 }
2366
2367 peer_lock(peer);
2368 listnode_add(plist, peer);
2369 }
2370 pthread_mutex_unlock(&plist_mtx);
2371 peer_writes_wake();
2372}
2373
2374/**
2375 * Turns off packet writing for a peer.
2376 */
2377void peer_writes_off(struct peer *peer)
2378{
2379 struct listnode *ln, *nn;
2380 struct peer *p;
2381 pthread_mutex_lock(&plist_mtx);
2382 {
2383 for (ALL_LIST_ELEMENTS(plist, ln, nn, p))
2384 if (p == peer) {
2385 list_delete_node(plist, ln);
2386 peer_unlock(peer);
2387 break;
2388 }
2389 }
2390 pthread_mutex_unlock(&plist_mtx);
2391}
2392
2393/**
2394 * Wakes up the write thread to do work.
2395 */
2396void peer_writes_wake()
2397{
2398 pthread_cond_signal(&write_cond);
2399}