]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_packet.c
bgpd: move update group processing to main thread
[mirror_frr.git] / bgpd / bgp_packet.c
1 /* BGP packet management routine.
2 * Copyright (C) 1999 Kunihiro Ishiguro
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22 #include <sys/time.h>
23
24 #include "thread.h"
25 #include "stream.h"
26 #include "network.h"
27 #include "prefix.h"
28 #include "command.h"
29 #include "log.h"
30 #include "memory.h"
31 #include "sockunion.h" /* for inet_ntop () */
32 #include "sockopt.h"
33 #include "linklist.h"
34 #include "plist.h"
35 #include "queue.h"
36 #include "filter.h"
37
38 #include "bgpd/bgpd.h"
39 #include "bgpd/bgp_table.h"
40 #include "bgpd/bgp_dump.h"
41 #include "bgpd/bgp_attr.h"
42 #include "bgpd/bgp_debug.h"
43 #include "bgpd/bgp_fsm.h"
44 #include "bgpd/bgp_route.h"
45 #include "bgpd/bgp_packet.h"
46 #include "bgpd/bgp_open.h"
47 #include "bgpd/bgp_aspath.h"
48 #include "bgpd/bgp_community.h"
49 #include "bgpd/bgp_ecommunity.h"
50 #include "bgpd/bgp_lcommunity.h"
51 #include "bgpd/bgp_network.h"
52 #include "bgpd/bgp_mplsvpn.h"
53 #include "bgpd/bgp_evpn.h"
54 #include "bgpd/bgp_advertise.h"
55 #include "bgpd/bgp_vty.h"
56 #include "bgpd/bgp_updgrp.h"
57 #include "bgpd/bgp_label.h"
58
59 /* Linked list of active peers */
60 static pthread_mutex_t plist_mtx = PTHREAD_MUTEX_INITIALIZER;
61 static pthread_cond_t write_cond = PTHREAD_COND_INITIALIZER;
62 static struct list *plist;
63
64 /* periodically scheduled thread to generate update-group updates */
65 static struct thread *t_generate_updgrp_packets;
66
67 bool bgp_packet_writes_thread_run;
68
69 /* Set up BGP packet marker and packet type. */
70 int bgp_packet_set_marker(struct stream *s, u_char type)
71 {
72 int i;
73
74 /* Fill in marker. */
75 for (i = 0; i < BGP_MARKER_SIZE; i++)
76 stream_putc(s, 0xff);
77
78 /* Dummy total length. This field is should be filled in later on. */
79 stream_putw(s, 0);
80
81 /* BGP packet type. */
82 stream_putc(s, type);
83
84 /* Return current stream size. */
85 return stream_get_endp(s);
86 }
87
88 /* Set BGP packet header size entry. If size is zero then use current
89 stream size. */
90 int bgp_packet_set_size(struct stream *s)
91 {
92 int cp;
93
94 /* Preserve current pointer. */
95 cp = stream_get_endp(s);
96 stream_putw_at(s, BGP_MARKER_SIZE, cp);
97
98 return cp;
99 }
100
101 /**
102 * Push a packet onto the beginning of the peer's output queue.
103 * Must be externally synchronized around 'peer'.
104 */
105 static void bgp_packet_add_unsafe(struct peer *peer, struct stream *s)
106 {
107 /* Add packet to the end of list. */
108 stream_fifo_push(peer->obuf, s);
109 peer_writes_wake();
110 }
111
112 /*
113 * Push a packet onto the beginning of the peer's output queue.
114 * This function acquires the peer's write mutex before proceeding.
115 */
116 static void bgp_packet_add(struct peer *peer, struct stream *s)
117 {
118 pthread_mutex_lock(&peer->obuf_mtx);
119 bgp_packet_add_unsafe(peer, s);
120 pthread_mutex_unlock(&peer->obuf_mtx);
121 }
122
123 /**
124 * Pop a packet off the end of the peer's output queue.
125 * Must be externally synchronized around 'peer'.
126 */
127 static void bgp_packet_delete_unsafe(struct peer *peer)
128 {
129 stream_free(stream_fifo_pop(peer->obuf));
130 }
131
132
133 /* Check file descriptor whether connect is established. */
134 static int bgp_connect_check(struct peer *peer, int change_state)
135 {
136 int status;
137 socklen_t slen;
138 int ret;
139
140 /* Anyway I have to reset read and write thread. */
141 BGP_READ_OFF(peer->t_read);
142
143 /* Check file descriptor. */
144 slen = sizeof(status);
145 ret = getsockopt(peer->fd, SOL_SOCKET, SO_ERROR, (void *)&status,
146 &slen);
147
148 /* If getsockopt is fail, this is fatal error. */
149 if (ret < 0) {
150 zlog_info("can't get sockopt for nonblocking connect");
151 BGP_EVENT_ADD(peer, TCP_fatal_error);
152 return -1;
153 }
154
155 /* When status is 0 then TCP connection is established. */
156 if (status == 0) {
157 BGP_EVENT_ADD(peer, TCP_connection_open);
158 return 1;
159 } else {
160 if (bgp_debug_neighbor_events(peer))
161 zlog_debug("%s [Event] Connect failed (%s)", peer->host,
162 safe_strerror(errno));
163 if (change_state)
164 BGP_EVENT_ADD(peer, TCP_connection_open_failed);
165 return 0;
166 }
167 }
168
169 static struct stream *bgp_update_packet_eor(struct peer *peer, afi_t afi,
170 safi_t safi)
171 {
172 struct stream *s;
173 iana_afi_t pkt_afi;
174 iana_safi_t pkt_safi;
175
176 if (DISABLE_BGP_ANNOUNCE)
177 return NULL;
178
179 if (bgp_debug_neighbor_events(peer))
180 zlog_debug("send End-of-RIB for %s to %s",
181 afi_safi_print(afi, safi), peer->host);
182
183 s = stream_new(BGP_MAX_PACKET_SIZE);
184
185 /* Make BGP update packet. */
186 bgp_packet_set_marker(s, BGP_MSG_UPDATE);
187
188 /* Unfeasible Routes Length */
189 stream_putw(s, 0);
190
191 if (afi == AFI_IP && safi == SAFI_UNICAST) {
192 /* Total Path Attribute Length */
193 stream_putw(s, 0);
194 } else {
195 /* Convert AFI, SAFI to values for packet. */
196 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
197
198 /* Total Path Attribute Length */
199 stream_putw(s, 6);
200 stream_putc(s, BGP_ATTR_FLAG_OPTIONAL);
201 stream_putc(s, BGP_ATTR_MP_UNREACH_NLRI);
202 stream_putc(s, 3);
203 stream_putw(s, pkt_afi);
204 stream_putc(s, pkt_safi);
205 }
206
207 bgp_packet_set_size(s);
208 return s;
209 }
210
211 /* Get next packet to be written. */
212 static struct stream *bgp_write_packet(struct peer *peer)
213 {
214 struct stream *s = NULL;
215 struct peer_af *paf;
216 struct bpacket *next_pkt;
217 afi_t afi;
218 safi_t safi;
219
220 /*
221 * The code beyond this part deals with update packets, proceed only
222 * if peer is Established and updates are not on hold (as part of
223 * update-delay post processing).
224 */
225 if (peer->status != Established)
226 return NULL;
227
228 if (peer->bgp && peer->bgp->main_peers_update_hold)
229 return NULL;
230
231 FOREACH_AFI_SAFI (afi, safi) {
232 paf = peer_af_find(peer, afi, safi);
233 if (!paf || !PAF_SUBGRP(paf))
234 continue;
235 next_pkt = paf->next_pkt_to_send;
236
237 /* Try to generate a packet for the peer if we are at
238 * the end of
239 * the list. Always try to push out WITHDRAWs first. */
240 if (!next_pkt || !next_pkt->buffer) {
241 next_pkt = subgroup_withdraw_packet(PAF_SUBGRP(paf));
242 if (!next_pkt || !next_pkt->buffer)
243 subgroup_update_packet(PAF_SUBGRP(paf));
244 next_pkt = paf->next_pkt_to_send;
245 }
246
247 /* Try to generate a packet for the peer if we are at
248 * the end of
249 * the list. Always try to push out WITHDRAWs first. */
250 if (!next_pkt || !next_pkt->buffer) {
251 next_pkt = subgroup_withdraw_packet(
252 PAF_SUBGRP(paf));
253 if (!next_pkt || !next_pkt->buffer)
254 subgroup_update_packet(PAF_SUBGRP(paf));
255 next_pkt = paf->next_pkt_to_send;
256 }
257
258 /* If we still don't have a packet to send to the peer,
259 * then
260 * try to find out out if we have to send eor or if not,
261 * skip to
262 * the next AFI, SAFI.
263 * Don't send the EOR prematurely... if the subgroup's
264 * coalesce
265 * timer is running, the adjacency-out structure is not
266 * created
267 * yet.
268 */
269 if (!next_pkt || !next_pkt->buffer) {
270 if (CHECK_FLAG(peer->cap,
271 PEER_CAP_RESTART_RCV)) {
272 if (!(PAF_SUBGRP(paf))->t_coalesce
273 && peer->afc_nego[afi][safi]
274 && peer->synctime
275 && !CHECK_FLAG(
276 peer->af_sflags[afi]
277 [safi],
278 PEER_STATUS_EOR_SEND)) {
279 SET_FLAG(peer->af_sflags[afi]
280 [safi],
281 PEER_STATUS_EOR_SEND);
282
283 if ((s = bgp_update_packet_eor(
284 peer, afi, safi)))
285 bgp_packet_add(peer, s);
286
287 return s;
288 }
289 }
290 }
291 continue;
292 }
293
294
295 /* Found a packet template to send, overwrite packet
296 * with appropriate
297 * attributes from peer and advance peer */
298 s = bpacket_reformat_for_peer(next_pkt, paf);
299 bgp_packet_add(peer, s);
300 bpacket_queue_advance_peer(paf);
301 return s;
302 }
303
304 return NULL;
305 }
306
307 static int bgp_generate_updgrp_packets(struct thread *thread)
308 {
309 struct listnode *ln;
310 struct peer *peer;
311 pthread_mutex_lock(&plist_mtx);
312 {
313 for (ALL_LIST_ELEMENTS_RO(plist, ln, peer))
314 while (bgp_write_packet(peer))
315 ;
316
317 t_generate_updgrp_packets = NULL;
318 }
319 pthread_mutex_unlock(&plist_mtx);
320 return 0;
321 }
322
323
324 /*
325 * Creates a BGP Keepalive packet and appends it to the peer's output queue.
326 */
327 void bgp_keepalive_send(struct peer *peer)
328 {
329 struct stream *s;
330
331 s = stream_new(BGP_MAX_PACKET_SIZE);
332
333 /* Make keepalive packet. */
334 bgp_packet_set_marker(s, BGP_MSG_KEEPALIVE);
335
336 /* Set packet size. */
337 (void)bgp_packet_set_size(s);
338
339 /* Dump packet if debug option is set. */
340 /* bgp_packet_dump (s); */
341
342 if (bgp_debug_keepalive(peer))
343 zlog_debug("%s sending KEEPALIVE", peer->host);
344
345 /* Add packet to the peer. */
346 bgp_packet_add(peer, s);
347 }
348
349 /*
350 * Creates a BGP Open packet and appends it to the peer's output queue.
351 * Sets capabilities as necessary.
352 */
353 void bgp_open_send(struct peer *peer)
354 {
355 struct stream *s;
356 u_int16_t send_holdtime;
357 as_t local_as;
358
359 if (PEER_OR_GROUP_TIMER_SET(peer))
360 send_holdtime = peer->holdtime;
361 else
362 send_holdtime = peer->bgp->default_holdtime;
363
364 /* local-as Change */
365 if (peer->change_local_as)
366 local_as = peer->change_local_as;
367 else
368 local_as = peer->local_as;
369
370 s = stream_new(BGP_MAX_PACKET_SIZE);
371
372 /* Make open packet. */
373 bgp_packet_set_marker(s, BGP_MSG_OPEN);
374
375 /* Set open packet values. */
376 stream_putc(s, BGP_VERSION_4); /* BGP version */
377 stream_putw(s,
378 (local_as <= BGP_AS_MAX) ? (u_int16_t)local_as
379 : BGP_AS_TRANS);
380 stream_putw(s, send_holdtime); /* Hold Time */
381 stream_put_in_addr(s, &peer->local_id); /* BGP Identifier */
382
383 /* Set capability code. */
384 bgp_open_capability(s, peer);
385
386 /* Set BGP packet length. */
387 (void)bgp_packet_set_size(s);
388
389 if (bgp_debug_neighbor_events(peer))
390 zlog_debug(
391 "%s sending OPEN, version %d, my as %u, holdtime %d, id %s",
392 peer->host, BGP_VERSION_4, local_as, send_holdtime,
393 inet_ntoa(peer->local_id));
394
395 /* Dump packet if debug option is set. */
396 /* bgp_packet_dump (s); */
397
398 /* Add packet to the peer. */
399 bgp_packet_add(peer, s);
400 }
401
402 /*
403 * Creates a BGP Notify and appends it to the peer's output queue.
404 *
405 * This function awakens the write thread to ensure the packet
406 * gets out ASAP.
407 *
408 * @param peer
409 * @param code BGP error code
410 * @param sub_code BGP error subcode
411 * @param data Data portion
412 * @param datalen length of data portion
413 */
414 void bgp_notify_send_with_data(struct peer *peer, u_char code, u_char sub_code,
415 u_char *data, size_t datalen)
416 {
417 struct stream *s;
418 int length;
419
420 /* Allocate new stream. */
421 s = stream_new(BGP_MAX_PACKET_SIZE);
422
423 /* Make notify packet. */
424 bgp_packet_set_marker(s, BGP_MSG_NOTIFY);
425
426 /* Set notify packet values. */
427 stream_putc(s, code); /* BGP notify code */
428 stream_putc(s, sub_code); /* BGP notify sub_code */
429
430 /* If notify data is present. */
431 if (data)
432 stream_write(s, data, datalen);
433
434 /* Set BGP packet length. */
435 length = bgp_packet_set_size(s);
436
437 /* Add packet to the peer. */
438 pthread_mutex_lock(&peer->obuf_mtx);
439 stream_fifo_clean(peer->obuf);
440 pthread_mutex_unlock(&peer->obuf_mtx);
441
442 /* For debug */
443 {
444 struct bgp_notify bgp_notify;
445 int first = 0;
446 int i;
447 char c[4];
448
449 bgp_notify.code = code;
450 bgp_notify.subcode = sub_code;
451 bgp_notify.data = NULL;
452 bgp_notify.length = length - BGP_MSG_NOTIFY_MIN_SIZE;
453 bgp_notify.raw_data = data;
454
455 peer->notify.code = bgp_notify.code;
456 peer->notify.subcode = bgp_notify.subcode;
457
458 if (bgp_notify.length) {
459 bgp_notify.data =
460 XMALLOC(MTYPE_TMP, bgp_notify.length * 3);
461 for (i = 0; i < bgp_notify.length; i++)
462 if (first) {
463 sprintf(c, " %02x", data[i]);
464 strcat(bgp_notify.data, c);
465 } else {
466 first = 1;
467 sprintf(c, "%02x", data[i]);
468 strcpy(bgp_notify.data, c);
469 }
470 }
471 bgp_notify_print(peer, &bgp_notify, "sending");
472
473 if (bgp_notify.data) {
474 XFREE(MTYPE_TMP, bgp_notify.data);
475 bgp_notify.data = NULL;
476 bgp_notify.length = 0;
477 }
478 }
479
480 /* peer reset cause */
481 if (code == BGP_NOTIFY_CEASE) {
482 if (sub_code == BGP_NOTIFY_CEASE_ADMIN_RESET)
483 peer->last_reset = PEER_DOWN_USER_RESET;
484 else if (sub_code == BGP_NOTIFY_CEASE_ADMIN_SHUTDOWN)
485 peer->last_reset = PEER_DOWN_USER_SHUTDOWN;
486 else
487 peer->last_reset = PEER_DOWN_NOTIFY_SEND;
488 } else
489 peer->last_reset = PEER_DOWN_NOTIFY_SEND;
490
491 /* Add packet to peer's output queue */
492 bgp_packet_add(peer, s);
493 /* Wake up the write thread to get the notify out ASAP */
494 peer_writes_wake();
495 }
496
497 /*
498 * Creates a BGP Notify and appends it to the peer's output queue.
499 *
500 * This function awakens the write thread to ensure the packet
501 * gets out ASAP.
502 *
503 * @param peer
504 * @param code BGP error code
505 * @param sub_code BGP error subcode
506 */
507 void bgp_notify_send(struct peer *peer, u_char code, u_char sub_code)
508 {
509 bgp_notify_send_with_data(peer, code, sub_code, NULL, 0);
510 }
511
512 /*
513 * Creates BGP Route Refresh packet and appends it to the peer's output queue.
514 *
515 * @param peer
516 * @param afi Address Family Identifier
517 * @param safi Subsequent Address Family Identifier
518 * @param orf_type Outbound Route Filtering type
519 * @param when_to_refresh Whether to refresh immediately or defer
520 * @param remove Whether to remove ORF for specified AFI/SAFI
521 */
522 void bgp_route_refresh_send(struct peer *peer, afi_t afi, safi_t safi,
523 u_char orf_type, u_char when_to_refresh, int remove)
524 {
525 struct stream *s;
526 struct bgp_filter *filter;
527 int orf_refresh = 0;
528 iana_afi_t pkt_afi;
529 iana_safi_t pkt_safi;
530
531 if (DISABLE_BGP_ANNOUNCE)
532 return;
533
534 filter = &peer->filter[afi][safi];
535
536 /* Convert AFI, SAFI to values for packet. */
537 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
538
539 s = stream_new(BGP_MAX_PACKET_SIZE);
540
541 /* Make BGP update packet. */
542 if (CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_NEW_RCV))
543 bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_NEW);
544 else
545 bgp_packet_set_marker(s, BGP_MSG_ROUTE_REFRESH_OLD);
546
547 /* Encode Route Refresh message. */
548 stream_putw(s, pkt_afi);
549 stream_putc(s, 0);
550 stream_putc(s, pkt_safi);
551
552 if (orf_type == ORF_TYPE_PREFIX || orf_type == ORF_TYPE_PREFIX_OLD)
553 if (remove || filter->plist[FILTER_IN].plist) {
554 u_int16_t orf_len;
555 unsigned long orfp;
556
557 orf_refresh = 1;
558 stream_putc(s, when_to_refresh);
559 stream_putc(s, orf_type);
560 orfp = stream_get_endp(s);
561 stream_putw(s, 0);
562
563 if (remove) {
564 UNSET_FLAG(peer->af_sflags[afi][safi],
565 PEER_STATUS_ORF_PREFIX_SEND);
566 stream_putc(s, ORF_COMMON_PART_REMOVE_ALL);
567 if (bgp_debug_neighbor_events(peer))
568 zlog_debug(
569 "%s sending REFRESH_REQ to remove ORF(%d) (%s) for afi/safi: %d/%d",
570 peer->host, orf_type,
571 (when_to_refresh == REFRESH_DEFER
572 ? "defer"
573 : "immediate"),
574 pkt_afi, pkt_safi);
575 } else {
576 SET_FLAG(peer->af_sflags[afi][safi],
577 PEER_STATUS_ORF_PREFIX_SEND);
578 prefix_bgp_orf_entry(
579 s, filter->plist[FILTER_IN].plist,
580 ORF_COMMON_PART_ADD,
581 ORF_COMMON_PART_PERMIT,
582 ORF_COMMON_PART_DENY);
583 if (bgp_debug_neighbor_events(peer))
584 zlog_debug(
585 "%s sending REFRESH_REQ with pfxlist ORF(%d) (%s) for afi/safi: %d/%d",
586 peer->host, orf_type,
587 (when_to_refresh == REFRESH_DEFER
588 ? "defer"
589 : "immediate"),
590 pkt_afi, pkt_safi);
591 }
592
593 /* Total ORF Entry Len. */
594 orf_len = stream_get_endp(s) - orfp - 2;
595 stream_putw_at(s, orfp, orf_len);
596 }
597
598 /* Set packet size. */
599 (void)bgp_packet_set_size(s);
600
601 if (bgp_debug_neighbor_events(peer)) {
602 if (!orf_refresh)
603 zlog_debug("%s sending REFRESH_REQ for afi/safi: %d/%d",
604 peer->host, pkt_afi, pkt_safi);
605 }
606
607 /* Add packet to the peer. */
608 bgp_packet_add(peer, s);
609 }
610
611 /*
612 * Create a BGP Capability packet and append it to the peer's output queue.
613 *
614 * @param peer
615 * @param afi Address Family Identifier
616 * @param safi Subsequent Address Family Identifier
617 * @param capability_code BGP Capability Code
618 * @param action Set or Remove capability
619 */
620 void bgp_capability_send(struct peer *peer, afi_t afi, safi_t safi,
621 int capability_code, int action)
622 {
623 struct stream *s;
624 iana_afi_t pkt_afi;
625 iana_safi_t pkt_safi;
626
627 /* Convert AFI, SAFI to values for packet. */
628 bgp_map_afi_safi_int2iana(afi, safi, &pkt_afi, &pkt_safi);
629
630 s = stream_new(BGP_MAX_PACKET_SIZE);
631
632 /* Make BGP update packet. */
633 bgp_packet_set_marker(s, BGP_MSG_CAPABILITY);
634
635 /* Encode MP_EXT capability. */
636 if (capability_code == CAPABILITY_CODE_MP) {
637 stream_putc(s, action);
638 stream_putc(s, CAPABILITY_CODE_MP);
639 stream_putc(s, CAPABILITY_CODE_MP_LEN);
640 stream_putw(s, pkt_afi);
641 stream_putc(s, 0);
642 stream_putc(s, pkt_safi);
643
644 if (bgp_debug_neighbor_events(peer))
645 zlog_debug(
646 "%s sending CAPABILITY has %s MP_EXT CAP for afi/safi: %d/%d",
647 peer->host,
648 action == CAPABILITY_ACTION_SET ? "Advertising"
649 : "Removing",
650 pkt_afi, pkt_safi);
651 }
652
653 /* Set packet size. */
654 (void)bgp_packet_set_size(s);
655
656 /* Add packet to the peer. */
657 bgp_packet_add(peer, s);
658 }
659
660 /* RFC1771 6.8 Connection collision detection. */
661 static int bgp_collision_detect(struct peer *new, struct in_addr remote_id)
662 {
663 struct peer *peer;
664
665 /* Upon receipt of an OPEN message, the local system must examine
666 all of its connections that are in the OpenConfirm state. A BGP
667 speaker may also examine connections in an OpenSent state if it
668 knows the BGP Identifier of the peer by means outside of the
669 protocol. If among these connections there is a connection to a
670 remote BGP speaker whose BGP Identifier equals the one in the
671 OPEN message, then the local system performs the following
672 collision resolution procedure: */
673
674 if ((peer = new->doppelganger) != NULL) {
675 /* Do not accept the new connection in Established or Clearing
676 * states.
677 * Note that a peer GR is handled by closing the existing
678 * connection
679 * upon receipt of new one.
680 */
681 if (peer->status == Established || peer->status == Clearing) {
682 bgp_notify_send(new, BGP_NOTIFY_CEASE,
683 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
684 return (-1);
685 } else if ((peer->status == OpenConfirm)
686 || (peer->status == OpenSent)) {
687 /* 1. The BGP Identifier of the local system is compared
688 to
689 the BGP Identifier of the remote system (as specified
690 in
691 the OPEN message). */
692
693 if (ntohl(peer->local_id.s_addr)
694 < ntohl(remote_id.s_addr))
695 if (!CHECK_FLAG(peer->sflags,
696 PEER_STATUS_ACCEPT_PEER)) {
697 /* 2. If the value of the local BGP
698 Identifier is less
699 than the remote one, the local system
700 closes BGP
701 connection that already exists (the
702 one that is
703 already in the OpenConfirm state),
704 and accepts BGP
705 connection initiated by the remote
706 system. */
707 bgp_notify_send(
708 peer, BGP_NOTIFY_CEASE,
709 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
710 return 1;
711 } else {
712 bgp_notify_send(
713 new, BGP_NOTIFY_CEASE,
714 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
715 return -1;
716 }
717 else {
718 /* 3. Otherwise, the local system closes newly
719 created
720 BGP connection (the one associated with the
721 newly
722 received OPEN message), and continues to use
723 the
724 existing one (the one that is already in the
725 OpenConfirm state). */
726 if (CHECK_FLAG(peer->sflags,
727 PEER_STATUS_ACCEPT_PEER)) {
728 bgp_notify_send(
729 peer, BGP_NOTIFY_CEASE,
730 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
731 return 1;
732 } else {
733 bgp_notify_send(
734 new, BGP_NOTIFY_CEASE,
735 BGP_NOTIFY_CEASE_COLLISION_RESOLUTION);
736 return -1;
737 }
738 }
739 }
740 }
741 return 0;
742 }
743
744 static int bgp_open_receive(struct peer *peer, bgp_size_t size)
745 {
746 int ret;
747 u_char version;
748 u_char optlen;
749 u_int16_t holdtime;
750 u_int16_t send_holdtime;
751 as_t remote_as;
752 as_t as4 = 0;
753 struct in_addr remote_id;
754 int mp_capability;
755 u_int8_t notify_data_remote_as[2];
756 u_int8_t notify_data_remote_as4[4];
757 u_int8_t notify_data_remote_id[4];
758 u_int16_t *holdtime_ptr;
759
760 /* Parse open packet. */
761 version = stream_getc(peer->ibuf);
762 memcpy(notify_data_remote_as, stream_pnt(peer->ibuf), 2);
763 remote_as = stream_getw(peer->ibuf);
764 holdtime_ptr = (u_int16_t *)stream_pnt(peer->ibuf);
765 holdtime = stream_getw(peer->ibuf);
766 memcpy(notify_data_remote_id, stream_pnt(peer->ibuf), 4);
767 remote_id.s_addr = stream_get_ipv4(peer->ibuf);
768
769 /* Receive OPEN message log */
770 if (bgp_debug_neighbor_events(peer))
771 zlog_debug(
772 "%s rcv OPEN, version %d, remote-as (in open) %u,"
773 " holdtime %d, id %s",
774 peer->host, version, remote_as, holdtime,
775 inet_ntoa(remote_id));
776
777 /* BEGIN to read the capability here, but dont do it yet */
778 mp_capability = 0;
779 optlen = stream_getc(peer->ibuf);
780
781 if (optlen != 0) {
782 /* If not enough bytes, it is an error. */
783 if (STREAM_READABLE(peer->ibuf) < optlen) {
784 bgp_notify_send(peer, BGP_NOTIFY_OPEN_ERR,
785 BGP_NOTIFY_OPEN_MALFORMED_ATTR);
786 return -1;
787 }
788
789 /* We need the as4 capability value *right now* because
790 * if it is there, we have not got the remote_as yet, and
791 * without
792 * that we do not know which peer is connecting to us now.
793 */
794 as4 = peek_for_as4_capability(peer, optlen);
795 memcpy(notify_data_remote_as4, &as4, 4);
796 }
797
798 /* Just in case we have a silly peer who sends AS4 capability set to 0
799 */
800 if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV) && !as4) {
801 zlog_err("%s bad OPEN, got AS4 capability, but AS4 set to 0",
802 peer->host);
803 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
804 BGP_NOTIFY_OPEN_BAD_PEER_AS,
805 notify_data_remote_as4, 4);
806 return -1;
807 }
808
809 if (remote_as == BGP_AS_TRANS) {
810 /* Take the AS4 from the capability. We must have received the
811 * capability now! Otherwise we have a asn16 peer who uses
812 * BGP_AS_TRANS, for some unknown reason.
813 */
814 if (as4 == BGP_AS_TRANS) {
815 zlog_err(
816 "%s [AS4] NEW speaker using AS_TRANS for AS4, not allowed",
817 peer->host);
818 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
819 BGP_NOTIFY_OPEN_BAD_PEER_AS,
820 notify_data_remote_as4, 4);
821 return -1;
822 }
823
824 if (!as4 && BGP_DEBUG(as4, AS4))
825 zlog_debug(
826 "%s [AS4] OPEN remote_as is AS_TRANS, but no AS4."
827 " Odd, but proceeding.",
828 peer->host);
829 else if (as4 < BGP_AS_MAX && BGP_DEBUG(as4, AS4))
830 zlog_debug(
831 "%s [AS4] OPEN remote_as is AS_TRANS, but AS4 (%u) fits "
832 "in 2-bytes, very odd peer.",
833 peer->host, as4);
834 if (as4)
835 remote_as = as4;
836 } else {
837 /* We may have a partner with AS4 who has an asno < BGP_AS_MAX
838 */
839 /* If we have got the capability, peer->as4cap must match
840 * remote_as */
841 if (CHECK_FLAG(peer->cap, PEER_CAP_AS4_RCV)
842 && as4 != remote_as) {
843 /* raise error, log this, close session */
844 zlog_err(
845 "%s bad OPEN, got AS4 capability, but remote_as %u"
846 " mismatch with 16bit 'myasn' %u in open",
847 peer->host, as4, remote_as);
848 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
849 BGP_NOTIFY_OPEN_BAD_PEER_AS,
850 notify_data_remote_as4, 4);
851 return -1;
852 }
853 }
854
855 /* remote router-id check. */
856 if (remote_id.s_addr == 0 || IPV4_CLASS_DE(ntohl(remote_id.s_addr))
857 || ntohl(peer->local_id.s_addr) == ntohl(remote_id.s_addr)) {
858 if (bgp_debug_neighbor_events(peer))
859 zlog_debug("%s bad OPEN, wrong router identifier %s",
860 peer->host, inet_ntoa(remote_id));
861 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
862 BGP_NOTIFY_OPEN_BAD_BGP_IDENT,
863 notify_data_remote_id, 4);
864 return -1;
865 }
866
867 /* Set remote router-id */
868 peer->remote_id = remote_id;
869
870 /* Peer BGP version check. */
871 if (version != BGP_VERSION_4) {
872 u_int16_t maxver = htons(BGP_VERSION_4);
873 /* XXX this reply may not be correct if version < 4 XXX */
874 if (bgp_debug_neighbor_events(peer))
875 zlog_debug(
876 "%s bad protocol version, remote requested %d, local request %d",
877 peer->host, version, BGP_VERSION_4);
878 /* Data must be in network byte order here */
879 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
880 BGP_NOTIFY_OPEN_UNSUP_VERSION,
881 (u_int8_t *)&maxver, 2);
882 return -1;
883 }
884
885 /* Check neighbor as number. */
886 if (peer->as_type == AS_UNSPECIFIED) {
887 if (bgp_debug_neighbor_events(peer))
888 zlog_debug(
889 "%s bad OPEN, remote AS is unspecified currently",
890 peer->host);
891 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
892 BGP_NOTIFY_OPEN_BAD_PEER_AS,
893 notify_data_remote_as, 2);
894 return -1;
895 } else if (peer->as_type == AS_INTERNAL) {
896 if (remote_as != peer->bgp->as) {
897 if (bgp_debug_neighbor_events(peer))
898 zlog_debug(
899 "%s bad OPEN, remote AS is %u, internal specified",
900 peer->host, remote_as);
901 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
902 BGP_NOTIFY_OPEN_BAD_PEER_AS,
903 notify_data_remote_as, 2);
904 return -1;
905 }
906 peer->as = peer->local_as;
907 } else if (peer->as_type == AS_EXTERNAL) {
908 if (remote_as == peer->bgp->as) {
909 if (bgp_debug_neighbor_events(peer))
910 zlog_debug(
911 "%s bad OPEN, remote AS is %u, external specified",
912 peer->host, remote_as);
913 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
914 BGP_NOTIFY_OPEN_BAD_PEER_AS,
915 notify_data_remote_as, 2);
916 return -1;
917 }
918 peer->as = remote_as;
919 } else if ((peer->as_type == AS_SPECIFIED) && (remote_as != peer->as)) {
920 if (bgp_debug_neighbor_events(peer))
921 zlog_debug("%s bad OPEN, remote AS is %u, expected %u",
922 peer->host, remote_as, peer->as);
923 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
924 BGP_NOTIFY_OPEN_BAD_PEER_AS,
925 notify_data_remote_as, 2);
926 return -1;
927 }
928
929 /* From the rfc: Upon receipt of an OPEN message, a BGP speaker MUST
930 calculate the value of the Hold Timer by using the smaller of its
931 configured Hold Time and the Hold Time received in the OPEN message.
932 The Hold Time MUST be either zero or at least three seconds. An
933 implementation may reject connections on the basis of the Hold Time.
934 */
935
936 if (holdtime < 3 && holdtime != 0) {
937 bgp_notify_send_with_data(peer, BGP_NOTIFY_OPEN_ERR,
938 BGP_NOTIFY_OPEN_UNACEP_HOLDTIME,
939 (u_char *)holdtime_ptr, 2);
940 return -1;
941 }
942
943 /* From the rfc: A reasonable maximum time between KEEPALIVE messages
944 would be one third of the Hold Time interval. KEEPALIVE messages
945 MUST NOT be sent more frequently than one per second. An
946 implementation MAY adjust the rate at which it sends KEEPALIVE
947 messages as a function of the Hold Time interval. */
948
949 if (PEER_OR_GROUP_TIMER_SET(peer))
950 send_holdtime = peer->holdtime;
951 else
952 send_holdtime = peer->bgp->default_holdtime;
953
954 if (holdtime < send_holdtime)
955 peer->v_holdtime = holdtime;
956 else
957 peer->v_holdtime = send_holdtime;
958
959 if ((PEER_OR_GROUP_TIMER_SET(peer))
960 && (peer->keepalive < peer->v_holdtime / 3))
961 peer->v_keepalive = peer->keepalive;
962 else
963 peer->v_keepalive = peer->v_holdtime / 3;
964
965 /* Open option part parse. */
966 if (optlen != 0) {
967 if ((ret = bgp_open_option_parse(peer, optlen, &mp_capability))
968 < 0)
969 return ret;
970 } else {
971 if (bgp_debug_neighbor_events(peer))
972 zlog_debug("%s rcvd OPEN w/ OPTION parameter len: 0",
973 peer->host);
974 }
975
976 /*
977 * Assume that the peer supports the locally configured set of
978 * AFI/SAFIs if the peer did not send us any Mulitiprotocol
979 * capabilities, or if 'override-capability' is configured.
980 */
981 if (!mp_capability
982 || CHECK_FLAG(peer->flags, PEER_FLAG_OVERRIDE_CAPABILITY)) {
983 peer->afc_nego[AFI_IP][SAFI_UNICAST] =
984 peer->afc[AFI_IP][SAFI_UNICAST];
985 peer->afc_nego[AFI_IP][SAFI_MULTICAST] =
986 peer->afc[AFI_IP][SAFI_MULTICAST];
987 peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST] =
988 peer->afc[AFI_IP][SAFI_LABELED_UNICAST];
989 peer->afc_nego[AFI_IP6][SAFI_UNICAST] =
990 peer->afc[AFI_IP6][SAFI_UNICAST];
991 peer->afc_nego[AFI_IP6][SAFI_MULTICAST] =
992 peer->afc[AFI_IP6][SAFI_MULTICAST];
993 peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST] =
994 peer->afc[AFI_IP6][SAFI_LABELED_UNICAST];
995 peer->afc_nego[AFI_L2VPN][SAFI_EVPN] =
996 peer->afc[AFI_L2VPN][SAFI_EVPN];
997 }
998
999 /* When collision is detected and this peer is closed. Retrun
1000 immidiately. */
1001 ret = bgp_collision_detect(peer, remote_id);
1002 if (ret < 0)
1003 return ret;
1004
1005 /* Get sockname. */
1006 if ((ret = bgp_getsockname(peer)) < 0) {
1007 zlog_err("%s: bgp_getsockname() failed for peer: %s",
1008 __FUNCTION__, peer->host);
1009 return (ret);
1010 }
1011
1012 /* Verify valid local address present based on negotiated
1013 * address-families. */
1014 if (peer->afc_nego[AFI_IP][SAFI_UNICAST]
1015 || peer->afc_nego[AFI_IP][SAFI_LABELED_UNICAST]
1016 || peer->afc_nego[AFI_IP][SAFI_MULTICAST]
1017 || peer->afc_nego[AFI_IP][SAFI_MPLS_VPN]
1018 || peer->afc_nego[AFI_IP][SAFI_ENCAP]) {
1019 if (!peer->nexthop.v4.s_addr) {
1020 #if defined(HAVE_CUMULUS)
1021 zlog_err(
1022 "%s: No local IPv4 addr resetting connection, fd %d",
1023 peer->host, peer->fd);
1024 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
1025 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
1026 return -1;
1027 #endif
1028 }
1029 }
1030 if (peer->afc_nego[AFI_IP6][SAFI_UNICAST]
1031 || peer->afc_nego[AFI_IP6][SAFI_LABELED_UNICAST]
1032 || peer->afc_nego[AFI_IP6][SAFI_MULTICAST]
1033 || peer->afc_nego[AFI_IP6][SAFI_MPLS_VPN]
1034 || peer->afc_nego[AFI_IP6][SAFI_ENCAP]) {
1035 if (IN6_IS_ADDR_UNSPECIFIED(&peer->nexthop.v6_global)) {
1036 #if defined(HAVE_CUMULUS)
1037 zlog_err(
1038 "%s: No local IPv6 addr resetting connection, fd %d",
1039 peer->host, peer->fd);
1040 bgp_notify_send(peer, BGP_NOTIFY_CEASE,
1041 BGP_NOTIFY_SUBCODE_UNSPECIFIC);
1042 return -1;
1043 #endif
1044 }
1045 }
1046 peer->rtt = sockopt_tcp_rtt(peer->fd);
1047
1048 if ((ret = bgp_event_update(peer, Receive_OPEN_message)) < 0) {
1049 zlog_err("%s: BGP event update failed for peer: %s",
1050 __FUNCTION__, peer->host);
1051 /* DD: bgp send notify and reset state */
1052 return (ret);
1053 }
1054
1055 peer->packet_size = 0;
1056 if (peer->ibuf)
1057 stream_reset(peer->ibuf);
1058
1059 return 0;
1060 }
1061
1062 /* Called when there is a change in the EOR(implicit or explicit) status of a
1063 peer.
1064 Ends the update-delay if all expected peers are done with EORs. */
1065 void bgp_check_update_delay(struct bgp *bgp)
1066 {
1067 struct listnode *node, *nnode;
1068 struct peer *peer = NULL;
1069
1070 if (bgp_debug_neighbor_events(peer))
1071 zlog_debug("Checking update delay, T: %d R: %d I:%d E: %d",
1072 bgp->established, bgp->restarted_peers,
1073 bgp->implicit_eors, bgp->explicit_eors);
1074
1075 if (bgp->established
1076 <= bgp->restarted_peers + bgp->implicit_eors + bgp->explicit_eors) {
1077 /* This is an extra sanity check to make sure we wait for all
1078 the
1079 eligible configured peers. This check is performed if
1080 establish wait
1081 timer is on, or establish wait option is not given with the
1082 update-delay command */
1083 if (bgp->t_establish_wait
1084 || (bgp->v_establish_wait == bgp->v_update_delay))
1085 for (ALL_LIST_ELEMENTS(bgp->peer, node, nnode, peer)) {
1086 if (CHECK_FLAG(peer->flags,
1087 PEER_FLAG_CONFIG_NODE)
1088 && !CHECK_FLAG(peer->flags,
1089 PEER_FLAG_SHUTDOWN)
1090 && !peer->update_delay_over) {
1091 if (bgp_debug_neighbor_events(peer))
1092 zlog_debug(
1093 " Peer %s pending, continuing read-only mode",
1094 peer->host);
1095 return;
1096 }
1097 }
1098
1099 zlog_info(
1100 "Update delay ended, restarted: %d, EORs implicit: %d, explicit: %d",
1101 bgp->restarted_peers, bgp->implicit_eors,
1102 bgp->explicit_eors);
1103 bgp_update_delay_end(bgp);
1104 }
1105 }
1106
1107 /* Called if peer is known to have restarted. The restart-state bit in
1108 Graceful-Restart capability is used for that */
1109 void bgp_update_restarted_peers(struct peer *peer)
1110 {
1111 if (!bgp_update_delay_active(peer->bgp))
1112 return; /* BGP update delay has ended */
1113 if (peer->update_delay_over)
1114 return; /* This peer has already been considered */
1115
1116 if (bgp_debug_neighbor_events(peer))
1117 zlog_debug("Peer %s: Checking restarted", peer->host);
1118
1119 if (peer->status == Established) {
1120 peer->update_delay_over = 1;
1121 peer->bgp->restarted_peers++;
1122 bgp_check_update_delay(peer->bgp);
1123 }
1124 }
1125
1126 /* Called as peer receives a keep-alive. Determines if this occurence can be
1127 taken as an implicit EOR for this peer.
1128 NOTE: The very first keep-alive after the Established state of a peer is
1129 considered implicit EOR for the update-delay purposes */
1130 void bgp_update_implicit_eors(struct peer *peer)
1131 {
1132 if (!bgp_update_delay_active(peer->bgp))
1133 return; /* BGP update delay has ended */
1134 if (peer->update_delay_over)
1135 return; /* This peer has already been considered */
1136
1137 if (bgp_debug_neighbor_events(peer))
1138 zlog_debug("Peer %s: Checking implicit EORs", peer->host);
1139
1140 if (peer->status == Established) {
1141 peer->update_delay_over = 1;
1142 peer->bgp->implicit_eors++;
1143 bgp_check_update_delay(peer->bgp);
1144 }
1145 }
1146
1147 /* Should be called only when there is a change in the EOR_RECEIVED status
1148 for any afi/safi on a peer */
1149 static void bgp_update_explicit_eors(struct peer *peer)
1150 {
1151 afi_t afi;
1152 safi_t safi;
1153
1154 if (!bgp_update_delay_active(peer->bgp))
1155 return; /* BGP update delay has ended */
1156 if (peer->update_delay_over)
1157 return; /* This peer has already been considered */
1158
1159 if (bgp_debug_neighbor_events(peer))
1160 zlog_debug("Peer %s: Checking explicit EORs", peer->host);
1161
1162 FOREACH_AFI_SAFI (afi, safi) {
1163 if (peer->afc_nego[afi][safi]
1164 && !CHECK_FLAG(peer->af_sflags[afi][safi],
1165 PEER_STATUS_EOR_RECEIVED)) {
1166 if (bgp_debug_neighbor_events(peer))
1167 zlog_debug(
1168 " afi %d safi %d didnt receive EOR",
1169 afi, safi);
1170 return;
1171 }
1172 }
1173
1174 peer->update_delay_over = 1;
1175 peer->bgp->explicit_eors++;
1176 bgp_check_update_delay(peer->bgp);
1177 }
1178
1179 /* Frontend for NLRI parsing, to fan-out to AFI/SAFI specific parsers
1180 * mp_withdraw, if set, is used to nullify attr structure on most of the calling
1181 * safi function
1182 * and for evpn, passed as parameter
1183 */
1184 int bgp_nlri_parse(struct peer *peer, struct attr *attr,
1185 struct bgp_nlri *packet, int mp_withdraw)
1186 {
1187 switch (packet->safi) {
1188 case SAFI_UNICAST:
1189 case SAFI_MULTICAST:
1190 return bgp_nlri_parse_ip(peer, mp_withdraw ? NULL : attr,
1191 packet);
1192 case SAFI_LABELED_UNICAST:
1193 return bgp_nlri_parse_label(peer, mp_withdraw ? NULL : attr,
1194 packet);
1195 case SAFI_MPLS_VPN:
1196 return bgp_nlri_parse_vpn(peer, mp_withdraw ? NULL : attr,
1197 packet);
1198 case SAFI_EVPN:
1199 return bgp_nlri_parse_evpn(peer, attr, packet, mp_withdraw);
1200 default:
1201 return -1;
1202 }
1203 }
1204
1205 /* Parse BGP Update packet and make attribute object. */
1206 static int bgp_update_receive(struct peer *peer, bgp_size_t size)
1207 {
1208 int ret, nlri_ret;
1209 u_char *end;
1210 struct stream *s;
1211 struct attr attr;
1212 bgp_size_t attribute_len;
1213 bgp_size_t update_len;
1214 bgp_size_t withdraw_len;
1215
1216 enum NLRI_TYPES {
1217 NLRI_UPDATE,
1218 NLRI_WITHDRAW,
1219 NLRI_MP_UPDATE,
1220 NLRI_MP_WITHDRAW,
1221 NLRI_TYPE_MAX
1222 };
1223 struct bgp_nlri nlris[NLRI_TYPE_MAX];
1224
1225 /* Status must be Established. */
1226 if (peer->status != Established) {
1227 zlog_err("%s [FSM] Update packet received under status %s",
1228 peer->host,
1229 lookup_msg(bgp_status_msg, peer->status, NULL));
1230 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR, 0);
1231 return -1;
1232 }
1233
1234 /* Set initial values. */
1235 memset(&attr, 0, sizeof(struct attr));
1236 attr.label_index = BGP_INVALID_LABEL_INDEX;
1237 attr.label = MPLS_INVALID_LABEL;
1238 memset(&nlris, 0, sizeof(nlris));
1239 memset(peer->rcvd_attr_str, 0, BUFSIZ);
1240 peer->rcvd_attr_printed = 0;
1241
1242 s = peer->ibuf;
1243 end = stream_pnt(s) + size;
1244
1245 /* RFC1771 6.3 If the Unfeasible Routes Length or Total Attribute
1246 Length is too large (i.e., if Unfeasible Routes Length + Total
1247 Attribute Length + 23 exceeds the message Length), then the Error
1248 Subcode is set to Malformed Attribute List. */
1249 if (stream_pnt(s) + 2 > end) {
1250 zlog_err(
1251 "%s [Error] Update packet error"
1252 " (packet length is short for unfeasible length)",
1253 peer->host);
1254 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1255 BGP_NOTIFY_UPDATE_MAL_ATTR);
1256 return -1;
1257 }
1258
1259 /* Unfeasible Route Length. */
1260 withdraw_len = stream_getw(s);
1261
1262 /* Unfeasible Route Length check. */
1263 if (stream_pnt(s) + withdraw_len > end) {
1264 zlog_err(
1265 "%s [Error] Update packet error"
1266 " (packet unfeasible length overflow %d)",
1267 peer->host, withdraw_len);
1268 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1269 BGP_NOTIFY_UPDATE_MAL_ATTR);
1270 return -1;
1271 }
1272
1273 /* Unfeasible Route packet format check. */
1274 if (withdraw_len > 0) {
1275 nlris[NLRI_WITHDRAW].afi = AFI_IP;
1276 nlris[NLRI_WITHDRAW].safi = SAFI_UNICAST;
1277 nlris[NLRI_WITHDRAW].nlri = stream_pnt(s);
1278 nlris[NLRI_WITHDRAW].length = withdraw_len;
1279 stream_forward_getp(s, withdraw_len);
1280 }
1281
1282 /* Attribute total length check. */
1283 if (stream_pnt(s) + 2 > end) {
1284 zlog_warn(
1285 "%s [Error] Packet Error"
1286 " (update packet is short for attribute length)",
1287 peer->host);
1288 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1289 BGP_NOTIFY_UPDATE_MAL_ATTR);
1290 return -1;
1291 }
1292
1293 /* Fetch attribute total length. */
1294 attribute_len = stream_getw(s);
1295
1296 /* Attribute length check. */
1297 if (stream_pnt(s) + attribute_len > end) {
1298 zlog_warn(
1299 "%s [Error] Packet Error"
1300 " (update packet attribute length overflow %d)",
1301 peer->host, attribute_len);
1302 bgp_notify_send(peer, BGP_NOTIFY_UPDATE_ERR,
1303 BGP_NOTIFY_UPDATE_MAL_ATTR);
1304 return -1;
1305 }
1306
1307 /* Certain attribute parsing errors should not be considered bad enough
1308 * to reset the session for, most particularly any partial/optional
1309 * attributes that have 'tunneled' over speakers that don't understand
1310 * them. Instead we withdraw only the prefix concerned.
1311 *
1312 * Complicates the flow a little though..
1313 */
1314 bgp_attr_parse_ret_t attr_parse_ret = BGP_ATTR_PARSE_PROCEED;
1315 /* This define morphs the update case into a withdraw when lower levels
1316 * have signalled an error condition where this is best.
1317 */
1318 #define NLRI_ATTR_ARG (attr_parse_ret != BGP_ATTR_PARSE_WITHDRAW ? &attr : NULL)
1319
1320 /* Parse attribute when it exists. */
1321 if (attribute_len) {
1322 attr_parse_ret = bgp_attr_parse(peer, &attr, attribute_len,
1323 &nlris[NLRI_MP_UPDATE],
1324 &nlris[NLRI_MP_WITHDRAW]);
1325 if (attr_parse_ret == BGP_ATTR_PARSE_ERROR) {
1326 bgp_attr_unintern_sub(&attr);
1327 return -1;
1328 }
1329 }
1330
1331 /* Logging the attribute. */
1332 if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW
1333 || BGP_DEBUG(update, UPDATE_IN)
1334 || BGP_DEBUG(update, UPDATE_PREFIX)) {
1335 ret = bgp_dump_attr(&attr, peer->rcvd_attr_str, BUFSIZ);
1336
1337 if (attr_parse_ret == BGP_ATTR_PARSE_WITHDRAW)
1338 zlog_err(
1339 "%s rcvd UPDATE with errors in attr(s)!! Withdrawing route.",
1340 peer->host);
1341
1342 if (ret && bgp_debug_update(peer, NULL, NULL, 1)) {
1343 zlog_debug("%s rcvd UPDATE w/ attr: %s", peer->host,
1344 peer->rcvd_attr_str);
1345 peer->rcvd_attr_printed = 1;
1346 }
1347 }
1348
1349 /* Network Layer Reachability Information. */
1350 update_len = end - stream_pnt(s);
1351
1352 if (update_len) {
1353 /* Set NLRI portion to structure. */
1354 nlris[NLRI_UPDATE].afi = AFI_IP;
1355 nlris[NLRI_UPDATE].safi = SAFI_UNICAST;
1356 nlris[NLRI_UPDATE].nlri = stream_pnt(s);
1357 nlris[NLRI_UPDATE].length = update_len;
1358 stream_forward_getp(s, update_len);
1359 }
1360
1361 if (BGP_DEBUG(update, UPDATE_IN))
1362 zlog_debug("%s rcvd UPDATE wlen %d attrlen %d alen %d",
1363 peer->host, withdraw_len, attribute_len, update_len);
1364
1365 /* Parse any given NLRIs */
1366 for (int i = NLRI_UPDATE; i < NLRI_TYPE_MAX; i++) {
1367 if (!nlris[i].nlri)
1368 continue;
1369
1370 /* NLRI is processed iff the peer if configured for the specific
1371 * afi/safi */
1372 if (!peer->afc[nlris[i].afi][nlris[i].safi]) {
1373 zlog_info(
1374 "%s [Info] UPDATE for non-enabled AFI/SAFI %u/%u",
1375 peer->host, nlris[i].afi, nlris[i].safi);
1376 continue;
1377 }
1378
1379 /* EoR handled later */
1380 if (nlris[i].length == 0)
1381 continue;
1382
1383 switch (i) {
1384 case NLRI_UPDATE:
1385 case NLRI_MP_UPDATE:
1386 nlri_ret = bgp_nlri_parse(peer, NLRI_ATTR_ARG,
1387 &nlris[i], 0);
1388 break;
1389 case NLRI_WITHDRAW:
1390 case NLRI_MP_WITHDRAW:
1391 nlri_ret = bgp_nlri_parse(peer, &attr, &nlris[i], 1);
1392 break;
1393 default:
1394 nlri_ret = -1;
1395 }
1396
1397 if (nlri_ret < 0) {
1398 zlog_err("%s [Error] Error parsing NLRI", peer->host);
1399 if (peer->status == Established)
1400 bgp_notify_send(
1401 peer, BGP_NOTIFY_UPDATE_ERR,
1402 i <= NLRI_WITHDRAW
1403 ? BGP_NOTIFY_UPDATE_INVAL_NETWORK
1404 : BGP_NOTIFY_UPDATE_OPT_ATTR_ERR);
1405 bgp_attr_unintern_sub(&attr);
1406 return -1;
1407 }
1408 }
1409
1410 /* EoR checks
1411 *
1412 * Non-MP IPv4/Unicast EoR is a completely empty UPDATE
1413 * and MP EoR should have only an empty MP_UNREACH
1414 */
1415 if ((!update_len && !withdraw_len &&
1416 nlris[NLRI_MP_UPDATE].length == 0) ||
1417 (attr_parse_ret == BGP_ATTR_PARSE_EOR)) {
1418 afi_t afi = 0;
1419 safi_t safi;
1420
1421 /* Non-MP IPv4/Unicast is a completely emtpy UPDATE - already
1422 * checked
1423 * update and withdraw NLRI lengths are 0.
1424 */
1425 if (!attribute_len) {
1426 afi = AFI_IP;
1427 safi = SAFI_UNICAST;
1428 } else if (attr.flag & ATTR_FLAG_BIT(BGP_ATTR_MP_UNREACH_NLRI)
1429 && nlris[NLRI_MP_WITHDRAW].length == 0) {
1430 afi = nlris[NLRI_MP_WITHDRAW].afi;
1431 safi = nlris[NLRI_MP_WITHDRAW].safi;
1432 } else if (attr_parse_ret == BGP_ATTR_PARSE_EOR) {
1433 afi = nlris[NLRI_MP_UPDATE].afi;
1434 safi = nlris[NLRI_MP_UPDATE].safi;
1435 }
1436
1437 if (afi && peer->afc[afi][safi]) {
1438 /* End-of-RIB received */
1439 if (!CHECK_FLAG(peer->af_sflags[afi][safi],
1440 PEER_STATUS_EOR_RECEIVED)) {
1441 SET_FLAG(peer->af_sflags[afi][safi],
1442 PEER_STATUS_EOR_RECEIVED);
1443 bgp_update_explicit_eors(peer);
1444 }
1445
1446 /* NSF delete stale route */
1447 if (peer->nsf[afi][safi])
1448 bgp_clear_stale_route(peer, afi, safi);
1449
1450 if (bgp_debug_neighbor_events(peer)) {
1451 zlog_debug("rcvd End-of-RIB for %s from %s",
1452 afi_safi_print(afi, safi),
1453 peer->host);
1454 }
1455 }
1456 }
1457
1458 /* Everything is done. We unintern temporary structures which
1459 interned in bgp_attr_parse(). */
1460 bgp_attr_unintern_sub(&attr);
1461
1462 /* If peering is stopped due to some reason, do not generate BGP
1463 event. */
1464 if (peer->status != Established)
1465 return 0;
1466
1467 /* Increment packet counter. */
1468 peer->update_in++;
1469 peer->update_time = bgp_clock();
1470
1471 /* Rearm holdtime timer */
1472 BGP_TIMER_OFF(peer->t_holdtime);
1473 bgp_timer_set(peer);
1474
1475 return 0;
1476 }
1477
1478 /* Notify message treatment function. */
1479 static void bgp_notify_receive(struct peer *peer, bgp_size_t size)
1480 {
1481 struct bgp_notify bgp_notify;
1482
1483 if (peer->notify.data) {
1484 XFREE(MTYPE_TMP, peer->notify.data);
1485 peer->notify.data = NULL;
1486 peer->notify.length = 0;
1487 }
1488
1489 bgp_notify.code = stream_getc(peer->ibuf);
1490 bgp_notify.subcode = stream_getc(peer->ibuf);
1491 bgp_notify.length = size - 2;
1492 bgp_notify.data = NULL;
1493
1494 /* Preserv notify code and sub code. */
1495 peer->notify.code = bgp_notify.code;
1496 peer->notify.subcode = bgp_notify.subcode;
1497 /* For further diagnostic record returned Data. */
1498 if (bgp_notify.length) {
1499 peer->notify.length = size - 2;
1500 peer->notify.data = XMALLOC(MTYPE_TMP, size - 2);
1501 memcpy(peer->notify.data, stream_pnt(peer->ibuf), size - 2);
1502 }
1503
1504 /* For debug */
1505 {
1506 int i;
1507 int first = 0;
1508 char c[4];
1509
1510 if (bgp_notify.length) {
1511 bgp_notify.data =
1512 XMALLOC(MTYPE_TMP, bgp_notify.length * 3);
1513 for (i = 0; i < bgp_notify.length; i++)
1514 if (first) {
1515 sprintf(c, " %02x",
1516 stream_getc(peer->ibuf));
1517 strcat(bgp_notify.data, c);
1518 } else {
1519 first = 1;
1520 sprintf(c, "%02x",
1521 stream_getc(peer->ibuf));
1522 strcpy(bgp_notify.data, c);
1523 }
1524 bgp_notify.raw_data = (u_char *)peer->notify.data;
1525 }
1526
1527 bgp_notify_print(peer, &bgp_notify, "received");
1528 if (bgp_notify.data) {
1529 XFREE(MTYPE_TMP, bgp_notify.data);
1530 bgp_notify.data = NULL;
1531 bgp_notify.length = 0;
1532 }
1533 }
1534
1535 /* peer count update */
1536 peer->notify_in++;
1537
1538 peer->last_reset = PEER_DOWN_NOTIFY_RECEIVED;
1539
1540 /* We have to check for Notify with Unsupported Optional Parameter.
1541 in that case we fallback to open without the capability option.
1542 But this done in bgp_stop. We just mark it here to avoid changing
1543 the fsm tables. */
1544 if (bgp_notify.code == BGP_NOTIFY_OPEN_ERR
1545 && bgp_notify.subcode == BGP_NOTIFY_OPEN_UNSUP_PARAM)
1546 UNSET_FLAG(peer->sflags, PEER_STATUS_CAPABILITY_OPEN);
1547
1548 BGP_EVENT_ADD(peer, Receive_NOTIFICATION_message);
1549 }
1550
1551 /* Keepalive treatment function -- get keepalive send keepalive */
1552 static void bgp_keepalive_receive(struct peer *peer, bgp_size_t size)
1553 {
1554 if (bgp_debug_keepalive(peer))
1555 zlog_debug("%s KEEPALIVE rcvd", peer->host);
1556
1557 BGP_EVENT_ADD(peer, Receive_KEEPALIVE_message);
1558 }
1559
1560 /* Route refresh message is received. */
1561 static void bgp_route_refresh_receive(struct peer *peer, bgp_size_t size)
1562 {
1563 iana_afi_t pkt_afi;
1564 afi_t afi;
1565 iana_safi_t pkt_safi;
1566 safi_t safi;
1567 struct stream *s;
1568 struct peer_af *paf;
1569 struct update_group *updgrp;
1570 struct peer *updgrp_peer;
1571
1572 /* If peer does not have the capability, send notification. */
1573 if (!CHECK_FLAG(peer->cap, PEER_CAP_REFRESH_ADV)) {
1574 zlog_err("%s [Error] BGP route refresh is not enabled",
1575 peer->host);
1576 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
1577 BGP_NOTIFY_HEADER_BAD_MESTYPE);
1578 return;
1579 }
1580
1581 /* Status must be Established. */
1582 if (peer->status != Established) {
1583 zlog_err(
1584 "%s [Error] Route refresh packet received under status %s",
1585 peer->host,
1586 lookup_msg(bgp_status_msg, peer->status, NULL));
1587 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR, 0);
1588 return;
1589 }
1590
1591 s = peer->ibuf;
1592
1593 /* Parse packet. */
1594 pkt_afi = stream_getw(s);
1595 (void)stream_getc(s);
1596 pkt_safi = stream_getc(s);
1597
1598 if (bgp_debug_update(peer, NULL, NULL, 0))
1599 zlog_debug("%s rcvd REFRESH_REQ for afi/safi: %d/%d",
1600 peer->host, pkt_afi, pkt_safi);
1601
1602 /* Convert AFI, SAFI to internal values and check. */
1603 if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi, &safi)) {
1604 zlog_info(
1605 "%s REFRESH_REQ for unrecognized afi/safi: %d/%d - ignored",
1606 peer->host, pkt_afi, pkt_safi);
1607 return;
1608 }
1609
1610 if (size != BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE) {
1611 u_char *end;
1612 u_char when_to_refresh;
1613 u_char orf_type;
1614 u_int16_t orf_len;
1615
1616 if (size - (BGP_MSG_ROUTE_REFRESH_MIN_SIZE - BGP_HEADER_SIZE)
1617 < 5) {
1618 zlog_info("%s ORF route refresh length error",
1619 peer->host);
1620 bgp_notify_send(peer, BGP_NOTIFY_CEASE, 0);
1621 return;
1622 }
1623
1624 when_to_refresh = stream_getc(s);
1625 end = stream_pnt(s) + (size - 5);
1626
1627 while ((stream_pnt(s) + 2) < end) {
1628 orf_type = stream_getc(s);
1629 orf_len = stream_getw(s);
1630
1631 /* orf_len in bounds? */
1632 if ((stream_pnt(s) + orf_len) > end)
1633 break; /* XXX: Notify instead?? */
1634 if (orf_type == ORF_TYPE_PREFIX
1635 || orf_type == ORF_TYPE_PREFIX_OLD) {
1636 uint8_t *p_pnt = stream_pnt(s);
1637 uint8_t *p_end = stream_pnt(s) + orf_len;
1638 struct orf_prefix orfp;
1639 u_char common = 0;
1640 u_int32_t seq;
1641 int psize;
1642 char name[BUFSIZ];
1643 int ret = CMD_SUCCESS;
1644
1645 if (bgp_debug_neighbor_events(peer)) {
1646 zlog_debug(
1647 "%s rcvd Prefixlist ORF(%d) length %d",
1648 peer->host, orf_type, orf_len);
1649 }
1650
1651 /* we're going to read at least 1 byte of common
1652 * ORF header,
1653 * and 7 bytes of ORF Address-filter entry from
1654 * the stream
1655 */
1656 if (orf_len < 7)
1657 break;
1658
1659 /* ORF prefix-list name */
1660 sprintf(name, "%s.%d.%d", peer->host, afi,
1661 safi);
1662
1663 while (p_pnt < p_end) {
1664 /* If the ORF entry is malformed, want
1665 * to read as much of it
1666 * as possible without going beyond the
1667 * bounds of the entry,
1668 * to maximise debug information.
1669 */
1670 int ok;
1671 memset(&orfp, 0,
1672 sizeof(struct orf_prefix));
1673 common = *p_pnt++;
1674 /* after ++: p_pnt <= p_end */
1675 if (common
1676 & ORF_COMMON_PART_REMOVE_ALL) {
1677 if (bgp_debug_neighbor_events(
1678 peer))
1679 zlog_debug(
1680 "%s rcvd Remove-All pfxlist ORF request",
1681 peer->host);
1682 prefix_bgp_orf_remove_all(afi,
1683 name);
1684 break;
1685 }
1686 ok = ((u_int32_t)(p_end - p_pnt)
1687 >= sizeof(u_int32_t));
1688 if (ok) {
1689 memcpy(&seq, p_pnt,
1690 sizeof(u_int32_t));
1691 p_pnt += sizeof(u_int32_t);
1692 orfp.seq = ntohl(seq);
1693 } else
1694 p_pnt = p_end;
1695
1696 if ((ok = (p_pnt < p_end)))
1697 orfp.ge =
1698 *p_pnt++; /* value
1699 checked in
1700 prefix_bgp_orf_set()
1701 */
1702 if ((ok = (p_pnt < p_end)))
1703 orfp.le =
1704 *p_pnt++; /* value
1705 checked in
1706 prefix_bgp_orf_set()
1707 */
1708 if ((ok = (p_pnt < p_end)))
1709 orfp.p.prefixlen = *p_pnt++;
1710 orfp.p.family = afi2family(
1711 afi); /* afi checked already */
1712
1713 psize = PSIZE(
1714 orfp.p.prefixlen); /* 0 if not
1715 ok */
1716 if (psize
1717 > prefix_blen(
1718 &orfp.p)) /* valid for
1719 family ? */
1720 {
1721 ok = 0;
1722 psize = prefix_blen(&orfp.p);
1723 }
1724 if (psize
1725 > (p_end - p_pnt)) /* valid for
1726 packet ? */
1727 {
1728 ok = 0;
1729 psize = p_end - p_pnt;
1730 }
1731
1732 if (psize > 0)
1733 memcpy(&orfp.p.u.prefix, p_pnt,
1734 psize);
1735 p_pnt += psize;
1736
1737 if (bgp_debug_neighbor_events(peer)) {
1738 char buf[INET6_BUFSIZ];
1739
1740 zlog_debug(
1741 "%s rcvd %s %s seq %u %s/%d ge %d le %d%s",
1742 peer->host,
1743 (common & ORF_COMMON_PART_REMOVE
1744 ? "Remove"
1745 : "Add"),
1746 (common & ORF_COMMON_PART_DENY
1747 ? "deny"
1748 : "permit"),
1749 orfp.seq,
1750 inet_ntop(
1751 orfp.p.family,
1752 &orfp.p.u.prefix,
1753 buf,
1754 INET6_BUFSIZ),
1755 orfp.p.prefixlen,
1756 orfp.ge, orfp.le,
1757 ok ? "" : " MALFORMED");
1758 }
1759
1760 if (ok)
1761 ret = prefix_bgp_orf_set(
1762 name, afi, &orfp,
1763 (common & ORF_COMMON_PART_DENY
1764 ? 0
1765 : 1),
1766 (common & ORF_COMMON_PART_REMOVE
1767 ? 0
1768 : 1));
1769
1770 if (!ok || (ok && ret != CMD_SUCCESS)) {
1771 zlog_info(
1772 "%s Received misformatted prefixlist ORF."
1773 " Remove All pfxlist",
1774 peer->host);
1775 prefix_bgp_orf_remove_all(afi,
1776 name);
1777 break;
1778 }
1779 }
1780
1781 peer->orf_plist[afi][safi] =
1782 prefix_bgp_orf_lookup(afi, name);
1783 }
1784 stream_forward_getp(s, orf_len);
1785 }
1786 if (bgp_debug_neighbor_events(peer))
1787 zlog_debug("%s rcvd Refresh %s ORF request", peer->host,
1788 when_to_refresh == REFRESH_DEFER
1789 ? "Defer"
1790 : "Immediate");
1791 if (when_to_refresh == REFRESH_DEFER)
1792 return;
1793 }
1794
1795 /* First update is deferred until ORF or ROUTE-REFRESH is received */
1796 if (CHECK_FLAG(peer->af_sflags[afi][safi],
1797 PEER_STATUS_ORF_WAIT_REFRESH))
1798 UNSET_FLAG(peer->af_sflags[afi][safi],
1799 PEER_STATUS_ORF_WAIT_REFRESH);
1800
1801 paf = peer_af_find(peer, afi, safi);
1802 if (paf && paf->subgroup) {
1803 if (peer->orf_plist[afi][safi]) {
1804 updgrp = PAF_UPDGRP(paf);
1805 updgrp_peer = UPDGRP_PEER(updgrp);
1806 updgrp_peer->orf_plist[afi][safi] =
1807 peer->orf_plist[afi][safi];
1808 }
1809
1810 /* If the peer is configured for default-originate clear the
1811 * SUBGRP_STATUS_DEFAULT_ORIGINATE flag so that we will
1812 * re-advertise the
1813 * default
1814 */
1815 if (CHECK_FLAG(paf->subgroup->sflags,
1816 SUBGRP_STATUS_DEFAULT_ORIGINATE))
1817 UNSET_FLAG(paf->subgroup->sflags,
1818 SUBGRP_STATUS_DEFAULT_ORIGINATE);
1819 }
1820
1821 /* Perform route refreshment to the peer */
1822 bgp_announce_route(peer, afi, safi);
1823 }
1824
1825 static int bgp_capability_msg_parse(struct peer *peer, u_char *pnt,
1826 bgp_size_t length)
1827 {
1828 u_char *end;
1829 struct capability_mp_data mpc;
1830 struct capability_header *hdr;
1831 u_char action;
1832 iana_afi_t pkt_afi;
1833 afi_t afi;
1834 iana_safi_t pkt_safi;
1835 safi_t safi;
1836
1837 end = pnt + length;
1838
1839 while (pnt < end) {
1840 /* We need at least action, capability code and capability
1841 * length. */
1842 if (pnt + 3 > end) {
1843 zlog_info("%s Capability length error", peer->host);
1844 bgp_notify_send(peer, BGP_NOTIFY_CEASE, 0);
1845 return -1;
1846 }
1847 action = *pnt;
1848 hdr = (struct capability_header *)(pnt + 1);
1849
1850 /* Action value check. */
1851 if (action != CAPABILITY_ACTION_SET
1852 && action != CAPABILITY_ACTION_UNSET) {
1853 zlog_info("%s Capability Action Value error %d",
1854 peer->host, action);
1855 bgp_notify_send(peer, BGP_NOTIFY_CEASE, 0);
1856 return -1;
1857 }
1858
1859 if (bgp_debug_neighbor_events(peer))
1860 zlog_debug(
1861 "%s CAPABILITY has action: %d, code: %u, length %u",
1862 peer->host, action, hdr->code, hdr->length);
1863
1864 /* Capability length check. */
1865 if ((pnt + hdr->length + 3) > end) {
1866 zlog_info("%s Capability length error", peer->host);
1867 bgp_notify_send(peer, BGP_NOTIFY_CEASE, 0);
1868 return -1;
1869 }
1870
1871 /* Fetch structure to the byte stream. */
1872 memcpy(&mpc, pnt + 3, sizeof(struct capability_mp_data));
1873 pnt += hdr->length + 3;
1874
1875 /* We know MP Capability Code. */
1876 if (hdr->code == CAPABILITY_CODE_MP) {
1877 pkt_afi = ntohs(mpc.afi);
1878 pkt_safi = mpc.safi;
1879
1880 /* Ignore capability when override-capability is set. */
1881 if (CHECK_FLAG(peer->flags,
1882 PEER_FLAG_OVERRIDE_CAPABILITY))
1883 continue;
1884
1885 /* Convert AFI, SAFI to internal values. */
1886 if (bgp_map_afi_safi_iana2int(pkt_afi, pkt_safi, &afi,
1887 &safi)) {
1888 if (bgp_debug_neighbor_events(peer))
1889 zlog_debug(
1890 "%s Dynamic Capability MP_EXT afi/safi invalid "
1891 "(%u/%u)",
1892 peer->host, pkt_afi, pkt_safi);
1893 continue;
1894 }
1895
1896 /* Address family check. */
1897 if (bgp_debug_neighbor_events(peer))
1898 zlog_debug(
1899 "%s CAPABILITY has %s MP_EXT CAP for afi/safi: %u/%u",
1900 peer->host,
1901 action == CAPABILITY_ACTION_SET
1902 ? "Advertising"
1903 : "Removing",
1904 pkt_afi, pkt_safi);
1905
1906 if (action == CAPABILITY_ACTION_SET) {
1907 peer->afc_recv[afi][safi] = 1;
1908 if (peer->afc[afi][safi]) {
1909 peer->afc_nego[afi][safi] = 1;
1910 bgp_announce_route(peer, afi, safi);
1911 }
1912 } else {
1913 peer->afc_recv[afi][safi] = 0;
1914 peer->afc_nego[afi][safi] = 0;
1915
1916 if (peer_active_nego(peer))
1917 bgp_clear_route(peer, afi, safi);
1918 else
1919 BGP_EVENT_ADD(peer, BGP_Stop);
1920 }
1921 } else {
1922 zlog_warn(
1923 "%s unrecognized capability code: %d - ignored",
1924 peer->host, hdr->code);
1925 }
1926 }
1927 return 0;
1928 }
1929
1930 /* Dynamic Capability is received.
1931 *
1932 * This is exported for unit-test purposes
1933 */
1934 int bgp_capability_receive(struct peer *peer, bgp_size_t size)
1935 {
1936 u_char *pnt;
1937
1938 /* Fetch pointer. */
1939 pnt = stream_pnt(peer->ibuf);
1940
1941 if (bgp_debug_neighbor_events(peer))
1942 zlog_debug("%s rcv CAPABILITY", peer->host);
1943
1944 /* If peer does not have the capability, send notification. */
1945 if (!CHECK_FLAG(peer->cap, PEER_CAP_DYNAMIC_ADV)) {
1946 zlog_err("%s [Error] BGP dynamic capability is not enabled",
1947 peer->host);
1948 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
1949 BGP_NOTIFY_HEADER_BAD_MESTYPE);
1950 return -1;
1951 }
1952
1953 /* Status must be Established. */
1954 if (peer->status != Established) {
1955 zlog_err(
1956 "%s [Error] Dynamic capability packet received under status %s",
1957 peer->host,
1958 lookup_msg(bgp_status_msg, peer->status, NULL));
1959 bgp_notify_send(peer, BGP_NOTIFY_FSM_ERR, 0);
1960 return -1;
1961 }
1962
1963 /* Parse packet. */
1964 return bgp_capability_msg_parse(peer, pnt, size);
1965 }
1966
1967 /* BGP read utility function. */
1968 static int bgp_read_packet(struct peer *peer)
1969 {
1970 int nbytes;
1971 int readsize;
1972
1973 readsize = peer->packet_size - stream_get_endp(peer->ibuf);
1974
1975 /* If size is zero then return. */
1976 if (!readsize)
1977 return 0;
1978
1979 /* Read packet from fd. */
1980 nbytes = stream_read_try(peer->ibuf, peer->fd, readsize);
1981
1982 /* If read byte is smaller than zero then error occured. */
1983 if (nbytes < 0) {
1984 /* Transient error should retry */
1985 if (nbytes == -2)
1986 return -1;
1987
1988 zlog_err("%s [Error] bgp_read_packet error: %s", peer->host,
1989 safe_strerror(errno));
1990
1991 if (peer->status == Established) {
1992 if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
1993 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
1994 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
1995 } else
1996 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
1997 }
1998
1999 BGP_EVENT_ADD(peer, TCP_fatal_error);
2000 return -1;
2001 }
2002
2003 /* When read byte is zero : clear bgp peer and return */
2004 if (nbytes == 0) {
2005 if (bgp_debug_neighbor_events(peer))
2006 zlog_debug("%s [Event] BGP connection closed fd %d",
2007 peer->host, peer->fd);
2008
2009 if (peer->status == Established) {
2010 if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_MODE)) {
2011 peer->last_reset = PEER_DOWN_NSF_CLOSE_SESSION;
2012 SET_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT);
2013 } else
2014 peer->last_reset = PEER_DOWN_CLOSE_SESSION;
2015 }
2016
2017 BGP_EVENT_ADD(peer, TCP_connection_closed);
2018 return -1;
2019 }
2020
2021 /* We read partial packet. */
2022 if (stream_get_endp(peer->ibuf) != peer->packet_size)
2023 return -1;
2024
2025 return 0;
2026 }
2027
2028 /* Marker check. */
2029 static int bgp_marker_all_one(struct stream *s, int length)
2030 {
2031 int i;
2032
2033 for (i = 0; i < length; i++)
2034 if (s->data[i] != 0xff)
2035 return 0;
2036
2037 return 1;
2038 }
2039
2040 /* Starting point of packet process function. */
2041 int bgp_read(struct thread *thread)
2042 {
2043 int ret;
2044 u_char type = 0;
2045 struct peer *peer;
2046 bgp_size_t size;
2047 char notify_data_length[2];
2048 u_int32_t notify_out;
2049
2050 /* Yes first of all get peer pointer. */
2051 peer = THREAD_ARG(thread);
2052 peer->t_read = NULL;
2053
2054 /* Note notify_out so we can check later to see if we sent another one
2055 */
2056 notify_out = peer->notify_out;
2057
2058 /* For non-blocking IO check. */
2059 if (peer->status == Connect) {
2060 bgp_connect_check(peer, 1);
2061 goto done;
2062 } else {
2063 if (peer->fd < 0) {
2064 zlog_err("bgp_read peer's fd is negative value %d",
2065 peer->fd);
2066 return -1;
2067 }
2068 BGP_READ_ON(peer->t_read, bgp_read, peer->fd);
2069 }
2070
2071 /* Read packet header to determine type of the packet */
2072 if (peer->packet_size == 0)
2073 peer->packet_size = BGP_HEADER_SIZE;
2074
2075 if (stream_get_endp(peer->ibuf) < BGP_HEADER_SIZE) {
2076 ret = bgp_read_packet(peer);
2077
2078 /* Header read error or partial read packet. */
2079 if (ret < 0)
2080 goto done;
2081
2082 /* Get size and type. */
2083 stream_forward_getp(peer->ibuf, BGP_MARKER_SIZE);
2084 memcpy(notify_data_length, stream_pnt(peer->ibuf), 2);
2085 size = stream_getw(peer->ibuf);
2086 type = stream_getc(peer->ibuf);
2087
2088 /* Marker check */
2089 if (((type == BGP_MSG_OPEN) || (type == BGP_MSG_KEEPALIVE))
2090 && !bgp_marker_all_one(peer->ibuf, BGP_MARKER_SIZE)) {
2091 bgp_notify_send(peer, BGP_NOTIFY_HEADER_ERR,
2092 BGP_NOTIFY_HEADER_NOT_SYNC);
2093 goto done;
2094 }
2095
2096 /* BGP type check. */
2097 if (type != BGP_MSG_OPEN && type != BGP_MSG_UPDATE
2098 && type != BGP_MSG_NOTIFY && type != BGP_MSG_KEEPALIVE
2099 && type != BGP_MSG_ROUTE_REFRESH_NEW
2100 && type != BGP_MSG_ROUTE_REFRESH_OLD
2101 && type != BGP_MSG_CAPABILITY) {
2102 if (bgp_debug_neighbor_events(peer))
2103 zlog_debug("%s unknown message type 0x%02x",
2104 peer->host, type);
2105 bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
2106 BGP_NOTIFY_HEADER_BAD_MESTYPE,
2107 &type, 1);
2108 goto done;
2109 }
2110 /* Mimimum packet length check. */
2111 if ((size < BGP_HEADER_SIZE) || (size > BGP_MAX_PACKET_SIZE)
2112 || (type == BGP_MSG_OPEN && size < BGP_MSG_OPEN_MIN_SIZE)
2113 || (type == BGP_MSG_UPDATE
2114 && size < BGP_MSG_UPDATE_MIN_SIZE)
2115 || (type == BGP_MSG_NOTIFY
2116 && size < BGP_MSG_NOTIFY_MIN_SIZE)
2117 || (type == BGP_MSG_KEEPALIVE
2118 && size != BGP_MSG_KEEPALIVE_MIN_SIZE)
2119 || (type == BGP_MSG_ROUTE_REFRESH_NEW
2120 && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE)
2121 || (type == BGP_MSG_ROUTE_REFRESH_OLD
2122 && size < BGP_MSG_ROUTE_REFRESH_MIN_SIZE)
2123 || (type == BGP_MSG_CAPABILITY
2124 && size < BGP_MSG_CAPABILITY_MIN_SIZE)) {
2125 if (bgp_debug_neighbor_events(peer))
2126 zlog_debug("%s bad message length - %d for %s",
2127 peer->host, size,
2128 type == 128
2129 ? "ROUTE-REFRESH"
2130 : bgp_type_str[(int)type]);
2131 bgp_notify_send_with_data(peer, BGP_NOTIFY_HEADER_ERR,
2132 BGP_NOTIFY_HEADER_BAD_MESLEN,
2133 (u_char *)notify_data_length,
2134 2);
2135 goto done;
2136 }
2137
2138 /* Adjust size to message length. */
2139 peer->packet_size = size;
2140 }
2141
2142 ret = bgp_read_packet(peer);
2143 if (ret < 0)
2144 goto done;
2145
2146 /* Get size and type again. */
2147 (void)stream_getw_from(peer->ibuf, BGP_MARKER_SIZE);
2148 type = stream_getc_from(peer->ibuf, BGP_MARKER_SIZE + 2);
2149
2150 /* BGP packet dump function. */
2151 bgp_dump_packet(peer, type, peer->ibuf);
2152
2153 size = (peer->packet_size - BGP_HEADER_SIZE);
2154
2155 /* Read rest of the packet and call each sort of packet routine */
2156 switch (type) {
2157 case BGP_MSG_OPEN:
2158 peer->open_in++;
2159 bgp_open_receive(peer, size); /* XXX return value ignored! */
2160 break;
2161 case BGP_MSG_UPDATE:
2162 peer->readtime = monotime(NULL);
2163 bgp_update_receive(peer, size);
2164 break;
2165 case BGP_MSG_NOTIFY:
2166 bgp_notify_receive(peer, size);
2167 break;
2168 case BGP_MSG_KEEPALIVE:
2169 peer->readtime = monotime(NULL);
2170 bgp_keepalive_receive(peer, size);
2171 break;
2172 case BGP_MSG_ROUTE_REFRESH_NEW:
2173 case BGP_MSG_ROUTE_REFRESH_OLD:
2174 peer->refresh_in++;
2175 bgp_route_refresh_receive(peer, size);
2176 break;
2177 case BGP_MSG_CAPABILITY:
2178 peer->dynamic_cap_in++;
2179 bgp_capability_receive(peer, size);
2180 break;
2181 }
2182
2183 /* If reading this packet caused us to send a NOTIFICATION then store a
2184 * copy
2185 * of the packet for troubleshooting purposes
2186 */
2187 if (notify_out < peer->notify_out) {
2188 memcpy(peer->last_reset_cause, peer->ibuf->data,
2189 peer->packet_size);
2190 peer->last_reset_cause_size = peer->packet_size;
2191 notify_out = peer->notify_out;
2192 }
2193
2194 /* Clear input buffer. */
2195 peer->packet_size = 0;
2196 if (peer->ibuf)
2197 stream_reset(peer->ibuf);
2198
2199 done:
2200 /* If reading this packet caused us to send a NOTIFICATION then store a
2201 * copy
2202 * of the packet for troubleshooting purposes
2203 */
2204 if (notify_out < peer->notify_out) {
2205 memcpy(peer->last_reset_cause, peer->ibuf->data,
2206 peer->packet_size);
2207 peer->last_reset_cause_size = peer->packet_size;
2208 }
2209
2210 return 0;
2211 }
2212
2213 /* ------------- write thread ------------------ */
2214
2215 /**
2216 * Flush peer output buffer.
2217 *
2218 * This function pops packets off of peer->obuf and writes them to peer->fd.
2219 * The amount of packets written is equal to the minimum of peer->wpkt_quanta
2220 * and the number of packets on the output buffer.
2221 *
2222 * If write() returns an error, the appropriate FSM event is generated.
2223 *
2224 * The return value is equal to the number of packets written
2225 * (which may be zero).
2226 */
2227 static int bgp_write(struct peer *peer)
2228 {
2229 u_char type;
2230 struct stream *s;
2231 int num;
2232 int update_last_write = 0;
2233 unsigned int count = 0;
2234 unsigned int oc = 0;
2235
2236 /* For non-blocking IO check. */
2237 if (peer->status == Connect) {
2238 bgp_connect_check(peer, 1);
2239 return 0;
2240 }
2241
2242 /* Write packets. The number of packets written is the value of
2243 * bgp->wpkt_quanta or the size of the output buffer, whichever is
2244 * smaller.*/
2245 while (count < peer->bgp->wpkt_quanta
2246 && (s = stream_fifo_head(peer->obuf))) {
2247 int writenum;
2248 do { // write a full packet, or return on error
2249 writenum = stream_get_endp(s) - stream_get_getp(s);
2250 num = write(peer->fd, STREAM_PNT(s), writenum);
2251
2252 if (num < 0) {
2253 if (ERRNO_IO_RETRY(errno))
2254 continue;
2255
2256 BGP_EVENT_ADD(peer, TCP_fatal_error);
2257 goto done;
2258 } else if (num != writenum) // incomplete write
2259 stream_forward_getp(s, num);
2260
2261 } while (num != writenum);
2262
2263 /* Retrieve BGP packet type. */
2264 stream_set_getp(s, BGP_MARKER_SIZE + 2);
2265 type = stream_getc(s);
2266
2267 switch (type) {
2268 case BGP_MSG_OPEN:
2269 peer->open_out++;
2270 break;
2271 case BGP_MSG_UPDATE:
2272 peer->update_out++;
2273 break;
2274 case BGP_MSG_NOTIFY:
2275 peer->notify_out++;
2276 /* Double start timer. */
2277 peer->v_start *= 2;
2278
2279 /* Overflow check. */
2280 if (peer->v_start >= (60 * 2))
2281 peer->v_start = (60 * 2);
2282
2283 /* Handle Graceful Restart case where the state changes
2284 to
2285 Connect instead of Idle */
2286 /* Flush any existing events */
2287 BGP_EVENT_ADD(peer, BGP_Stop);
2288 goto done;
2289
2290 case BGP_MSG_KEEPALIVE:
2291 peer->keepalive_out++;
2292 break;
2293 case BGP_MSG_ROUTE_REFRESH_NEW:
2294 case BGP_MSG_ROUTE_REFRESH_OLD:
2295 peer->refresh_out++;
2296 break;
2297 case BGP_MSG_CAPABILITY:
2298 peer->dynamic_cap_out++;
2299 break;
2300 }
2301
2302 count++;
2303 /* OK we send packet so delete it. */
2304 bgp_packet_delete_unsafe(peer);
2305 update_last_write = 1;
2306 }
2307
2308 done : {
2309 /* Update last_update if UPDATEs were written. */
2310 if (peer->update_out > oc)
2311 peer->last_update = bgp_clock();
2312
2313 /* If we TXed any flavor of packet update last_write */
2314 if (update_last_write)
2315 peer->last_write = bgp_clock();
2316 }
2317
2318 return count;
2319 }
2320
2321 static void cleanup_handler(void *arg)
2322 {
2323 if (plist)
2324 list_delete(plist);
2325
2326 plist = NULL;
2327
2328 pthread_mutex_unlock(&plist_mtx);
2329 }
2330
2331 /**
2332 * Entry function for peer packet flushing pthread.
2333 *
2334 * The plist must be initialized before calling this.
2335 */
2336 void *peer_writes_start(void *arg)
2337 {
2338 struct timeval currtime = {0, 0};
2339 struct timeval sleeptime = {0, 500};
2340 struct timespec next_update = {0, 0};
2341
2342 // initialize
2343 pthread_mutex_lock(&plist_mtx);
2344 plist = list_new();
2345
2346 struct listnode *ln;
2347 struct peer *peer;
2348
2349 pthread_cleanup_push(&cleanup_handler, NULL);
2350
2351 bgp_packet_writes_thread_run = true;
2352
2353 while (bgp_packet_writes_thread_run) { // wait around until next update
2354 // time
2355 if (plist->count > 0)
2356 pthread_cond_timedwait(&write_cond, &plist_mtx,
2357 &next_update);
2358 else // wait around until we have some peers
2359 while (plist->count == 0
2360 && bgp_packet_writes_thread_run)
2361 pthread_cond_wait(&write_cond, &plist_mtx);
2362
2363 for (ALL_LIST_ELEMENTS_RO(plist, ln, peer)) {
2364 pthread_mutex_lock(&peer->obuf_mtx);
2365 {
2366 bgp_write(peer);
2367 }
2368 pthread_mutex_unlock(&peer->obuf_mtx);
2369 }
2370
2371 // schedule update packet generation on main thread
2372 if (!t_generate_updgrp_packets)
2373 t_generate_updgrp_packets = thread_add_event(
2374 bm->master, bgp_generate_updgrp_packets, NULL,
2375 0);
2376
2377 gettimeofday(&currtime, NULL);
2378 timeradd(&currtime, &sleeptime, &currtime);
2379 TIMEVAL_TO_TIMESPEC(&currtime, &next_update);
2380 }
2381
2382 // clean up
2383 pthread_cleanup_pop(1);
2384
2385 return NULL;
2386 }
2387
2388 /**
2389 * Turns on packet writing for a peer.
2390 */
2391 void peer_writes_on(struct peer *peer)
2392 {
2393 if (peer->status == Deleted)
2394 return;
2395
2396 pthread_mutex_lock(&plist_mtx);
2397 {
2398 struct listnode *ln, *nn;
2399 struct peer *p;
2400
2401 // make sure this peer isn't already in the list
2402 for (ALL_LIST_ELEMENTS(plist, ln, nn, p))
2403 if (p == peer) {
2404 pthread_mutex_unlock(&plist_mtx);
2405 return;
2406 }
2407
2408 peer_lock(peer);
2409 listnode_add(plist, peer);
2410 }
2411 pthread_mutex_unlock(&plist_mtx);
2412 peer_writes_wake();
2413 }
2414
2415 /**
2416 * Turns off packet writing for a peer.
2417 */
2418 void peer_writes_off(struct peer *peer)
2419 {
2420 struct listnode *ln, *nn;
2421 struct peer *p;
2422 pthread_mutex_lock(&plist_mtx);
2423 {
2424 for (ALL_LIST_ELEMENTS(plist, ln, nn, p))
2425 if (p == peer) {
2426 list_delete_node(plist, ln);
2427 peer_unlock(peer);
2428 break;
2429 }
2430 }
2431 pthread_mutex_unlock(&plist_mtx);
2432 }
2433
2434 /**
2435 * Wakes up the write thread to do work.
2436 */
2437 void peer_writes_wake()
2438 {
2439 pthread_cond_signal(&write_cond);
2440 }