]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_evpn.c
bgpd: l3vni/rmac association with bgp vrf
[mirror_frr.git] / bgpd / bgp_evpn.c
1 /* Ethernet-VPN Packet and vty Processing File
2 * Copyright (C) 2016 6WIND
3 * Copyright (C) 2017 Cumulus Networks, Inc.
4 *
5 * This file is part of FRR.
6 *
7 * FRRouting is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2, or (at your option) any
10 * later version.
11 *
12 * FRRouting is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; see the file COPYING; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #include <zebra.h>
23
24 #include "command.h"
25 #include "filter.h"
26 #include "prefix.h"
27 #include "log.h"
28 #include "memory.h"
29 #include "stream.h"
30 #include "hash.h"
31 #include "jhash.h"
32 #include "bitfield.h"
33 #include "zclient.h"
34
35 #include "bgpd/bgp_attr_evpn.h"
36 #include "bgpd/bgpd.h"
37 #include "bgpd/bgp_table.h"
38 #include "bgpd/bgp_route.h"
39 #include "bgpd/bgp_attr.h"
40 #include "bgpd/bgp_mplsvpn.h"
41 #include "bgpd/bgp_label.h"
42 #include "bgpd/bgp_evpn.h"
43 #include "bgpd/bgp_evpn_private.h"
44 #include "bgpd/bgp_ecommunity.h"
45 #include "bgpd/bgp_encap_types.h"
46 #include "bgpd/bgp_debug.h"
47 #include "bgpd/bgp_aspath.h"
48 #include "bgpd/bgp_zebra.h"
49 #include "bgpd/bgp_nexthop.h"
50
51 /*
52 * Definitions and external declarations.
53 */
54 extern struct zclient *zclient;
55
56 DEFINE_QOBJ_TYPE(bgpevpn)
57
58
59 /*
60 * Static function declarations
61 */
62 static void delete_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn,
63 afi_t afi, safi_t safi, struct bgp_node *rn,
64 struct bgp_info **ri);
65 static int delete_all_vni_routes(struct bgp *bgp, struct bgpevpn *vpn);
66
67 /*
68 * Private functions.
69 */
70
71 /*
72 * Make vni hash key.
73 */
74 static unsigned int vni_hash_key_make(void *p)
75 {
76 struct bgpevpn *vpn = p;
77 return (jhash_1word(vpn->vni, 0));
78 }
79
80 /*
81 * Comparison function for vni hash
82 */
83 static int vni_hash_cmp(const void *p1, const void *p2)
84 {
85 const struct bgpevpn *vpn1 = p1;
86 const struct bgpevpn *vpn2 = p2;
87
88 if (!vpn1 && !vpn2)
89 return 1;
90 if (!vpn1 || !vpn2)
91 return 0;
92 return (vpn1->vni == vpn2->vni);
93 }
94
95 /*
96 * Make import route target hash key.
97 */
98 static unsigned int import_rt_hash_key_make(void *p)
99 {
100 struct irt_node *irt = p;
101 char *pnt = irt->rt.val;
102 unsigned int key = 0;
103 int c = 0;
104
105 key += pnt[c];
106 key += pnt[c + 1];
107 key += pnt[c + 2];
108 key += pnt[c + 3];
109 key += pnt[c + 4];
110 key += pnt[c + 5];
111 key += pnt[c + 6];
112 key += pnt[c + 7];
113
114 return (key);
115 }
116
117 /*
118 * Comparison function for import rt hash
119 */
120 static int import_rt_hash_cmp(const void *p1, const void *p2)
121 {
122 const struct irt_node *irt1 = p1;
123 const struct irt_node *irt2 = p2;
124
125 if (irt1 == NULL && irt2 == NULL)
126 return 1;
127
128 if (irt1 == NULL || irt2 == NULL)
129 return 0;
130
131 return (memcmp(irt1->rt.val, irt2->rt.val, ECOMMUNITY_SIZE) == 0);
132 }
133
134 /*
135 * Create a new import_rt
136 */
137 static struct irt_node *import_rt_new(struct bgp *bgp,
138 struct ecommunity_val *rt)
139 {
140 struct irt_node *irt;
141
142 if (!bgp)
143 return NULL;
144
145 irt = XCALLOC(MTYPE_BGP_EVPN_IMPORT_RT, sizeof(struct irt_node));
146 if (!irt)
147 return NULL;
148
149 irt->rt = *rt;
150 irt->vnis = list_new();
151
152 /* Add to hash */
153 if (!hash_get(bgp->import_rt_hash, irt, hash_alloc_intern)) {
154 XFREE(MTYPE_BGP_EVPN_IMPORT_RT, irt);
155 return NULL;
156 }
157
158 return irt;
159 }
160
161 /*
162 * Free the import rt node
163 */
164 static void import_rt_free(struct bgp *bgp, struct irt_node *irt)
165 {
166 hash_release(bgp->import_rt_hash, irt);
167 XFREE(MTYPE_BGP_EVPN_IMPORT_RT, irt);
168 }
169
170 /*
171 * Function to lookup Import RT node - used to map a RT to set of
172 * VNIs importing routes with that RT.
173 */
174 static struct irt_node *lookup_import_rt(struct bgp *bgp,
175 struct ecommunity_val *rt)
176 {
177 struct irt_node *irt;
178 struct irt_node tmp;
179
180 memset(&tmp, 0, sizeof(struct irt_node));
181 memcpy(&tmp.rt, rt, ECOMMUNITY_SIZE);
182 irt = hash_lookup(bgp->import_rt_hash, &tmp);
183 return irt;
184 }
185
186 /*
187 * Is specified VNI present on the RT's list of "importing" VNIs?
188 */
189 static int is_vni_present_in_irt_vnis(struct list *vnis, struct bgpevpn *vpn)
190 {
191 struct listnode *node, *nnode;
192 struct bgpevpn *tmp_vpn;
193
194 for (ALL_LIST_ELEMENTS(vnis, node, nnode, tmp_vpn)) {
195 if (tmp_vpn == vpn)
196 return 1;
197 }
198
199 return 0;
200 }
201
202 /*
203 * Compare Route Targets.
204 */
205 static int evpn_route_target_cmp(struct ecommunity *ecom1,
206 struct ecommunity *ecom2)
207 {
208 if (ecom1 && !ecom2)
209 return -1;
210
211 if (!ecom1 && ecom2)
212 return 1;
213
214 if (!ecom1 && !ecom2)
215 return 0;
216
217 if (ecom1->str && !ecom2->str)
218 return -1;
219
220 if (!ecom1->str && ecom2->str)
221 return 1;
222
223 if (!ecom1->str && !ecom2->str)
224 return 0;
225
226 return strcmp(ecom1->str, ecom2->str);
227 }
228
229 /*
230 * Mask off global-admin field of specified extended community (RT),
231 * just retain the local-admin field.
232 */
233 static inline void mask_ecom_global_admin(struct ecommunity_val *dst,
234 struct ecommunity_val *src)
235 {
236 u_char type;
237
238 type = src->val[0];
239 dst->val[0] = 0;
240 if (type == ECOMMUNITY_ENCODE_AS) {
241 dst->val[2] = dst->val[3] = 0;
242 } else if (type == ECOMMUNITY_ENCODE_AS4
243 || type == ECOMMUNITY_ENCODE_IP) {
244 dst->val[2] = dst->val[3] = 0;
245 dst->val[4] = dst->val[5] = 0;
246 }
247 }
248
249 /*
250 * Map one RT to specified VNI.
251 */
252 static void map_vni_to_rt(struct bgp *bgp, struct bgpevpn *vpn,
253 struct ecommunity_val *eval)
254 {
255 struct irt_node *irt;
256 struct ecommunity_val eval_tmp;
257
258 /* If using "automatic" RT, we only care about the local-admin
259 * sub-field.
260 * This is to facilitate using VNI as the RT for EBGP peering too.
261 */
262 memcpy(&eval_tmp, eval, ECOMMUNITY_SIZE);
263 if (!is_import_rt_configured(vpn))
264 mask_ecom_global_admin(&eval_tmp, eval);
265
266 irt = lookup_import_rt(bgp, &eval_tmp);
267 if (irt && irt->vnis)
268 if (is_vni_present_in_irt_vnis(irt->vnis, vpn))
269 /* Already mapped. */
270 return;
271
272 if (!irt) {
273 irt = import_rt_new(bgp, &eval_tmp);
274 assert(irt);
275 }
276
277 /* Add VNI to the hash list for this RT. */
278 listnode_add(irt->vnis, vpn);
279 }
280
281 /*
282 * Unmap specified VNI from specified RT. If there are no other
283 * VNIs for this RT, then the RT hash is deleted.
284 */
285 static void unmap_vni_from_rt(struct bgp *bgp, struct bgpevpn *vpn,
286 struct irt_node *irt)
287 {
288 /* Delete VNI from hash list for this RT. */
289 listnode_delete(irt->vnis, vpn);
290 if (!listnode_head(irt->vnis)) {
291 list_delete_and_null(&irt->vnis);
292 import_rt_free(bgp, irt);
293 }
294 }
295
296 /*
297 * Create RT extended community automatically from passed information:
298 * of the form AS:VNI.
299 * NOTE: We use only the lower 16 bits of the AS. This is sufficient as
300 * the need is to get a RT value that will be unique across different
301 * VNIs but the same across routers (in the same AS) for a particular
302 * VNI.
303 */
304 static void form_auto_rt(struct bgp *bgp, struct bgpevpn *vpn, struct list *rtl)
305 {
306 struct ecommunity_val eval;
307 struct ecommunity *ecomadd;
308
309 encode_route_target_as((bgp->as & 0xFFFF), vpn->vni, &eval);
310
311 ecomadd = ecommunity_new();
312 ecommunity_add_val(ecomadd, &eval);
313 listnode_add_sort(rtl, ecomadd);
314 }
315
316 /*
317 * Derive RD and RT for a VNI automatically. Invoked at the time of
318 * creation of a VNI.
319 */
320 static void derive_rd_rt_for_vni(struct bgp *bgp, struct bgpevpn *vpn)
321 {
322 bgp_evpn_derive_auto_rd(bgp, vpn);
323 bgp_evpn_derive_auto_rt_import(bgp, vpn);
324 bgp_evpn_derive_auto_rt_export(bgp, vpn);
325 }
326
327 /*
328 * Add (update) or delete MACIP from zebra.
329 */
330 static int bgp_zebra_send_remote_macip(struct bgp *bgp, struct bgpevpn *vpn,
331 struct prefix_evpn *p,
332 struct in_addr remote_vtep_ip, int add,
333 u_char sticky)
334 {
335 struct stream *s;
336 int ipa_len;
337 char buf1[ETHER_ADDR_STRLEN];
338 char buf2[INET6_ADDRSTRLEN];
339 char buf3[INET6_ADDRSTRLEN];
340
341 /* Check socket. */
342 if (!zclient || zclient->sock < 0)
343 return 0;
344
345 /* Don't try to register if Zebra doesn't know of this instance. */
346 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bgp))
347 return 0;
348
349 s = zclient->obuf;
350 stream_reset(s);
351
352 zclient_create_header(s, add ? ZEBRA_REMOTE_MACIP_ADD
353 : ZEBRA_REMOTE_MACIP_DEL,
354 bgp->vrf_id);
355 stream_putl(s, vpn->vni);
356 stream_put(s, &p->prefix.mac.octet, ETH_ALEN); /* Mac Addr */
357 /* IP address length and IP address, if any. */
358 if (IS_EVPN_PREFIX_IPADDR_NONE(p))
359 stream_putl(s, 0);
360 else {
361 ipa_len = IS_EVPN_PREFIX_IPADDR_V4(p) ? IPV4_MAX_BYTELEN
362 : IPV6_MAX_BYTELEN;
363 stream_putl(s, ipa_len);
364 stream_put(s, &p->prefix.ip.ip.addr, ipa_len);
365 }
366 stream_put_in_addr(s, &remote_vtep_ip);
367
368 /* TX MAC sticky status */
369 if (add)
370 stream_putc(s, sticky);
371
372 stream_putw_at(s, 0, stream_get_endp(s));
373
374 if (bgp_debug_zebra(NULL))
375 zlog_debug("Tx %s MACIP, VNI %u %sMAC %s IP %s remote VTEP %s",
376 add ? "ADD" : "DEL", vpn->vni,
377 sticky ? "sticky " : "",
378 prefix_mac2str(&p->prefix.mac, buf1, sizeof(buf1)),
379 ipaddr2str(&p->prefix.ip, buf3, sizeof(buf3)),
380 inet_ntop(AF_INET, &remote_vtep_ip, buf2,
381 sizeof(buf2)));
382
383 return zclient_send_message(zclient);
384 }
385
386 /*
387 * Add (update) or delete remote VTEP from zebra.
388 */
389 static int bgp_zebra_send_remote_vtep(struct bgp *bgp, struct bgpevpn *vpn,
390 struct prefix_evpn *p, int add)
391 {
392 struct stream *s;
393
394 /* Check socket. */
395 if (!zclient || zclient->sock < 0)
396 return 0;
397
398 /* Don't try to register if Zebra doesn't know of this instance. */
399 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bgp))
400 return 0;
401
402 s = zclient->obuf;
403 stream_reset(s);
404
405 zclient_create_header(s, add ? ZEBRA_REMOTE_VTEP_ADD
406 : ZEBRA_REMOTE_VTEP_DEL,
407 bgp->vrf_id);
408 stream_putl(s, vpn->vni);
409 if (IS_EVPN_PREFIX_IPADDR_V4(p))
410 stream_put_in_addr(s, &p->prefix.ip.ipaddr_v4);
411 else if (IS_EVPN_PREFIX_IPADDR_V6(p)) {
412 zlog_err(
413 "Bad remote IP when trying to %s remote VTEP for VNI %u",
414 add ? "ADD" : "DEL", vpn->vni);
415 return -1;
416 }
417
418 stream_putw_at(s, 0, stream_get_endp(s));
419
420 if (bgp_debug_zebra(NULL))
421 zlog_debug("Tx %s Remote VTEP, VNI %u remote VTEP %s",
422 add ? "ADD" : "DEL", vpn->vni,
423 inet_ntoa(p->prefix.ip.ipaddr_v4));
424
425 return zclient_send_message(zclient);
426 }
427
428 /*
429 * Build extended communities for EVPN route. RT and ENCAP are
430 * applicable to all routes.
431 */
432 static void build_evpn_route_extcomm(struct bgpevpn *vpn, struct attr *attr)
433 {
434 struct ecommunity ecom_encap;
435 struct ecommunity ecom_sticky;
436 struct ecommunity_val eval;
437 struct ecommunity_val eval_sticky;
438 bgp_encap_types tnl_type;
439 struct listnode *node, *nnode;
440 struct ecommunity *ecom;
441 u_int32_t seqnum;
442
443 /* Encap */
444 tnl_type = BGP_ENCAP_TYPE_VXLAN;
445 memset(&ecom_encap, 0, sizeof(ecom_encap));
446 encode_encap_extcomm(tnl_type, &eval);
447 ecom_encap.size = 1;
448 ecom_encap.val = (u_int8_t *)eval.val;
449
450 /* Add Encap */
451 attr->ecommunity = ecommunity_dup(&ecom_encap);
452
453 /* Add the export RTs */
454 for (ALL_LIST_ELEMENTS(vpn->export_rtl, node, nnode, ecom))
455 attr->ecommunity = ecommunity_merge(attr->ecommunity, ecom);
456
457 if (attr->sticky) {
458 seqnum = 0;
459 memset(&ecom_sticky, 0, sizeof(ecom_sticky));
460 encode_mac_mobility_extcomm(1, seqnum, &eval_sticky);
461 ecom_sticky.size = 1;
462 ecom_sticky.val = (u_int8_t *)eval_sticky.val;
463 attr->ecommunity =
464 ecommunity_merge(attr->ecommunity, &ecom_sticky);
465 }
466
467 attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES);
468 }
469
470 /*
471 * Add MAC mobility extended community to attribute.
472 */
473 static void add_mac_mobility_to_attr(u_int32_t seq_num, struct attr *attr)
474 {
475 struct ecommunity ecom_tmp;
476 struct ecommunity_val eval;
477 u_int8_t *ecom_val_ptr;
478 int i;
479 u_int8_t *pnt;
480 int type = 0;
481 int sub_type = 0;
482
483 /* Build MM */
484 encode_mac_mobility_extcomm(0, seq_num, &eval);
485
486 /* Find current MM ecommunity */
487 ecom_val_ptr = NULL;
488
489 if (attr->ecommunity) {
490 for (i = 0; i < attr->ecommunity->size; i++) {
491 pnt = attr->ecommunity->val + (i * 8);
492 type = *pnt++;
493 sub_type = *pnt++;
494
495 if (type == ECOMMUNITY_ENCODE_EVPN
496 && sub_type
497 == ECOMMUNITY_EVPN_SUBTYPE_MACMOBILITY) {
498 ecom_val_ptr =
499 (u_int8_t *)(attr->ecommunity->val
500 + (i * 8));
501 break;
502 }
503 }
504 }
505
506 /* Update the existing MM ecommunity */
507 if (ecom_val_ptr) {
508 memcpy(ecom_val_ptr, eval.val, sizeof(char) * ECOMMUNITY_SIZE);
509 }
510 /* Add MM to existing */
511 else {
512 memset(&ecom_tmp, 0, sizeof(ecom_tmp));
513 ecom_tmp.size = 1;
514 ecom_tmp.val = (u_int8_t *)eval.val;
515
516 attr->ecommunity =
517 ecommunity_merge(attr->ecommunity, &ecom_tmp);
518 }
519 }
520
521 /* Install EVPN route into zebra. */
522 static int evpn_zebra_install(struct bgp *bgp, struct bgpevpn *vpn,
523 struct prefix_evpn *p,
524 struct in_addr remote_vtep_ip, u_char sticky)
525 {
526 int ret;
527
528 if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE)
529 ret = bgp_zebra_send_remote_macip(bgp, vpn, p, remote_vtep_ip,
530 1, sticky);
531 else
532 ret = bgp_zebra_send_remote_vtep(bgp, vpn, p, 1);
533
534 return ret;
535 }
536
537 /* Uninstall EVPN route from zebra. */
538 static int evpn_zebra_uninstall(struct bgp *bgp, struct bgpevpn *vpn,
539 struct prefix_evpn *p,
540 struct in_addr remote_vtep_ip)
541 {
542 int ret;
543
544 if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE)
545 ret = bgp_zebra_send_remote_macip(bgp, vpn, p, remote_vtep_ip,
546 0, 0);
547 else
548 ret = bgp_zebra_send_remote_vtep(bgp, vpn, p, 0);
549
550 return ret;
551 }
552
553 /*
554 * Due to MAC mobility, the prior "local" best route has been supplanted
555 * by a "remote" best route. The prior route has to be deleted and withdrawn
556 * from peers.
557 */
558 static void evpn_delete_old_local_route(struct bgp *bgp, struct bgpevpn *vpn,
559 struct bgp_node *rn,
560 struct bgp_info *old_local)
561 {
562 struct bgp_node *global_rn;
563 struct bgp_info *ri;
564 afi_t afi = AFI_L2VPN;
565 safi_t safi = SAFI_EVPN;
566
567 /* Locate route node in the global EVPN routing table. Note that
568 * this table is a 2-level tree (RD-level + Prefix-level) similar to
569 * L3VPN routes.
570 */
571 global_rn = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi,
572 (struct prefix *)&rn->p, &vpn->prd);
573 if (global_rn) {
574 /* Delete route entry in the global EVPN table. */
575 delete_evpn_route_entry(bgp, vpn, afi, safi, global_rn, &ri);
576
577 /* Schedule for processing - withdraws to peers happen from
578 * this table.
579 */
580 if (ri)
581 bgp_process(bgp, global_rn, afi, safi);
582 bgp_unlock_node(global_rn);
583 }
584
585 /* Delete route entry in the VNI route table, caller to remove. */
586 bgp_info_delete(rn, old_local);
587 }
588
589 /*
590 * Calculate the best path for an EVPN route. Install/update best path in zebra,
591 * if appropriate.
592 */
593 static int evpn_route_select_install(struct bgp *bgp, struct bgpevpn *vpn,
594 struct bgp_node *rn)
595 {
596 struct bgp_info *old_select, *new_select;
597 struct bgp_info_pair old_and_new;
598 afi_t afi = AFI_L2VPN;
599 safi_t safi = SAFI_EVPN;
600 int ret = 0;
601
602 /* Compute the best path. */
603 bgp_best_selection(bgp, rn, &bgp->maxpaths[afi][safi], &old_and_new,
604 afi, safi);
605 old_select = old_and_new.old;
606 new_select = old_and_new.new;
607
608 /* If the best path hasn't changed - see if there is still something to
609 * update
610 * to zebra RIB.
611 */
612 if (old_select && old_select == new_select
613 && old_select->type == ZEBRA_ROUTE_BGP
614 && old_select->sub_type == BGP_ROUTE_NORMAL
615 && !CHECK_FLAG(rn->flags, BGP_NODE_USER_CLEAR)
616 && !CHECK_FLAG(old_select->flags, BGP_INFO_ATTR_CHANGED)
617 && !bgp->addpath_tx_used[afi][safi]) {
618 if (bgp_zebra_has_route_changed(rn, old_select))
619 ret = evpn_zebra_install(bgp, vpn,
620 (struct prefix_evpn *)&rn->p,
621 old_select->attr->nexthop,
622 old_select->attr->sticky);
623 UNSET_FLAG(old_select->flags, BGP_INFO_MULTIPATH_CHG);
624 bgp_zebra_clear_route_change_flags(rn);
625 return ret;
626 }
627
628 /* If the user did a "clear" this flag will be set */
629 UNSET_FLAG(rn->flags, BGP_NODE_USER_CLEAR);
630
631 /* bestpath has changed; update relevant fields and install or uninstall
632 * into the zebra RIB.
633 */
634 if (old_select || new_select)
635 bgp_bump_version(rn);
636
637 if (old_select)
638 bgp_info_unset_flag(rn, old_select, BGP_INFO_SELECTED);
639 if (new_select) {
640 bgp_info_set_flag(rn, new_select, BGP_INFO_SELECTED);
641 bgp_info_unset_flag(rn, new_select, BGP_INFO_ATTR_CHANGED);
642 UNSET_FLAG(new_select->flags, BGP_INFO_MULTIPATH_CHG);
643 }
644
645 if (new_select && new_select->type == ZEBRA_ROUTE_BGP
646 && new_select->sub_type == BGP_ROUTE_NORMAL) {
647 ret = evpn_zebra_install(bgp, vpn, (struct prefix_evpn *)&rn->p,
648 new_select->attr->nexthop,
649 new_select->attr->sticky);
650 /* If an old best existed and it was a "local" route, the only
651 * reason
652 * it would be supplanted is due to MAC mobility procedures. So,
653 * we
654 * need to do an implicit delete and withdraw that route from
655 * peers.
656 */
657 if (old_select && old_select->peer == bgp->peer_self
658 && old_select->type == ZEBRA_ROUTE_BGP
659 && old_select->sub_type == BGP_ROUTE_STATIC)
660 evpn_delete_old_local_route(bgp, vpn, rn, old_select);
661 } else {
662 if (old_select && old_select->type == ZEBRA_ROUTE_BGP
663 && old_select->sub_type == BGP_ROUTE_NORMAL)
664 ret = evpn_zebra_uninstall(bgp, vpn,
665 (struct prefix_evpn *)&rn->p,
666 old_select->attr->nexthop);
667 }
668
669 /* Clear any route change flags. */
670 bgp_zebra_clear_route_change_flags(rn);
671
672 /* Reap old select bgp_info, if it has been removed */
673 if (old_select && CHECK_FLAG(old_select->flags, BGP_INFO_REMOVED))
674 bgp_info_reap(rn, old_select);
675
676 return ret;
677 }
678
679
680 /*
681 * Return true if the local ri for this rn has sticky set
682 */
683 static int evpn_route_is_sticky(struct bgp *bgp, struct bgp_node *rn)
684 {
685 struct bgp_info *tmp_ri;
686 struct bgp_info *local_ri;
687
688 local_ri = NULL;
689 for (tmp_ri = rn->info; tmp_ri; tmp_ri = tmp_ri->next) {
690 if (tmp_ri->peer == bgp->peer_self
691 && tmp_ri->type == ZEBRA_ROUTE_BGP
692 && tmp_ri->sub_type == BGP_ROUTE_STATIC)
693 local_ri = tmp_ri;
694 }
695
696 if (!local_ri)
697 return 0;
698
699 return local_ri->attr->sticky;
700 }
701
702 /*
703 * Create or update EVPN route entry. This could be in the VNI route table
704 * or the global route table.
705 */
706 static int update_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn,
707 afi_t afi, safi_t safi, struct bgp_node *rn,
708 struct attr *attr, int add, int vni_table,
709 struct bgp_info **ri, u_char flags)
710 {
711 struct bgp_info *tmp_ri;
712 struct bgp_info *local_ri, *remote_ri;
713 struct attr *attr_new;
714 mpls_label_t label = MPLS_INVALID_LABEL;
715 int route_change = 1;
716 u_char sticky = 0;
717
718 *ri = NULL;
719
720 /* See if this is an update of an existing route, or a new add. Also,
721 * identify if already known from remote, and if so, the one with the
722 * highest sequence number; this is only when adding to the VNI routing
723 * table.
724 */
725 local_ri = remote_ri = NULL;
726 for (tmp_ri = rn->info; tmp_ri; tmp_ri = tmp_ri->next) {
727 if (tmp_ri->peer == bgp->peer_self
728 && tmp_ri->type == ZEBRA_ROUTE_BGP
729 && tmp_ri->sub_type == BGP_ROUTE_STATIC)
730 local_ri = tmp_ri;
731 if (vni_table) {
732 if (tmp_ri->type == ZEBRA_ROUTE_BGP
733 && tmp_ri->sub_type == BGP_ROUTE_NORMAL
734 && CHECK_FLAG(tmp_ri->flags, BGP_INFO_VALID)) {
735 if (!remote_ri)
736 remote_ri = tmp_ri;
737 else if (mac_mobility_seqnum(tmp_ri->attr)
738 > mac_mobility_seqnum(remote_ri->attr))
739 remote_ri = tmp_ri;
740 }
741 }
742 }
743
744 /* If route doesn't exist already, create a new one, if told to.
745 * Otherwise act based on whether the attributes of the route have
746 * changed or not.
747 */
748 if (!local_ri && !add)
749 return 0;
750
751 if (!local_ri) {
752 /* When learnt locally for the first time but already known from
753 * remote, we have to initiate appropriate MAC mobility steps.
754 * This
755 * is applicable when updating the VNI routing table.
756 * We need to skip mobility steps for g/w macs (local mac on g/w
757 * SVI) advertised in EVPN.
758 * This will ensure that local routes are preferred for g/w macs
759 */
760 if (remote_ri && !CHECK_FLAG(flags, ZEBRA_MAC_TYPE_GW)) {
761 u_int32_t cur_seqnum;
762
763 /* Add MM extended community to route. */
764 cur_seqnum = mac_mobility_seqnum(remote_ri->attr);
765 add_mac_mobility_to_attr(cur_seqnum + 1, attr);
766 }
767
768 /* Add (or update) attribute to hash. */
769 attr_new = bgp_attr_intern(attr);
770
771 /* Extract MAC mobility sequence number, if any. */
772 attr_new->mm_seqnum =
773 bgp_attr_mac_mobility_seqnum(attr_new, &sticky);
774 attr_new->sticky = sticky;
775
776 /* Create new route with its attribute. */
777 tmp_ri = info_make(ZEBRA_ROUTE_BGP, BGP_ROUTE_STATIC, 0,
778 bgp->peer_self, attr_new, rn);
779 SET_FLAG(tmp_ri->flags, BGP_INFO_VALID);
780 bgp_info_extra_get(tmp_ri);
781
782 /* The VNI goes into the 'label' field of the route */
783 vni2label(vpn->vni, &label);
784
785 memcpy(&tmp_ri->extra->label, &label, BGP_LABEL_BYTES);
786 bgp_info_add(rn, tmp_ri);
787 } else {
788 tmp_ri = local_ri;
789 if (attrhash_cmp(tmp_ri->attr, attr)
790 && !CHECK_FLAG(tmp_ri->flags, BGP_INFO_REMOVED))
791 route_change = 0;
792 else {
793 /* The attribute has changed. */
794 /* Add (or update) attribute to hash. */
795 attr_new = bgp_attr_intern(attr);
796 bgp_info_set_flag(rn, tmp_ri, BGP_INFO_ATTR_CHANGED);
797
798 /* Restore route, if needed. */
799 if (CHECK_FLAG(tmp_ri->flags, BGP_INFO_REMOVED))
800 bgp_info_restore(rn, tmp_ri);
801
802 /* Unintern existing, set to new. */
803 bgp_attr_unintern(&tmp_ri->attr);
804 tmp_ri->attr = attr_new;
805 tmp_ri->uptime = bgp_clock();
806 }
807 }
808
809 /* Return back the route entry. */
810 *ri = tmp_ri;
811 return route_change;
812 }
813
814 /*
815 * Create or update EVPN route (of type based on prefix) for specified VNI
816 * and schedule for processing.
817 */
818 static int update_evpn_route(struct bgp *bgp, struct bgpevpn *vpn,
819 struct prefix_evpn *p, u_char flags)
820 {
821 struct bgp_node *rn;
822 struct attr attr;
823 struct attr *attr_new;
824 struct bgp_info *ri;
825 afi_t afi = AFI_L2VPN;
826 safi_t safi = SAFI_EVPN;
827 int route_change;
828
829 memset(&attr, 0, sizeof(struct attr));
830
831 /* Build path-attribute for this route. */
832 bgp_attr_default_set(&attr, BGP_ORIGIN_IGP);
833 attr.nexthop = vpn->originator_ip;
834 attr.mp_nexthop_global_in = vpn->originator_ip;
835 attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4;
836 attr.sticky = CHECK_FLAG(flags, ZEBRA_MAC_TYPE_STICKY) ? 1 : 0;
837
838 /* Set up RT and ENCAP extended community. */
839 build_evpn_route_extcomm(vpn, &attr);
840
841 /* First, create (or fetch) route node within the VNI. */
842 /* NOTE: There is no RD here. */
843 rn = bgp_node_get(vpn->route_table, (struct prefix *)p);
844
845 /* Create or update route entry. */
846 route_change = update_evpn_route_entry(bgp, vpn, afi, safi, rn, &attr,
847 1, 1, &ri, flags);
848 assert(ri);
849 attr_new = ri->attr;
850
851 /* Perform route selection; this is just to set the flags correctly
852 * as local route in the VNI always wins.
853 */
854 evpn_route_select_install(bgp, vpn, rn);
855 bgp_unlock_node(rn);
856
857 /* If this is a new route or some attribute has changed, export the
858 * route to the global table. The route will be advertised to peers
859 * from there. Note that this table is a 2-level tree (RD-level +
860 * Prefix-level) similar to L3VPN routes.
861 */
862 if (route_change) {
863 struct bgp_info *global_ri;
864
865 rn = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi,
866 (struct prefix *)p, &vpn->prd);
867 update_evpn_route_entry(bgp, vpn, afi, safi, rn, attr_new, 1, 0,
868 &global_ri, flags);
869
870 /* Schedule for processing and unlock node. */
871 bgp_process(bgp, rn, afi, safi);
872 bgp_unlock_node(rn);
873 }
874
875 /* Unintern temporary. */
876 aspath_unintern(&attr.aspath);
877
878 return 0;
879 }
880
881 /*
882 * Delete EVPN route entry. This could be in the VNI route table
883 * or the global route table.
884 */
885 static void delete_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn,
886 afi_t afi, safi_t safi, struct bgp_node *rn,
887 struct bgp_info **ri)
888 {
889 struct bgp_info *tmp_ri;
890
891 *ri = NULL;
892
893 /* Now, find matching route. */
894 for (tmp_ri = rn->info; tmp_ri; tmp_ri = tmp_ri->next)
895 if (tmp_ri->peer == bgp->peer_self
896 && tmp_ri->type == ZEBRA_ROUTE_BGP
897 && tmp_ri->sub_type == BGP_ROUTE_STATIC)
898 break;
899
900 *ri = tmp_ri;
901
902 /* Mark route for delete. */
903 if (tmp_ri)
904 bgp_info_delete(rn, tmp_ri);
905 }
906
907 /*
908 * Delete EVPN route (of type based on prefix) for specified VNI and
909 * schedule for processing.
910 */
911 static int delete_evpn_route(struct bgp *bgp, struct bgpevpn *vpn,
912 struct prefix_evpn *p)
913 {
914 struct bgp_node *rn, *global_rn;
915 struct bgp_info *ri;
916 afi_t afi = AFI_L2VPN;
917 safi_t safi = SAFI_EVPN;
918
919 /* First, locate the route node within the VNI. If it doesn't exist,
920 * there
921 * is nothing further to do.
922 */
923 /* NOTE: There is no RD here. */
924 rn = bgp_node_lookup(vpn->route_table, (struct prefix *)p);
925 if (!rn)
926 return 0;
927
928 /* Next, locate route node in the global EVPN routing table. Note that
929 * this table is a 2-level tree (RD-level + Prefix-level) similar to
930 * L3VPN routes.
931 */
932 global_rn = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi,
933 (struct prefix *)p, &vpn->prd);
934 if (global_rn) {
935 /* Delete route entry in the global EVPN table. */
936 delete_evpn_route_entry(bgp, vpn, afi, safi, global_rn, &ri);
937
938 /* Schedule for processing - withdraws to peers happen from
939 * this table.
940 */
941 if (ri)
942 bgp_process(bgp, global_rn, afi, safi);
943 bgp_unlock_node(global_rn);
944 }
945
946 /* Delete route entry in the VNI route table. This can just be removed.
947 */
948 delete_evpn_route_entry(bgp, vpn, afi, safi, rn, &ri);
949 if (ri)
950 bgp_info_reap(rn, ri);
951 bgp_unlock_node(rn);
952
953 return 0;
954 }
955
956 /*
957 * Update all type-2 (MACIP) local routes for this VNI - these should also
958 * be scheduled for advertise to peers.
959 */
960 static int update_all_type2_routes(struct bgp *bgp, struct bgpevpn *vpn)
961 {
962 afi_t afi;
963 safi_t safi;
964 struct bgp_node *rn;
965 struct bgp_info *ri;
966 struct attr attr;
967 struct attr attr_sticky;
968 struct attr *attr_new;
969
970 afi = AFI_L2VPN;
971 safi = SAFI_EVPN;
972 memset(&attr, 0, sizeof(struct attr));
973 memset(&attr_sticky, 0, sizeof(struct attr));
974
975 /* Build path-attribute - all type-2 routes for this VNI will share the
976 * same path attribute.
977 */
978 bgp_attr_default_set(&attr, BGP_ORIGIN_IGP);
979 bgp_attr_default_set(&attr_sticky, BGP_ORIGIN_IGP);
980 attr.nexthop = vpn->originator_ip;
981 attr.mp_nexthop_global_in = vpn->originator_ip;
982 attr.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4;
983 attr_sticky.nexthop = vpn->originator_ip;
984 attr_sticky.mp_nexthop_global_in = vpn->originator_ip;
985 attr_sticky.mp_nexthop_len = BGP_ATTR_NHLEN_IPV4;
986 attr_sticky.sticky = 1;
987
988 /* Set up RT, ENCAP and sticky MAC extended community. */
989 build_evpn_route_extcomm(vpn, &attr);
990 build_evpn_route_extcomm(vpn, &attr_sticky);
991
992 /* Walk this VNI's route table and update local type-2 routes. For any
993 * routes updated, update corresponding entry in the global table too.
994 */
995 for (rn = bgp_table_top(vpn->route_table); rn;
996 rn = bgp_route_next(rn)) {
997 struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p;
998 struct bgp_node *rd_rn;
999 struct bgp_info *global_ri;
1000
1001 if (evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE)
1002 continue;
1003
1004 if (evpn_route_is_sticky(bgp, rn))
1005 update_evpn_route_entry(bgp, vpn, afi, safi, rn,
1006 &attr_sticky, 0, 1, &ri, 0);
1007 else
1008 update_evpn_route_entry(bgp, vpn, afi, safi, rn, &attr,
1009 0, 1, &ri, 0);
1010
1011 /* If a local route exists for this prefix, we need to update
1012 * the global routing table too.
1013 */
1014 if (!ri)
1015 continue;
1016
1017 /* Perform route selection; this is just to set the flags
1018 * correctly
1019 * as local route in the VNI always wins.
1020 */
1021 evpn_route_select_install(bgp, vpn, rn);
1022
1023 attr_new = ri->attr;
1024
1025 /* Update route in global routing table. */
1026 rd_rn = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi,
1027 (struct prefix *)evp, &vpn->prd);
1028 assert(rd_rn);
1029 update_evpn_route_entry(bgp, vpn, afi, safi, rd_rn, attr_new, 0,
1030 0, &global_ri, 0);
1031
1032 /* Schedule for processing and unlock node. */
1033 bgp_process(bgp, rd_rn, afi, safi);
1034 bgp_unlock_node(rd_rn);
1035 }
1036
1037 /* Unintern temporary. */
1038 aspath_unintern(&attr.aspath);
1039 aspath_unintern(&attr_sticky.aspath);
1040
1041 return 0;
1042 }
1043
1044 /*
1045 * Delete all type-2 (MACIP) local routes for this VNI - only from the
1046 * global routing table. These are also scheduled for withdraw from peers.
1047 */
1048 static int delete_global_type2_routes(struct bgp *bgp, struct bgpevpn *vpn)
1049 {
1050 afi_t afi;
1051 safi_t safi;
1052 struct bgp_node *rdrn, *rn;
1053 struct bgp_table *table;
1054 struct bgp_info *ri;
1055
1056 afi = AFI_L2VPN;
1057 safi = SAFI_EVPN;
1058
1059 rdrn = bgp_node_lookup(bgp->rib[afi][safi], (struct prefix *)&vpn->prd);
1060 if (rdrn && rdrn->info) {
1061 table = (struct bgp_table *)rdrn->info;
1062 for (rn = bgp_table_top(table); rn; rn = bgp_route_next(rn)) {
1063 struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p;
1064
1065 if (evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE)
1066 continue;
1067
1068 delete_evpn_route_entry(bgp, vpn, afi, safi, rn, &ri);
1069 if (ri)
1070 bgp_process(bgp, rn, afi, safi);
1071 }
1072 }
1073
1074 /* Unlock RD node. */
1075 if (rdrn)
1076 bgp_unlock_node(rdrn);
1077
1078 return 0;
1079 }
1080
1081 /*
1082 * Delete all type-2 (MACIP) local routes for this VNI - from the global
1083 * table as well as the per-VNI route table.
1084 */
1085 static int delete_all_type2_routes(struct bgp *bgp, struct bgpevpn *vpn)
1086 {
1087 afi_t afi;
1088 safi_t safi;
1089 struct bgp_node *rn;
1090 struct bgp_info *ri;
1091
1092 afi = AFI_L2VPN;
1093 safi = SAFI_EVPN;
1094
1095 /* First, walk the global route table for this VNI's type-2 local
1096 * routes.
1097 * EVPN routes are a 2-level table, first get the RD table.
1098 */
1099 delete_global_type2_routes(bgp, vpn);
1100
1101 /* Next, walk this VNI's route table and delete local type-2 routes. */
1102 for (rn = bgp_table_top(vpn->route_table); rn;
1103 rn = bgp_route_next(rn)) {
1104 struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p;
1105
1106 if (evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE)
1107 continue;
1108
1109 delete_evpn_route_entry(bgp, vpn, afi, safi, rn, &ri);
1110
1111 /* Route entry in local table gets deleted immediately. */
1112 if (ri)
1113 bgp_info_reap(rn, ri);
1114 }
1115
1116 return 0;
1117 }
1118
1119 /*
1120 * Delete all routes in the per-VNI route table.
1121 */
1122 static int delete_all_vni_routes(struct bgp *bgp, struct bgpevpn *vpn)
1123 {
1124 struct bgp_node *rn;
1125 struct bgp_info *ri, *nextri;
1126
1127 /* Walk this VNI's route table and delete all routes. */
1128 for (rn = bgp_table_top(vpn->route_table); rn;
1129 rn = bgp_route_next(rn)) {
1130 for (ri = rn->info; (ri != NULL) && (nextri = ri->next, 1);
1131 ri = nextri) {
1132 bgp_info_delete(rn, ri);
1133 bgp_info_reap(rn, ri);
1134 }
1135 }
1136
1137 return 0;
1138 }
1139
1140 /*
1141 * Update (and advertise) local routes for a VNI. Invoked upon the VNI
1142 * export RT getting modified or change to tunnel IP. Note that these
1143 * situations need the route in the per-VNI table as well as the global
1144 * table to be updated (as attributes change).
1145 */
1146 static int update_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn)
1147 {
1148 int ret;
1149 struct prefix_evpn p;
1150
1151 /* Update and advertise the type-3 route (only one) followed by the
1152 * locally learnt type-2 routes (MACIP) - for this VNI.
1153 */
1154 build_evpn_type3_prefix(&p, vpn->originator_ip);
1155 ret = update_evpn_route(bgp, vpn, &p, 0);
1156 if (ret)
1157 return ret;
1158
1159 return update_all_type2_routes(bgp, vpn);
1160 }
1161
1162 /*
1163 * Delete (and withdraw) local routes for specified VNI from the global
1164 * table and per-VNI table. After this, remove all other routes from
1165 * the per-VNI table. Invoked upon the VNI being deleted or EVPN
1166 * (advertise-all-vni) being disabled.
1167 */
1168 static int delete_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn)
1169 {
1170 int ret;
1171 struct prefix_evpn p;
1172
1173 /* Delete and withdraw locally learnt type-2 routes (MACIP)
1174 * followed by type-3 routes (only one) - for this VNI.
1175 */
1176 ret = delete_all_type2_routes(bgp, vpn);
1177 if (ret)
1178 return ret;
1179
1180 build_evpn_type3_prefix(&p, vpn->originator_ip);
1181 ret = delete_evpn_route(bgp, vpn, &p);
1182 if (ret)
1183 return ret;
1184
1185 /* Delete all routes from the per-VNI table. */
1186 return delete_all_vni_routes(bgp, vpn);
1187 }
1188
1189 /*
1190 * There is a tunnel endpoint IP address change for this VNI,
1191 * need to re-advertise routes with the new nexthop.
1192 */
1193 static int handle_tunnel_ip_change(struct bgp *bgp, struct bgpevpn *vpn,
1194 struct in_addr originator_ip)
1195 {
1196 struct prefix_evpn p;
1197
1198 /* If VNI is not live, we only need to update the originator ip */
1199 if (!is_vni_live(vpn)) {
1200 vpn->originator_ip = originator_ip;
1201 return 0;
1202 }
1203
1204 /* Update the tunnel-ip hash */
1205 bgp_tip_del(bgp, &vpn->originator_ip);
1206 bgp_tip_add(bgp, &originator_ip);
1207
1208 /* filter routes as martian nexthop db has changed */
1209 bgp_filter_evpn_routes_upon_martian_nh_change(bgp);
1210
1211 /* Need to withdraw type-3 route as the originator IP is part
1212 * of the key.
1213 */
1214 build_evpn_type3_prefix(&p, vpn->originator_ip);
1215 delete_evpn_route(bgp, vpn, &p);
1216
1217 /* Update the tunnel IP and re-advertise all routes for this VNI. */
1218 vpn->originator_ip = originator_ip;
1219 return update_routes_for_vni(bgp, vpn);
1220 }
1221
1222 /*
1223 * Install route entry into the VNI routing table and invoke route selection.
1224 */
1225 static int install_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn,
1226 struct prefix_evpn *p,
1227 struct bgp_info *parent_ri)
1228 {
1229 struct bgp_node *rn;
1230 struct bgp_info *ri;
1231 struct attr *attr_new;
1232 int ret;
1233
1234 /* Create (or fetch) route within the VNI. */
1235 /* NOTE: There is no RD here. */
1236 rn = bgp_node_get(vpn->route_table, (struct prefix *)p);
1237
1238 /* Check if route entry is already present. */
1239 for (ri = rn->info; ri; ri = ri->next)
1240 if (ri->extra
1241 && (struct bgp_info *)ri->extra->parent == parent_ri)
1242 break;
1243
1244 if (!ri) {
1245 /* Add (or update) attribute to hash. */
1246 attr_new = bgp_attr_intern(parent_ri->attr);
1247
1248 /* Create new route with its attribute. */
1249 ri = info_make(parent_ri->type, parent_ri->sub_type, 0,
1250 parent_ri->peer, attr_new, rn);
1251 SET_FLAG(ri->flags, BGP_INFO_VALID);
1252 bgp_info_extra_get(ri);
1253 ri->extra->parent = parent_ri;
1254 if (parent_ri->extra)
1255 memcpy(&ri->extra->label, &parent_ri->extra->label,
1256 BGP_LABEL_BYTES);
1257 bgp_info_add(rn, ri);
1258 } else {
1259 if (attrhash_cmp(ri->attr, parent_ri->attr)
1260 && !CHECK_FLAG(ri->flags, BGP_INFO_REMOVED)) {
1261 bgp_unlock_node(rn);
1262 return 0;
1263 }
1264 /* The attribute has changed. */
1265 /* Add (or update) attribute to hash. */
1266 attr_new = bgp_attr_intern(parent_ri->attr);
1267
1268 /* Restore route, if needed. */
1269 if (CHECK_FLAG(ri->flags, BGP_INFO_REMOVED))
1270 bgp_info_restore(rn, ri);
1271
1272 /* Mark if nexthop has changed. */
1273 if (!IPV4_ADDR_SAME(&ri->attr->nexthop, &attr_new->nexthop))
1274 SET_FLAG(ri->flags, BGP_INFO_IGP_CHANGED);
1275
1276 /* Unintern existing, set to new. */
1277 bgp_attr_unintern(&ri->attr);
1278 ri->attr = attr_new;
1279 ri->uptime = bgp_clock();
1280 }
1281
1282 /* Perform route selection and update zebra, if required. */
1283 ret = evpn_route_select_install(bgp, vpn, rn);
1284
1285 return ret;
1286 }
1287
1288 /*
1289 * Uninstall route entry from the VNI routing table and send message
1290 * to zebra, if appropriate.
1291 */
1292 static int uninstall_evpn_route_entry(struct bgp *bgp, struct bgpevpn *vpn,
1293 struct prefix_evpn *p,
1294 struct bgp_info *parent_ri)
1295 {
1296 struct bgp_node *rn;
1297 struct bgp_info *ri;
1298 int ret;
1299
1300 /* Locate route within the VNI. */
1301 /* NOTE: There is no RD here. */
1302 rn = bgp_node_lookup(vpn->route_table, (struct prefix *)p);
1303 if (!rn)
1304 return 0;
1305
1306 /* Find matching route entry. */
1307 for (ri = rn->info; ri; ri = ri->next)
1308 if (ri->extra
1309 && (struct bgp_info *)ri->extra->parent == parent_ri)
1310 break;
1311
1312 if (!ri)
1313 return 0;
1314
1315 /* Mark entry for deletion */
1316 bgp_info_delete(rn, ri);
1317
1318 /* Perform route selection and update zebra, if required. */
1319 ret = evpn_route_select_install(bgp, vpn, rn);
1320
1321 /* Unlock route node. */
1322 bgp_unlock_node(rn);
1323
1324 return ret;
1325 }
1326
1327 /*
1328 * Given a route entry and a VNI, see if this route entry should be
1329 * imported into the VNI i.e., RTs match.
1330 */
1331 static int is_route_matching_for_vni(struct bgp *bgp, struct bgpevpn *vpn,
1332 struct bgp_info *ri)
1333 {
1334 struct attr *attr = ri->attr;
1335 struct ecommunity *ecom;
1336 int i;
1337
1338 assert(attr);
1339 /* Route should have valid RT to be even considered. */
1340 if (!(attr->flag & ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES)))
1341 return 0;
1342
1343 ecom = attr->ecommunity;
1344 if (!ecom || !ecom->size)
1345 return 0;
1346
1347 /* For each extended community RT, see if it matches this VNI. If any RT
1348 * matches, we're done.
1349 */
1350 for (i = 0; i < ecom->size; i++) {
1351 u_char *pnt;
1352 u_char type, sub_type;
1353 struct ecommunity_val *eval;
1354 struct ecommunity_val eval_tmp;
1355 struct irt_node *irt;
1356
1357 /* Only deal with RTs */
1358 pnt = (ecom->val + (i * ECOMMUNITY_SIZE));
1359 eval = (struct ecommunity_val *)(ecom->val
1360 + (i * ECOMMUNITY_SIZE));
1361 type = *pnt++;
1362 sub_type = *pnt++;
1363 if (sub_type != ECOMMUNITY_ROUTE_TARGET)
1364 continue;
1365
1366 /* See if this RT matches specified VNIs import RTs */
1367 irt = lookup_import_rt(bgp, eval);
1368 if (irt && irt->vnis)
1369 if (is_vni_present_in_irt_vnis(irt->vnis, vpn))
1370 return 1;
1371
1372 /* Also check for non-exact match. In this, we mask out the AS
1373 * and
1374 * only check on the local-admin sub-field. This is to
1375 * facilitate using
1376 * VNI as the RT for EBGP peering too.
1377 */
1378 irt = NULL;
1379 if (type == ECOMMUNITY_ENCODE_AS
1380 || type == ECOMMUNITY_ENCODE_AS4
1381 || type == ECOMMUNITY_ENCODE_IP) {
1382 memcpy(&eval_tmp, eval, ECOMMUNITY_SIZE);
1383 mask_ecom_global_admin(&eval_tmp, eval);
1384 irt = lookup_import_rt(bgp, &eval_tmp);
1385 }
1386 if (irt && irt->vnis)
1387 if (is_vni_present_in_irt_vnis(irt->vnis, vpn))
1388 return 1;
1389 }
1390
1391 return 0;
1392 }
1393
1394 /*
1395 * Install or uninstall routes of specified type that are appropriate for this
1396 * particular VNI.
1397 */
1398 static int install_uninstall_routes_for_vni(struct bgp *bgp,
1399 struct bgpevpn *vpn,
1400 bgp_evpn_route_type rtype,
1401 int install)
1402 {
1403 afi_t afi;
1404 safi_t safi;
1405 struct bgp_node *rd_rn, *rn;
1406 struct bgp_table *table;
1407 struct bgp_info *ri;
1408 int ret;
1409
1410 afi = AFI_L2VPN;
1411 safi = SAFI_EVPN;
1412
1413 /* Walk entire global routing table and evaluate routes which could be
1414 * imported into this VPN. Note that we cannot just look at the routes
1415 * for
1416 * the VNI's RD - remote routes applicable for this VNI could have any
1417 * RD.
1418 */
1419 /* EVPN routes are a 2-level table. */
1420 for (rd_rn = bgp_table_top(bgp->rib[afi][safi]); rd_rn;
1421 rd_rn = bgp_route_next(rd_rn)) {
1422 table = (struct bgp_table *)(rd_rn->info);
1423 if (!table)
1424 continue;
1425
1426 for (rn = bgp_table_top(table); rn; rn = bgp_route_next(rn)) {
1427 struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p;
1428
1429 if (evp->prefix.route_type != rtype)
1430 continue;
1431
1432 for (ri = rn->info; ri; ri = ri->next) {
1433 /* Consider "valid" remote routes applicable for
1434 * this VNI. */
1435 if (!(CHECK_FLAG(ri->flags, BGP_INFO_VALID)
1436 && ri->type == ZEBRA_ROUTE_BGP
1437 && ri->sub_type == BGP_ROUTE_NORMAL))
1438 continue;
1439
1440 if (is_route_matching_for_vni(bgp, vpn, ri)) {
1441 if (install)
1442 ret = install_evpn_route_entry(
1443 bgp, vpn, evp, ri);
1444 else
1445 ret = uninstall_evpn_route_entry(
1446 bgp, vpn, evp, ri);
1447
1448 if (ret) {
1449 zlog_err(
1450 "%u: Failed to %s EVPN %s route in VNI %u",
1451 bgp->vrf_id,
1452 install ? "install"
1453 : "uninstall",
1454 rtype == BGP_EVPN_MAC_IP_ROUTE
1455 ? "MACIP"
1456 : "IMET",
1457 vpn->vni);
1458 return ret;
1459 }
1460 }
1461 }
1462 }
1463 }
1464
1465 return 0;
1466 }
1467
1468 /*
1469 * Install any existing remote routes applicable for this VNI into its
1470 * routing table. This is invoked when a VNI becomes "live" or its Import
1471 * RT is changed.
1472 */
1473 static int install_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn)
1474 {
1475 int ret;
1476
1477 /* Install type-3 routes followed by type-2 routes - the ones applicable
1478 * for this VNI.
1479 */
1480 ret = install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_IMET_ROUTE,
1481 1);
1482 if (ret)
1483 return ret;
1484
1485 return install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_MAC_IP_ROUTE,
1486 1);
1487 }
1488
1489 /*
1490 * Uninstall any existing remote routes for this VNI. One scenario in which
1491 * this is invoked is upon an import RT change.
1492 */
1493 static int uninstall_routes_for_vni(struct bgp *bgp, struct bgpevpn *vpn)
1494 {
1495 int ret;
1496
1497 /* Uninstall type-2 routes followed by type-3 routes - the ones
1498 * applicable
1499 * for this VNI.
1500 */
1501 ret = install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_MAC_IP_ROUTE,
1502 0);
1503 if (ret)
1504 return ret;
1505
1506 return install_uninstall_routes_for_vni(bgp, vpn, BGP_EVPN_IMET_ROUTE,
1507 0);
1508 }
1509
1510 /*
1511 * Install or uninstall route in matching VNIs (list).
1512 */
1513 static int install_uninstall_route_in_vnis(struct bgp *bgp, afi_t afi,
1514 safi_t safi, struct prefix_evpn *evp,
1515 struct bgp_info *ri,
1516 struct list *vnis, int install)
1517 {
1518 struct bgpevpn *vpn;
1519 struct listnode *node, *nnode;
1520
1521 for (ALL_LIST_ELEMENTS(vnis, node, nnode, vpn)) {
1522 int ret;
1523
1524 if (!is_vni_live(vpn))
1525 continue;
1526
1527 if (install)
1528 ret = install_evpn_route_entry(bgp, vpn, evp, ri);
1529 else
1530 ret = uninstall_evpn_route_entry(bgp, vpn, evp, ri);
1531
1532 if (ret) {
1533 zlog_err("%u: Failed to %s EVPN %s route in VNI %u",
1534 bgp->vrf_id, install ? "install" : "uninstall",
1535 evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE
1536 ? "MACIP"
1537 : "IMET",
1538 vpn->vni);
1539 return ret;
1540 }
1541 }
1542
1543 return 0;
1544 }
1545
1546 /*
1547 * Install or uninstall route for appropriate VNIs.
1548 */
1549 static int install_uninstall_evpn_route(struct bgp *bgp, afi_t afi, safi_t safi,
1550 struct prefix *p, struct bgp_info *ri,
1551 int import)
1552 {
1553 struct prefix_evpn *evp = (struct prefix_evpn *)p;
1554 struct attr *attr = ri->attr;
1555 struct ecommunity *ecom;
1556 int i;
1557
1558 assert(attr);
1559
1560 /* Only type-2 and type-3 routes go into a L2 VNI. */
1561 if (!(evp->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE
1562 || evp->prefix.route_type == BGP_EVPN_IMET_ROUTE))
1563 return 0;
1564
1565 /* If we don't have Route Target, nothing much to do. */
1566 if (!(attr->flag & ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES)))
1567 return 0;
1568
1569 ecom = attr->ecommunity;
1570 if (!ecom || !ecom->size)
1571 return -1;
1572
1573 /* For each extended community RT, see which VNIs match and import
1574 * the route into matching VNIs.
1575 */
1576 for (i = 0; i < ecom->size; i++) {
1577 u_char *pnt;
1578 u_char type, sub_type;
1579 struct ecommunity_val *eval;
1580 struct ecommunity_val eval_tmp;
1581 struct irt_node *irt;
1582
1583 /* Only deal with RTs */
1584 pnt = (ecom->val + (i * ECOMMUNITY_SIZE));
1585 eval = (struct ecommunity_val *)(ecom->val
1586 + (i * ECOMMUNITY_SIZE));
1587 type = *pnt++;
1588 sub_type = *pnt++;
1589 if (sub_type != ECOMMUNITY_ROUTE_TARGET)
1590 continue;
1591
1592 /* Are we interested in this RT? */
1593 irt = lookup_import_rt(bgp, eval);
1594 if (irt && irt->vnis)
1595 install_uninstall_route_in_vnis(bgp, afi, safi, evp, ri,
1596 irt->vnis, import);
1597
1598 /* Also check for non-exact match. In this, we mask out the AS
1599 * and
1600 * only check on the local-admin sub-field. This is to
1601 * facilitate using
1602 * VNI as the RT for EBGP peering too.
1603 */
1604 irt = NULL;
1605 if (type == ECOMMUNITY_ENCODE_AS
1606 || type == ECOMMUNITY_ENCODE_AS4
1607 || type == ECOMMUNITY_ENCODE_IP) {
1608 memcpy(&eval_tmp, eval, ECOMMUNITY_SIZE);
1609 mask_ecom_global_admin(&eval_tmp, eval);
1610 irt = lookup_import_rt(bgp, &eval_tmp);
1611 }
1612 if (irt && irt->vnis)
1613 install_uninstall_route_in_vnis(bgp, afi, safi, evp, ri,
1614 irt->vnis, import);
1615 }
1616
1617 return 0;
1618 }
1619
1620 /*
1621 * Update and advertise local routes for a VNI. Invoked upon router-id
1622 * change. Note that the processing is done only on the global route table
1623 * using routes that already exist in the per-VNI table.
1624 */
1625 static int update_advertise_vni_routes(struct bgp *bgp, struct bgpevpn *vpn)
1626 {
1627 struct prefix_evpn p;
1628 struct bgp_node *rn, *global_rn;
1629 struct bgp_info *ri, *global_ri;
1630 struct attr *attr;
1631 afi_t afi = AFI_L2VPN;
1632 safi_t safi = SAFI_EVPN;
1633
1634 /* Locate type-3 route for VNI in the per-VNI table and use its
1635 * attributes to create and advertise the type-3 route for this VNI
1636 * in the global table.
1637 */
1638 build_evpn_type3_prefix(&p, vpn->originator_ip);
1639 rn = bgp_node_lookup(vpn->route_table, (struct prefix *)&p);
1640 if (!rn) /* unexpected */
1641 return 0;
1642 for (ri = rn->info; ri; ri = ri->next)
1643 if (ri->peer == bgp->peer_self && ri->type == ZEBRA_ROUTE_BGP
1644 && ri->sub_type == BGP_ROUTE_STATIC)
1645 break;
1646 if (!ri) /* unexpected */
1647 return 0;
1648 attr = ri->attr;
1649
1650 global_rn = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi,
1651 (struct prefix *)&p, &vpn->prd);
1652 update_evpn_route_entry(bgp, vpn, afi, safi, global_rn, attr, 1, 0, &ri,
1653 0);
1654
1655 /* Schedule for processing and unlock node. */
1656 bgp_process(bgp, global_rn, afi, safi);
1657 bgp_unlock_node(global_rn);
1658
1659 /* Now, walk this VNI's route table and use the route and its attribute
1660 * to create and schedule route in global table.
1661 */
1662 for (rn = bgp_table_top(vpn->route_table); rn;
1663 rn = bgp_route_next(rn)) {
1664 struct prefix_evpn *evp = (struct prefix_evpn *)&rn->p;
1665
1666 /* Identify MAC-IP local routes. */
1667 if (evp->prefix.route_type != BGP_EVPN_MAC_IP_ROUTE)
1668 continue;
1669
1670 for (ri = rn->info; ri; ri = ri->next)
1671 if (ri->peer == bgp->peer_self
1672 && ri->type == ZEBRA_ROUTE_BGP
1673 && ri->sub_type == BGP_ROUTE_STATIC)
1674 break;
1675 if (!ri)
1676 continue;
1677
1678 /* Create route in global routing table using this route entry's
1679 * attribute.
1680 */
1681 attr = ri->attr;
1682 global_rn = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi,
1683 (struct prefix *)evp, &vpn->prd);
1684 assert(global_rn);
1685 update_evpn_route_entry(bgp, vpn, afi, safi, global_rn, attr, 1,
1686 0, &global_ri, 0);
1687
1688 /* Schedule for processing and unlock node. */
1689 bgp_process(bgp, global_rn, afi, safi);
1690 bgp_unlock_node(global_rn);
1691 }
1692
1693 return 0;
1694 }
1695
1696 /*
1697 * Delete (and withdraw) local routes for a VNI - only from the global
1698 * table. Invoked upon router-id change.
1699 */
1700 static int delete_withdraw_vni_routes(struct bgp *bgp, struct bgpevpn *vpn)
1701 {
1702 int ret;
1703 struct prefix_evpn p;
1704 struct bgp_node *global_rn;
1705 struct bgp_info *ri;
1706 afi_t afi = AFI_L2VPN;
1707 safi_t safi = SAFI_EVPN;
1708
1709 /* Delete and withdraw locally learnt type-2 routes (MACIP)
1710 * for this VNI - from the global table.
1711 */
1712 ret = delete_global_type2_routes(bgp, vpn);
1713 if (ret)
1714 return ret;
1715
1716 /* Remove type-3 route for this VNI from global table. */
1717 build_evpn_type3_prefix(&p, vpn->originator_ip);
1718 global_rn = bgp_afi_node_lookup(bgp->rib[afi][safi], afi, safi,
1719 (struct prefix *)&p, &vpn->prd);
1720 if (global_rn) {
1721 /* Delete route entry in the global EVPN table. */
1722 delete_evpn_route_entry(bgp, vpn, afi, safi, global_rn, &ri);
1723
1724 /* Schedule for processing - withdraws to peers happen from
1725 * this table.
1726 */
1727 if (ri)
1728 bgp_process(bgp, global_rn, afi, safi);
1729 bgp_unlock_node(global_rn);
1730 }
1731
1732 return 0;
1733 }
1734
1735 /*
1736 * Handle router-id change. Update and advertise local routes corresponding
1737 * to this VNI from peers. Note that this is invoked after updating the
1738 * router-id. The routes in the per-VNI table are used to create routes in
1739 * the global table and schedule them.
1740 */
1741 static void update_router_id_vni(struct hash_backet *backet, struct bgp *bgp)
1742 {
1743 struct bgpevpn *vpn;
1744
1745 vpn = (struct bgpevpn *)backet->data;
1746
1747 if (!vpn) {
1748 zlog_warn("%s: VNI hash entry for VNI not found", __FUNCTION__);
1749 return;
1750 }
1751
1752 /* Skip VNIs with configured RD. */
1753 if (is_rd_configured(vpn))
1754 return;
1755
1756 bgp_evpn_derive_auto_rd(bgp, vpn);
1757 update_advertise_vni_routes(bgp, vpn);
1758 }
1759
1760 /*
1761 * Handle router-id change. Delete and withdraw local routes corresponding
1762 * to this VNI from peers. Note that this is invoked prior to updating
1763 * the router-id and is done only on the global route table, the routes
1764 * are needed in the per-VNI table to re-advertise with new router id.
1765 */
1766 static void withdraw_router_id_vni(struct hash_backet *backet, struct bgp *bgp)
1767 {
1768 struct bgpevpn *vpn;
1769
1770 vpn = (struct bgpevpn *)backet->data;
1771
1772 if (!vpn) {
1773 zlog_warn("%s: VNI hash entry for VNI not found", __FUNCTION__);
1774 return;
1775 }
1776
1777 /* Skip VNIs with configured RD. */
1778 if (is_rd_configured(vpn))
1779 return;
1780
1781 delete_withdraw_vni_routes(bgp, vpn);
1782 }
1783
1784 /*
1785 * Process received EVPN type-2 route (advertise or withdraw).
1786 */
1787 static int process_type2_route(struct peer *peer, afi_t afi, safi_t safi,
1788 struct attr *attr, u_char *pfx, int psize,
1789 u_int32_t addpath_id)
1790 {
1791 struct prefix_rd prd;
1792 struct prefix_evpn p;
1793 u_char ipaddr_len;
1794 u_char macaddr_len;
1795 mpls_label_t *label_pnt;
1796 int ret;
1797
1798 /* Type-2 route should be either 33, 37 or 49 bytes or an
1799 * additional 3 bytes if there is a second label (VNI):
1800 * RD (8), ESI (10), Eth Tag (4), MAC Addr Len (1),
1801 * MAC Addr (6), IP len (1), IP (0, 4 or 16),
1802 * MPLS Lbl1 (3), MPLS Lbl2 (0 or 3)
1803 */
1804 if (psize != 33 && psize != 37 && psize != 49 && psize != 36
1805 && psize != 40 && psize != 52) {
1806 zlog_err("%u:%s - Rx EVPN Type-2 NLRI with invalid length %d",
1807 peer->bgp->vrf_id, peer->host, psize);
1808 return -1;
1809 }
1810
1811 /* Make prefix_rd */
1812 prd.family = AF_UNSPEC;
1813 prd.prefixlen = 64;
1814 memcpy(&prd.val, pfx, 8);
1815 pfx += 8;
1816
1817 /* Make EVPN prefix. */
1818 memset(&p, 0, sizeof(struct prefix_evpn));
1819 p.family = AF_EVPN;
1820 p.prefixlen = EVPN_TYPE_2_ROUTE_PREFIXLEN;
1821 p.prefix.route_type = BGP_EVPN_MAC_IP_ROUTE;
1822
1823 /* Skip over Ethernet Seg Identifier for now. */
1824 pfx += 10;
1825
1826 /* Skip over Ethernet Tag for now. */
1827 pfx += 4;
1828
1829 /* Get the MAC Addr len */
1830 macaddr_len = *pfx++;
1831
1832 /* Get the MAC Addr */
1833 if (macaddr_len == (ETH_ALEN * 8)) {
1834 memcpy(&p.prefix.mac.octet, pfx, ETH_ALEN);
1835 pfx += ETH_ALEN;
1836 } else {
1837 zlog_err(
1838 "%u:%s - Rx EVPN Type-2 NLRI with unsupported MAC address length %d",
1839 peer->bgp->vrf_id, peer->host, macaddr_len);
1840 return -1;
1841 }
1842
1843
1844 /* Get the IP. */
1845 ipaddr_len = *pfx++;
1846 if (ipaddr_len != 0 && ipaddr_len != IPV4_MAX_BITLEN
1847 && ipaddr_len != IPV6_MAX_BITLEN) {
1848 zlog_err(
1849 "%u:%s - Rx EVPN Type-2 NLRI with unsupported IP address length %d",
1850 peer->bgp->vrf_id, peer->host, ipaddr_len);
1851 return -1;
1852 }
1853
1854 if (ipaddr_len) {
1855 ipaddr_len /= 8; /* Convert to bytes. */
1856 p.prefix.ip.ipa_type = (ipaddr_len == IPV4_MAX_BYTELEN)
1857 ? IPADDR_V4
1858 : IPADDR_V6;
1859 memcpy(&p.prefix.ip.ip.addr, pfx, ipaddr_len);
1860 }
1861 pfx += ipaddr_len;
1862
1863 /* Get the VNI (in MPLS label field). */
1864 /* Note: We ignore the second VNI, if any. */
1865 label_pnt = (mpls_label_t *)pfx;
1866
1867 /* Process the route. */
1868 if (attr)
1869 ret = bgp_update(peer, (struct prefix *)&p, addpath_id, attr,
1870 afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
1871 &prd, label_pnt, 0, NULL);
1872 else
1873 ret = bgp_withdraw(peer, (struct prefix *)&p, addpath_id, attr,
1874 afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
1875 &prd, label_pnt, NULL);
1876 return ret;
1877 }
1878
1879 /*
1880 * Process received EVPN type-3 route (advertise or withdraw).
1881 */
1882 static int process_type3_route(struct peer *peer, afi_t afi, safi_t safi,
1883 struct attr *attr, u_char *pfx, int psize,
1884 u_int32_t addpath_id)
1885 {
1886 struct prefix_rd prd;
1887 struct prefix_evpn p;
1888 u_char ipaddr_len;
1889 int ret;
1890
1891 /* Type-3 route should be either 17 or 29 bytes: RD (8), Eth Tag (4),
1892 * IP len (1) and IP (4 or 16).
1893 */
1894 if (psize != 17 && psize != 29) {
1895 zlog_err("%u:%s - Rx EVPN Type-3 NLRI with invalid length %d",
1896 peer->bgp->vrf_id, peer->host, psize);
1897 return -1;
1898 }
1899
1900 /* Make prefix_rd */
1901 prd.family = AF_UNSPEC;
1902 prd.prefixlen = 64;
1903 memcpy(&prd.val, pfx, 8);
1904 pfx += 8;
1905
1906 /* Make EVPN prefix. */
1907 memset(&p, 0, sizeof(struct prefix_evpn));
1908 p.family = AF_EVPN;
1909 p.prefixlen = EVPN_TYPE_3_ROUTE_PREFIXLEN;
1910 p.prefix.route_type = BGP_EVPN_IMET_ROUTE;
1911
1912 /* Skip over Ethernet Tag for now. */
1913 pfx += 4;
1914
1915 /* Get the IP. */
1916 ipaddr_len = *pfx++;
1917 if (ipaddr_len == IPV4_MAX_BITLEN) {
1918 p.prefix.ip.ipa_type = IPADDR_V4;
1919 memcpy(&p.prefix.ip.ip.addr, pfx, IPV4_MAX_BYTELEN);
1920 } else {
1921 zlog_err(
1922 "%u:%s - Rx EVPN Type-3 NLRI with unsupported IP address length %d",
1923 peer->bgp->vrf_id, peer->host, ipaddr_len);
1924 return -1;
1925 }
1926
1927 /* Process the route. */
1928 if (attr)
1929 ret = bgp_update(peer, (struct prefix *)&p, addpath_id, attr,
1930 afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
1931 &prd, NULL, 0, NULL);
1932 else
1933 ret = bgp_withdraw(peer, (struct prefix *)&p, addpath_id, attr,
1934 afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
1935 &prd, NULL, NULL);
1936 return ret;
1937 }
1938
1939 /*
1940 * Process received EVPN type-5 route (advertise or withdraw).
1941 */
1942 static int process_type5_route(struct peer *peer, afi_t afi, safi_t safi,
1943 struct attr *attr, u_char *pfx, int psize,
1944 u_int32_t addpath_id, int withdraw)
1945 {
1946 struct prefix_rd prd;
1947 struct prefix_evpn p;
1948 struct bgp_route_evpn evpn;
1949 u_char ippfx_len;
1950 u_int32_t eth_tag;
1951 mpls_label_t *label_pnt;
1952 int ret;
1953
1954 /* Type-5 route should be 34 or 58 bytes:
1955 * RD (8), ESI (10), Eth Tag (4), IP len (1), IP (4 or 16),
1956 * GW (4 or 16) and VNI (3).
1957 * Note that the IP and GW should both be IPv4 or both IPv6.
1958 */
1959 if (psize != 34 && psize != 58) {
1960 zlog_err("%u:%s - Rx EVPN Type-5 NLRI with invalid length %d",
1961 peer->bgp->vrf_id, peer->host, psize);
1962 return -1;
1963 }
1964
1965 /* Make prefix_rd */
1966 prd.family = AF_UNSPEC;
1967 prd.prefixlen = 64;
1968 memcpy(&prd.val, pfx, 8);
1969 pfx += 8;
1970
1971 /* Make EVPN prefix. */
1972 memset(&p, 0, sizeof(struct prefix_evpn));
1973 p.family = AF_EVPN;
1974 p.prefix.route_type = BGP_EVPN_IP_PREFIX_ROUTE;
1975
1976 /* Additional information outside of prefix - ESI and GW IP */
1977 memset(&evpn, 0, sizeof(evpn));
1978
1979 /* Fetch ESI */
1980 memcpy(&evpn.eth_s_id.val, pfx, 10);
1981 pfx += 10;
1982
1983 /* Fetch Ethernet Tag. */
1984 memcpy(&eth_tag, pfx, 4);
1985 p.prefix.eth_tag = ntohl(eth_tag);
1986 pfx += 4;
1987
1988 /* Fetch IP prefix length. */
1989 ippfx_len = *pfx++;
1990 if (ippfx_len > IPV6_MAX_BITLEN) {
1991 zlog_err(
1992 "%u:%s - Rx EVPN Type-5 NLRI with invalid IP Prefix length %d",
1993 peer->bgp->vrf_id, peer->host, ippfx_len);
1994 return -1;
1995 }
1996 p.prefix.ip_prefix_length = ippfx_len;
1997
1998 /* Determine IPv4 or IPv6 prefix */
1999 /* Since the address and GW are from the same family, this just becomes
2000 * a simple check on the total size.
2001 */
2002 if (psize == 34) {
2003 SET_IPADDR_V4(&p.prefix.ip);
2004 memcpy(&p.prefix.ip.ipaddr_v4, pfx, 4);
2005 pfx += 4;
2006 memcpy(&evpn.gw_ip.ipv4, pfx, 4);
2007 pfx += 4;
2008 p.prefixlen = PREFIX_LEN_ROUTE_TYPE_5_IPV4;
2009 } else {
2010 SET_IPADDR_V6(&p.prefix.ip);
2011 memcpy(&p.prefix.ip.ipaddr_v6, pfx, 16);
2012 pfx += 16;
2013 memcpy(&evpn.gw_ip.ipv6, pfx, 16);
2014 pfx += 16;
2015 p.prefixlen = PREFIX_LEN_ROUTE_TYPE_5_IPV6;
2016 }
2017
2018 label_pnt = (mpls_label_t *)pfx;
2019
2020 /* Process the route. */
2021 if (!withdraw)
2022 ret = bgp_update(peer, (struct prefix *)&p, addpath_id, attr,
2023 afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
2024 &prd, label_pnt, 0, &evpn);
2025 else
2026 ret = bgp_withdraw(peer, (struct prefix *)&p, addpath_id, attr,
2027 afi, safi, ZEBRA_ROUTE_BGP, BGP_ROUTE_NORMAL,
2028 &prd, label_pnt, &evpn);
2029
2030 return ret;
2031 }
2032
2033 static void evpn_mpattr_encode_type5(struct stream *s, struct prefix *p,
2034 struct prefix_rd *prd, mpls_label_t *label,
2035 struct attr *attr)
2036 {
2037 int len;
2038 char temp[16];
2039 struct evpn_addr *p_evpn_p;
2040
2041 memset(&temp, 0, 16);
2042 if (p->family != AF_EVPN)
2043 return;
2044 p_evpn_p = &(p->u.prefix_evpn);
2045
2046 if (IS_IPADDR_V4(&p_evpn_p->ip))
2047 len = 8; /* ipv4 */
2048 else
2049 len = 32; /* ipv6 */
2050 /* Prefix contains RD, ESI, EthTag, IP length, IP, GWIP and VNI */
2051 stream_putc(s, 8 + 10 + 4 + 1 + len + 3);
2052 stream_put(s, prd->val, 8);
2053 if (attr)
2054 stream_put(s, &(attr->evpn_overlay.eth_s_id), 10);
2055 else
2056 stream_put(s, &temp, 10);
2057 stream_putl(s, p_evpn_p->eth_tag);
2058 stream_putc(s, p_evpn_p->ip_prefix_length);
2059 if (IS_IPADDR_V4(&p_evpn_p->ip))
2060 stream_put_ipv4(s, p_evpn_p->ip.ipaddr_v4.s_addr);
2061 else
2062 stream_put(s, &p_evpn_p->ip.ipaddr_v6, 16);
2063 if (attr) {
2064 if (IS_IPADDR_V4(&p_evpn_p->ip))
2065 stream_put_ipv4(s,
2066 attr->evpn_overlay.gw_ip.ipv4.s_addr);
2067 else
2068 stream_put(s, &(attr->evpn_overlay.gw_ip.ipv6), 16);
2069 } else {
2070 if (IS_IPADDR_V4(&p_evpn_p->ip))
2071 stream_put_ipv4(s, 0);
2072 else
2073 stream_put(s, &temp, 16);
2074 }
2075
2076 if (label)
2077 stream_put(s, label, 3);
2078 else
2079 stream_put3(s, 0);
2080 }
2081
2082 /*
2083 * Cleanup specific VNI upon EVPN (advertise-all-vni) being disabled.
2084 */
2085 static void cleanup_vni_on_disable(struct hash_backet *backet, struct bgp *bgp)
2086 {
2087 struct bgpevpn *vpn = (struct bgpevpn *)backet->data;
2088
2089 /* Remove EVPN routes and schedule for processing. */
2090 delete_routes_for_vni(bgp, vpn);
2091
2092 /* Clear "live" flag and see if hash needs to be freed. */
2093 UNSET_FLAG(vpn->flags, VNI_FLAG_LIVE);
2094 if (!is_vni_configured(vpn))
2095 bgp_evpn_free(bgp, vpn);
2096 }
2097
2098 /*
2099 * Free a VNI entry; iterator function called during cleanup.
2100 */
2101 static void free_vni_entry(struct hash_backet *backet, struct bgp *bgp)
2102 {
2103 struct bgpevpn *vpn;
2104
2105 vpn = (struct bgpevpn *)backet->data;
2106 delete_all_vni_routes(bgp, vpn);
2107 bgp_evpn_free(bgp, vpn);
2108 }
2109
2110
2111 /*
2112 * Public functions.
2113 */
2114
2115 /*
2116 * Handle change to BGP router id. This is invoked twice by the change
2117 * handler, first before the router id has been changed and then after
2118 * the router id has been changed. The first invocation will result in
2119 * local routes for all VNIs being deleted and withdrawn and the next
2120 * will result in the routes being re-advertised.
2121 */
2122 void bgp_evpn_handle_router_id_update(struct bgp *bgp, int withdraw)
2123 {
2124 if (withdraw)
2125 hash_iterate(bgp->vnihash,
2126 (void (*)(struct hash_backet *,
2127 void *))withdraw_router_id_vni,
2128 bgp);
2129 else
2130 hash_iterate(bgp->vnihash,
2131 (void (*)(struct hash_backet *,
2132 void *))update_router_id_vni,
2133 bgp);
2134 }
2135
2136 /*
2137 * Handle change to export RT - update and advertise local routes.
2138 */
2139 int bgp_evpn_handle_export_rt_change(struct bgp *bgp, struct bgpevpn *vpn)
2140 {
2141 return update_routes_for_vni(bgp, vpn);
2142 }
2143
2144 /*
2145 * Handle change to RD. This is invoked twice by the change handler,
2146 * first before the RD has been changed and then after the RD has
2147 * been changed. The first invocation will result in local routes
2148 * of this VNI being deleted and withdrawn and the next will result
2149 * in the routes being re-advertised.
2150 */
2151 void bgp_evpn_handle_rd_change(struct bgp *bgp, struct bgpevpn *vpn,
2152 int withdraw)
2153 {
2154 if (withdraw)
2155 delete_withdraw_vni_routes(bgp, vpn);
2156 else
2157 update_advertise_vni_routes(bgp, vpn);
2158 }
2159
2160 /*
2161 * Install routes for this VNI. Invoked upon change to Import RT.
2162 */
2163 int bgp_evpn_install_routes(struct bgp *bgp, struct bgpevpn *vpn)
2164 {
2165 return install_routes_for_vni(bgp, vpn);
2166 }
2167
2168 /*
2169 * Uninstall all routes installed for this VNI. Invoked upon change
2170 * to Import RT.
2171 */
2172 int bgp_evpn_uninstall_routes(struct bgp *bgp, struct bgpevpn *vpn)
2173 {
2174 return uninstall_routes_for_vni(bgp, vpn);
2175 }
2176
2177 /*
2178 * Function to display "tag" in route as a VNI.
2179 */
2180 char *bgp_evpn_label2str(mpls_label_t *label, char *buf, int len)
2181 {
2182 vni_t vni;
2183
2184 vni = label2vni(label);
2185 snprintf(buf, len, "%u", vni);
2186 return buf;
2187 }
2188
2189 /*
2190 * Function to convert evpn route to json format.
2191 * NOTE: We don't use prefix2str as the output here is a bit different.
2192 */
2193 void bgp_evpn_route2json(struct prefix_evpn *p, json_object *json)
2194 {
2195 char buf1[ETHER_ADDR_STRLEN];
2196 char buf2[PREFIX2STR_BUFFER];
2197
2198 if (!json)
2199 return;
2200
2201 if (p->prefix.route_type == BGP_EVPN_IMET_ROUTE) {
2202 json_object_int_add(json, "routeType", p->prefix.route_type);
2203 json_object_int_add(json, "ethTag", 0);
2204 json_object_int_add(json, "ipLen",
2205 IS_EVPN_PREFIX_IPADDR_V4(p)
2206 ? IPV4_MAX_BITLEN
2207 : IPV6_MAX_BITLEN);
2208 json_object_string_add(json, "ip",
2209 inet_ntoa(p->prefix.ip.ipaddr_v4));
2210 } else if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) {
2211 if (IS_EVPN_PREFIX_IPADDR_NONE(p)) {
2212 json_object_int_add(json, "routeType",
2213 p->prefix.route_type);
2214 json_object_int_add(
2215 json, "esi",
2216 0); /* TODO: we don't support esi yet */
2217 json_object_int_add(json, "ethTag", 0);
2218 json_object_int_add(json, "macLen", 8 * ETH_ALEN);
2219 json_object_string_add(json, "mac",
2220 prefix_mac2str(&p->prefix.mac,
2221 buf1,
2222 sizeof(buf1)));
2223 } else {
2224 u_char family;
2225
2226 family = IS_EVPN_PREFIX_IPADDR_V4(p) ? AF_INET
2227 : AF_INET6;
2228
2229 json_object_int_add(json, "routeType",
2230 p->prefix.route_type);
2231 json_object_int_add(
2232 json, "esi",
2233 0); /* TODO: we don't support esi yet */
2234 json_object_int_add(json, "ethTag", 0);
2235 json_object_int_add(json, "macLen", 8 * ETH_ALEN);
2236 json_object_string_add(json, "mac",
2237 prefix_mac2str(&p->prefix.mac,
2238 buf1,
2239 sizeof(buf1)));
2240 json_object_int_add(json, "ipLen",
2241 IS_EVPN_PREFIX_IPADDR_V4(p)
2242 ? IPV4_MAX_BITLEN
2243 : IPV6_MAX_BITLEN);
2244 json_object_string_add(
2245 json, "ip",
2246 inet_ntop(family, &p->prefix.ip.ip.addr, buf2,
2247 PREFIX2STR_BUFFER));
2248 }
2249 } else {
2250 /* Currently, this is to cater to other AF_ETHERNET code. */
2251 }
2252 }
2253
2254 /*
2255 * Function to convert evpn route to string.
2256 * NOTE: We don't use prefix2str as the output here is a bit different.
2257 */
2258 char *bgp_evpn_route2str(struct prefix_evpn *p, char *buf, int len)
2259 {
2260 char buf1[ETHER_ADDR_STRLEN];
2261 char buf2[PREFIX2STR_BUFFER];
2262
2263 if (p->prefix.route_type == BGP_EVPN_IMET_ROUTE) {
2264 snprintf(buf, len, "[%d]:[0]:[%d]:[%s]", p->prefix.route_type,
2265 IS_EVPN_PREFIX_IPADDR_V4(p) ? IPV4_MAX_BITLEN
2266 : IPV6_MAX_BITLEN,
2267 inet_ntoa(p->prefix.ip.ipaddr_v4));
2268 } else if (p->prefix.route_type == BGP_EVPN_MAC_IP_ROUTE) {
2269 if (IS_EVPN_PREFIX_IPADDR_NONE(p))
2270 snprintf(buf, len, "[%d]:[0]:[0]:[%d]:[%s]",
2271 p->prefix.route_type, 8 * ETH_ALEN,
2272 prefix_mac2str(&p->prefix.mac, buf1,
2273 sizeof(buf1)));
2274 else {
2275 u_char family;
2276
2277 family = IS_EVPN_PREFIX_IPADDR_V4(p) ? AF_INET
2278 : AF_INET6;
2279 snprintf(buf, len, "[%d]:[0]:[0]:[%d]:[%s]:[%d]:[%s]",
2280 p->prefix.route_type, 8 * ETH_ALEN,
2281 prefix_mac2str(&p->prefix.mac, buf1,
2282 sizeof(buf1)),
2283 family == AF_INET ? IPV4_MAX_BITLEN
2284 : IPV6_MAX_BITLEN,
2285 inet_ntop(family, &p->prefix.ip.ip.addr, buf2,
2286 PREFIX2STR_BUFFER));
2287 }
2288 } else {
2289 /* For EVPN route types not supported yet. */
2290 snprintf(buf, len, "(unsupported route type %d)",
2291 p->prefix.route_type);
2292 }
2293
2294 return (buf);
2295 }
2296
2297 /*
2298 * Encode EVPN prefix in Update (MP_REACH)
2299 */
2300 void bgp_evpn_encode_prefix(struct stream *s, struct prefix *p,
2301 struct prefix_rd *prd, mpls_label_t *label,
2302 struct attr *attr, int addpath_encode,
2303 u_int32_t addpath_tx_id)
2304 {
2305 struct prefix_evpn *evp = (struct prefix_evpn *)p;
2306 int ipa_len = 0;
2307
2308 if (addpath_encode)
2309 stream_putl(s, addpath_tx_id);
2310
2311 /* Route type */
2312 stream_putc(s, evp->prefix.route_type);
2313
2314 switch (evp->prefix.route_type) {
2315 case BGP_EVPN_MAC_IP_ROUTE:
2316 if (IS_EVPN_PREFIX_IPADDR_V4(evp))
2317 ipa_len = IPV4_MAX_BYTELEN;
2318 else if (IS_EVPN_PREFIX_IPADDR_V6(evp))
2319 ipa_len = IPV6_MAX_BYTELEN;
2320 stream_putc(s, 33 + ipa_len); // 1 VNI
2321 stream_put(s, prd->val, 8); /* RD */
2322 stream_put(s, 0, 10); /* ESI */
2323 stream_putl(s, 0); /* Ethernet Tag ID */
2324 stream_putc(s, 8 * ETH_ALEN); /* Mac Addr Len - bits */
2325 stream_put(s, evp->prefix.mac.octet, 6); /* Mac Addr */
2326 stream_putc(s, 8 * ipa_len); /* IP address Length */
2327 if (ipa_len)
2328 stream_put(s, &evp->prefix.ip.ip.addr,
2329 ipa_len); /* IP */
2330 stream_put(s, label,
2331 BGP_LABEL_BYTES); /* VNI is contained in 'tag' */
2332 break;
2333
2334 case BGP_EVPN_IMET_ROUTE:
2335 stream_putc(s, 17); // TODO: length - assumes IPv4 address
2336 stream_put(s, prd->val, 8); /* RD */
2337 stream_putl(s, 0); /* Ethernet Tag ID */
2338 stream_putc(s, IPV4_MAX_BITLEN); /* IP address Length - bits */
2339 /* Originating Router's IP Addr */
2340 stream_put_in_addr(s, &evp->prefix.ip.ipaddr_v4);
2341 break;
2342
2343 case BGP_EVPN_IP_PREFIX_ROUTE:
2344 /* TODO: AddPath support. */
2345 evpn_mpattr_encode_type5(s, p, prd, label, attr);
2346 break;
2347
2348 default:
2349 break;
2350 }
2351 }
2352
2353 int bgp_nlri_parse_evpn(struct peer *peer, struct attr *attr,
2354 struct bgp_nlri *packet, int withdraw)
2355 {
2356 u_char *pnt;
2357 u_char *lim;
2358 afi_t afi;
2359 safi_t safi;
2360 u_int32_t addpath_id;
2361 int addpath_encoded;
2362 int psize = 0;
2363 u_char rtype;
2364 u_char rlen;
2365 struct prefix p;
2366
2367 /* Check peer status. */
2368 if (peer->status != Established) {
2369 zlog_err("%u:%s - EVPN update received in state %d",
2370 peer->bgp->vrf_id, peer->host, peer->status);
2371 return -1;
2372 }
2373
2374 /* Start processing the NLRI - there may be multiple in the MP_REACH */
2375 pnt = packet->nlri;
2376 lim = pnt + packet->length;
2377 afi = packet->afi;
2378 safi = packet->safi;
2379 addpath_id = 0;
2380
2381 addpath_encoded =
2382 (CHECK_FLAG(peer->af_cap[afi][safi], PEER_CAP_ADDPATH_AF_RX_ADV)
2383 && CHECK_FLAG(peer->af_cap[afi][safi],
2384 PEER_CAP_ADDPATH_AF_TX_RCV));
2385
2386 for (; pnt < lim; pnt += psize) {
2387 /* Clear prefix structure. */
2388 memset(&p, 0, sizeof(struct prefix));
2389
2390 /* Deal with path-id if AddPath is supported. */
2391 if (addpath_encoded) {
2392 /* When packet overflow occurs return immediately. */
2393 if (pnt + BGP_ADDPATH_ID_LEN > lim)
2394 return -1;
2395
2396 addpath_id = ntohl(*((uint32_t *)pnt));
2397 pnt += BGP_ADDPATH_ID_LEN;
2398 }
2399
2400 /* All EVPN NLRI types start with type and length. */
2401 if (pnt + 2 > lim)
2402 return -1;
2403
2404 rtype = *pnt++;
2405 psize = rlen = *pnt++;
2406
2407 /* When packet overflow occur return immediately. */
2408 if (pnt + psize > lim)
2409 return -1;
2410
2411 switch (rtype) {
2412 case BGP_EVPN_MAC_IP_ROUTE:
2413 if (process_type2_route(peer, afi, safi,
2414 withdraw ? NULL : attr, pnt,
2415 psize, addpath_id)) {
2416 zlog_err(
2417 "%u:%s - Error in processing EVPN type-2 NLRI size %d",
2418 peer->bgp->vrf_id, peer->host, psize);
2419 return -1;
2420 }
2421 break;
2422
2423 case BGP_EVPN_IMET_ROUTE:
2424 if (process_type3_route(peer, afi, safi,
2425 withdraw ? NULL : attr, pnt,
2426 psize, addpath_id)) {
2427 zlog_err(
2428 "%u:%s - Error in processing EVPN type-3 NLRI size %d",
2429 peer->bgp->vrf_id, peer->host, psize);
2430 return -1;
2431 }
2432 break;
2433
2434 case BGP_EVPN_IP_PREFIX_ROUTE:
2435 if (process_type5_route(peer, afi, safi, attr, pnt,
2436 psize, addpath_id, withdraw)) {
2437 zlog_err(
2438 "%u:%s - Error in processing EVPN type-5 NLRI size %d",
2439 peer->bgp->vrf_id, peer->host, psize);
2440 return -1;
2441 }
2442 break;
2443
2444 default:
2445 break;
2446 }
2447 }
2448
2449 /* Packet length consistency check. */
2450 if (pnt != lim)
2451 return -1;
2452
2453 return 0;
2454 }
2455
2456
2457 /*
2458 * Map the RTs (configured or automatically derived) of a VNI to the VNI.
2459 * The mapping will be used during route processing.
2460 */
2461 void bgp_evpn_map_vni_to_its_rts(struct bgp *bgp, struct bgpevpn *vpn)
2462 {
2463 int i;
2464 struct ecommunity_val *eval;
2465 struct listnode *node, *nnode;
2466 struct ecommunity *ecom;
2467
2468 for (ALL_LIST_ELEMENTS(vpn->import_rtl, node, nnode, ecom)) {
2469 for (i = 0; i < ecom->size; i++) {
2470 eval = (struct ecommunity_val *)(ecom->val
2471 + (i
2472 * ECOMMUNITY_SIZE));
2473 map_vni_to_rt(bgp, vpn, eval);
2474 }
2475 }
2476 }
2477
2478 /*
2479 * Unmap the RTs (configured or automatically derived) of a VNI from the VNI.
2480 */
2481 void bgp_evpn_unmap_vni_from_its_rts(struct bgp *bgp, struct bgpevpn *vpn)
2482 {
2483 int i;
2484 struct ecommunity_val *eval;
2485 struct listnode *node, *nnode;
2486 struct ecommunity *ecom;
2487
2488 for (ALL_LIST_ELEMENTS(vpn->import_rtl, node, nnode, ecom)) {
2489 for (i = 0; i < ecom->size; i++) {
2490 struct irt_node *irt;
2491 struct ecommunity_val eval_tmp;
2492
2493 eval = (struct ecommunity_val *)(ecom->val
2494 + (i
2495 * ECOMMUNITY_SIZE));
2496 /* If using "automatic" RT, we only care about the
2497 * local-admin sub-field.
2498 * This is to facilitate using VNI as the RT for EBGP
2499 * peering too.
2500 */
2501 memcpy(&eval_tmp, eval, ECOMMUNITY_SIZE);
2502 if (!is_import_rt_configured(vpn))
2503 mask_ecom_global_admin(&eval_tmp, eval);
2504
2505 irt = lookup_import_rt(bgp, &eval_tmp);
2506 if (irt)
2507 unmap_vni_from_rt(bgp, vpn, irt);
2508 }
2509 }
2510 }
2511
2512 /*
2513 * Derive Import RT automatically for VNI and map VNI to RT.
2514 * The mapping will be used during route processing.
2515 */
2516 void bgp_evpn_derive_auto_rt_import(struct bgp *bgp, struct bgpevpn *vpn)
2517 {
2518 form_auto_rt(bgp, vpn, vpn->import_rtl);
2519 UNSET_FLAG(vpn->flags, VNI_FLAG_IMPRT_CFGD);
2520
2521 /* Map RT to VNI */
2522 bgp_evpn_map_vni_to_its_rts(bgp, vpn);
2523 }
2524
2525 /*
2526 * Derive Export RT automatically for VNI.
2527 */
2528 void bgp_evpn_derive_auto_rt_export(struct bgp *bgp, struct bgpevpn *vpn)
2529 {
2530 form_auto_rt(bgp, vpn, vpn->export_rtl);
2531 UNSET_FLAG(vpn->flags, VNI_FLAG_EXPRT_CFGD);
2532 }
2533
2534 /*
2535 * Derive RD automatically for VNI using passed information - it
2536 * is of the form RouterId:unique-id-for-vni.
2537 */
2538 void bgp_evpn_derive_auto_rd(struct bgp *bgp, struct bgpevpn *vpn)
2539 {
2540 char buf[100];
2541
2542 vpn->prd.family = AF_UNSPEC;
2543 vpn->prd.prefixlen = 64;
2544 sprintf(buf, "%s:%hu", inet_ntoa(bgp->router_id), vpn->rd_id);
2545 (void)str2prefix_rd(buf, &vpn->prd);
2546 UNSET_FLAG(vpn->flags, VNI_FLAG_RD_CFGD);
2547 }
2548
2549 /*
2550 * Lookup VNI.
2551 */
2552 struct bgpevpn *bgp_evpn_lookup_vni(struct bgp *bgp, vni_t vni)
2553 {
2554 struct bgpevpn *vpn;
2555 struct bgpevpn tmp;
2556
2557 memset(&tmp, 0, sizeof(struct bgpevpn));
2558 tmp.vni = vni;
2559 vpn = hash_lookup(bgp->vnihash, &tmp);
2560 return vpn;
2561 }
2562
2563 /*
2564 * Create a new vpn - invoked upon configuration or zebra notification.
2565 */
2566 struct bgpevpn *bgp_evpn_new(struct bgp *bgp, vni_t vni,
2567 struct in_addr originator_ip,
2568 vrf_id_t tenant_vrf_id)
2569 {
2570 struct bgpevpn *vpn;
2571
2572 if (!bgp)
2573 return NULL;
2574
2575 vpn = XCALLOC(MTYPE_BGP_EVPN, sizeof(struct bgpevpn));
2576 if (!vpn)
2577 return NULL;
2578
2579 /* Set values - RD and RT set to defaults. */
2580 vpn->vni = vni;
2581 vpn->originator_ip = originator_ip;
2582 vpn->tenant_vrf_id = tenant_vrf_id;
2583
2584 /* Initialize route-target import and export lists */
2585 vpn->import_rtl = list_new();
2586 vpn->import_rtl->cmp = (int (*)(void *, void *))evpn_route_target_cmp;
2587 vpn->export_rtl = list_new();
2588 vpn->export_rtl->cmp = (int (*)(void *, void *))evpn_route_target_cmp;
2589 bf_assign_index(bgp->rd_idspace, vpn->rd_id);
2590 derive_rd_rt_for_vni(bgp, vpn);
2591
2592 /* Initialize EVPN route table. */
2593 vpn->route_table = bgp_table_init(AFI_L2VPN, SAFI_EVPN);
2594
2595 /* Add to hash */
2596 if (!hash_get(bgp->vnihash, vpn, hash_alloc_intern)) {
2597 XFREE(MTYPE_BGP_EVPN, vpn);
2598 return NULL;
2599 }
2600 QOBJ_REG(vpn, bgpevpn);
2601 return vpn;
2602 }
2603
2604 /*
2605 * Free a given VPN - called in multiple scenarios such as zebra
2606 * notification, configuration being deleted, advertise-all-vni disabled etc.
2607 * This just frees appropriate memory, caller should have taken other
2608 * needed actions.
2609 */
2610 void bgp_evpn_free(struct bgp *bgp, struct bgpevpn *vpn)
2611 {
2612 bgp_table_unlock(vpn->route_table);
2613 bgp_evpn_unmap_vni_from_its_rts(bgp, vpn);
2614 list_delete_and_null(&vpn->import_rtl);
2615 list_delete_and_null(&vpn->export_rtl);
2616 bf_release_index(bgp->rd_idspace, vpn->rd_id);
2617 hash_release(bgp->vnihash, vpn);
2618 QOBJ_UNREG(vpn);
2619 XFREE(MTYPE_BGP_EVPN, vpn);
2620 }
2621
2622 /*
2623 * Import route into matching VNI(s).
2624 */
2625 int bgp_evpn_import_route(struct bgp *bgp, afi_t afi, safi_t safi,
2626 struct prefix *p, struct bgp_info *ri)
2627 {
2628 return install_uninstall_evpn_route(bgp, afi, safi, p, ri, 1);
2629 }
2630
2631 /*
2632 * Unimport route from matching VNI(s).
2633 */
2634 int bgp_evpn_unimport_route(struct bgp *bgp, afi_t afi, safi_t safi,
2635 struct prefix *p, struct bgp_info *ri)
2636 {
2637 return install_uninstall_evpn_route(bgp, afi, safi, p, ri, 0);
2638 }
2639
2640 /* filter routes which have martian next hops */
2641 int bgp_filter_evpn_routes_upon_martian_nh_change(struct bgp *bgp)
2642 {
2643 afi_t afi;
2644 safi_t safi;
2645 struct bgp_node *rd_rn, *rn;
2646 struct bgp_table *table;
2647 struct bgp_info *ri;
2648
2649 afi = AFI_L2VPN;
2650 safi = SAFI_EVPN;
2651
2652 /* Walk entire global routing table and evaluate routes which could be
2653 * imported into this VPN. Note that we cannot just look at the routes
2654 * for the VNI's RD -
2655 * remote routes applicable for this VNI could have any RD.
2656 */
2657 /* EVPN routes are a 2-level table. */
2658 for (rd_rn = bgp_table_top(bgp->rib[afi][safi]); rd_rn;
2659 rd_rn = bgp_route_next(rd_rn)) {
2660 table = (struct bgp_table *)(rd_rn->info);
2661 if (!table)
2662 continue;
2663
2664 for (rn = bgp_table_top(table); rn; rn = bgp_route_next(rn)) {
2665
2666 for (ri = rn->info; ri; ri = ri->next) {
2667
2668 /* Consider "valid" remote routes applicable for
2669 * this VNI. */
2670 if (!(ri->type == ZEBRA_ROUTE_BGP
2671 && ri->sub_type == BGP_ROUTE_NORMAL))
2672 continue;
2673
2674 if (bgp_nexthop_self(bgp, ri->attr->nexthop)) {
2675
2676 char attr_str[BUFSIZ];
2677 char pbuf[PREFIX_STRLEN];
2678
2679 bgp_dump_attr(ri->attr, attr_str,
2680 BUFSIZ);
2681
2682 if (bgp_debug_update(ri->peer, &rn->p,
2683 NULL, 1))
2684 zlog_debug(
2685 "%u: prefix %s with attr %s - DENIED due to martian or self nexthop",
2686 bgp->vrf_id,
2687 prefix2str(
2688 &rn->p, pbuf,
2689 sizeof(pbuf)),
2690 attr_str);
2691
2692 bgp_evpn_unimport_route(bgp, afi, safi,
2693 &rn->p, ri);
2694
2695 bgp_rib_remove(rn, ri, ri->peer, afi,
2696 safi);
2697 }
2698 }
2699 }
2700 }
2701
2702 return 0;
2703 }
2704
2705 /*
2706 * Handle del of a local MACIP.
2707 */
2708 int bgp_evpn_local_macip_del(struct bgp *bgp, vni_t vni, struct ethaddr *mac,
2709 struct ipaddr *ip)
2710 {
2711 struct bgpevpn *vpn;
2712 struct prefix_evpn p;
2713
2714 if (!bgp->vnihash) {
2715 zlog_err("%u: VNI hash not created", bgp->vrf_id);
2716 return -1;
2717 }
2718
2719 /* Lookup VNI hash - should exist. */
2720 vpn = bgp_evpn_lookup_vni(bgp, vni);
2721 if (!vpn || !is_vni_live(vpn)) {
2722 zlog_warn("%u: VNI hash entry for VNI %u %s at MACIP DEL",
2723 bgp->vrf_id, vni, vpn ? "not live" : "not found");
2724 return -1;
2725 }
2726
2727 /* Remove EVPN type-2 route and schedule for processing. */
2728 build_evpn_type2_prefix(&p, mac, ip);
2729 delete_evpn_route(bgp, vpn, &p);
2730
2731 return 0;
2732 }
2733
2734 /*
2735 * Handle add of a local MACIP.
2736 */
2737 int bgp_evpn_local_macip_add(struct bgp *bgp, vni_t vni, struct ethaddr *mac,
2738 struct ipaddr *ip, u_char flags)
2739 {
2740 struct bgpevpn *vpn;
2741 struct prefix_evpn p;
2742
2743 if (!bgp->vnihash) {
2744 zlog_err("%u: VNI hash not created", bgp->vrf_id);
2745 return -1;
2746 }
2747
2748 /* Lookup VNI hash - should exist. */
2749 vpn = bgp_evpn_lookup_vni(bgp, vni);
2750 if (!vpn || !is_vni_live(vpn)) {
2751 zlog_warn("%u: VNI hash entry for VNI %u %s at MACIP ADD",
2752 bgp->vrf_id, vni, vpn ? "not live" : "not found");
2753 return -1;
2754 }
2755
2756 /* Create EVPN type-2 route and schedule for processing. */
2757 build_evpn_type2_prefix(&p, mac, ip);
2758 if (update_evpn_route(bgp, vpn, &p, flags)) {
2759 char buf[ETHER_ADDR_STRLEN];
2760 char buf2[INET6_ADDRSTRLEN];
2761
2762 zlog_err(
2763 "%u:Failed to create Type-2 route, VNI %u %s MAC %s IP %s",
2764 bgp->vrf_id, vpn->vni,
2765 CHECK_FLAG(flags, ZEBRA_MAC_TYPE_STICKY) ? "sticky gateway"
2766 : "",
2767 prefix_mac2str(mac, buf, sizeof(buf)),
2768 ipaddr2str(ip, buf2, sizeof(buf2)));
2769 return -1;
2770 }
2771
2772 return 0;
2773 }
2774
2775 int bgp_evpn_local_l3vni_add(vni_t l3vni,
2776 vrf_id_t vrf_id,
2777 struct ethaddr *rmac)
2778 {
2779 struct bgp *bgp_vrf = NULL; /* bgp VRF instance */
2780 struct bgp *bgp_def = NULL; /* default bgp instance */
2781 as_t as = 0;
2782
2783 /* get the default instamce - required to get the AS number for VRF
2784 * auto-creation*/
2785 bgp_def = bgp_get_default();
2786 if (!bgp_def) {
2787 zlog_err("Cannot process L3VNI %u ADD - default BGP instance not yet created",
2788 l3vni);
2789 return -1;
2790 }
2791 as = bgp_def->as;
2792
2793 /* if the BGP vrf instance doesnt exist - create one */
2794 bgp_vrf = bgp_lookup_by_vrf_id(vrf_id);
2795 if (!bgp_vrf) {
2796
2797 int ret = 0;
2798
2799 ret = bgp_get(&bgp_vrf, &as, vrf_id_to_name(vrf_id),
2800 BGP_INSTANCE_TYPE_VRF);
2801 switch (ret) {
2802 case BGP_ERR_MULTIPLE_INSTANCE_NOT_SET:
2803 zlog_err("'bgp multiple-instance' not present\n");
2804 return -1;
2805 case BGP_ERR_AS_MISMATCH:
2806 zlog_err("BGP is already running; AS is %u\n", as);
2807 return -1;
2808 case BGP_ERR_INSTANCE_MISMATCH:
2809 zlog_err("BGP instance name and AS number mismatch\n");
2810 return -1;
2811 }
2812
2813 /* mark as auto created */
2814 SET_FLAG(bgp_vrf->vrf_flags, BGP_VRF_AUTO);
2815 }
2816
2817 /* associate with l3vni */
2818 bgp_vrf->l3vni = l3vni;
2819
2820 /* set the router mac - to be used in mac-ip routes for this vrf */
2821 memcpy(&bgp_vrf->rmac, rmac, sizeof(struct ethaddr));
2822
2823 //TODO_MITESH: auto derive RD/RT
2824
2825 //TODO_MITESH: update all the local mac-ip routes with l3vni/rmac info
2826
2827 //TODO_MITESH: import all the remote routes to VRF
2828
2829 return 0;
2830 }
2831
2832 int bgp_evpn_local_l3vni_del(vni_t l3vni,
2833 vrf_id_t vrf_id)
2834 {
2835 struct bgp *bgp_vrf = NULL; /* bgp vrf instance */
2836
2837 bgp_vrf = bgp_lookup_by_vrf_id(vrf_id);
2838 if (!bgp_vrf) {
2839 zlog_err("Cannot process L3VNI %u Del - Could not find BGP instance",
2840 l3vni);
2841 return -1;
2842 }
2843
2844 /* remove the l3vni from vrf instance */
2845 bgp_vrf->l3vni = 0;
2846
2847 /* remove the Rmac from the BGP vrf */
2848 memset(&bgp_vrf->rmac, 0, sizeof(struct ethaddr));
2849
2850 /* TODO_MITESH: delete auto RD/RT */
2851
2852 /* TODO_MITESH: update all local mac-ip routes */
2853
2854 /* TODO_MITESH: unimport remote routes from VRF */
2855
2856 /* Delete the instance if it was autocreated */
2857 if (CHECK_FLAG(bgp_vrf->vrf_flags, BGP_VRF_AUTO))
2858 bgp_delete(bgp_vrf);
2859
2860 return 0;
2861 }
2862
2863 /*
2864 * Handle del of a local VNI.
2865 */
2866 int bgp_evpn_local_vni_del(struct bgp *bgp, vni_t vni)
2867 {
2868 struct bgpevpn *vpn;
2869
2870 if (!bgp->vnihash) {
2871 zlog_err("%u: VNI hash not created", bgp->vrf_id);
2872 return -1;
2873 }
2874
2875 /* Locate VNI hash */
2876 vpn = bgp_evpn_lookup_vni(bgp, vni);
2877 if (!vpn) {
2878 zlog_warn("%u: VNI hash entry for VNI %u not found at DEL",
2879 bgp->vrf_id, vni);
2880 return 0;
2881 }
2882
2883 /* Remove all local EVPN routes and schedule for processing (to
2884 * withdraw from peers).
2885 */
2886 delete_routes_for_vni(bgp, vpn);
2887
2888 /*
2889 * tunnel is no longer active, del tunnel ip address from tip_hash
2890 */
2891 bgp_tip_del(bgp, &vpn->originator_ip);
2892
2893 /* Clear "live" flag and see if hash needs to be freed. */
2894 UNSET_FLAG(vpn->flags, VNI_FLAG_LIVE);
2895 if (!is_vni_configured(vpn))
2896 bgp_evpn_free(bgp, vpn);
2897
2898 return 0;
2899 }
2900
2901 /*
2902 * Handle add (or update) of a local VNI. The only VNI change we care
2903 * about is change to local-tunnel-ip.
2904 */
2905 int bgp_evpn_local_vni_add(struct bgp *bgp, vni_t vni,
2906 struct in_addr originator_ip,
2907 vrf_id_t tenant_vrf_id)
2908 {
2909 struct bgpevpn *vpn;
2910 struct prefix_evpn p;
2911
2912 if (!bgp->vnihash) {
2913 zlog_err("%u: VNI hash not created", bgp->vrf_id);
2914 return -1;
2915 }
2916
2917 /* Lookup VNI. If present and no change, exit. */
2918 vpn = bgp_evpn_lookup_vni(bgp, vni);
2919 if (vpn) {
2920
2921 /* update tenant_vrf_id if required */
2922 if (vpn->tenant_vrf_id != tenant_vrf_id)
2923 vpn->tenant_vrf_id = tenant_vrf_id;
2924
2925 if (is_vni_live(vpn)
2926 && IPV4_ADDR_SAME(&vpn->originator_ip, &originator_ip))
2927 /* Probably some other param has changed that we don't
2928 * care about. */
2929 return 0;
2930
2931 /* Local tunnel endpoint IP address has changed */
2932 handle_tunnel_ip_change(bgp, vpn, originator_ip);
2933 }
2934
2935 /* Create or update as appropriate. */
2936 if (!vpn) {
2937 vpn = bgp_evpn_new(bgp, vni, originator_ip, tenant_vrf_id);
2938 if (!vpn) {
2939 zlog_err(
2940 "%u: Failed to allocate VNI entry for VNI %u - at Add",
2941 bgp->vrf_id, vni);
2942 return -1;
2943 }
2944 }
2945
2946 /* if the VNI is live already, there is nothing more to do */
2947 if (is_vni_live(vpn))
2948 return 0;
2949
2950 /* Mark as "live" */
2951 SET_FLAG(vpn->flags, VNI_FLAG_LIVE);
2952
2953 /* tunnel is now active, add tunnel-ip to db */
2954 bgp_tip_add(bgp, &originator_ip);
2955
2956 /* filter routes as nexthop database has changed */
2957 bgp_filter_evpn_routes_upon_martian_nh_change(bgp);
2958
2959 /* Create EVPN type-3 route and schedule for processing. */
2960 build_evpn_type3_prefix(&p, vpn->originator_ip);
2961 if (update_evpn_route(bgp, vpn, &p, 0)) {
2962 zlog_err("%u: Type3 route creation failure for VNI %u",
2963 bgp->vrf_id, vni);
2964 return -1;
2965 }
2966
2967 /* If we have learnt and retained remote routes (VTEPs, MACs) for this
2968 * VNI,
2969 * install them.
2970 */
2971 install_routes_for_vni(bgp, vpn);
2972
2973 /* If we are advertising gateway mac-ip
2974 It needs to be conveyed again to zebra */
2975 bgp_zebra_advertise_gw_macip(bgp, vpn->advertise_gw_macip, vpn->vni);
2976
2977 return 0;
2978 }
2979
2980 /*
2981 * Cleanup EVPN information on disable - Need to delete and withdraw
2982 * EVPN routes from peers.
2983 */
2984 void bgp_evpn_cleanup_on_disable(struct bgp *bgp)
2985 {
2986 hash_iterate(bgp->vnihash, (void (*)(struct hash_backet *,
2987 void *))cleanup_vni_on_disable,
2988 bgp);
2989 }
2990
2991 /*
2992 * Cleanup EVPN information - invoked at the time of bgpd exit or when the
2993 * BGP instance (default) is being freed.
2994 */
2995 void bgp_evpn_cleanup(struct bgp *bgp)
2996 {
2997 if (bgp->vnihash)
2998 hash_iterate(bgp->vnihash, (void (*)(struct hash_backet *,
2999 void *))free_vni_entry,
3000 bgp);
3001 if (bgp->import_rt_hash)
3002 hash_free(bgp->import_rt_hash);
3003 bgp->import_rt_hash = NULL;
3004 if (bgp->vnihash)
3005 hash_free(bgp->vnihash);
3006 bgp->vnihash = NULL;
3007 bf_free(bgp->rd_idspace);
3008 }
3009
3010 /*
3011 * Initialization for EVPN
3012 * Create
3013 * VNI hash table
3014 * hash for RT to VNI
3015 * unique rd id space for auto derivation of RD for VNIs
3016 */
3017 void bgp_evpn_init(struct bgp *bgp)
3018 {
3019 bgp->vnihash =
3020 hash_create(vni_hash_key_make, vni_hash_cmp, "BGP VNI Hash");
3021 bgp->import_rt_hash =
3022 hash_create(import_rt_hash_key_make, import_rt_hash_cmp,
3023 "BGP Import RT Hash");
3024 bf_init(bgp->rd_idspace, UINT16_MAX);
3025 /*assign 0th index in the bitfield, so that we start with id 1*/
3026 bf_assign_zero_index(bgp->rd_idspace);
3027 }