1 /* PIM support for VxLAN BUM flooding
3 * Copyright (C) 2019 Cumulus Networks, Inc.
5 * This file is part of FRR.
7 * FRR is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2, or (at your option) any
12 * FRR is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
31 #include "pim_iface.h"
32 #include "pim_memory.h"
34 #include "pim_register.h"
36 #include "pim_upstream.h"
37 #include "pim_ifchannel.h"
39 #include "pim_zebra.h"
40 #include "pim_vxlan.h"
43 /* pim-vxlan global info */
44 struct pim_vxlan vxlan_info
, *pim_vxlan_p
= &vxlan_info
;
46 static void pim_vxlan_work_timer_setup(bool start
);
47 static void pim_vxlan_set_peerlink_rif(struct pim_instance
*pim
,
48 struct interface
*ifp
);
50 /*************************** vxlan work list **********************************
51 * A work list is maintained for staggered generation of pim null register
52 * messages for vxlan SG entries that are in a reg_join state.
54 * A max of 500 NULL registers are generated at one shot. If paused reg
55 * generation continues on the next second and so on till all register
56 * messages have been sent out. And the process is restarted every 60s.
58 * purpose of this null register generation is to setup the SPT and maintain
59 * independent of the presence of overlay BUM traffic.
60 ****************************************************************************/
61 static void pim_vxlan_do_reg_work(void)
63 struct listnode
*listnode
;
65 struct pim_vxlan_sg
*vxlan_sg
;
70 if (sec_count
> PIM_VXLAN_NULL_REG_INTERVAL
) {
72 listnode
= vxlan_info
.next_work
?
73 vxlan_info
.next_work
:
74 vxlan_info
.work_list
->head
;
75 if (PIM_DEBUG_VXLAN
&& listnode
)
76 zlog_debug("vxlan SG work %s",
77 vxlan_info
.next_work
? "continues" : "starts");
79 listnode
= vxlan_info
.next_work
;
82 for (; listnode
; listnode
= listnode
->next
) {
83 vxlan_sg
= (struct pim_vxlan_sg
*)listnode
->data
;
84 if (vxlan_sg
->up
&& (vxlan_sg
->up
->reg_state
== PIM_REG_JOIN
)) {
86 zlog_debug("vxlan SG %s periodic NULL register",
90 * If we are on the work queue *and* the rpf
91 * has been lost on the vxlan_sg->up let's
92 * make sure that we don't send it.
94 if (vxlan_sg
->up
->rpf
.source_nexthop
.interface
) {
95 pim_null_register_send(vxlan_sg
->up
);
100 if (work_cnt
> vxlan_info
.max_work_cnt
) {
101 vxlan_info
.next_work
= listnode
->next
;
103 zlog_debug("vxlan SG %d work items proc and pause",
111 zlog_debug("vxlan SG %d work items proc", work_cnt
);
113 vxlan_info
.next_work
= NULL
;
116 /* Staggered work related info is initialized when the first work comes
119 static void pim_vxlan_init_work(void)
121 if (vxlan_info
.flags
& PIM_VXLANF_WORK_INITED
)
124 vxlan_info
.max_work_cnt
= PIM_VXLAN_WORK_MAX
;
125 vxlan_info
.flags
|= PIM_VXLANF_WORK_INITED
;
126 vxlan_info
.work_list
= list_new();
127 pim_vxlan_work_timer_setup(true/* start */);
130 static void pim_vxlan_add_work(struct pim_vxlan_sg
*vxlan_sg
)
132 if (vxlan_sg
->flags
& PIM_VXLAN_SGF_DEL_IN_PROG
) {
134 zlog_debug("vxlan SG %s skip work list; del-in-prog",
139 pim_vxlan_init_work();
141 /* already a part of the work list */
142 if (vxlan_sg
->work_node
)
146 zlog_debug("vxlan SG %s work list add",
148 vxlan_sg
->work_node
= listnode_add(vxlan_info
.work_list
, vxlan_sg
);
149 /* XXX: adjust max_work_cnt if needed */
152 static void pim_vxlan_del_work(struct pim_vxlan_sg
*vxlan_sg
)
154 if (!vxlan_sg
->work_node
)
158 zlog_debug("vxlan SG %s work list del",
161 if (vxlan_sg
->work_node
== vxlan_info
.next_work
)
162 vxlan_info
.next_work
= vxlan_sg
->work_node
->next
;
164 list_delete_node(vxlan_info
.work_list
, vxlan_sg
->work_node
);
165 vxlan_sg
->work_node
= NULL
;
168 void pim_vxlan_update_sg_reg_state(struct pim_instance
*pim
,
169 struct pim_upstream
*up
, bool reg_join
)
171 struct pim_vxlan_sg
*vxlan_sg
;
173 vxlan_sg
= pim_vxlan_sg_find(pim
, &up
->sg
);
177 /* add the vxlan sg entry to a work list for periodic reg joins.
178 * the entry will stay in the list as long as the register state is
182 pim_vxlan_add_work(vxlan_sg
);
184 pim_vxlan_del_work(vxlan_sg
);
187 static int pim_vxlan_work_timer_cb(struct thread
*t
)
189 pim_vxlan_do_reg_work();
190 pim_vxlan_work_timer_setup(true /* start */);
194 /* global 1second timer used for periodic processing */
195 static void pim_vxlan_work_timer_setup(bool start
)
197 THREAD_OFF(vxlan_info
.work_timer
);
199 thread_add_timer(router
->master
, pim_vxlan_work_timer_cb
, NULL
,
200 PIM_VXLAN_WORK_TIME
, &vxlan_info
.work_timer
);
203 /**************************** vxlan origination mroutes ***********************
204 * For every (local-vtep-ip, bum-mcast-grp) registered by evpn an origination
205 * mroute is setup by pimd. The purpose of this mroute is to forward vxlan
206 * encapsulated BUM (broadcast, unknown-unicast and unknown-multicast packets
207 * over the underlay.)
209 * Sample mroute (single VTEP):
210 * (27.0.0.7, 239.1.1.100) Iif: lo Oifs: uplink-1
212 * Sample mroute (anycast VTEP):
213 * (36.0.0.9, 239.1.1.100) Iif: peerlink-3.4094\
214 * Oifs: peerlink-3.4094 uplink-1
215 ***************************************************************************/
216 static void pim_vxlan_orig_mr_up_del(struct pim_vxlan_sg
*vxlan_sg
)
218 struct pim_upstream
*up
= vxlan_sg
->up
;
224 zlog_debug("vxlan SG %s orig mroute-up del",
229 if (up
->flags
& PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG
) {
230 /* clear out all the vxlan properties */
231 up
->flags
&= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG
|
232 PIM_UPSTREAM_FLAG_MASK_STATIC_IIF
|
233 PIM_UPSTREAM_FLAG_MASK_DISABLE_KAT_EXPIRY
|
234 PIM_UPSTREAM_FLAG_MASK_FORCE_PIMREG
|
235 PIM_UPSTREAM_FLAG_MASK_NO_PIMREG_DATA
|
236 PIM_UPSTREAM_FLAG_MASK_ALLOW_IIF_IN_OIL
);
238 /* We bring things to a grinding halt by force expirying
239 * the kat. Doing this will also remove the reference we
240 * created as a "vxlan" source and delete the upstream entry
241 * if there are no other references.
243 if (PIM_UPSTREAM_FLAG_TEST_SRC_STREAM(up
->flags
)) {
244 THREAD_OFF(up
->t_ka_timer
);
245 up
= pim_upstream_keep_alive_timer_proc(up
);
247 /* this is really unexpected as we force vxlan
248 * origination mroutes active sources but just in
251 up
= pim_upstream_del(vxlan_sg
->pim
, up
, __func__
);
253 /* if there are other references register the source
257 enum pim_rpf_result r
;
259 r
= pim_rpf_update(vxlan_sg
->pim
, up
, NULL
, __func__
);
260 if (r
== PIM_RPF_FAILURE
) {
263 "vxlan SG %s rpf_update failure",
270 static void pim_vxlan_orig_mr_up_iif_update(struct pim_vxlan_sg
*vxlan_sg
)
272 /* update MFC with the new IIF */
273 pim_upstream_fill_static_iif(vxlan_sg
->up
, vxlan_sg
->iif
);
274 pim_upstream_mroute_iif_update(vxlan_sg
->up
->channel_oil
, __func__
);
277 zlog_debug("vxlan SG %s orig mroute-up updated with iif %s",
279 vxlan_sg
->iif
?vxlan_sg
->iif
->name
:"-");
283 /* For every VxLAN BUM multicast group we setup a SG-up that has the following
284 * "forced properties" -
285 * 1. Directly connected on a DR interface i.e. we must act as an FHR
286 * 2. We prime the pump i.e. no multicast data is needed to register this
287 * source with the FHR. To do that we send periodic null registers if
288 * the SG entry is in a register-join state. We also prevent expiry of
290 * 3. As this SG is setup without data there is no need to register encapsulate
291 * data traffic. This encapsulation is explicitly skipped for the following
293 * a) Many levels of encapsulation are needed creating MTU disc challenges.
294 * Overlay BUM is encapsulated in a vxlan/UDP/IP header and then
295 * encapsulated again in a pim-register header.
296 * b) On a vxlan-aa setup both switches rx a copy of each BUM packet. if
297 * they both reg encapsulated traffic the RP will accept the duplicates
298 * as there are no RPF checks for this encapsulated data.
299 * a), b) can be workarounded if needed, but there is really no need because
300 * of (2) i.e. the pump is primed without data.
302 static void pim_vxlan_orig_mr_up_add(struct pim_vxlan_sg
*vxlan_sg
)
304 struct pim_upstream
*up
;
305 struct pim_interface
*term_ifp
;
308 struct pim_instance
*pim
= vxlan_sg
->pim
;
316 zlog_debug("vxlan SG %s orig mroute-up add with iif %s",
318 vxlan_sg
->iif
?vxlan_sg
->iif
->name
:"-");
320 PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_ORIG(flags
);
321 /* pin the IIF to lo or peerlink-subinterface and disable NHT */
322 PIM_UPSTREAM_FLAG_SET_STATIC_IIF(flags
);
323 /* Fake traffic by setting SRC_STREAM and starting KAT */
324 /* We intentionally skip updating ref count for SRC_STREAM/FHR.
325 * Setting SRC_VXLAN should have already created a reference
326 * preventing the entry from being deleted
328 PIM_UPSTREAM_FLAG_SET_FHR(flags
);
329 PIM_UPSTREAM_FLAG_SET_SRC_STREAM(flags
);
330 /* Force pimreg even if non-DR. This is needed on a MLAG setup for
333 PIM_UPSTREAM_FLAG_SET_FORCE_PIMREG(flags
);
334 /* prevent KAT expiry. we want the MDT setup even if there is no BUM
337 PIM_UPSTREAM_FLAG_SET_DISABLE_KAT_EXPIRY(flags
);
338 /* SPT for vxlan BUM groups is primed and maintained via NULL
339 * registers so there is no need to reg-encapsulate
340 * vxlan-encapsulated overlay data traffic
342 PIM_UPSTREAM_FLAG_SET_NO_PIMREG_DATA(flags
);
343 /* On a MLAG setup we force a copy to the MLAG peer while also
344 * accepting traffic from the peer. To do this we set peerlink-rif as
345 * the IIF and also add it to the OIL
347 PIM_UPSTREAM_FLAG_SET_ALLOW_IIF_IN_OIL(flags
);
349 /* XXX: todo: defer pim_upstream add if pim is not enabled on the iif */
350 up
= pim_upstream_find(vxlan_sg
->pim
, &vxlan_sg
->sg
);
352 /* if the iif is set to something other than the vxlan_sg->iif
353 * we must dereg the old nexthop and force to new "static"
356 if (!PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(up
->flags
)) {
357 nht_p
.family
= AF_INET
;
358 nht_p
.prefixlen
= IPV4_MAX_BITLEN
;
359 nht_p
.u
.prefix4
= up
->upstream_addr
;
360 pim_delete_tracked_nexthop(vxlan_sg
->pim
, &nht_p
, up
,
363 /* We are acting FHR; clear out use_rpt setting if any */
364 pim_upstream_update_use_rpt(up
, false /*update_mroute*/);
365 pim_upstream_ref(up
, flags
, __func__
);
367 term_ifp
= pim_vxlan_get_term_ifp(pim
);
368 /* mute termination device on origination mroutes */
370 pim_channel_update_oif_mute(up
->channel_oil
,
372 pim_vxlan_orig_mr_up_iif_update(vxlan_sg
);
373 /* mute pimreg on origination mroutes */
375 pim_channel_update_oif_mute(up
->channel_oil
,
376 pim
->regiface
->info
);
378 up
= pim_upstream_add(vxlan_sg
->pim
, &vxlan_sg
->sg
,
379 vxlan_sg
->iif
, flags
, __func__
, NULL
);
385 zlog_debug("vxlan SG %s orig mroute-up add failed",
390 pim_upstream_keep_alive_timer_start(up
, vxlan_sg
->pim
->keep_alive_time
);
392 /* register the source with the RP */
393 switch (up
->reg_state
) {
396 pim_register_join(up
);
397 pim_null_register_send(up
);
401 /* if the pim upstream entry is already in reg-join state
402 * send null_register right away and add to the register
405 pim_null_register_send(up
);
406 pim_vxlan_update_sg_reg_state(pim
, up
, true);
409 case PIM_REG_JOIN_PENDING
:
414 /* update the inherited OIL */
415 pim_upstream_inherited_olist(vxlan_sg
->pim
, up
);
416 if (!up
->channel_oil
->installed
)
417 pim_upstream_mroute_add(up
->channel_oil
, __func__
);
420 static void pim_vxlan_orig_mr_oif_add(struct pim_vxlan_sg
*vxlan_sg
)
422 if (!vxlan_sg
->up
|| !vxlan_sg
->orig_oif
)
426 zlog_debug("vxlan SG %s oif %s add",
427 vxlan_sg
->sg_str
, vxlan_sg
->orig_oif
->name
);
429 vxlan_sg
->flags
|= PIM_VXLAN_SGF_OIF_INSTALLED
;
430 pim_channel_add_oif(vxlan_sg
->up
->channel_oil
,
431 vxlan_sg
->orig_oif
, PIM_OIF_FLAG_PROTO_VXLAN
,
435 static void pim_vxlan_orig_mr_oif_del(struct pim_vxlan_sg
*vxlan_sg
)
437 struct interface
*orig_oif
;
439 orig_oif
= vxlan_sg
->orig_oif
;
440 vxlan_sg
->orig_oif
= NULL
;
442 if (!(vxlan_sg
->flags
& PIM_VXLAN_SGF_OIF_INSTALLED
))
446 zlog_debug("vxlan SG %s oif %s del",
447 vxlan_sg
->sg_str
, orig_oif
->name
);
449 vxlan_sg
->flags
&= ~PIM_VXLAN_SGF_OIF_INSTALLED
;
450 pim_channel_del_oif(vxlan_sg
->up
->channel_oil
,
451 orig_oif
, PIM_OIF_FLAG_PROTO_VXLAN
, __func__
);
454 static inline struct interface
*pim_vxlan_orig_mr_oif_get(
455 struct pim_instance
*pim
)
457 return (vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_ENABLED
) ?
458 pim
->vxlan
.peerlink_rif
: NULL
;
461 /* Single VTEPs: IIF for the vxlan-origination-mroutes is lo or vrf-dev (if
462 * the mroute is in a non-default vrf).
463 * Anycast VTEPs: IIF is the MLAG ISL/peerlink.
465 static inline struct interface
*pim_vxlan_orig_mr_iif_get(
466 struct pim_instance
*pim
)
468 return ((vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_ENABLED
) &&
469 pim
->vxlan
.peerlink_rif
) ?
470 pim
->vxlan
.peerlink_rif
: pim
->vxlan
.default_iif
;
473 static bool pim_vxlan_orig_mr_add_is_ok(struct pim_vxlan_sg
*vxlan_sg
)
475 struct pim_interface
*pim_ifp
;
477 vxlan_sg
->iif
= pim_vxlan_orig_mr_iif_get(vxlan_sg
->pim
);
481 pim_ifp
= (struct pim_interface
*)vxlan_sg
->iif
->info
;
482 if (!pim_ifp
|| (pim_ifp
->mroute_vif_index
< 0))
488 static void pim_vxlan_orig_mr_install(struct pim_vxlan_sg
*vxlan_sg
)
490 pim_vxlan_orig_mr_up_add(vxlan_sg
);
492 vxlan_sg
->orig_oif
= pim_vxlan_orig_mr_oif_get(vxlan_sg
->pim
);
493 pim_vxlan_orig_mr_oif_add(vxlan_sg
);
496 static void pim_vxlan_orig_mr_add(struct pim_vxlan_sg
*vxlan_sg
)
498 if (!pim_vxlan_orig_mr_add_is_ok(vxlan_sg
))
502 zlog_debug("vxlan SG %s orig-mr add", vxlan_sg
->sg_str
);
504 pim_vxlan_orig_mr_install(vxlan_sg
);
507 static void pim_vxlan_orig_mr_del(struct pim_vxlan_sg
*vxlan_sg
)
510 zlog_debug("vxlan SG %s orig-mr del", vxlan_sg
->sg_str
);
512 pim_vxlan_orig_mr_oif_del(vxlan_sg
);
513 pim_vxlan_orig_mr_up_del(vxlan_sg
);
516 static void pim_vxlan_orig_mr_iif_update(struct hash_bucket
*bucket
, void *arg
)
518 struct interface
*ifp
;
519 struct pim_vxlan_sg
*vxlan_sg
= (struct pim_vxlan_sg
*)bucket
->data
;
520 struct interface
*old_iif
= vxlan_sg
->iif
;
522 if (!pim_vxlan_is_orig_mroute(vxlan_sg
))
525 ifp
= pim_vxlan_orig_mr_iif_get(vxlan_sg
->pim
);
527 zlog_debug("vxlan SG %s iif changed from %s to %s",
529 old_iif
? old_iif
->name
: "-",
530 ifp
? ifp
->name
: "-");
532 if (pim_vxlan_orig_mr_add_is_ok(vxlan_sg
)) {
534 /* upstream exists but iif changed */
535 pim_vxlan_orig_mr_up_iif_update(vxlan_sg
);
538 pim_vxlan_orig_mr_install(vxlan_sg
);
541 pim_vxlan_orig_mr_del(vxlan_sg
);
545 /**************************** vxlan termination mroutes ***********************
546 * For every bum-mcast-grp registered by evpn a *G termination
547 * mroute is setup by pimd. The purpose of this mroute is to pull down vxlan
548 * packets with the bum-mcast-grp dip from the underlay and terminate the
549 * tunnel. This is done by including the vxlan termination device (ipmr-lo) in
550 * its OIL. The vxlan de-capsulated packets are subject to subsequent overlay
554 * (0.0.0.0, 239.1.1.100) Iif: uplink-1 Oifs: ipmr-lo, uplink-1
555 *****************************************************************************/
556 struct pim_interface
*pim_vxlan_get_term_ifp(struct pim_instance
*pim
)
558 return pim
->vxlan
.term_if
?
559 (struct pim_interface
*)pim
->vxlan
.term_if
->info
: NULL
;
562 static void pim_vxlan_term_mr_oif_add(struct pim_vxlan_sg
*vxlan_sg
)
564 if (vxlan_sg
->flags
& PIM_VXLAN_SGF_OIF_INSTALLED
)
568 zlog_debug("vxlan SG %s term-oif %s add",
569 vxlan_sg
->sg_str
, vxlan_sg
->term_oif
->name
);
571 if (pim_ifchannel_local_membership_add(vxlan_sg
->term_oif
,
572 &vxlan_sg
->sg
, true /*is_vxlan */)) {
573 vxlan_sg
->flags
|= PIM_VXLAN_SGF_OIF_INSTALLED
;
574 /* update the inherited OIL */
575 /* XXX - I don't see the inherited OIL updated when a local
576 * member is added. And that probably needs to be fixed. Till
577 * that happens we do a force update on the inherited OIL
580 pim_upstream_inherited_olist(vxlan_sg
->pim
, vxlan_sg
->up
);
582 zlog_warn("vxlan SG %s term-oif %s add failed",
583 vxlan_sg
->sg_str
, vxlan_sg
->term_oif
->name
);
587 static void pim_vxlan_term_mr_oif_del(struct pim_vxlan_sg
*vxlan_sg
)
589 if (!(vxlan_sg
->flags
& PIM_VXLAN_SGF_OIF_INSTALLED
))
593 zlog_debug("vxlan SG %s oif %s del",
594 vxlan_sg
->sg_str
, vxlan_sg
->term_oif
->name
);
596 vxlan_sg
->flags
&= ~PIM_VXLAN_SGF_OIF_INSTALLED
;
597 pim_ifchannel_local_membership_del(vxlan_sg
->term_oif
, &vxlan_sg
->sg
);
598 /* update the inherited OIL */
599 /* XXX - I don't see the inherited OIL updated when a local member
600 * is deleted. And that probably needs to be fixed. Till that happens
601 * we do a force update on the inherited OIL here.
603 pim_upstream_inherited_olist(vxlan_sg
->pim
, vxlan_sg
->up
);
606 static void pim_vxlan_update_sg_entry_mlag(struct pim_instance
*pim
,
607 struct pim_upstream
*up
, bool inherit
)
611 if (inherit
&& up
->parent
&&
612 PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up
->parent
->flags
) &&
613 PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up
->parent
->flags
))
616 pim_mlag_up_df_role_update(pim
, up
, is_df
, "inherit_xg_df");
619 /* We run MLAG DF election only on mroutes that have the termination
620 * device ipmr-lo in the immediate OIL. This is only (*, G) entries at the
621 * moment. For (S, G) entries that (with ipmr-lo in the inherited OIL) we
622 * inherit the DF role from the (*, G) entry.
624 void pim_vxlan_inherit_mlag_flags(struct pim_instance
*pim
,
625 struct pim_upstream
*up
, bool inherit
)
627 struct listnode
*listnode
;
628 struct pim_upstream
*child
;
630 for (ALL_LIST_ELEMENTS_RO(up
->sources
, listnode
,
632 pim_vxlan_update_sg_entry_mlag(pim
,
633 child
, true /* inherit */);
637 static void pim_vxlan_term_mr_up_add(struct pim_vxlan_sg
*vxlan_sg
)
639 struct pim_upstream
*up
;
648 zlog_debug("vxlan SG %s term mroute-up add",
651 PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_TERM(flags
);
652 /* enable MLAG designated-forwarder election on termination mroutes */
653 PIM_UPSTREAM_FLAG_SET_MLAG_VXLAN(flags
);
655 up
= pim_upstream_add(vxlan_sg
->pim
, &vxlan_sg
->sg
, NULL
/* iif */,
656 flags
, __func__
, NULL
);
660 zlog_warn("vxlan SG %s term mroute-up add failed",
665 /* update existing SG entries with the parent's MLAG flag */
666 pim_vxlan_inherit_mlag_flags(vxlan_sg
->pim
, up
, true /*enable*/);
669 static void pim_vxlan_term_mr_up_del(struct pim_vxlan_sg
*vxlan_sg
)
671 struct pim_upstream
*up
= vxlan_sg
->up
;
677 zlog_debug("vxlan SG %s term mroute-up del",
680 if (up
->flags
& PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM
) {
681 /* update SG entries that are inheriting from this XG entry */
682 pim_vxlan_inherit_mlag_flags(vxlan_sg
->pim
, up
,
684 /* clear out all the vxlan related flags */
685 up
->flags
&= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM
|
686 PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN
);
687 pim_mlag_up_local_del(vxlan_sg
->pim
, up
);
688 pim_upstream_del(vxlan_sg
->pim
, up
, __func__
);
692 static void pim_vxlan_term_mr_add(struct pim_vxlan_sg
*vxlan_sg
)
695 zlog_debug("vxlan SG %s term mroute add", vxlan_sg
->sg_str
);
697 vxlan_sg
->term_oif
= vxlan_sg
->pim
->vxlan
.term_if
;
698 if (!vxlan_sg
->term_oif
)
699 /* defer termination mroute till we have a termination device */
702 pim_vxlan_term_mr_up_add(vxlan_sg
);
703 /* set up local membership for the term-oif */
704 pim_vxlan_term_mr_oif_add(vxlan_sg
);
707 static void pim_vxlan_term_mr_del(struct pim_vxlan_sg
*vxlan_sg
)
710 zlog_debug("vxlan SG %s term mroute del", vxlan_sg
->sg_str
);
712 /* remove local membership associated with the term oif */
713 pim_vxlan_term_mr_oif_del(vxlan_sg
);
714 /* remove references to the upstream entry */
715 pim_vxlan_term_mr_up_del(vxlan_sg
);
718 /************************** vxlan SG cache management ************************/
719 static unsigned int pim_vxlan_sg_hash_key_make(const void *p
)
721 const struct pim_vxlan_sg
*vxlan_sg
= p
;
723 return (jhash_2words(vxlan_sg
->sg
.src
.s_addr
,
724 vxlan_sg
->sg
.grp
.s_addr
, 0));
727 static bool pim_vxlan_sg_hash_eq(const void *p1
, const void *p2
)
729 const struct pim_vxlan_sg
*sg1
= p1
;
730 const struct pim_vxlan_sg
*sg2
= p2
;
732 return ((sg1
->sg
.src
.s_addr
== sg2
->sg
.src
.s_addr
)
733 && (sg1
->sg
.grp
.s_addr
== sg2
->sg
.grp
.s_addr
));
736 static struct pim_vxlan_sg
*pim_vxlan_sg_new(struct pim_instance
*pim
,
737 struct prefix_sg
*sg
)
739 struct pim_vxlan_sg
*vxlan_sg
;
741 vxlan_sg
= XCALLOC(MTYPE_PIM_VXLAN_SG
, sizeof(*vxlan_sg
));
745 pim_str_sg_set(sg
, vxlan_sg
->sg_str
);
748 zlog_debug("vxlan SG %s alloc", vxlan_sg
->sg_str
);
750 vxlan_sg
= hash_get(pim
->vxlan
.sg_hash
, vxlan_sg
, hash_alloc_intern
);
752 /* we register with the MLAG daemon in the first VxLAN SG and never
753 * de-register during that life of the pimd
755 if (pim
->vxlan
.sg_hash
->count
== 1) {
756 vxlan_mlag
.flags
|= PIM_VXLAN_MLAGF_DO_REG
;
763 struct pim_vxlan_sg
*pim_vxlan_sg_find(struct pim_instance
*pim
,
764 struct prefix_sg
*sg
)
766 struct pim_vxlan_sg lookup
;
769 return hash_lookup(pim
->vxlan
.sg_hash
, &lookup
);
772 struct pim_vxlan_sg
*pim_vxlan_sg_add(struct pim_instance
*pim
,
773 struct prefix_sg
*sg
)
775 struct pim_vxlan_sg
*vxlan_sg
;
777 vxlan_sg
= pim_vxlan_sg_find(pim
, sg
);
781 vxlan_sg
= pim_vxlan_sg_new(pim
, sg
);
783 if (pim_vxlan_is_orig_mroute(vxlan_sg
))
784 pim_vxlan_orig_mr_add(vxlan_sg
);
786 pim_vxlan_term_mr_add(vxlan_sg
);
791 static void pim_vxlan_sg_del_item(struct pim_vxlan_sg
*vxlan_sg
)
793 vxlan_sg
->flags
|= PIM_VXLAN_SGF_DEL_IN_PROG
;
795 pim_vxlan_del_work(vxlan_sg
);
797 if (pim_vxlan_is_orig_mroute(vxlan_sg
))
798 pim_vxlan_orig_mr_del(vxlan_sg
);
800 pim_vxlan_term_mr_del(vxlan_sg
);
803 zlog_debug("vxlan SG %s free", vxlan_sg
->sg_str
);
805 XFREE(MTYPE_PIM_VXLAN_SG
, vxlan_sg
);
808 void pim_vxlan_sg_del(struct pim_instance
*pim
, struct prefix_sg
*sg
)
810 struct pim_vxlan_sg
*vxlan_sg
;
812 vxlan_sg
= pim_vxlan_sg_find(pim
, sg
);
816 hash_release(pim
->vxlan
.sg_hash
, vxlan_sg
);
817 pim_vxlan_sg_del_item(vxlan_sg
);
820 /******************************* MLAG handling *******************************/
821 bool pim_vxlan_do_mlag_reg(void)
823 return (vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_DO_REG
);
826 /* The peerlink sub-interface is added as an OIF to the origination-mroute.
827 * This is done to send a copy of the multicast-vxlan encapsulated traffic
828 * to the MLAG peer which may mroute it over the underlay if there are any
829 * interested receivers.
831 static void pim_vxlan_sg_peerlink_oif_update(struct hash_bucket
*bucket
,
834 struct interface
*new_oif
= (struct interface
*)arg
;
835 struct pim_vxlan_sg
*vxlan_sg
= (struct pim_vxlan_sg
*)bucket
->data
;
837 if (!pim_vxlan_is_orig_mroute(vxlan_sg
))
840 if (vxlan_sg
->orig_oif
== new_oif
)
843 pim_vxlan_orig_mr_oif_del(vxlan_sg
);
845 vxlan_sg
->orig_oif
= new_oif
;
846 pim_vxlan_orig_mr_oif_add(vxlan_sg
);
849 /* In the case of anycast VTEPs the VTEP-PIP must be used as the
852 bool pim_vxlan_get_register_src(struct pim_instance
*pim
,
853 struct pim_upstream
*up
, struct in_addr
*src_p
)
855 if (!(vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_ENABLED
))
858 /* if address is not available suppress the pim-register */
859 if (vxlan_mlag
.reg_addr
.s_addr
== INADDR_ANY
)
862 *src_p
= vxlan_mlag
.reg_addr
;
866 void pim_vxlan_mlag_update(bool enable
, bool peer_state
, uint32_t role
,
867 struct interface
*peerlink_rif
,
868 struct in_addr
*reg_addr
)
870 struct pim_instance
*pim
;
871 char addr_buf
[INET_ADDRSTRLEN
];
872 struct pim_interface
*pim_ifp
= NULL
;
874 if (PIM_DEBUG_VXLAN
) {
875 inet_ntop(AF_INET
, reg_addr
,
876 addr_buf
, INET_ADDRSTRLEN
);
877 zlog_debug("vxlan MLAG update %s state %s role %d rif %s addr %s",
878 enable
? "enable" : "disable",
879 peer_state
? "up" : "down",
881 peerlink_rif
? peerlink_rif
->name
: "-",
885 /* XXX: for now vxlan termination is only possible in the default VRF
886 * when that changes this will need to change to iterate all VRFs
888 pim
= pim_get_pim_instance(VRF_DEFAULT
);
891 vxlan_mlag
.flags
|= PIM_VXLAN_MLAGF_ENABLED
;
893 vxlan_mlag
.flags
&= ~PIM_VXLAN_MLAGF_ENABLED
;
895 if (vxlan_mlag
.peerlink_rif
!= peerlink_rif
)
896 vxlan_mlag
.peerlink_rif
= peerlink_rif
;
898 vxlan_mlag
.reg_addr
= *reg_addr
;
899 vxlan_mlag
.peer_state
= peer_state
;
900 vxlan_mlag
.role
= role
;
902 /* process changes */
903 if (vxlan_mlag
.peerlink_rif
)
904 pim_ifp
= (struct pim_interface
*)vxlan_mlag
.peerlink_rif
->info
;
905 if ((vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_ENABLED
) &&
906 pim_ifp
&& (pim_ifp
->mroute_vif_index
> 0))
907 pim_vxlan_set_peerlink_rif(pim
, peerlink_rif
);
909 pim_vxlan_set_peerlink_rif(pim
, NULL
);
912 /****************************** misc callbacks *******************************/
913 static void pim_vxlan_set_default_iif(struct pim_instance
*pim
,
914 struct interface
*ifp
)
916 struct interface
*old_iif
;
918 if (pim
->vxlan
.default_iif
== ifp
)
921 old_iif
= pim
->vxlan
.default_iif
;
923 zlog_debug("%s: vxlan default iif changed from %s to %s",
924 __func__
, old_iif
? old_iif
->name
: "-",
925 ifp
? ifp
->name
: "-");
927 old_iif
= pim_vxlan_orig_mr_iif_get(pim
);
928 pim
->vxlan
.default_iif
= ifp
;
929 ifp
= pim_vxlan_orig_mr_iif_get(pim
);
934 zlog_debug("%s: vxlan orig iif changed from %s to %s", __func__
,
935 old_iif
? old_iif
->name
: "-",
936 ifp
? ifp
->name
: "-");
938 /* add/del upstream entries for the existing vxlan SG when the
939 * interface becomes available
941 if (pim
->vxlan
.sg_hash
)
942 hash_iterate(pim
->vxlan
.sg_hash
,
943 pim_vxlan_orig_mr_iif_update
, NULL
);
946 static void pim_vxlan_up_cost_update(struct pim_instance
*pim
,
947 struct pim_upstream
*up
,
948 struct interface
*old_peerlink_rif
)
950 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up
->flags
))
953 if (up
->rpf
.source_nexthop
.interface
&&
954 ((up
->rpf
.source_nexthop
.interface
==
955 pim
->vxlan
.peerlink_rif
) ||
956 (up
->rpf
.source_nexthop
.interface
==
957 old_peerlink_rif
))) {
959 zlog_debug("RPF cost adjust for %s on peerlink-rif (old: %s, new: %s) change",
962 old_peerlink_rif
->name
: "-",
963 pim
->vxlan
.peerlink_rif
?
964 pim
->vxlan
.peerlink_rif
->name
: "-");
965 pim_mlag_up_local_add(pim
, up
);
969 static void pim_vxlan_term_mr_cost_update(struct hash_bucket
*bucket
, void *arg
)
971 struct interface
*old_peerlink_rif
= (struct interface
*)arg
;
972 struct pim_vxlan_sg
*vxlan_sg
= (struct pim_vxlan_sg
*)bucket
->data
;
973 struct pim_upstream
*up
;
974 struct listnode
*listnode
;
975 struct pim_upstream
*child
;
977 if (pim_vxlan_is_orig_mroute(vxlan_sg
))
980 /* Lookup all XG and SG entries with RPF-interface peerlink_rif */
985 pim_vxlan_up_cost_update(vxlan_sg
->pim
, up
,
988 for (ALL_LIST_ELEMENTS_RO(up
->sources
, listnode
,
990 pim_vxlan_up_cost_update(vxlan_sg
->pim
, child
,
994 static void pim_vxlan_sg_peerlink_rif_update(struct hash_bucket
*bucket
,
997 pim_vxlan_orig_mr_iif_update(bucket
, NULL
);
998 pim_vxlan_term_mr_cost_update(bucket
, arg
);
1001 static void pim_vxlan_set_peerlink_rif(struct pim_instance
*pim
,
1002 struct interface
*ifp
)
1004 struct interface
*old_iif
;
1005 struct interface
*new_iif
;
1006 struct interface
*old_oif
;
1007 struct interface
*new_oif
;
1009 if (pim
->vxlan
.peerlink_rif
== ifp
)
1012 old_iif
= pim
->vxlan
.peerlink_rif
;
1013 if (PIM_DEBUG_VXLAN
)
1014 zlog_debug("%s: vxlan peerlink_rif changed from %s to %s",
1015 __func__
, old_iif
? old_iif
->name
: "-",
1016 ifp
? ifp
->name
: "-");
1018 old_iif
= pim_vxlan_orig_mr_iif_get(pim
);
1019 old_oif
= pim_vxlan_orig_mr_oif_get(pim
);
1020 pim
->vxlan
.peerlink_rif
= ifp
;
1022 new_iif
= pim_vxlan_orig_mr_iif_get(pim
);
1023 if (old_iif
!= new_iif
) {
1024 if (PIM_DEBUG_VXLAN
)
1025 zlog_debug("%s: vxlan orig iif changed from %s to %s",
1026 __func__
, old_iif
? old_iif
->name
: "-",
1027 new_iif
? new_iif
->name
: "-");
1029 /* add/del upstream entries for the existing vxlan SG when the
1030 * interface becomes available
1032 if (pim
->vxlan
.sg_hash
)
1033 hash_iterate(pim
->vxlan
.sg_hash
,
1034 pim_vxlan_sg_peerlink_rif_update
,
1038 new_oif
= pim_vxlan_orig_mr_oif_get(pim
);
1039 if (old_oif
!= new_oif
) {
1040 if (PIM_DEBUG_VXLAN
)
1041 zlog_debug("%s: vxlan orig oif changed from %s to %s",
1042 __func__
, old_oif
? old_oif
->name
: "-",
1043 new_oif
? new_oif
->name
: "-");
1044 if (pim
->vxlan
.sg_hash
)
1045 hash_iterate(pim
->vxlan
.sg_hash
,
1046 pim_vxlan_sg_peerlink_oif_update
,
1051 static void pim_vxlan_term_mr_oif_update(struct hash_bucket
*bucket
, void *arg
)
1053 struct interface
*ifp
= (struct interface
*)arg
;
1054 struct pim_vxlan_sg
*vxlan_sg
= (struct pim_vxlan_sg
*)bucket
->data
;
1056 if (pim_vxlan_is_orig_mroute(vxlan_sg
))
1059 if (vxlan_sg
->term_oif
== ifp
)
1062 if (PIM_DEBUG_VXLAN
)
1063 zlog_debug("vxlan SG %s term oif changed from %s to %s",
1065 vxlan_sg
->term_oif
? vxlan_sg
->term_oif
->name
: "-",
1066 ifp
? ifp
->name
: "-");
1068 pim_vxlan_term_mr_del(vxlan_sg
);
1069 vxlan_sg
->term_oif
= ifp
;
1070 pim_vxlan_term_mr_add(vxlan_sg
);
1073 static void pim_vxlan_term_oif_update(struct pim_instance
*pim
,
1074 struct interface
*ifp
)
1076 if (pim
->vxlan
.term_if
== ifp
)
1079 if (PIM_DEBUG_VXLAN
)
1080 zlog_debug("vxlan term oif changed from %s to %s",
1081 pim
->vxlan
.term_if
? pim
->vxlan
.term_if
->name
: "-",
1082 ifp
? ifp
->name
: "-");
1084 pim
->vxlan
.term_if
= ifp
;
1085 if (pim
->vxlan
.sg_hash
)
1086 hash_iterate(pim
->vxlan
.sg_hash
,
1087 pim_vxlan_term_mr_oif_update
, ifp
);
1090 void pim_vxlan_add_vif(struct interface
*ifp
)
1092 struct pim_interface
*pim_ifp
= ifp
->info
;
1093 struct pim_instance
*pim
= pim_ifp
->pim
;
1095 if (pim
->vrf
->vrf_id
!= VRF_DEFAULT
)
1098 if (if_is_loopback(ifp
))
1099 pim_vxlan_set_default_iif(pim
, ifp
);
1101 if (vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_ENABLED
&&
1102 (ifp
== vxlan_mlag
.peerlink_rif
))
1103 pim_vxlan_set_peerlink_rif(pim
, ifp
);
1105 if (pim
->vxlan
.term_if_cfg
== ifp
)
1106 pim_vxlan_term_oif_update(pim
, ifp
);
1109 void pim_vxlan_del_vif(struct interface
*ifp
)
1111 struct pim_interface
*pim_ifp
= ifp
->info
;
1112 struct pim_instance
*pim
= pim_ifp
->pim
;
1114 if (pim
->vrf
->vrf_id
!= VRF_DEFAULT
)
1117 if (pim
->vxlan
.default_iif
== ifp
)
1118 pim_vxlan_set_default_iif(pim
, NULL
);
1120 if (pim
->vxlan
.peerlink_rif
== ifp
)
1121 pim_vxlan_set_peerlink_rif(pim
, NULL
);
1123 if (pim
->vxlan
.term_if
== ifp
)
1124 pim_vxlan_term_oif_update(pim
, NULL
);
1127 /* enable pim implicitly on the termination device add */
1128 void pim_vxlan_add_term_dev(struct pim_instance
*pim
,
1129 struct interface
*ifp
)
1131 struct pim_interface
*pim_ifp
;
1133 if (pim
->vxlan
.term_if_cfg
== ifp
)
1136 if (PIM_DEBUG_VXLAN
)
1137 zlog_debug("vxlan term oif cfg changed from %s to %s",
1138 pim
->vxlan
.term_if_cfg
?
1139 pim
->vxlan
.term_if_cfg
->name
: "-",
1142 pim
->vxlan
.term_if_cfg
= ifp
;
1144 /* enable pim on the term ifp */
1145 pim_ifp
= (struct pim_interface
*)ifp
->info
;
1147 PIM_IF_DO_PIM(pim_ifp
->options
);
1148 /* ifp is already oper up; activate it as a term dev */
1149 if (pim_ifp
->mroute_vif_index
>= 0)
1150 pim_vxlan_term_oif_update(pim
, ifp
);
1152 /* ensure that pimreg exists before using the newly created
1153 * vxlan termination device
1155 pim_if_create_pimreg(pim
);
1156 (void)pim_if_new(ifp
, false /*igmp*/, true /*pim*/,
1157 false /*pimreg*/, true /*vxlan_term*/);
1161 /* disable pim implicitly, if needed, on the termination device deletion */
1162 void pim_vxlan_del_term_dev(struct pim_instance
*pim
)
1164 struct interface
*ifp
= pim
->vxlan
.term_if_cfg
;
1165 struct pim_interface
*pim_ifp
;
1167 if (PIM_DEBUG_VXLAN
)
1168 zlog_debug("vxlan term oif cfg changed from %s to -",
1171 pim
->vxlan
.term_if_cfg
= NULL
;
1173 pim_ifp
= (struct pim_interface
*)ifp
->info
;
1175 PIM_IF_DONT_PIM(pim_ifp
->options
);
1176 if (!PIM_IF_TEST_IGMP(pim_ifp
->options
))
1181 void pim_vxlan_init(struct pim_instance
*pim
)
1185 snprintf(hash_name
, sizeof(hash_name
),
1186 "PIM %s vxlan SG hash", pim
->vrf
->name
);
1187 pim
->vxlan
.sg_hash
= hash_create(pim_vxlan_sg_hash_key_make
,
1188 pim_vxlan_sg_hash_eq
, hash_name
);
1191 void pim_vxlan_exit(struct pim_instance
*pim
)
1193 if (pim
->vxlan
.sg_hash
) {
1194 hash_clean(pim
->vxlan
.sg_hash
,
1195 (void (*)(void *))pim_vxlan_sg_del_item
);
1196 hash_free(pim
->vxlan
.sg_hash
);
1197 pim
->vxlan
.sg_hash
= NULL
;
1201 void pim_vxlan_terminate(void)
1203 pim_vxlan_work_timer_setup(false);