1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* PIM support for VxLAN BUM flooding
4 * Copyright (C) 2019 Cumulus Networks, Inc.
16 #include "pim_iface.h"
17 #include "pim_memory.h"
19 #include "pim_register.h"
21 #include "pim_upstream.h"
22 #include "pim_ifchannel.h"
24 #include "pim_zebra.h"
25 #include "pim_vxlan.h"
28 /* pim-vxlan global info */
29 struct pim_vxlan vxlan_info
, *pim_vxlan_p
= &vxlan_info
;
31 static void pim_vxlan_work_timer_setup(bool start
);
32 static void pim_vxlan_set_peerlink_rif(struct pim_instance
*pim
,
33 struct interface
*ifp
);
35 /*************************** vxlan work list **********************************
36 * A work list is maintained for staggered generation of pim null register
37 * messages for vxlan SG entries that are in a reg_join state.
39 * A max of 500 NULL registers are generated at one shot. If paused reg
40 * generation continues on the next second and so on till all register
41 * messages have been sent out. And the process is restarted every 60s.
43 * purpose of this null register generation is to setup the SPT and maintain
44 * independent of the presence of overlay BUM traffic.
45 ****************************************************************************/
46 static void pim_vxlan_do_reg_work(void)
48 struct listnode
*listnode
;
50 struct pim_vxlan_sg
*vxlan_sg
;
55 if (sec_count
> PIM_VXLAN_NULL_REG_INTERVAL
) {
57 listnode
= vxlan_info
.next_work
?
58 vxlan_info
.next_work
:
59 vxlan_info
.work_list
->head
;
60 if (PIM_DEBUG_VXLAN
&& listnode
)
61 zlog_debug("vxlan SG work %s",
62 vxlan_info
.next_work
? "continues" : "starts");
64 listnode
= vxlan_info
.next_work
;
67 for (; listnode
; listnode
= listnode
->next
) {
68 vxlan_sg
= (struct pim_vxlan_sg
*)listnode
->data
;
69 if (vxlan_sg
->up
&& (vxlan_sg
->up
->reg_state
== PIM_REG_JOIN
)) {
71 zlog_debug("vxlan SG %s periodic NULL register",
75 * If we are on the work queue *and* the rpf
76 * has been lost on the vxlan_sg->up let's
77 * make sure that we don't send it.
79 if (vxlan_sg
->up
->rpf
.source_nexthop
.interface
) {
80 pim_null_register_send(vxlan_sg
->up
);
85 if (work_cnt
> vxlan_info
.max_work_cnt
) {
86 vxlan_info
.next_work
= listnode
->next
;
88 zlog_debug("vxlan SG %d work items proc and pause",
96 zlog_debug("vxlan SG %d work items proc", work_cnt
);
98 vxlan_info
.next_work
= NULL
;
101 /* Staggered work related info is initialized when the first work comes
104 static void pim_vxlan_init_work(void)
106 if (vxlan_info
.flags
& PIM_VXLANF_WORK_INITED
)
109 vxlan_info
.max_work_cnt
= PIM_VXLAN_WORK_MAX
;
110 vxlan_info
.flags
|= PIM_VXLANF_WORK_INITED
;
111 vxlan_info
.work_list
= list_new();
112 pim_vxlan_work_timer_setup(true/* start */);
115 static void pim_vxlan_add_work(struct pim_vxlan_sg
*vxlan_sg
)
117 if (vxlan_sg
->flags
& PIM_VXLAN_SGF_DEL_IN_PROG
) {
119 zlog_debug("vxlan SG %s skip work list; del-in-prog",
124 pim_vxlan_init_work();
126 /* already a part of the work list */
127 if (vxlan_sg
->work_node
)
131 zlog_debug("vxlan SG %s work list add",
133 vxlan_sg
->work_node
= listnode_add(vxlan_info
.work_list
, vxlan_sg
);
134 /* XXX: adjust max_work_cnt if needed */
137 static void pim_vxlan_del_work(struct pim_vxlan_sg
*vxlan_sg
)
139 if (!vxlan_sg
->work_node
)
143 zlog_debug("vxlan SG %s work list del",
146 if (vxlan_sg
->work_node
== vxlan_info
.next_work
)
147 vxlan_info
.next_work
= vxlan_sg
->work_node
->next
;
149 list_delete_node(vxlan_info
.work_list
, vxlan_sg
->work_node
);
150 vxlan_sg
->work_node
= NULL
;
153 void pim_vxlan_update_sg_reg_state(struct pim_instance
*pim
,
154 struct pim_upstream
*up
, bool reg_join
)
156 struct pim_vxlan_sg
*vxlan_sg
;
158 vxlan_sg
= pim_vxlan_sg_find(pim
, &up
->sg
);
162 /* add the vxlan sg entry to a work list for periodic reg joins.
163 * the entry will stay in the list as long as the register state is
167 pim_vxlan_add_work(vxlan_sg
);
169 pim_vxlan_del_work(vxlan_sg
);
172 static void pim_vxlan_work_timer_cb(struct event
*t
)
174 pim_vxlan_do_reg_work();
175 pim_vxlan_work_timer_setup(true /* start */);
178 /* global 1second timer used for periodic processing */
179 static void pim_vxlan_work_timer_setup(bool start
)
181 EVENT_OFF(vxlan_info
.work_timer
);
183 event_add_timer(router
->master
, pim_vxlan_work_timer_cb
, NULL
,
184 PIM_VXLAN_WORK_TIME
, &vxlan_info
.work_timer
);
187 /**************************** vxlan origination mroutes ***********************
188 * For every (local-vtep-ip, bum-mcast-grp) registered by evpn an origination
189 * mroute is setup by pimd. The purpose of this mroute is to forward vxlan
190 * encapsulated BUM (broadcast, unknown-unicast and unknown-multicast packets
191 * over the underlay.)
193 * Sample mroute (single VTEP):
194 * (27.0.0.7, 239.1.1.100) Iif: lo Oifs: uplink-1
196 * Sample mroute (anycast VTEP):
197 * (36.0.0.9, 239.1.1.100) Iif: peerlink-3.4094\
198 * Oifs: peerlink-3.4094 uplink-1
199 ***************************************************************************/
200 static void pim_vxlan_orig_mr_up_del(struct pim_vxlan_sg
*vxlan_sg
)
202 struct pim_upstream
*up
= vxlan_sg
->up
;
208 zlog_debug("vxlan SG %s orig mroute-up del",
213 if (up
->flags
& PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG
) {
214 /* clear out all the vxlan properties */
215 up
->flags
&= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG
|
216 PIM_UPSTREAM_FLAG_MASK_STATIC_IIF
|
217 PIM_UPSTREAM_FLAG_MASK_DISABLE_KAT_EXPIRY
|
218 PIM_UPSTREAM_FLAG_MASK_FORCE_PIMREG
|
219 PIM_UPSTREAM_FLAG_MASK_NO_PIMREG_DATA
|
220 PIM_UPSTREAM_FLAG_MASK_ALLOW_IIF_IN_OIL
);
222 /* We bring things to a grinding halt by force expirying
223 * the kat. Doing this will also remove the reference we
224 * created as a "vxlan" source and delete the upstream entry
225 * if there are no other references.
227 if (PIM_UPSTREAM_FLAG_TEST_SRC_STREAM(up
->flags
)) {
228 EVENT_OFF(up
->t_ka_timer
);
229 up
= pim_upstream_keep_alive_timer_proc(up
);
231 /* this is really unexpected as we force vxlan
232 * origination mroutes active sources but just in
235 up
= pim_upstream_del(vxlan_sg
->pim
, up
, __func__
);
237 /* if there are other references register the source
241 enum pim_rpf_result r
;
243 r
= pim_rpf_update(vxlan_sg
->pim
, up
, NULL
, __func__
);
244 if (r
== PIM_RPF_FAILURE
) {
247 "vxlan SG %s rpf_update failure",
254 static void pim_vxlan_orig_mr_up_iif_update(struct pim_vxlan_sg
*vxlan_sg
)
256 /* update MFC with the new IIF */
257 pim_upstream_fill_static_iif(vxlan_sg
->up
, vxlan_sg
->iif
);
258 pim_upstream_mroute_iif_update(vxlan_sg
->up
->channel_oil
, __func__
);
261 zlog_debug("vxlan SG %s orig mroute-up updated with iif %s",
263 vxlan_sg
->iif
?vxlan_sg
->iif
->name
:"-");
267 /* For every VxLAN BUM multicast group we setup a SG-up that has the following
268 * "forced properties" -
269 * 1. Directly connected on a DR interface i.e. we must act as an FHR
270 * 2. We prime the pump i.e. no multicast data is needed to register this
271 * source with the FHR. To do that we send periodic null registers if
272 * the SG entry is in a register-join state. We also prevent expiry of
274 * 3. As this SG is setup without data there is no need to register encapsulate
275 * data traffic. This encapsulation is explicitly skipped for the following
277 * a) Many levels of encapsulation are needed creating MTU disc challenges.
278 * Overlay BUM is encapsulated in a vxlan/UDP/IP header and then
279 * encapsulated again in a pim-register header.
280 * b) On a vxlan-aa setup both switches rx a copy of each BUM packet. if
281 * they both reg encapsulated traffic the RP will accept the duplicates
282 * as there are no RPF checks for this encapsulated data.
283 * a), b) can be workarounded if needed, but there is really no need because
284 * of (2) i.e. the pump is primed without data.
286 static void pim_vxlan_orig_mr_up_add(struct pim_vxlan_sg
*vxlan_sg
)
288 struct pim_upstream
*up
;
289 struct pim_interface
*term_ifp
;
291 struct pim_instance
*pim
= vxlan_sg
->pim
;
299 zlog_debug("vxlan SG %s orig mroute-up add with iif %s",
301 vxlan_sg
->iif
?vxlan_sg
->iif
->name
:"-");
303 PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_ORIG(flags
);
304 /* pin the IIF to lo or peerlink-subinterface and disable NHT */
305 PIM_UPSTREAM_FLAG_SET_STATIC_IIF(flags
);
306 /* Fake traffic by setting SRC_STREAM and starting KAT */
307 /* We intentionally skip updating ref count for SRC_STREAM/FHR.
308 * Setting SRC_VXLAN should have already created a reference
309 * preventing the entry from being deleted
311 PIM_UPSTREAM_FLAG_SET_FHR(flags
);
312 PIM_UPSTREAM_FLAG_SET_SRC_STREAM(flags
);
313 /* Force pimreg even if non-DR. This is needed on a MLAG setup for
316 PIM_UPSTREAM_FLAG_SET_FORCE_PIMREG(flags
);
317 /* prevent KAT expiry. we want the MDT setup even if there is no BUM
320 PIM_UPSTREAM_FLAG_SET_DISABLE_KAT_EXPIRY(flags
);
321 /* SPT for vxlan BUM groups is primed and maintained via NULL
322 * registers so there is no need to reg-encapsulate
323 * vxlan-encapsulated overlay data traffic
325 PIM_UPSTREAM_FLAG_SET_NO_PIMREG_DATA(flags
);
326 /* On a MLAG setup we force a copy to the MLAG peer while also
327 * accepting traffic from the peer. To do this we set peerlink-rif as
328 * the IIF and also add it to the OIL
330 PIM_UPSTREAM_FLAG_SET_ALLOW_IIF_IN_OIL(flags
);
332 /* XXX: todo: defer pim_upstream add if pim is not enabled on the iif */
333 up
= pim_upstream_find(vxlan_sg
->pim
, &vxlan_sg
->sg
);
335 /* if the iif is set to something other than the vxlan_sg->iif
336 * we must dereg the old nexthop and force to new "static"
339 if (!PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(up
->flags
)) {
340 pim_delete_tracked_nexthop(vxlan_sg
->pim
,
341 up
->upstream_addr
, up
, NULL
);
343 /* We are acting FHR; clear out use_rpt setting if any */
344 pim_upstream_update_use_rpt(up
, false /*update_mroute*/);
345 pim_upstream_ref(up
, flags
, __func__
);
347 term_ifp
= pim_vxlan_get_term_ifp(pim
);
348 /* mute termination device on origination mroutes */
350 pim_channel_update_oif_mute(up
->channel_oil
,
352 pim_vxlan_orig_mr_up_iif_update(vxlan_sg
);
353 /* mute pimreg on origination mroutes */
355 pim_channel_update_oif_mute(up
->channel_oil
,
356 pim
->regiface
->info
);
358 up
= pim_upstream_add(vxlan_sg
->pim
, &vxlan_sg
->sg
,
359 vxlan_sg
->iif
, flags
, __func__
, NULL
);
365 zlog_debug("vxlan SG %s orig mroute-up add failed",
370 pim_upstream_keep_alive_timer_start(up
, vxlan_sg
->pim
->keep_alive_time
);
372 /* register the source with the RP */
373 switch (up
->reg_state
) {
376 pim_register_join(up
);
377 pim_null_register_send(up
);
381 /* if the pim upstream entry is already in reg-join state
382 * send null_register right away and add to the register
385 pim_null_register_send(up
);
386 pim_vxlan_update_sg_reg_state(pim
, up
, true);
389 case PIM_REG_JOIN_PENDING
:
394 /* update the inherited OIL */
395 pim_upstream_inherited_olist(vxlan_sg
->pim
, up
);
396 if (!up
->channel_oil
->installed
)
397 pim_upstream_mroute_add(up
->channel_oil
, __func__
);
400 static void pim_vxlan_orig_mr_oif_add(struct pim_vxlan_sg
*vxlan_sg
)
402 if (!vxlan_sg
->up
|| !vxlan_sg
->orig_oif
)
406 zlog_debug("vxlan SG %s oif %s add",
407 vxlan_sg
->sg_str
, vxlan_sg
->orig_oif
->name
);
409 vxlan_sg
->flags
|= PIM_VXLAN_SGF_OIF_INSTALLED
;
410 pim_channel_add_oif(vxlan_sg
->up
->channel_oil
,
411 vxlan_sg
->orig_oif
, PIM_OIF_FLAG_PROTO_VXLAN
,
415 static void pim_vxlan_orig_mr_oif_del(struct pim_vxlan_sg
*vxlan_sg
)
417 struct interface
*orig_oif
;
419 orig_oif
= vxlan_sg
->orig_oif
;
420 vxlan_sg
->orig_oif
= NULL
;
422 if (!(vxlan_sg
->flags
& PIM_VXLAN_SGF_OIF_INSTALLED
))
426 zlog_debug("vxlan SG %s oif %s del",
427 vxlan_sg
->sg_str
, orig_oif
->name
);
429 vxlan_sg
->flags
&= ~PIM_VXLAN_SGF_OIF_INSTALLED
;
430 pim_channel_del_oif(vxlan_sg
->up
->channel_oil
,
431 orig_oif
, PIM_OIF_FLAG_PROTO_VXLAN
, __func__
);
434 static inline struct interface
*pim_vxlan_orig_mr_oif_get(
435 struct pim_instance
*pim
)
437 return (vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_ENABLED
) ?
438 pim
->vxlan
.peerlink_rif
: NULL
;
441 /* Single VTEPs: IIF for the vxlan-origination-mroutes is lo or vrf-dev (if
442 * the mroute is in a non-default vrf).
443 * Anycast VTEPs: IIF is the MLAG ISL/peerlink.
445 static inline struct interface
*pim_vxlan_orig_mr_iif_get(
446 struct pim_instance
*pim
)
448 return ((vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_ENABLED
) &&
449 pim
->vxlan
.peerlink_rif
) ?
450 pim
->vxlan
.peerlink_rif
: pim
->vxlan
.default_iif
;
453 static bool pim_vxlan_orig_mr_add_is_ok(struct pim_vxlan_sg
*vxlan_sg
)
455 struct pim_interface
*pim_ifp
;
457 vxlan_sg
->iif
= pim_vxlan_orig_mr_iif_get(vxlan_sg
->pim
);
461 pim_ifp
= (struct pim_interface
*)vxlan_sg
->iif
->info
;
462 if (!pim_ifp
|| (pim_ifp
->mroute_vif_index
< 0))
468 static void pim_vxlan_orig_mr_install(struct pim_vxlan_sg
*vxlan_sg
)
470 pim_vxlan_orig_mr_up_add(vxlan_sg
);
472 vxlan_sg
->orig_oif
= pim_vxlan_orig_mr_oif_get(vxlan_sg
->pim
);
473 pim_vxlan_orig_mr_oif_add(vxlan_sg
);
476 static void pim_vxlan_orig_mr_add(struct pim_vxlan_sg
*vxlan_sg
)
478 if (!pim_vxlan_orig_mr_add_is_ok(vxlan_sg
))
482 zlog_debug("vxlan SG %s orig-mr add", vxlan_sg
->sg_str
);
484 pim_vxlan_orig_mr_install(vxlan_sg
);
487 static void pim_vxlan_orig_mr_del(struct pim_vxlan_sg
*vxlan_sg
)
490 zlog_debug("vxlan SG %s orig-mr del", vxlan_sg
->sg_str
);
492 pim_vxlan_orig_mr_oif_del(vxlan_sg
);
493 pim_vxlan_orig_mr_up_del(vxlan_sg
);
496 static void pim_vxlan_orig_mr_iif_update(struct hash_bucket
*bucket
, void *arg
)
498 struct interface
*ifp
;
499 struct pim_vxlan_sg
*vxlan_sg
= (struct pim_vxlan_sg
*)bucket
->data
;
500 struct interface
*old_iif
= vxlan_sg
->iif
;
502 if (!pim_vxlan_is_orig_mroute(vxlan_sg
))
505 ifp
= pim_vxlan_orig_mr_iif_get(vxlan_sg
->pim
);
507 zlog_debug("vxlan SG %s iif changed from %s to %s",
509 old_iif
? old_iif
->name
: "-",
510 ifp
? ifp
->name
: "-");
512 if (pim_vxlan_orig_mr_add_is_ok(vxlan_sg
)) {
514 /* upstream exists but iif changed */
515 pim_vxlan_orig_mr_up_iif_update(vxlan_sg
);
518 pim_vxlan_orig_mr_install(vxlan_sg
);
521 pim_vxlan_orig_mr_del(vxlan_sg
);
525 /**************************** vxlan termination mroutes ***********************
526 * For every bum-mcast-grp registered by evpn a *G termination
527 * mroute is setup by pimd. The purpose of this mroute is to pull down vxlan
528 * packets with the bum-mcast-grp dip from the underlay and terminate the
529 * tunnel. This is done by including the vxlan termination device (ipmr-lo) in
530 * its OIL. The vxlan de-capsulated packets are subject to subsequent overlay
534 * (0.0.0.0, 239.1.1.100) Iif: uplink-1 Oifs: ipmr-lo, uplink-1
535 *****************************************************************************/
536 struct pim_interface
*pim_vxlan_get_term_ifp(struct pim_instance
*pim
)
538 return pim
->vxlan
.term_if
?
539 (struct pim_interface
*)pim
->vxlan
.term_if
->info
: NULL
;
542 static void pim_vxlan_term_mr_oif_add(struct pim_vxlan_sg
*vxlan_sg
)
544 if (vxlan_sg
->flags
& PIM_VXLAN_SGF_OIF_INSTALLED
)
548 zlog_debug("vxlan SG %s term-oif %s add",
549 vxlan_sg
->sg_str
, vxlan_sg
->term_oif
->name
);
551 if (pim_ifchannel_local_membership_add(vxlan_sg
->term_oif
,
552 &vxlan_sg
->sg
, true /*is_vxlan */)) {
553 vxlan_sg
->flags
|= PIM_VXLAN_SGF_OIF_INSTALLED
;
554 /* update the inherited OIL */
555 /* XXX - I don't see the inherited OIL updated when a local
556 * member is added. And that probably needs to be fixed. Till
557 * that happens we do a force update on the inherited OIL
560 pim_upstream_inherited_olist(vxlan_sg
->pim
, vxlan_sg
->up
);
562 zlog_warn("vxlan SG %s term-oif %s add failed",
563 vxlan_sg
->sg_str
, vxlan_sg
->term_oif
->name
);
567 static void pim_vxlan_term_mr_oif_del(struct pim_vxlan_sg
*vxlan_sg
)
569 if (!(vxlan_sg
->flags
& PIM_VXLAN_SGF_OIF_INSTALLED
))
573 zlog_debug("vxlan SG %s oif %s del",
574 vxlan_sg
->sg_str
, vxlan_sg
->term_oif
->name
);
576 vxlan_sg
->flags
&= ~PIM_VXLAN_SGF_OIF_INSTALLED
;
577 pim_ifchannel_local_membership_del(vxlan_sg
->term_oif
, &vxlan_sg
->sg
);
578 /* update the inherited OIL */
579 /* XXX - I don't see the inherited OIL updated when a local member
580 * is deleted. And that probably needs to be fixed. Till that happens
581 * we do a force update on the inherited OIL here.
583 pim_upstream_inherited_olist(vxlan_sg
->pim
, vxlan_sg
->up
);
586 static void pim_vxlan_update_sg_entry_mlag(struct pim_instance
*pim
,
587 struct pim_upstream
*up
, bool inherit
)
591 if (inherit
&& up
->parent
&&
592 PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up
->parent
->flags
) &&
593 PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up
->parent
->flags
))
596 pim_mlag_up_df_role_update(pim
, up
, is_df
, "inherit_xg_df");
599 /* We run MLAG DF election only on mroutes that have the termination
600 * device ipmr-lo in the immediate OIL. This is only (*, G) entries at the
601 * moment. For (S, G) entries that (with ipmr-lo in the inherited OIL) we
602 * inherit the DF role from the (*, G) entry.
604 void pim_vxlan_inherit_mlag_flags(struct pim_instance
*pim
,
605 struct pim_upstream
*up
, bool inherit
)
607 struct listnode
*listnode
;
608 struct pim_upstream
*child
;
610 for (ALL_LIST_ELEMENTS_RO(up
->sources
, listnode
,
612 pim_vxlan_update_sg_entry_mlag(pim
,
613 child
, true /* inherit */);
617 static void pim_vxlan_term_mr_up_add(struct pim_vxlan_sg
*vxlan_sg
)
619 struct pim_upstream
*up
;
628 zlog_debug("vxlan SG %s term mroute-up add",
631 PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_TERM(flags
);
632 /* enable MLAG designated-forwarder election on termination mroutes */
633 PIM_UPSTREAM_FLAG_SET_MLAG_VXLAN(flags
);
635 up
= pim_upstream_add(vxlan_sg
->pim
, &vxlan_sg
->sg
, NULL
/* iif */,
636 flags
, __func__
, NULL
);
640 zlog_warn("vxlan SG %s term mroute-up add failed",
645 /* update existing SG entries with the parent's MLAG flag */
646 pim_vxlan_inherit_mlag_flags(vxlan_sg
->pim
, up
, true /*enable*/);
649 static void pim_vxlan_term_mr_up_del(struct pim_vxlan_sg
*vxlan_sg
)
651 struct pim_upstream
*up
= vxlan_sg
->up
;
657 zlog_debug("vxlan SG %s term mroute-up del",
660 if (up
->flags
& PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM
) {
661 /* update SG entries that are inheriting from this XG entry */
662 pim_vxlan_inherit_mlag_flags(vxlan_sg
->pim
, up
,
664 /* clear out all the vxlan related flags */
665 up
->flags
&= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM
|
666 PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN
);
667 pim_mlag_up_local_del(vxlan_sg
->pim
, up
);
668 pim_upstream_del(vxlan_sg
->pim
, up
, __func__
);
672 static void pim_vxlan_term_mr_add(struct pim_vxlan_sg
*vxlan_sg
)
675 zlog_debug("vxlan SG %s term mroute add", vxlan_sg
->sg_str
);
677 vxlan_sg
->term_oif
= vxlan_sg
->pim
->vxlan
.term_if
;
678 if (!vxlan_sg
->term_oif
)
679 /* defer termination mroute till we have a termination device */
682 pim_vxlan_term_mr_up_add(vxlan_sg
);
683 /* set up local membership for the term-oif */
684 pim_vxlan_term_mr_oif_add(vxlan_sg
);
687 static void pim_vxlan_term_mr_del(struct pim_vxlan_sg
*vxlan_sg
)
690 zlog_debug("vxlan SG %s term mroute del", vxlan_sg
->sg_str
);
692 /* remove local membership associated with the term oif */
693 pim_vxlan_term_mr_oif_del(vxlan_sg
);
694 /* remove references to the upstream entry */
695 pim_vxlan_term_mr_up_del(vxlan_sg
);
698 /************************** vxlan SG cache management ************************/
699 static unsigned int pim_vxlan_sg_hash_key_make(const void *p
)
701 const struct pim_vxlan_sg
*vxlan_sg
= p
;
703 return pim_sgaddr_hash(vxlan_sg
->sg
, 0);
706 static bool pim_vxlan_sg_hash_eq(const void *p1
, const void *p2
)
708 const struct pim_vxlan_sg
*sg1
= p1
;
709 const struct pim_vxlan_sg
*sg2
= p2
;
711 return !pim_sgaddr_cmp(sg1
->sg
, sg2
->sg
);
714 static struct pim_vxlan_sg
*pim_vxlan_sg_new(struct pim_instance
*pim
,
717 struct pim_vxlan_sg
*vxlan_sg
;
719 vxlan_sg
= XCALLOC(MTYPE_PIM_VXLAN_SG
, sizeof(*vxlan_sg
));
723 snprintfrr(vxlan_sg
->sg_str
, sizeof(vxlan_sg
->sg_str
), "%pSG", sg
);
726 zlog_debug("vxlan SG %s alloc", vxlan_sg
->sg_str
);
728 vxlan_sg
= hash_get(pim
->vxlan
.sg_hash
, vxlan_sg
, hash_alloc_intern
);
730 /* we register with the MLAG daemon in the first VxLAN SG and never
731 * de-register during that life of the pimd
733 if (pim
->vxlan
.sg_hash
->count
== 1) {
734 vxlan_mlag
.flags
|= PIM_VXLAN_MLAGF_DO_REG
;
741 struct pim_vxlan_sg
*pim_vxlan_sg_find(struct pim_instance
*pim
, pim_sgaddr
*sg
)
743 struct pim_vxlan_sg lookup
;
746 return hash_lookup(pim
->vxlan
.sg_hash
, &lookup
);
749 struct pim_vxlan_sg
*pim_vxlan_sg_add(struct pim_instance
*pim
, pim_sgaddr
*sg
)
751 struct pim_vxlan_sg
*vxlan_sg
;
753 vxlan_sg
= pim_vxlan_sg_find(pim
, sg
);
757 vxlan_sg
= pim_vxlan_sg_new(pim
, sg
);
759 if (pim_vxlan_is_orig_mroute(vxlan_sg
))
760 pim_vxlan_orig_mr_add(vxlan_sg
);
762 pim_vxlan_term_mr_add(vxlan_sg
);
767 static void pim_vxlan_sg_del_item(struct pim_vxlan_sg
*vxlan_sg
)
769 vxlan_sg
->flags
|= PIM_VXLAN_SGF_DEL_IN_PROG
;
771 pim_vxlan_del_work(vxlan_sg
);
773 if (pim_vxlan_is_orig_mroute(vxlan_sg
))
774 pim_vxlan_orig_mr_del(vxlan_sg
);
776 pim_vxlan_term_mr_del(vxlan_sg
);
779 zlog_debug("vxlan SG %s free", vxlan_sg
->sg_str
);
781 XFREE(MTYPE_PIM_VXLAN_SG
, vxlan_sg
);
784 void pim_vxlan_sg_del(struct pim_instance
*pim
, pim_sgaddr
*sg
)
786 struct pim_vxlan_sg
*vxlan_sg
;
788 vxlan_sg
= pim_vxlan_sg_find(pim
, sg
);
792 hash_release(pim
->vxlan
.sg_hash
, vxlan_sg
);
793 pim_vxlan_sg_del_item(vxlan_sg
);
796 /******************************* MLAG handling *******************************/
797 bool pim_vxlan_do_mlag_reg(void)
799 return (vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_DO_REG
);
802 /* The peerlink sub-interface is added as an OIF to the origination-mroute.
803 * This is done to send a copy of the multicast-vxlan encapsulated traffic
804 * to the MLAG peer which may mroute it over the underlay if there are any
805 * interested receivers.
807 static void pim_vxlan_sg_peerlink_oif_update(struct hash_bucket
*bucket
,
810 struct interface
*new_oif
= (struct interface
*)arg
;
811 struct pim_vxlan_sg
*vxlan_sg
= (struct pim_vxlan_sg
*)bucket
->data
;
813 if (!pim_vxlan_is_orig_mroute(vxlan_sg
))
816 if (vxlan_sg
->orig_oif
== new_oif
)
819 pim_vxlan_orig_mr_oif_del(vxlan_sg
);
821 vxlan_sg
->orig_oif
= new_oif
;
822 pim_vxlan_orig_mr_oif_add(vxlan_sg
);
825 /* In the case of anycast VTEPs the VTEP-PIP must be used as the
828 bool pim_vxlan_get_register_src(struct pim_instance
*pim
,
829 struct pim_upstream
*up
, struct in_addr
*src_p
)
831 if (!(vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_ENABLED
))
834 /* if address is not available suppress the pim-register */
835 if (vxlan_mlag
.reg_addr
.s_addr
== INADDR_ANY
)
838 *src_p
= vxlan_mlag
.reg_addr
;
842 void pim_vxlan_mlag_update(bool enable
, bool peer_state
, uint32_t role
,
843 struct interface
*peerlink_rif
,
844 struct in_addr
*reg_addr
)
846 struct pim_instance
*pim
;
847 char addr_buf
[INET_ADDRSTRLEN
];
848 struct pim_interface
*pim_ifp
= NULL
;
850 if (PIM_DEBUG_VXLAN
) {
851 inet_ntop(AF_INET
, reg_addr
,
852 addr_buf
, INET_ADDRSTRLEN
);
853 zlog_debug("vxlan MLAG update %s state %s role %d rif %s addr %s",
854 enable
? "enable" : "disable",
855 peer_state
? "up" : "down",
857 peerlink_rif
? peerlink_rif
->name
: "-",
861 /* XXX: for now vxlan termination is only possible in the default VRF
862 * when that changes this will need to change to iterate all VRFs
864 pim
= pim_get_pim_instance(VRF_DEFAULT
);
868 zlog_debug("%s: Unable to find pim instance", __func__
);
873 vxlan_mlag
.flags
|= PIM_VXLAN_MLAGF_ENABLED
;
875 vxlan_mlag
.flags
&= ~PIM_VXLAN_MLAGF_ENABLED
;
877 if (vxlan_mlag
.peerlink_rif
!= peerlink_rif
)
878 vxlan_mlag
.peerlink_rif
= peerlink_rif
;
880 vxlan_mlag
.reg_addr
= *reg_addr
;
881 vxlan_mlag
.peer_state
= peer_state
;
882 vxlan_mlag
.role
= role
;
884 /* process changes */
885 if (vxlan_mlag
.peerlink_rif
)
886 pim_ifp
= (struct pim_interface
*)vxlan_mlag
.peerlink_rif
->info
;
887 if ((vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_ENABLED
) &&
888 pim_ifp
&& (pim_ifp
->mroute_vif_index
> 0))
889 pim_vxlan_set_peerlink_rif(pim
, peerlink_rif
);
891 pim_vxlan_set_peerlink_rif(pim
, NULL
);
894 /****************************** misc callbacks *******************************/
895 static void pim_vxlan_set_default_iif(struct pim_instance
*pim
,
896 struct interface
*ifp
)
898 struct interface
*old_iif
;
900 if (pim
->vxlan
.default_iif
== ifp
)
903 old_iif
= pim
->vxlan
.default_iif
;
905 zlog_debug("%s: vxlan default iif changed from %s to %s",
906 __func__
, old_iif
? old_iif
->name
: "-",
907 ifp
? ifp
->name
: "-");
909 old_iif
= pim_vxlan_orig_mr_iif_get(pim
);
910 pim
->vxlan
.default_iif
= ifp
;
911 ifp
= pim_vxlan_orig_mr_iif_get(pim
);
916 zlog_debug("%s: vxlan orig iif changed from %s to %s", __func__
,
917 old_iif
? old_iif
->name
: "-",
918 ifp
? ifp
->name
: "-");
920 /* add/del upstream entries for the existing vxlan SG when the
921 * interface becomes available
923 if (pim
->vxlan
.sg_hash
)
924 hash_iterate(pim
->vxlan
.sg_hash
,
925 pim_vxlan_orig_mr_iif_update
, NULL
);
928 static void pim_vxlan_up_cost_update(struct pim_instance
*pim
,
929 struct pim_upstream
*up
,
930 struct interface
*old_peerlink_rif
)
932 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up
->flags
))
935 if (up
->rpf
.source_nexthop
.interface
&&
936 ((up
->rpf
.source_nexthop
.interface
==
937 pim
->vxlan
.peerlink_rif
) ||
938 (up
->rpf
.source_nexthop
.interface
==
939 old_peerlink_rif
))) {
941 zlog_debug("RPF cost adjust for %s on peerlink-rif (old: %s, new: %s) change",
944 old_peerlink_rif
->name
: "-",
945 pim
->vxlan
.peerlink_rif
?
946 pim
->vxlan
.peerlink_rif
->name
: "-");
947 pim_mlag_up_local_add(pim
, up
);
951 static void pim_vxlan_term_mr_cost_update(struct hash_bucket
*bucket
, void *arg
)
953 struct interface
*old_peerlink_rif
= (struct interface
*)arg
;
954 struct pim_vxlan_sg
*vxlan_sg
= (struct pim_vxlan_sg
*)bucket
->data
;
955 struct pim_upstream
*up
;
956 struct listnode
*listnode
;
957 struct pim_upstream
*child
;
959 if (pim_vxlan_is_orig_mroute(vxlan_sg
))
962 /* Lookup all XG and SG entries with RPF-interface peerlink_rif */
967 pim_vxlan_up_cost_update(vxlan_sg
->pim
, up
,
970 for (ALL_LIST_ELEMENTS_RO(up
->sources
, listnode
,
972 pim_vxlan_up_cost_update(vxlan_sg
->pim
, child
,
976 static void pim_vxlan_sg_peerlink_rif_update(struct hash_bucket
*bucket
,
979 pim_vxlan_orig_mr_iif_update(bucket
, NULL
);
980 pim_vxlan_term_mr_cost_update(bucket
, arg
);
983 static void pim_vxlan_set_peerlink_rif(struct pim_instance
*pim
,
984 struct interface
*ifp
)
986 struct interface
*old_iif
;
987 struct interface
*new_iif
;
988 struct interface
*old_oif
;
989 struct interface
*new_oif
;
991 if (pim
->vxlan
.peerlink_rif
== ifp
)
994 old_iif
= pim
->vxlan
.peerlink_rif
;
996 zlog_debug("%s: vxlan peerlink_rif changed from %s to %s",
997 __func__
, old_iif
? old_iif
->name
: "-",
998 ifp
? ifp
->name
: "-");
1000 old_iif
= pim_vxlan_orig_mr_iif_get(pim
);
1001 old_oif
= pim_vxlan_orig_mr_oif_get(pim
);
1002 pim
->vxlan
.peerlink_rif
= ifp
;
1004 new_iif
= pim_vxlan_orig_mr_iif_get(pim
);
1005 if (old_iif
!= new_iif
) {
1006 if (PIM_DEBUG_VXLAN
)
1007 zlog_debug("%s: vxlan orig iif changed from %s to %s",
1008 __func__
, old_iif
? old_iif
->name
: "-",
1009 new_iif
? new_iif
->name
: "-");
1011 /* add/del upstream entries for the existing vxlan SG when the
1012 * interface becomes available
1014 if (pim
->vxlan
.sg_hash
)
1015 hash_iterate(pim
->vxlan
.sg_hash
,
1016 pim_vxlan_sg_peerlink_rif_update
,
1020 new_oif
= pim_vxlan_orig_mr_oif_get(pim
);
1021 if (old_oif
!= new_oif
) {
1022 if (PIM_DEBUG_VXLAN
)
1023 zlog_debug("%s: vxlan orig oif changed from %s to %s",
1024 __func__
, old_oif
? old_oif
->name
: "-",
1025 new_oif
? new_oif
->name
: "-");
1026 if (pim
->vxlan
.sg_hash
)
1027 hash_iterate(pim
->vxlan
.sg_hash
,
1028 pim_vxlan_sg_peerlink_oif_update
,
1033 static void pim_vxlan_term_mr_oif_update(struct hash_bucket
*bucket
, void *arg
)
1035 struct interface
*ifp
= (struct interface
*)arg
;
1036 struct pim_vxlan_sg
*vxlan_sg
= (struct pim_vxlan_sg
*)bucket
->data
;
1038 if (pim_vxlan_is_orig_mroute(vxlan_sg
))
1041 if (vxlan_sg
->term_oif
== ifp
)
1044 if (PIM_DEBUG_VXLAN
)
1045 zlog_debug("vxlan SG %s term oif changed from %s to %s",
1047 vxlan_sg
->term_oif
? vxlan_sg
->term_oif
->name
: "-",
1048 ifp
? ifp
->name
: "-");
1050 pim_vxlan_term_mr_del(vxlan_sg
);
1051 vxlan_sg
->term_oif
= ifp
;
1052 pim_vxlan_term_mr_add(vxlan_sg
);
1055 static void pim_vxlan_term_oif_update(struct pim_instance
*pim
,
1056 struct interface
*ifp
)
1058 if (pim
->vxlan
.term_if
== ifp
)
1061 if (PIM_DEBUG_VXLAN
)
1062 zlog_debug("vxlan term oif changed from %s to %s",
1063 pim
->vxlan
.term_if
? pim
->vxlan
.term_if
->name
: "-",
1064 ifp
? ifp
->name
: "-");
1066 pim
->vxlan
.term_if
= ifp
;
1067 if (pim
->vxlan
.sg_hash
)
1068 hash_iterate(pim
->vxlan
.sg_hash
,
1069 pim_vxlan_term_mr_oif_update
, ifp
);
1072 void pim_vxlan_add_vif(struct interface
*ifp
)
1074 struct pim_interface
*pim_ifp
= ifp
->info
;
1075 struct pim_instance
*pim
= pim_ifp
->pim
;
1077 if (pim
->vrf
->vrf_id
!= VRF_DEFAULT
)
1080 if (if_is_loopback(ifp
))
1081 pim_vxlan_set_default_iif(pim
, ifp
);
1083 if (vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_ENABLED
&&
1084 (ifp
== vxlan_mlag
.peerlink_rif
))
1085 pim_vxlan_set_peerlink_rif(pim
, ifp
);
1087 if (pim
->vxlan
.term_if_cfg
== ifp
)
1088 pim_vxlan_term_oif_update(pim
, ifp
);
1091 void pim_vxlan_del_vif(struct interface
*ifp
)
1093 struct pim_interface
*pim_ifp
= ifp
->info
;
1094 struct pim_instance
*pim
= pim_ifp
->pim
;
1096 if (pim
->vrf
->vrf_id
!= VRF_DEFAULT
)
1099 if (pim
->vxlan
.default_iif
== ifp
)
1100 pim_vxlan_set_default_iif(pim
, NULL
);
1102 if (pim
->vxlan
.peerlink_rif
== ifp
)
1103 pim_vxlan_set_peerlink_rif(pim
, NULL
);
1105 if (pim
->vxlan
.term_if
== ifp
)
1106 pim_vxlan_term_oif_update(pim
, NULL
);
1109 /* enable pim implicitly on the termination device add */
1110 void pim_vxlan_add_term_dev(struct pim_instance
*pim
,
1111 struct interface
*ifp
)
1113 struct pim_interface
*pim_ifp
;
1115 if (pim
->vxlan
.term_if_cfg
== ifp
)
1118 if (PIM_DEBUG_VXLAN
)
1119 zlog_debug("vxlan term oif cfg changed from %s to %s",
1120 pim
->vxlan
.term_if_cfg
?
1121 pim
->vxlan
.term_if_cfg
->name
: "-",
1124 pim
->vxlan
.term_if_cfg
= ifp
;
1126 /* enable pim on the term ifp */
1127 pim_ifp
= (struct pim_interface
*)ifp
->info
;
1129 pim_ifp
->pim_enable
= true;
1130 /* ifp is already oper up; activate it as a term dev */
1131 if (pim_ifp
->mroute_vif_index
>= 0)
1132 pim_vxlan_term_oif_update(pim
, ifp
);
1134 /* ensure that pimreg exists before using the newly created
1135 * vxlan termination device
1137 pim_if_create_pimreg(pim
);
1138 (void)pim_if_new(ifp
, false /*igmp*/, true /*pim*/,
1139 false /*pimreg*/, true /*vxlan_term*/);
1143 /* disable pim implicitly, if needed, on the termination device deletion */
1144 void pim_vxlan_del_term_dev(struct pim_instance
*pim
)
1146 struct interface
*ifp
= pim
->vxlan
.term_if_cfg
;
1147 struct pim_interface
*pim_ifp
;
1149 if (PIM_DEBUG_VXLAN
)
1150 zlog_debug("vxlan term oif cfg changed from %s to -",
1153 pim
->vxlan
.term_if_cfg
= NULL
;
1155 pim_ifp
= (struct pim_interface
*)ifp
->info
;
1157 pim_ifp
->pim_enable
= false;
1158 if (!pim_ifp
->gm_enable
)
1163 void pim_vxlan_init(struct pim_instance
*pim
)
1167 snprintf(hash_name
, sizeof(hash_name
),
1168 "PIM %s vxlan SG hash", pim
->vrf
->name
);
1169 pim
->vxlan
.sg_hash
= hash_create(pim_vxlan_sg_hash_key_make
,
1170 pim_vxlan_sg_hash_eq
, hash_name
);
1173 void pim_vxlan_exit(struct pim_instance
*pim
)
1175 hash_clean_and_free(&pim
->vxlan
.sg_hash
,
1176 (void (*)(void *))pim_vxlan_sg_del_item
);
1179 void pim_vxlan_terminate(void)
1181 pim_vxlan_work_timer_setup(false);