1 /* PIM support for VxLAN BUM flooding
3 * Copyright (C) 2019 Cumulus Networks, Inc.
5 * This file is part of FRR.
7 * FRR is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2, or (at your option) any
12 * FRR is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
31 #include "pim_iface.h"
32 #include "pim_memory.h"
34 #include "pim_register.h"
36 #include "pim_upstream.h"
37 #include "pim_ifchannel.h"
39 #include "pim_zebra.h"
40 #include "pim_vxlan.h"
43 /* pim-vxlan global info */
44 struct pim_vxlan vxlan_info
, *pim_vxlan_p
= &vxlan_info
;
46 static void pim_vxlan_work_timer_setup(bool start
);
47 static void pim_vxlan_set_peerlink_rif(struct pim_instance
*pim
,
48 struct interface
*ifp
);
50 /*************************** vxlan work list **********************************
51 * A work list is maintained for staggered generation of pim null register
52 * messages for vxlan SG entries that are in a reg_join state.
54 * A max of 500 NULL registers are generated at one shot. If paused reg
55 * generation continues on the next second and so on till all register
56 * messages have been sent out. And the process is restarted every 60s.
58 * purpose of this null register generation is to setup the SPT and maintain
59 * independent of the presence of overlay BUM traffic.
60 ****************************************************************************/
61 static void pim_vxlan_do_reg_work(void)
63 struct listnode
*listnode
;
65 struct pim_vxlan_sg
*vxlan_sg
;
70 if (sec_count
> PIM_VXLAN_NULL_REG_INTERVAL
) {
72 listnode
= vxlan_info
.next_work
?
73 vxlan_info
.next_work
:
74 vxlan_info
.work_list
->head
;
75 if (PIM_DEBUG_VXLAN
&& listnode
)
76 zlog_debug("vxlan SG work %s",
77 vxlan_info
.next_work
? "continues" : "starts");
79 listnode
= vxlan_info
.next_work
;
82 for (; listnode
; listnode
= listnode
->next
) {
83 vxlan_sg
= (struct pim_vxlan_sg
*)listnode
->data
;
84 if (vxlan_sg
->up
&& (vxlan_sg
->up
->reg_state
== PIM_REG_JOIN
)) {
86 zlog_debug("vxlan SG %s periodic NULL register",
90 * If we are on the work queue *and* the rpf
91 * has been lost on the vxlan_sg->up let's
92 * make sure that we don't send it.
94 if (vxlan_sg
->up
->rpf
.source_nexthop
.interface
) {
95 pim_null_register_send(vxlan_sg
->up
);
100 if (work_cnt
> vxlan_info
.max_work_cnt
) {
101 vxlan_info
.next_work
= listnode
->next
;
103 zlog_debug("vxlan SG %d work items proc and pause",
111 zlog_debug("vxlan SG %d work items proc", work_cnt
);
113 vxlan_info
.next_work
= NULL
;
116 /* Staggered work related info is initialized when the first work comes
119 static void pim_vxlan_init_work(void)
121 if (vxlan_info
.flags
& PIM_VXLANF_WORK_INITED
)
124 vxlan_info
.max_work_cnt
= PIM_VXLAN_WORK_MAX
;
125 vxlan_info
.flags
|= PIM_VXLANF_WORK_INITED
;
126 vxlan_info
.work_list
= list_new();
127 pim_vxlan_work_timer_setup(true/* start */);
130 static void pim_vxlan_add_work(struct pim_vxlan_sg
*vxlan_sg
)
132 if (vxlan_sg
->flags
& PIM_VXLAN_SGF_DEL_IN_PROG
) {
134 zlog_debug("vxlan SG %s skip work list; del-in-prog",
139 pim_vxlan_init_work();
141 /* already a part of the work list */
142 if (vxlan_sg
->work_node
)
146 zlog_debug("vxlan SG %s work list add",
148 vxlan_sg
->work_node
= listnode_add(vxlan_info
.work_list
, vxlan_sg
);
149 /* XXX: adjust max_work_cnt if needed */
152 static void pim_vxlan_del_work(struct pim_vxlan_sg
*vxlan_sg
)
154 if (!vxlan_sg
->work_node
)
158 zlog_debug("vxlan SG %s work list del",
161 if (vxlan_sg
->work_node
== vxlan_info
.next_work
)
162 vxlan_info
.next_work
= vxlan_sg
->work_node
->next
;
164 list_delete_node(vxlan_info
.work_list
, vxlan_sg
->work_node
);
165 vxlan_sg
->work_node
= NULL
;
168 void pim_vxlan_update_sg_reg_state(struct pim_instance
*pim
,
169 struct pim_upstream
*up
, bool reg_join
)
171 struct pim_vxlan_sg
*vxlan_sg
;
173 vxlan_sg
= pim_vxlan_sg_find(pim
, &up
->sg
);
177 /* add the vxlan sg entry to a work list for periodic reg joins.
178 * the entry will stay in the list as long as the register state is
182 pim_vxlan_add_work(vxlan_sg
);
184 pim_vxlan_del_work(vxlan_sg
);
187 static int pim_vxlan_work_timer_cb(struct thread
*t
)
189 pim_vxlan_do_reg_work();
190 pim_vxlan_work_timer_setup(true /* start */);
194 /* global 1second timer used for periodic processing */
195 static void pim_vxlan_work_timer_setup(bool start
)
197 THREAD_OFF(vxlan_info
.work_timer
);
199 thread_add_timer(router
->master
, pim_vxlan_work_timer_cb
, NULL
,
200 PIM_VXLAN_WORK_TIME
, &vxlan_info
.work_timer
);
203 /**************************** vxlan origination mroutes ***********************
204 * For every (local-vtep-ip, bum-mcast-grp) registered by evpn an origination
205 * mroute is setup by pimd. The purpose of this mroute is to forward vxlan
206 * encapsulated BUM (broadcast, unknown-unicast and unknown-multicast packets
207 * over the underlay.)
209 * Sample mroute (single VTEP):
210 * (27.0.0.7, 239.1.1.100) Iif: lo Oifs: uplink-1
212 * Sample mroute (anycast VTEP):
213 * (36.0.0.9, 239.1.1.100) Iif: peerlink-3.4094\
214 * Oifs: peerlink-3.4094 uplink-1
215 ***************************************************************************/
216 static void pim_vxlan_orig_mr_up_del(struct pim_vxlan_sg
*vxlan_sg
)
218 struct pim_upstream
*up
= vxlan_sg
->up
;
224 zlog_debug("vxlan SG %s orig mroute-up del",
229 if (up
->flags
& PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG
) {
230 /* clear out all the vxlan properties */
231 up
->flags
&= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG
|
232 PIM_UPSTREAM_FLAG_MASK_STATIC_IIF
|
233 PIM_UPSTREAM_FLAG_MASK_DISABLE_KAT_EXPIRY
|
234 PIM_UPSTREAM_FLAG_MASK_FORCE_PIMREG
|
235 PIM_UPSTREAM_FLAG_MASK_NO_PIMREG_DATA
|
236 PIM_UPSTREAM_FLAG_MASK_ALLOW_IIF_IN_OIL
);
238 /* We bring things to a grinding halt by force expirying
239 * the kat. Doing this will also remove the reference we
240 * created as a "vxlan" source and delete the upstream entry
241 * if there are no other references.
243 if (PIM_UPSTREAM_FLAG_TEST_SRC_STREAM(up
->flags
)) {
244 THREAD_OFF(up
->t_ka_timer
);
245 up
= pim_upstream_keep_alive_timer_proc(up
);
247 /* this is really unexpected as we force vxlan
248 * origination mroutes active sources but just in
251 up
= pim_upstream_del(vxlan_sg
->pim
, up
, __func__
);
253 /* if there are other references register the source
257 enum pim_rpf_result r
;
259 r
= pim_rpf_update(vxlan_sg
->pim
, up
, NULL
, __func__
);
260 if (r
== PIM_RPF_FAILURE
) {
263 "vxlan SG %s rpf_update failure",
270 static void pim_vxlan_orig_mr_up_iif_update(struct pim_vxlan_sg
*vxlan_sg
)
272 /* update MFC with the new IIF */
273 pim_upstream_fill_static_iif(vxlan_sg
->up
, vxlan_sg
->iif
);
274 pim_upstream_mroute_iif_update(vxlan_sg
->up
->channel_oil
, __func__
);
277 zlog_debug("vxlan SG %s orig mroute-up updated with iif %s",
279 vxlan_sg
->iif
?vxlan_sg
->iif
->name
:"-");
283 /* For every VxLAN BUM multicast group we setup a SG-up that has the following
284 * "forced properties" -
285 * 1. Directly connected on a DR interface i.e. we must act as an FHR
286 * 2. We prime the pump i.e. no multicast data is needed to register this
287 * source with the FHR. To do that we send periodic null registers if
288 * the SG entry is in a register-join state. We also prevent expiry of
290 * 3. As this SG is setup without data there is no need to register encapsulate
291 * data traffic. This encapsulation is explicitly skipped for the following
293 * a) Many levels of encapsulation are needed creating MTU disc challenges.
294 * Overlay BUM is encapsulated in a vxlan/UDP/IP header and then
295 * encapsulated again in a pim-register header.
296 * b) On a vxlan-aa setup both switches rx a copy of each BUM packet. if
297 * they both reg encapsulated traffic the RP will accept the duplicates
298 * as there are no RPF checks for this encapsulated data.
299 * a), b) can be workarounded if needed, but there is really no need because
300 * of (2) i.e. the pump is primed without data.
302 static void pim_vxlan_orig_mr_up_add(struct pim_vxlan_sg
*vxlan_sg
)
304 struct pim_upstream
*up
;
305 struct pim_interface
*term_ifp
;
308 struct pim_instance
*pim
= vxlan_sg
->pim
;
316 zlog_debug("vxlan SG %s orig mroute-up add with iif %s",
318 vxlan_sg
->iif
?vxlan_sg
->iif
->name
:"-");
320 PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_ORIG(flags
);
321 /* pin the IIF to lo or peerlink-subinterface and disable NHT */
322 PIM_UPSTREAM_FLAG_SET_STATIC_IIF(flags
);
323 /* Fake traffic by setting SRC_STREAM and starting KAT */
324 /* We intentionally skip updating ref count for SRC_STREAM/FHR.
325 * Setting SRC_VXLAN should have already created a reference
326 * preventing the entry from being deleted
328 PIM_UPSTREAM_FLAG_SET_FHR(flags
);
329 PIM_UPSTREAM_FLAG_SET_SRC_STREAM(flags
);
330 /* Force pimreg even if non-DR. This is needed on a MLAG setup for
333 PIM_UPSTREAM_FLAG_SET_FORCE_PIMREG(flags
);
334 /* prevent KAT expiry. we want the MDT setup even if there is no BUM
337 PIM_UPSTREAM_FLAG_SET_DISABLE_KAT_EXPIRY(flags
);
338 /* SPT for vxlan BUM groups is primed and maintained via NULL
339 * registers so there is no need to reg-encapsulate
340 * vxlan-encapsulated overlay data traffic
342 PIM_UPSTREAM_FLAG_SET_NO_PIMREG_DATA(flags
);
343 /* On a MLAG setup we force a copy to the MLAG peer while also
344 * accepting traffic from the peer. To do this we set peerlink-rif as
345 * the IIF and also add it to the OIL
347 PIM_UPSTREAM_FLAG_SET_ALLOW_IIF_IN_OIL(flags
);
349 /* XXX: todo: defer pim_upstream add if pim is not enabled on the iif */
350 up
= pim_upstream_find(vxlan_sg
->pim
, &vxlan_sg
->sg
);
352 /* if the iif is set to something other than the vxlan_sg->iif
353 * we must dereg the old nexthop and force to new "static"
356 if (!PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(up
->flags
)) {
357 pim_addr_to_prefix(&nht_p
, up
->upstream_addr
);
358 pim_delete_tracked_nexthop(vxlan_sg
->pim
, &nht_p
, up
,
361 /* We are acting FHR; clear out use_rpt setting if any */
362 pim_upstream_update_use_rpt(up
, false /*update_mroute*/);
363 pim_upstream_ref(up
, flags
, __func__
);
365 term_ifp
= pim_vxlan_get_term_ifp(pim
);
366 /* mute termination device on origination mroutes */
368 pim_channel_update_oif_mute(up
->channel_oil
,
370 pim_vxlan_orig_mr_up_iif_update(vxlan_sg
);
371 /* mute pimreg on origination mroutes */
373 pim_channel_update_oif_mute(up
->channel_oil
,
374 pim
->regiface
->info
);
376 up
= pim_upstream_add(vxlan_sg
->pim
, &vxlan_sg
->sg
,
377 vxlan_sg
->iif
, flags
, __func__
, NULL
);
383 zlog_debug("vxlan SG %s orig mroute-up add failed",
388 pim_upstream_keep_alive_timer_start(up
, vxlan_sg
->pim
->keep_alive_time
);
390 /* register the source with the RP */
391 switch (up
->reg_state
) {
394 pim_register_join(up
);
395 pim_null_register_send(up
);
399 /* if the pim upstream entry is already in reg-join state
400 * send null_register right away and add to the register
403 pim_null_register_send(up
);
404 pim_vxlan_update_sg_reg_state(pim
, up
, true);
407 case PIM_REG_JOIN_PENDING
:
412 /* update the inherited OIL */
413 pim_upstream_inherited_olist(vxlan_sg
->pim
, up
);
414 if (!up
->channel_oil
->installed
)
415 pim_upstream_mroute_add(up
->channel_oil
, __func__
);
418 static void pim_vxlan_orig_mr_oif_add(struct pim_vxlan_sg
*vxlan_sg
)
420 if (!vxlan_sg
->up
|| !vxlan_sg
->orig_oif
)
424 zlog_debug("vxlan SG %s oif %s add",
425 vxlan_sg
->sg_str
, vxlan_sg
->orig_oif
->name
);
427 vxlan_sg
->flags
|= PIM_VXLAN_SGF_OIF_INSTALLED
;
428 pim_channel_add_oif(vxlan_sg
->up
->channel_oil
,
429 vxlan_sg
->orig_oif
, PIM_OIF_FLAG_PROTO_VXLAN
,
433 static void pim_vxlan_orig_mr_oif_del(struct pim_vxlan_sg
*vxlan_sg
)
435 struct interface
*orig_oif
;
437 orig_oif
= vxlan_sg
->orig_oif
;
438 vxlan_sg
->orig_oif
= NULL
;
440 if (!(vxlan_sg
->flags
& PIM_VXLAN_SGF_OIF_INSTALLED
))
444 zlog_debug("vxlan SG %s oif %s del",
445 vxlan_sg
->sg_str
, orig_oif
->name
);
447 vxlan_sg
->flags
&= ~PIM_VXLAN_SGF_OIF_INSTALLED
;
448 pim_channel_del_oif(vxlan_sg
->up
->channel_oil
,
449 orig_oif
, PIM_OIF_FLAG_PROTO_VXLAN
, __func__
);
452 static inline struct interface
*pim_vxlan_orig_mr_oif_get(
453 struct pim_instance
*pim
)
455 return (vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_ENABLED
) ?
456 pim
->vxlan
.peerlink_rif
: NULL
;
459 /* Single VTEPs: IIF for the vxlan-origination-mroutes is lo or vrf-dev (if
460 * the mroute is in a non-default vrf).
461 * Anycast VTEPs: IIF is the MLAG ISL/peerlink.
463 static inline struct interface
*pim_vxlan_orig_mr_iif_get(
464 struct pim_instance
*pim
)
466 return ((vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_ENABLED
) &&
467 pim
->vxlan
.peerlink_rif
) ?
468 pim
->vxlan
.peerlink_rif
: pim
->vxlan
.default_iif
;
471 static bool pim_vxlan_orig_mr_add_is_ok(struct pim_vxlan_sg
*vxlan_sg
)
473 struct pim_interface
*pim_ifp
;
475 vxlan_sg
->iif
= pim_vxlan_orig_mr_iif_get(vxlan_sg
->pim
);
479 pim_ifp
= (struct pim_interface
*)vxlan_sg
->iif
->info
;
480 if (!pim_ifp
|| (pim_ifp
->mroute_vif_index
< 0))
486 static void pim_vxlan_orig_mr_install(struct pim_vxlan_sg
*vxlan_sg
)
488 pim_vxlan_orig_mr_up_add(vxlan_sg
);
490 vxlan_sg
->orig_oif
= pim_vxlan_orig_mr_oif_get(vxlan_sg
->pim
);
491 pim_vxlan_orig_mr_oif_add(vxlan_sg
);
494 static void pim_vxlan_orig_mr_add(struct pim_vxlan_sg
*vxlan_sg
)
496 if (!pim_vxlan_orig_mr_add_is_ok(vxlan_sg
))
500 zlog_debug("vxlan SG %s orig-mr add", vxlan_sg
->sg_str
);
502 pim_vxlan_orig_mr_install(vxlan_sg
);
505 static void pim_vxlan_orig_mr_del(struct pim_vxlan_sg
*vxlan_sg
)
508 zlog_debug("vxlan SG %s orig-mr del", vxlan_sg
->sg_str
);
510 pim_vxlan_orig_mr_oif_del(vxlan_sg
);
511 pim_vxlan_orig_mr_up_del(vxlan_sg
);
514 static void pim_vxlan_orig_mr_iif_update(struct hash_bucket
*bucket
, void *arg
)
516 struct interface
*ifp
;
517 struct pim_vxlan_sg
*vxlan_sg
= (struct pim_vxlan_sg
*)bucket
->data
;
518 struct interface
*old_iif
= vxlan_sg
->iif
;
520 if (!pim_vxlan_is_orig_mroute(vxlan_sg
))
523 ifp
= pim_vxlan_orig_mr_iif_get(vxlan_sg
->pim
);
525 zlog_debug("vxlan SG %s iif changed from %s to %s",
527 old_iif
? old_iif
->name
: "-",
528 ifp
? ifp
->name
: "-");
530 if (pim_vxlan_orig_mr_add_is_ok(vxlan_sg
)) {
532 /* upstream exists but iif changed */
533 pim_vxlan_orig_mr_up_iif_update(vxlan_sg
);
536 pim_vxlan_orig_mr_install(vxlan_sg
);
539 pim_vxlan_orig_mr_del(vxlan_sg
);
543 /**************************** vxlan termination mroutes ***********************
544 * For every bum-mcast-grp registered by evpn a *G termination
545 * mroute is setup by pimd. The purpose of this mroute is to pull down vxlan
546 * packets with the bum-mcast-grp dip from the underlay and terminate the
547 * tunnel. This is done by including the vxlan termination device (ipmr-lo) in
548 * its OIL. The vxlan de-capsulated packets are subject to subsequent overlay
552 * (0.0.0.0, 239.1.1.100) Iif: uplink-1 Oifs: ipmr-lo, uplink-1
553 *****************************************************************************/
554 struct pim_interface
*pim_vxlan_get_term_ifp(struct pim_instance
*pim
)
556 return pim
->vxlan
.term_if
?
557 (struct pim_interface
*)pim
->vxlan
.term_if
->info
: NULL
;
560 static void pim_vxlan_term_mr_oif_add(struct pim_vxlan_sg
*vxlan_sg
)
562 if (vxlan_sg
->flags
& PIM_VXLAN_SGF_OIF_INSTALLED
)
566 zlog_debug("vxlan SG %s term-oif %s add",
567 vxlan_sg
->sg_str
, vxlan_sg
->term_oif
->name
);
569 if (pim_ifchannel_local_membership_add(vxlan_sg
->term_oif
,
570 &vxlan_sg
->sg
, true /*is_vxlan */)) {
571 vxlan_sg
->flags
|= PIM_VXLAN_SGF_OIF_INSTALLED
;
572 /* update the inherited OIL */
573 /* XXX - I don't see the inherited OIL updated when a local
574 * member is added. And that probably needs to be fixed. Till
575 * that happens we do a force update on the inherited OIL
578 pim_upstream_inherited_olist(vxlan_sg
->pim
, vxlan_sg
->up
);
580 zlog_warn("vxlan SG %s term-oif %s add failed",
581 vxlan_sg
->sg_str
, vxlan_sg
->term_oif
->name
);
585 static void pim_vxlan_term_mr_oif_del(struct pim_vxlan_sg
*vxlan_sg
)
587 if (!(vxlan_sg
->flags
& PIM_VXLAN_SGF_OIF_INSTALLED
))
591 zlog_debug("vxlan SG %s oif %s del",
592 vxlan_sg
->sg_str
, vxlan_sg
->term_oif
->name
);
594 vxlan_sg
->flags
&= ~PIM_VXLAN_SGF_OIF_INSTALLED
;
595 pim_ifchannel_local_membership_del(vxlan_sg
->term_oif
, &vxlan_sg
->sg
);
596 /* update the inherited OIL */
597 /* XXX - I don't see the inherited OIL updated when a local member
598 * is deleted. And that probably needs to be fixed. Till that happens
599 * we do a force update on the inherited OIL here.
601 pim_upstream_inherited_olist(vxlan_sg
->pim
, vxlan_sg
->up
);
604 static void pim_vxlan_update_sg_entry_mlag(struct pim_instance
*pim
,
605 struct pim_upstream
*up
, bool inherit
)
609 if (inherit
&& up
->parent
&&
610 PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up
->parent
->flags
) &&
611 PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up
->parent
->flags
))
614 pim_mlag_up_df_role_update(pim
, up
, is_df
, "inherit_xg_df");
617 /* We run MLAG DF election only on mroutes that have the termination
618 * device ipmr-lo in the immediate OIL. This is only (*, G) entries at the
619 * moment. For (S, G) entries that (with ipmr-lo in the inherited OIL) we
620 * inherit the DF role from the (*, G) entry.
622 void pim_vxlan_inherit_mlag_flags(struct pim_instance
*pim
,
623 struct pim_upstream
*up
, bool inherit
)
625 struct listnode
*listnode
;
626 struct pim_upstream
*child
;
628 for (ALL_LIST_ELEMENTS_RO(up
->sources
, listnode
,
630 pim_vxlan_update_sg_entry_mlag(pim
,
631 child
, true /* inherit */);
635 static void pim_vxlan_term_mr_up_add(struct pim_vxlan_sg
*vxlan_sg
)
637 struct pim_upstream
*up
;
646 zlog_debug("vxlan SG %s term mroute-up add",
649 PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_TERM(flags
);
650 /* enable MLAG designated-forwarder election on termination mroutes */
651 PIM_UPSTREAM_FLAG_SET_MLAG_VXLAN(flags
);
653 up
= pim_upstream_add(vxlan_sg
->pim
, &vxlan_sg
->sg
, NULL
/* iif */,
654 flags
, __func__
, NULL
);
658 zlog_warn("vxlan SG %s term mroute-up add failed",
663 /* update existing SG entries with the parent's MLAG flag */
664 pim_vxlan_inherit_mlag_flags(vxlan_sg
->pim
, up
, true /*enable*/);
667 static void pim_vxlan_term_mr_up_del(struct pim_vxlan_sg
*vxlan_sg
)
669 struct pim_upstream
*up
= vxlan_sg
->up
;
675 zlog_debug("vxlan SG %s term mroute-up del",
678 if (up
->flags
& PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM
) {
679 /* update SG entries that are inheriting from this XG entry */
680 pim_vxlan_inherit_mlag_flags(vxlan_sg
->pim
, up
,
682 /* clear out all the vxlan related flags */
683 up
->flags
&= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM
|
684 PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN
);
685 pim_mlag_up_local_del(vxlan_sg
->pim
, up
);
686 pim_upstream_del(vxlan_sg
->pim
, up
, __func__
);
690 static void pim_vxlan_term_mr_add(struct pim_vxlan_sg
*vxlan_sg
)
693 zlog_debug("vxlan SG %s term mroute add", vxlan_sg
->sg_str
);
695 vxlan_sg
->term_oif
= vxlan_sg
->pim
->vxlan
.term_if
;
696 if (!vxlan_sg
->term_oif
)
697 /* defer termination mroute till we have a termination device */
700 pim_vxlan_term_mr_up_add(vxlan_sg
);
701 /* set up local membership for the term-oif */
702 pim_vxlan_term_mr_oif_add(vxlan_sg
);
705 static void pim_vxlan_term_mr_del(struct pim_vxlan_sg
*vxlan_sg
)
708 zlog_debug("vxlan SG %s term mroute del", vxlan_sg
->sg_str
);
710 /* remove local membership associated with the term oif */
711 pim_vxlan_term_mr_oif_del(vxlan_sg
);
712 /* remove references to the upstream entry */
713 pim_vxlan_term_mr_up_del(vxlan_sg
);
716 /************************** vxlan SG cache management ************************/
717 static unsigned int pim_vxlan_sg_hash_key_make(const void *p
)
719 const struct pim_vxlan_sg
*vxlan_sg
= p
;
721 return pim_sgaddr_hash(vxlan_sg
->sg
, 0);
724 static bool pim_vxlan_sg_hash_eq(const void *p1
, const void *p2
)
726 const struct pim_vxlan_sg
*sg1
= p1
;
727 const struct pim_vxlan_sg
*sg2
= p2
;
729 return !pim_sgaddr_cmp(sg1
->sg
, sg2
->sg
);
732 static struct pim_vxlan_sg
*pim_vxlan_sg_new(struct pim_instance
*pim
,
735 struct pim_vxlan_sg
*vxlan_sg
;
737 vxlan_sg
= XCALLOC(MTYPE_PIM_VXLAN_SG
, sizeof(*vxlan_sg
));
741 snprintfrr(vxlan_sg
->sg_str
, sizeof(vxlan_sg
->sg_str
), "%pSG", sg
);
744 zlog_debug("vxlan SG %s alloc", vxlan_sg
->sg_str
);
746 vxlan_sg
= hash_get(pim
->vxlan
.sg_hash
, vxlan_sg
, hash_alloc_intern
);
748 /* we register with the MLAG daemon in the first VxLAN SG and never
749 * de-register during that life of the pimd
751 if (pim
->vxlan
.sg_hash
->count
== 1) {
752 vxlan_mlag
.flags
|= PIM_VXLAN_MLAGF_DO_REG
;
759 struct pim_vxlan_sg
*pim_vxlan_sg_find(struct pim_instance
*pim
, pim_sgaddr
*sg
)
761 struct pim_vxlan_sg lookup
;
764 return hash_lookup(pim
->vxlan
.sg_hash
, &lookup
);
767 struct pim_vxlan_sg
*pim_vxlan_sg_add(struct pim_instance
*pim
, pim_sgaddr
*sg
)
769 struct pim_vxlan_sg
*vxlan_sg
;
771 vxlan_sg
= pim_vxlan_sg_find(pim
, sg
);
775 vxlan_sg
= pim_vxlan_sg_new(pim
, sg
);
777 if (pim_vxlan_is_orig_mroute(vxlan_sg
))
778 pim_vxlan_orig_mr_add(vxlan_sg
);
780 pim_vxlan_term_mr_add(vxlan_sg
);
785 static void pim_vxlan_sg_del_item(struct pim_vxlan_sg
*vxlan_sg
)
787 vxlan_sg
->flags
|= PIM_VXLAN_SGF_DEL_IN_PROG
;
789 pim_vxlan_del_work(vxlan_sg
);
791 if (pim_vxlan_is_orig_mroute(vxlan_sg
))
792 pim_vxlan_orig_mr_del(vxlan_sg
);
794 pim_vxlan_term_mr_del(vxlan_sg
);
797 zlog_debug("vxlan SG %s free", vxlan_sg
->sg_str
);
799 XFREE(MTYPE_PIM_VXLAN_SG
, vxlan_sg
);
802 void pim_vxlan_sg_del(struct pim_instance
*pim
, pim_sgaddr
*sg
)
804 struct pim_vxlan_sg
*vxlan_sg
;
806 vxlan_sg
= pim_vxlan_sg_find(pim
, sg
);
810 hash_release(pim
->vxlan
.sg_hash
, vxlan_sg
);
811 pim_vxlan_sg_del_item(vxlan_sg
);
814 /******************************* MLAG handling *******************************/
815 bool pim_vxlan_do_mlag_reg(void)
817 return (vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_DO_REG
);
820 /* The peerlink sub-interface is added as an OIF to the origination-mroute.
821 * This is done to send a copy of the multicast-vxlan encapsulated traffic
822 * to the MLAG peer which may mroute it over the underlay if there are any
823 * interested receivers.
825 static void pim_vxlan_sg_peerlink_oif_update(struct hash_bucket
*bucket
,
828 struct interface
*new_oif
= (struct interface
*)arg
;
829 struct pim_vxlan_sg
*vxlan_sg
= (struct pim_vxlan_sg
*)bucket
->data
;
831 if (!pim_vxlan_is_orig_mroute(vxlan_sg
))
834 if (vxlan_sg
->orig_oif
== new_oif
)
837 pim_vxlan_orig_mr_oif_del(vxlan_sg
);
839 vxlan_sg
->orig_oif
= new_oif
;
840 pim_vxlan_orig_mr_oif_add(vxlan_sg
);
843 /* In the case of anycast VTEPs the VTEP-PIP must be used as the
846 bool pim_vxlan_get_register_src(struct pim_instance
*pim
,
847 struct pim_upstream
*up
, struct in_addr
*src_p
)
849 if (!(vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_ENABLED
))
852 /* if address is not available suppress the pim-register */
853 if (vxlan_mlag
.reg_addr
.s_addr
== INADDR_ANY
)
856 *src_p
= vxlan_mlag
.reg_addr
;
860 void pim_vxlan_mlag_update(bool enable
, bool peer_state
, uint32_t role
,
861 struct interface
*peerlink_rif
,
862 struct in_addr
*reg_addr
)
864 struct pim_instance
*pim
;
865 char addr_buf
[INET_ADDRSTRLEN
];
866 struct pim_interface
*pim_ifp
= NULL
;
868 if (PIM_DEBUG_VXLAN
) {
869 inet_ntop(AF_INET
, reg_addr
,
870 addr_buf
, INET_ADDRSTRLEN
);
871 zlog_debug("vxlan MLAG update %s state %s role %d rif %s addr %s",
872 enable
? "enable" : "disable",
873 peer_state
? "up" : "down",
875 peerlink_rif
? peerlink_rif
->name
: "-",
879 /* XXX: for now vxlan termination is only possible in the default VRF
880 * when that changes this will need to change to iterate all VRFs
882 pim
= pim_get_pim_instance(VRF_DEFAULT
);
885 vxlan_mlag
.flags
|= PIM_VXLAN_MLAGF_ENABLED
;
887 vxlan_mlag
.flags
&= ~PIM_VXLAN_MLAGF_ENABLED
;
889 if (vxlan_mlag
.peerlink_rif
!= peerlink_rif
)
890 vxlan_mlag
.peerlink_rif
= peerlink_rif
;
892 vxlan_mlag
.reg_addr
= *reg_addr
;
893 vxlan_mlag
.peer_state
= peer_state
;
894 vxlan_mlag
.role
= role
;
896 /* process changes */
897 if (vxlan_mlag
.peerlink_rif
)
898 pim_ifp
= (struct pim_interface
*)vxlan_mlag
.peerlink_rif
->info
;
899 if ((vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_ENABLED
) &&
900 pim_ifp
&& (pim_ifp
->mroute_vif_index
> 0))
901 pim_vxlan_set_peerlink_rif(pim
, peerlink_rif
);
903 pim_vxlan_set_peerlink_rif(pim
, NULL
);
906 /****************************** misc callbacks *******************************/
907 static void pim_vxlan_set_default_iif(struct pim_instance
*pim
,
908 struct interface
*ifp
)
910 struct interface
*old_iif
;
912 if (pim
->vxlan
.default_iif
== ifp
)
915 old_iif
= pim
->vxlan
.default_iif
;
917 zlog_debug("%s: vxlan default iif changed from %s to %s",
918 __func__
, old_iif
? old_iif
->name
: "-",
919 ifp
? ifp
->name
: "-");
921 old_iif
= pim_vxlan_orig_mr_iif_get(pim
);
922 pim
->vxlan
.default_iif
= ifp
;
923 ifp
= pim_vxlan_orig_mr_iif_get(pim
);
928 zlog_debug("%s: vxlan orig iif changed from %s to %s", __func__
,
929 old_iif
? old_iif
->name
: "-",
930 ifp
? ifp
->name
: "-");
932 /* add/del upstream entries for the existing vxlan SG when the
933 * interface becomes available
935 if (pim
->vxlan
.sg_hash
)
936 hash_iterate(pim
->vxlan
.sg_hash
,
937 pim_vxlan_orig_mr_iif_update
, NULL
);
940 static void pim_vxlan_up_cost_update(struct pim_instance
*pim
,
941 struct pim_upstream
*up
,
942 struct interface
*old_peerlink_rif
)
944 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up
->flags
))
947 if (up
->rpf
.source_nexthop
.interface
&&
948 ((up
->rpf
.source_nexthop
.interface
==
949 pim
->vxlan
.peerlink_rif
) ||
950 (up
->rpf
.source_nexthop
.interface
==
951 old_peerlink_rif
))) {
953 zlog_debug("RPF cost adjust for %s on peerlink-rif (old: %s, new: %s) change",
956 old_peerlink_rif
->name
: "-",
957 pim
->vxlan
.peerlink_rif
?
958 pim
->vxlan
.peerlink_rif
->name
: "-");
959 pim_mlag_up_local_add(pim
, up
);
963 static void pim_vxlan_term_mr_cost_update(struct hash_bucket
*bucket
, void *arg
)
965 struct interface
*old_peerlink_rif
= (struct interface
*)arg
;
966 struct pim_vxlan_sg
*vxlan_sg
= (struct pim_vxlan_sg
*)bucket
->data
;
967 struct pim_upstream
*up
;
968 struct listnode
*listnode
;
969 struct pim_upstream
*child
;
971 if (pim_vxlan_is_orig_mroute(vxlan_sg
))
974 /* Lookup all XG and SG entries with RPF-interface peerlink_rif */
979 pim_vxlan_up_cost_update(vxlan_sg
->pim
, up
,
982 for (ALL_LIST_ELEMENTS_RO(up
->sources
, listnode
,
984 pim_vxlan_up_cost_update(vxlan_sg
->pim
, child
,
988 static void pim_vxlan_sg_peerlink_rif_update(struct hash_bucket
*bucket
,
991 pim_vxlan_orig_mr_iif_update(bucket
, NULL
);
992 pim_vxlan_term_mr_cost_update(bucket
, arg
);
995 static void pim_vxlan_set_peerlink_rif(struct pim_instance
*pim
,
996 struct interface
*ifp
)
998 struct interface
*old_iif
;
999 struct interface
*new_iif
;
1000 struct interface
*old_oif
;
1001 struct interface
*new_oif
;
1003 if (pim
->vxlan
.peerlink_rif
== ifp
)
1006 old_iif
= pim
->vxlan
.peerlink_rif
;
1007 if (PIM_DEBUG_VXLAN
)
1008 zlog_debug("%s: vxlan peerlink_rif changed from %s to %s",
1009 __func__
, old_iif
? old_iif
->name
: "-",
1010 ifp
? ifp
->name
: "-");
1012 old_iif
= pim_vxlan_orig_mr_iif_get(pim
);
1013 old_oif
= pim_vxlan_orig_mr_oif_get(pim
);
1014 pim
->vxlan
.peerlink_rif
= ifp
;
1016 new_iif
= pim_vxlan_orig_mr_iif_get(pim
);
1017 if (old_iif
!= new_iif
) {
1018 if (PIM_DEBUG_VXLAN
)
1019 zlog_debug("%s: vxlan orig iif changed from %s to %s",
1020 __func__
, old_iif
? old_iif
->name
: "-",
1021 new_iif
? new_iif
->name
: "-");
1023 /* add/del upstream entries for the existing vxlan SG when the
1024 * interface becomes available
1026 if (pim
->vxlan
.sg_hash
)
1027 hash_iterate(pim
->vxlan
.sg_hash
,
1028 pim_vxlan_sg_peerlink_rif_update
,
1032 new_oif
= pim_vxlan_orig_mr_oif_get(pim
);
1033 if (old_oif
!= new_oif
) {
1034 if (PIM_DEBUG_VXLAN
)
1035 zlog_debug("%s: vxlan orig oif changed from %s to %s",
1036 __func__
, old_oif
? old_oif
->name
: "-",
1037 new_oif
? new_oif
->name
: "-");
1038 if (pim
->vxlan
.sg_hash
)
1039 hash_iterate(pim
->vxlan
.sg_hash
,
1040 pim_vxlan_sg_peerlink_oif_update
,
1045 static void pim_vxlan_term_mr_oif_update(struct hash_bucket
*bucket
, void *arg
)
1047 struct interface
*ifp
= (struct interface
*)arg
;
1048 struct pim_vxlan_sg
*vxlan_sg
= (struct pim_vxlan_sg
*)bucket
->data
;
1050 if (pim_vxlan_is_orig_mroute(vxlan_sg
))
1053 if (vxlan_sg
->term_oif
== ifp
)
1056 if (PIM_DEBUG_VXLAN
)
1057 zlog_debug("vxlan SG %s term oif changed from %s to %s",
1059 vxlan_sg
->term_oif
? vxlan_sg
->term_oif
->name
: "-",
1060 ifp
? ifp
->name
: "-");
1062 pim_vxlan_term_mr_del(vxlan_sg
);
1063 vxlan_sg
->term_oif
= ifp
;
1064 pim_vxlan_term_mr_add(vxlan_sg
);
1067 static void pim_vxlan_term_oif_update(struct pim_instance
*pim
,
1068 struct interface
*ifp
)
1070 if (pim
->vxlan
.term_if
== ifp
)
1073 if (PIM_DEBUG_VXLAN
)
1074 zlog_debug("vxlan term oif changed from %s to %s",
1075 pim
->vxlan
.term_if
? pim
->vxlan
.term_if
->name
: "-",
1076 ifp
? ifp
->name
: "-");
1078 pim
->vxlan
.term_if
= ifp
;
1079 if (pim
->vxlan
.sg_hash
)
1080 hash_iterate(pim
->vxlan
.sg_hash
,
1081 pim_vxlan_term_mr_oif_update
, ifp
);
1084 void pim_vxlan_add_vif(struct interface
*ifp
)
1086 struct pim_interface
*pim_ifp
= ifp
->info
;
1087 struct pim_instance
*pim
= pim_ifp
->pim
;
1089 if (pim
->vrf
->vrf_id
!= VRF_DEFAULT
)
1092 if (if_is_loopback(ifp
))
1093 pim_vxlan_set_default_iif(pim
, ifp
);
1095 if (vxlan_mlag
.flags
& PIM_VXLAN_MLAGF_ENABLED
&&
1096 (ifp
== vxlan_mlag
.peerlink_rif
))
1097 pim_vxlan_set_peerlink_rif(pim
, ifp
);
1099 if (pim
->vxlan
.term_if_cfg
== ifp
)
1100 pim_vxlan_term_oif_update(pim
, ifp
);
1103 void pim_vxlan_del_vif(struct interface
*ifp
)
1105 struct pim_interface
*pim_ifp
= ifp
->info
;
1106 struct pim_instance
*pim
= pim_ifp
->pim
;
1108 if (pim
->vrf
->vrf_id
!= VRF_DEFAULT
)
1111 if (pim
->vxlan
.default_iif
== ifp
)
1112 pim_vxlan_set_default_iif(pim
, NULL
);
1114 if (pim
->vxlan
.peerlink_rif
== ifp
)
1115 pim_vxlan_set_peerlink_rif(pim
, NULL
);
1117 if (pim
->vxlan
.term_if
== ifp
)
1118 pim_vxlan_term_oif_update(pim
, NULL
);
1121 /* enable pim implicitly on the termination device add */
1122 void pim_vxlan_add_term_dev(struct pim_instance
*pim
,
1123 struct interface
*ifp
)
1125 struct pim_interface
*pim_ifp
;
1127 if (pim
->vxlan
.term_if_cfg
== ifp
)
1130 if (PIM_DEBUG_VXLAN
)
1131 zlog_debug("vxlan term oif cfg changed from %s to %s",
1132 pim
->vxlan
.term_if_cfg
?
1133 pim
->vxlan
.term_if_cfg
->name
: "-",
1136 pim
->vxlan
.term_if_cfg
= ifp
;
1138 /* enable pim on the term ifp */
1139 pim_ifp
= (struct pim_interface
*)ifp
->info
;
1141 PIM_IF_DO_PIM(pim_ifp
->options
);
1142 /* ifp is already oper up; activate it as a term dev */
1143 if (pim_ifp
->mroute_vif_index
>= 0)
1144 pim_vxlan_term_oif_update(pim
, ifp
);
1146 /* ensure that pimreg exists before using the newly created
1147 * vxlan termination device
1149 pim_if_create_pimreg(pim
);
1150 (void)pim_if_new(ifp
, false /*igmp*/, true /*pim*/,
1151 false /*pimreg*/, true /*vxlan_term*/);
1155 /* disable pim implicitly, if needed, on the termination device deletion */
1156 void pim_vxlan_del_term_dev(struct pim_instance
*pim
)
1158 struct interface
*ifp
= pim
->vxlan
.term_if_cfg
;
1159 struct pim_interface
*pim_ifp
;
1161 if (PIM_DEBUG_VXLAN
)
1162 zlog_debug("vxlan term oif cfg changed from %s to -",
1165 pim
->vxlan
.term_if_cfg
= NULL
;
1167 pim_ifp
= (struct pim_interface
*)ifp
->info
;
1169 PIM_IF_DONT_PIM(pim_ifp
->options
);
1170 if (!PIM_IF_TEST_IGMP(pim_ifp
->options
))
1175 void pim_vxlan_init(struct pim_instance
*pim
)
1179 snprintf(hash_name
, sizeof(hash_name
),
1180 "PIM %s vxlan SG hash", pim
->vrf
->name
);
1181 pim
->vxlan
.sg_hash
= hash_create(pim_vxlan_sg_hash_key_make
,
1182 pim_vxlan_sg_hash_eq
, hash_name
);
1185 void pim_vxlan_exit(struct pim_instance
*pim
)
1187 if (pim
->vxlan
.sg_hash
) {
1188 hash_clean(pim
->vxlan
.sg_hash
,
1189 (void (*)(void *))pim_vxlan_sg_del_item
);
1190 hash_free(pim
->vxlan
.sg_hash
);
1191 pim
->vxlan
.sg_hash
= NULL
;
1195 void pim_vxlan_terminate(void)
1197 pim_vxlan_work_timer_setup(false);