]> git.proxmox.com Git - mirror_frr.git/blob - pimd/pim_vxlan.c
Merge pull request #12791 from taspelund/loc_rib_json_fix
[mirror_frr.git] / pimd / pim_vxlan.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* PIM support for VxLAN BUM flooding
3 *
4 * Copyright (C) 2019 Cumulus Networks, Inc.
5 */
6
7 #include <zebra.h>
8
9 #include <hash.h>
10 #include <jhash.h>
11 #include <log.h>
12 #include <prefix.h>
13 #include <vrf.h>
14
15 #include "pimd.h"
16 #include "pim_iface.h"
17 #include "pim_memory.h"
18 #include "pim_oil.h"
19 #include "pim_register.h"
20 #include "pim_str.h"
21 #include "pim_upstream.h"
22 #include "pim_ifchannel.h"
23 #include "pim_nht.h"
24 #include "pim_zebra.h"
25 #include "pim_vxlan.h"
26 #include "pim_mlag.h"
27
28 /* pim-vxlan global info */
29 struct pim_vxlan vxlan_info, *pim_vxlan_p = &vxlan_info;
30
31 static void pim_vxlan_work_timer_setup(bool start);
32 static void pim_vxlan_set_peerlink_rif(struct pim_instance *pim,
33 struct interface *ifp);
34
35 /*************************** vxlan work list **********************************
36 * A work list is maintained for staggered generation of pim null register
37 * messages for vxlan SG entries that are in a reg_join state.
38 *
39 * A max of 500 NULL registers are generated at one shot. If paused reg
40 * generation continues on the next second and so on till all register
41 * messages have been sent out. And the process is restarted every 60s.
42 *
43 * purpose of this null register generation is to setup the SPT and maintain
44 * independent of the presence of overlay BUM traffic.
45 ****************************************************************************/
46 static void pim_vxlan_do_reg_work(void)
47 {
48 struct listnode *listnode;
49 int work_cnt = 0;
50 struct pim_vxlan_sg *vxlan_sg;
51 static int sec_count;
52
53 ++sec_count;
54
55 if (sec_count > PIM_VXLAN_NULL_REG_INTERVAL) {
56 sec_count = 0;
57 listnode = vxlan_info.next_work ?
58 vxlan_info.next_work :
59 vxlan_info.work_list->head;
60 if (PIM_DEBUG_VXLAN && listnode)
61 zlog_debug("vxlan SG work %s",
62 vxlan_info.next_work ? "continues" : "starts");
63 } else {
64 listnode = vxlan_info.next_work;
65 }
66
67 for (; listnode; listnode = listnode->next) {
68 vxlan_sg = (struct pim_vxlan_sg *)listnode->data;
69 if (vxlan_sg->up && (vxlan_sg->up->reg_state == PIM_REG_JOIN)) {
70 if (PIM_DEBUG_VXLAN)
71 zlog_debug("vxlan SG %s periodic NULL register",
72 vxlan_sg->sg_str);
73
74 /*
75 * If we are on the work queue *and* the rpf
76 * has been lost on the vxlan_sg->up let's
77 * make sure that we don't send it.
78 */
79 if (vxlan_sg->up->rpf.source_nexthop.interface) {
80 pim_null_register_send(vxlan_sg->up);
81 ++work_cnt;
82 }
83 }
84
85 if (work_cnt > vxlan_info.max_work_cnt) {
86 vxlan_info.next_work = listnode->next;
87 if (PIM_DEBUG_VXLAN)
88 zlog_debug("vxlan SG %d work items proc and pause",
89 work_cnt);
90 return;
91 }
92 }
93
94 if (work_cnt) {
95 if (PIM_DEBUG_VXLAN)
96 zlog_debug("vxlan SG %d work items proc", work_cnt);
97 }
98 vxlan_info.next_work = NULL;
99 }
100
101 /* Staggered work related info is initialized when the first work comes
102 * along
103 */
104 static void pim_vxlan_init_work(void)
105 {
106 if (vxlan_info.flags & PIM_VXLANF_WORK_INITED)
107 return;
108
109 vxlan_info.max_work_cnt = PIM_VXLAN_WORK_MAX;
110 vxlan_info.flags |= PIM_VXLANF_WORK_INITED;
111 vxlan_info.work_list = list_new();
112 pim_vxlan_work_timer_setup(true/* start */);
113 }
114
115 static void pim_vxlan_add_work(struct pim_vxlan_sg *vxlan_sg)
116 {
117 if (vxlan_sg->flags & PIM_VXLAN_SGF_DEL_IN_PROG) {
118 if (PIM_DEBUG_VXLAN)
119 zlog_debug("vxlan SG %s skip work list; del-in-prog",
120 vxlan_sg->sg_str);
121 return;
122 }
123
124 pim_vxlan_init_work();
125
126 /* already a part of the work list */
127 if (vxlan_sg->work_node)
128 return;
129
130 if (PIM_DEBUG_VXLAN)
131 zlog_debug("vxlan SG %s work list add",
132 vxlan_sg->sg_str);
133 vxlan_sg->work_node = listnode_add(vxlan_info.work_list, vxlan_sg);
134 /* XXX: adjust max_work_cnt if needed */
135 }
136
137 static void pim_vxlan_del_work(struct pim_vxlan_sg *vxlan_sg)
138 {
139 if (!vxlan_sg->work_node)
140 return;
141
142 if (PIM_DEBUG_VXLAN)
143 zlog_debug("vxlan SG %s work list del",
144 vxlan_sg->sg_str);
145
146 if (vxlan_sg->work_node == vxlan_info.next_work)
147 vxlan_info.next_work = vxlan_sg->work_node->next;
148
149 list_delete_node(vxlan_info.work_list, vxlan_sg->work_node);
150 vxlan_sg->work_node = NULL;
151 }
152
153 void pim_vxlan_update_sg_reg_state(struct pim_instance *pim,
154 struct pim_upstream *up, bool reg_join)
155 {
156 struct pim_vxlan_sg *vxlan_sg;
157
158 vxlan_sg = pim_vxlan_sg_find(pim, &up->sg);
159 if (!vxlan_sg)
160 return;
161
162 /* add the vxlan sg entry to a work list for periodic reg joins.
163 * the entry will stay in the list as long as the register state is
164 * PIM_REG_JOIN
165 */
166 if (reg_join)
167 pim_vxlan_add_work(vxlan_sg);
168 else
169 pim_vxlan_del_work(vxlan_sg);
170 }
171
172 static void pim_vxlan_work_timer_cb(struct thread *t)
173 {
174 pim_vxlan_do_reg_work();
175 pim_vxlan_work_timer_setup(true /* start */);
176 }
177
178 /* global 1second timer used for periodic processing */
179 static void pim_vxlan_work_timer_setup(bool start)
180 {
181 THREAD_OFF(vxlan_info.work_timer);
182 if (start)
183 thread_add_timer(router->master, pim_vxlan_work_timer_cb, NULL,
184 PIM_VXLAN_WORK_TIME, &vxlan_info.work_timer);
185 }
186
187 /**************************** vxlan origination mroutes ***********************
188 * For every (local-vtep-ip, bum-mcast-grp) registered by evpn an origination
189 * mroute is setup by pimd. The purpose of this mroute is to forward vxlan
190 * encapsulated BUM (broadcast, unknown-unicast and unknown-multicast packets
191 * over the underlay.)
192 *
193 * Sample mroute (single VTEP):
194 * (27.0.0.7, 239.1.1.100) Iif: lo Oifs: uplink-1
195 *
196 * Sample mroute (anycast VTEP):
197 * (36.0.0.9, 239.1.1.100) Iif: peerlink-3.4094\
198 * Oifs: peerlink-3.4094 uplink-1
199 ***************************************************************************/
200 static void pim_vxlan_orig_mr_up_del(struct pim_vxlan_sg *vxlan_sg)
201 {
202 struct pim_upstream *up = vxlan_sg->up;
203
204 if (!up)
205 return;
206
207 if (PIM_DEBUG_VXLAN)
208 zlog_debug("vxlan SG %s orig mroute-up del",
209 vxlan_sg->sg_str);
210
211 vxlan_sg->up = NULL;
212
213 if (up->flags & PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG) {
214 /* clear out all the vxlan properties */
215 up->flags &= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG |
216 PIM_UPSTREAM_FLAG_MASK_STATIC_IIF |
217 PIM_UPSTREAM_FLAG_MASK_DISABLE_KAT_EXPIRY |
218 PIM_UPSTREAM_FLAG_MASK_FORCE_PIMREG |
219 PIM_UPSTREAM_FLAG_MASK_NO_PIMREG_DATA |
220 PIM_UPSTREAM_FLAG_MASK_ALLOW_IIF_IN_OIL);
221
222 /* We bring things to a grinding halt by force expirying
223 * the kat. Doing this will also remove the reference we
224 * created as a "vxlan" source and delete the upstream entry
225 * if there are no other references.
226 */
227 if (PIM_UPSTREAM_FLAG_TEST_SRC_STREAM(up->flags)) {
228 THREAD_OFF(up->t_ka_timer);
229 up = pim_upstream_keep_alive_timer_proc(up);
230 } else {
231 /* this is really unexpected as we force vxlan
232 * origination mroutes active sources but just in
233 * case
234 */
235 up = pim_upstream_del(vxlan_sg->pim, up, __func__);
236 }
237 /* if there are other references register the source
238 * for nht
239 */
240 if (up) {
241 enum pim_rpf_result r;
242
243 r = pim_rpf_update(vxlan_sg->pim, up, NULL, __func__);
244 if (r == PIM_RPF_FAILURE) {
245 if (PIM_DEBUG_VXLAN)
246 zlog_debug(
247 "vxlan SG %s rpf_update failure",
248 vxlan_sg->sg_str);
249 }
250 }
251 }
252 }
253
254 static void pim_vxlan_orig_mr_up_iif_update(struct pim_vxlan_sg *vxlan_sg)
255 {
256 /* update MFC with the new IIF */
257 pim_upstream_fill_static_iif(vxlan_sg->up, vxlan_sg->iif);
258 pim_upstream_mroute_iif_update(vxlan_sg->up->channel_oil, __func__);
259
260 if (PIM_DEBUG_VXLAN)
261 zlog_debug("vxlan SG %s orig mroute-up updated with iif %s",
262 vxlan_sg->sg_str,
263 vxlan_sg->iif?vxlan_sg->iif->name:"-");
264
265 }
266
267 /* For every VxLAN BUM multicast group we setup a SG-up that has the following
268 * "forced properties" -
269 * 1. Directly connected on a DR interface i.e. we must act as an FHR
270 * 2. We prime the pump i.e. no multicast data is needed to register this
271 * source with the FHR. To do that we send periodic null registers if
272 * the SG entry is in a register-join state. We also prevent expiry of
273 * KAT.
274 * 3. As this SG is setup without data there is no need to register encapsulate
275 * data traffic. This encapsulation is explicitly skipped for the following
276 * reasons -
277 * a) Many levels of encapsulation are needed creating MTU disc challenges.
278 * Overlay BUM is encapsulated in a vxlan/UDP/IP header and then
279 * encapsulated again in a pim-register header.
280 * b) On a vxlan-aa setup both switches rx a copy of each BUM packet. if
281 * they both reg encapsulated traffic the RP will accept the duplicates
282 * as there are no RPF checks for this encapsulated data.
283 * a), b) can be workarounded if needed, but there is really no need because
284 * of (2) i.e. the pump is primed without data.
285 */
286 static void pim_vxlan_orig_mr_up_add(struct pim_vxlan_sg *vxlan_sg)
287 {
288 struct pim_upstream *up;
289 struct pim_interface *term_ifp;
290 int flags = 0;
291 struct pim_instance *pim = vxlan_sg->pim;
292
293 if (vxlan_sg->up) {
294 /* nothing to do */
295 return;
296 }
297
298 if (PIM_DEBUG_VXLAN)
299 zlog_debug("vxlan SG %s orig mroute-up add with iif %s",
300 vxlan_sg->sg_str,
301 vxlan_sg->iif?vxlan_sg->iif->name:"-");
302
303 PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_ORIG(flags);
304 /* pin the IIF to lo or peerlink-subinterface and disable NHT */
305 PIM_UPSTREAM_FLAG_SET_STATIC_IIF(flags);
306 /* Fake traffic by setting SRC_STREAM and starting KAT */
307 /* We intentionally skip updating ref count for SRC_STREAM/FHR.
308 * Setting SRC_VXLAN should have already created a reference
309 * preventing the entry from being deleted
310 */
311 PIM_UPSTREAM_FLAG_SET_FHR(flags);
312 PIM_UPSTREAM_FLAG_SET_SRC_STREAM(flags);
313 /* Force pimreg even if non-DR. This is needed on a MLAG setup for
314 * VxLAN AA
315 */
316 PIM_UPSTREAM_FLAG_SET_FORCE_PIMREG(flags);
317 /* prevent KAT expiry. we want the MDT setup even if there is no BUM
318 * traffic
319 */
320 PIM_UPSTREAM_FLAG_SET_DISABLE_KAT_EXPIRY(flags);
321 /* SPT for vxlan BUM groups is primed and maintained via NULL
322 * registers so there is no need to reg-encapsulate
323 * vxlan-encapsulated overlay data traffic
324 */
325 PIM_UPSTREAM_FLAG_SET_NO_PIMREG_DATA(flags);
326 /* On a MLAG setup we force a copy to the MLAG peer while also
327 * accepting traffic from the peer. To do this we set peerlink-rif as
328 * the IIF and also add it to the OIL
329 */
330 PIM_UPSTREAM_FLAG_SET_ALLOW_IIF_IN_OIL(flags);
331
332 /* XXX: todo: defer pim_upstream add if pim is not enabled on the iif */
333 up = pim_upstream_find(vxlan_sg->pim, &vxlan_sg->sg);
334 if (up) {
335 /* if the iif is set to something other than the vxlan_sg->iif
336 * we must dereg the old nexthop and force to new "static"
337 * iif
338 */
339 if (!PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(up->flags)) {
340 pim_delete_tracked_nexthop(vxlan_sg->pim,
341 up->upstream_addr, up, NULL);
342 }
343 /* We are acting FHR; clear out use_rpt setting if any */
344 pim_upstream_update_use_rpt(up, false /*update_mroute*/);
345 pim_upstream_ref(up, flags, __func__);
346 vxlan_sg->up = up;
347 term_ifp = pim_vxlan_get_term_ifp(pim);
348 /* mute termination device on origination mroutes */
349 if (term_ifp)
350 pim_channel_update_oif_mute(up->channel_oil,
351 term_ifp);
352 pim_vxlan_orig_mr_up_iif_update(vxlan_sg);
353 /* mute pimreg on origination mroutes */
354 if (pim->regiface)
355 pim_channel_update_oif_mute(up->channel_oil,
356 pim->regiface->info);
357 } else {
358 up = pim_upstream_add(vxlan_sg->pim, &vxlan_sg->sg,
359 vxlan_sg->iif, flags, __func__, NULL);
360 vxlan_sg->up = up;
361 }
362
363 if (!up) {
364 if (PIM_DEBUG_VXLAN)
365 zlog_debug("vxlan SG %s orig mroute-up add failed",
366 vxlan_sg->sg_str);
367 return;
368 }
369
370 pim_upstream_keep_alive_timer_start(up, vxlan_sg->pim->keep_alive_time);
371
372 /* register the source with the RP */
373 switch (up->reg_state) {
374
375 case PIM_REG_NOINFO:
376 pim_register_join(up);
377 pim_null_register_send(up);
378 break;
379
380 case PIM_REG_JOIN:
381 /* if the pim upstream entry is already in reg-join state
382 * send null_register right away and add to the register
383 * worklist
384 */
385 pim_null_register_send(up);
386 pim_vxlan_update_sg_reg_state(pim, up, true);
387 break;
388
389 case PIM_REG_JOIN_PENDING:
390 case PIM_REG_PRUNE:
391 break;
392 }
393
394 /* update the inherited OIL */
395 pim_upstream_inherited_olist(vxlan_sg->pim, up);
396 if (!up->channel_oil->installed)
397 pim_upstream_mroute_add(up->channel_oil, __func__);
398 }
399
400 static void pim_vxlan_orig_mr_oif_add(struct pim_vxlan_sg *vxlan_sg)
401 {
402 if (!vxlan_sg->up || !vxlan_sg->orig_oif)
403 return;
404
405 if (PIM_DEBUG_VXLAN)
406 zlog_debug("vxlan SG %s oif %s add",
407 vxlan_sg->sg_str, vxlan_sg->orig_oif->name);
408
409 vxlan_sg->flags |= PIM_VXLAN_SGF_OIF_INSTALLED;
410 pim_channel_add_oif(vxlan_sg->up->channel_oil,
411 vxlan_sg->orig_oif, PIM_OIF_FLAG_PROTO_VXLAN,
412 __func__);
413 }
414
415 static void pim_vxlan_orig_mr_oif_del(struct pim_vxlan_sg *vxlan_sg)
416 {
417 struct interface *orig_oif;
418
419 orig_oif = vxlan_sg->orig_oif;
420 vxlan_sg->orig_oif = NULL;
421
422 if (!(vxlan_sg->flags & PIM_VXLAN_SGF_OIF_INSTALLED))
423 return;
424
425 if (PIM_DEBUG_VXLAN)
426 zlog_debug("vxlan SG %s oif %s del",
427 vxlan_sg->sg_str, orig_oif->name);
428
429 vxlan_sg->flags &= ~PIM_VXLAN_SGF_OIF_INSTALLED;
430 pim_channel_del_oif(vxlan_sg->up->channel_oil,
431 orig_oif, PIM_OIF_FLAG_PROTO_VXLAN, __func__);
432 }
433
434 static inline struct interface *pim_vxlan_orig_mr_oif_get(
435 struct pim_instance *pim)
436 {
437 return (vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) ?
438 pim->vxlan.peerlink_rif : NULL;
439 }
440
441 /* Single VTEPs: IIF for the vxlan-origination-mroutes is lo or vrf-dev (if
442 * the mroute is in a non-default vrf).
443 * Anycast VTEPs: IIF is the MLAG ISL/peerlink.
444 */
445 static inline struct interface *pim_vxlan_orig_mr_iif_get(
446 struct pim_instance *pim)
447 {
448 return ((vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) &&
449 pim->vxlan.peerlink_rif) ?
450 pim->vxlan.peerlink_rif : pim->vxlan.default_iif;
451 }
452
453 static bool pim_vxlan_orig_mr_add_is_ok(struct pim_vxlan_sg *vxlan_sg)
454 {
455 struct pim_interface *pim_ifp;
456
457 vxlan_sg->iif = pim_vxlan_orig_mr_iif_get(vxlan_sg->pim);
458 if (!vxlan_sg->iif)
459 return false;
460
461 pim_ifp = (struct pim_interface *)vxlan_sg->iif->info;
462 if (!pim_ifp || (pim_ifp->mroute_vif_index < 0))
463 return false;
464
465 return true;
466 }
467
468 static void pim_vxlan_orig_mr_install(struct pim_vxlan_sg *vxlan_sg)
469 {
470 pim_vxlan_orig_mr_up_add(vxlan_sg);
471
472 vxlan_sg->orig_oif = pim_vxlan_orig_mr_oif_get(vxlan_sg->pim);
473 pim_vxlan_orig_mr_oif_add(vxlan_sg);
474 }
475
476 static void pim_vxlan_orig_mr_add(struct pim_vxlan_sg *vxlan_sg)
477 {
478 if (!pim_vxlan_orig_mr_add_is_ok(vxlan_sg))
479 return;
480
481 if (PIM_DEBUG_VXLAN)
482 zlog_debug("vxlan SG %s orig-mr add", vxlan_sg->sg_str);
483
484 pim_vxlan_orig_mr_install(vxlan_sg);
485 }
486
487 static void pim_vxlan_orig_mr_del(struct pim_vxlan_sg *vxlan_sg)
488 {
489 if (PIM_DEBUG_VXLAN)
490 zlog_debug("vxlan SG %s orig-mr del", vxlan_sg->sg_str);
491
492 pim_vxlan_orig_mr_oif_del(vxlan_sg);
493 pim_vxlan_orig_mr_up_del(vxlan_sg);
494 }
495
496 static void pim_vxlan_orig_mr_iif_update(struct hash_bucket *bucket, void *arg)
497 {
498 struct interface *ifp;
499 struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)bucket->data;
500 struct interface *old_iif = vxlan_sg->iif;
501
502 if (!pim_vxlan_is_orig_mroute(vxlan_sg))
503 return;
504
505 ifp = pim_vxlan_orig_mr_iif_get(vxlan_sg->pim);
506 if (PIM_DEBUG_VXLAN)
507 zlog_debug("vxlan SG %s iif changed from %s to %s",
508 vxlan_sg->sg_str,
509 old_iif ? old_iif->name : "-",
510 ifp ? ifp->name : "-");
511
512 if (pim_vxlan_orig_mr_add_is_ok(vxlan_sg)) {
513 if (vxlan_sg->up) {
514 /* upstream exists but iif changed */
515 pim_vxlan_orig_mr_up_iif_update(vxlan_sg);
516 } else {
517 /* install mroute */
518 pim_vxlan_orig_mr_install(vxlan_sg);
519 }
520 } else {
521 pim_vxlan_orig_mr_del(vxlan_sg);
522 }
523 }
524
525 /**************************** vxlan termination mroutes ***********************
526 * For every bum-mcast-grp registered by evpn a *G termination
527 * mroute is setup by pimd. The purpose of this mroute is to pull down vxlan
528 * packets with the bum-mcast-grp dip from the underlay and terminate the
529 * tunnel. This is done by including the vxlan termination device (ipmr-lo) in
530 * its OIL. The vxlan de-capsulated packets are subject to subsequent overlay
531 * bridging.
532 *
533 * Sample mroute:
534 * (0.0.0.0, 239.1.1.100) Iif: uplink-1 Oifs: ipmr-lo, uplink-1
535 *****************************************************************************/
536 struct pim_interface *pim_vxlan_get_term_ifp(struct pim_instance *pim)
537 {
538 return pim->vxlan.term_if ?
539 (struct pim_interface *)pim->vxlan.term_if->info : NULL;
540 }
541
542 static void pim_vxlan_term_mr_oif_add(struct pim_vxlan_sg *vxlan_sg)
543 {
544 if (vxlan_sg->flags & PIM_VXLAN_SGF_OIF_INSTALLED)
545 return;
546
547 if (PIM_DEBUG_VXLAN)
548 zlog_debug("vxlan SG %s term-oif %s add",
549 vxlan_sg->sg_str, vxlan_sg->term_oif->name);
550
551 if (pim_ifchannel_local_membership_add(vxlan_sg->term_oif,
552 &vxlan_sg->sg, true /*is_vxlan */)) {
553 vxlan_sg->flags |= PIM_VXLAN_SGF_OIF_INSTALLED;
554 /* update the inherited OIL */
555 /* XXX - I don't see the inherited OIL updated when a local
556 * member is added. And that probably needs to be fixed. Till
557 * that happens we do a force update on the inherited OIL
558 * here.
559 */
560 pim_upstream_inherited_olist(vxlan_sg->pim, vxlan_sg->up);
561 } else {
562 zlog_warn("vxlan SG %s term-oif %s add failed",
563 vxlan_sg->sg_str, vxlan_sg->term_oif->name);
564 }
565 }
566
567 static void pim_vxlan_term_mr_oif_del(struct pim_vxlan_sg *vxlan_sg)
568 {
569 if (!(vxlan_sg->flags & PIM_VXLAN_SGF_OIF_INSTALLED))
570 return;
571
572 if (PIM_DEBUG_VXLAN)
573 zlog_debug("vxlan SG %s oif %s del",
574 vxlan_sg->sg_str, vxlan_sg->term_oif->name);
575
576 vxlan_sg->flags &= ~PIM_VXLAN_SGF_OIF_INSTALLED;
577 pim_ifchannel_local_membership_del(vxlan_sg->term_oif, &vxlan_sg->sg);
578 /* update the inherited OIL */
579 /* XXX - I don't see the inherited OIL updated when a local member
580 * is deleted. And that probably needs to be fixed. Till that happens
581 * we do a force update on the inherited OIL here.
582 */
583 pim_upstream_inherited_olist(vxlan_sg->pim, vxlan_sg->up);
584 }
585
586 static void pim_vxlan_update_sg_entry_mlag(struct pim_instance *pim,
587 struct pim_upstream *up, bool inherit)
588 {
589 bool is_df = true;
590
591 if (inherit && up->parent &&
592 PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->parent->flags) &&
593 PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->parent->flags))
594 is_df = false;
595
596 pim_mlag_up_df_role_update(pim, up, is_df, "inherit_xg_df");
597 }
598
599 /* We run MLAG DF election only on mroutes that have the termination
600 * device ipmr-lo in the immediate OIL. This is only (*, G) entries at the
601 * moment. For (S, G) entries that (with ipmr-lo in the inherited OIL) we
602 * inherit the DF role from the (*, G) entry.
603 */
604 void pim_vxlan_inherit_mlag_flags(struct pim_instance *pim,
605 struct pim_upstream *up, bool inherit)
606 {
607 struct listnode *listnode;
608 struct pim_upstream *child;
609
610 for (ALL_LIST_ELEMENTS_RO(up->sources, listnode,
611 child)) {
612 pim_vxlan_update_sg_entry_mlag(pim,
613 child, true /* inherit */);
614 }
615 }
616
617 static void pim_vxlan_term_mr_up_add(struct pim_vxlan_sg *vxlan_sg)
618 {
619 struct pim_upstream *up;
620 int flags = 0;
621
622 if (vxlan_sg->up) {
623 /* nothing to do */
624 return;
625 }
626
627 if (PIM_DEBUG_VXLAN)
628 zlog_debug("vxlan SG %s term mroute-up add",
629 vxlan_sg->sg_str);
630
631 PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_TERM(flags);
632 /* enable MLAG designated-forwarder election on termination mroutes */
633 PIM_UPSTREAM_FLAG_SET_MLAG_VXLAN(flags);
634
635 up = pim_upstream_add(vxlan_sg->pim, &vxlan_sg->sg, NULL /* iif */,
636 flags, __func__, NULL);
637 vxlan_sg->up = up;
638
639 if (!up) {
640 zlog_warn("vxlan SG %s term mroute-up add failed",
641 vxlan_sg->sg_str);
642 return;
643 }
644
645 /* update existing SG entries with the parent's MLAG flag */
646 pim_vxlan_inherit_mlag_flags(vxlan_sg->pim, up, true /*enable*/);
647 }
648
649 static void pim_vxlan_term_mr_up_del(struct pim_vxlan_sg *vxlan_sg)
650 {
651 struct pim_upstream *up = vxlan_sg->up;
652
653 if (!up)
654 return;
655
656 if (PIM_DEBUG_VXLAN)
657 zlog_debug("vxlan SG %s term mroute-up del",
658 vxlan_sg->sg_str);
659 vxlan_sg->up = NULL;
660 if (up->flags & PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM) {
661 /* update SG entries that are inheriting from this XG entry */
662 pim_vxlan_inherit_mlag_flags(vxlan_sg->pim, up,
663 false /*enable*/);
664 /* clear out all the vxlan related flags */
665 up->flags &= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM |
666 PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN);
667 pim_mlag_up_local_del(vxlan_sg->pim, up);
668 pim_upstream_del(vxlan_sg->pim, up, __func__);
669 }
670 }
671
672 static void pim_vxlan_term_mr_add(struct pim_vxlan_sg *vxlan_sg)
673 {
674 if (PIM_DEBUG_VXLAN)
675 zlog_debug("vxlan SG %s term mroute add", vxlan_sg->sg_str);
676
677 vxlan_sg->term_oif = vxlan_sg->pim->vxlan.term_if;
678 if (!vxlan_sg->term_oif)
679 /* defer termination mroute till we have a termination device */
680 return;
681
682 pim_vxlan_term_mr_up_add(vxlan_sg);
683 /* set up local membership for the term-oif */
684 pim_vxlan_term_mr_oif_add(vxlan_sg);
685 }
686
687 static void pim_vxlan_term_mr_del(struct pim_vxlan_sg *vxlan_sg)
688 {
689 if (PIM_DEBUG_VXLAN)
690 zlog_debug("vxlan SG %s term mroute del", vxlan_sg->sg_str);
691
692 /* remove local membership associated with the term oif */
693 pim_vxlan_term_mr_oif_del(vxlan_sg);
694 /* remove references to the upstream entry */
695 pim_vxlan_term_mr_up_del(vxlan_sg);
696 }
697
698 /************************** vxlan SG cache management ************************/
699 static unsigned int pim_vxlan_sg_hash_key_make(const void *p)
700 {
701 const struct pim_vxlan_sg *vxlan_sg = p;
702
703 return pim_sgaddr_hash(vxlan_sg->sg, 0);
704 }
705
706 static bool pim_vxlan_sg_hash_eq(const void *p1, const void *p2)
707 {
708 const struct pim_vxlan_sg *sg1 = p1;
709 const struct pim_vxlan_sg *sg2 = p2;
710
711 return !pim_sgaddr_cmp(sg1->sg, sg2->sg);
712 }
713
714 static struct pim_vxlan_sg *pim_vxlan_sg_new(struct pim_instance *pim,
715 pim_sgaddr *sg)
716 {
717 struct pim_vxlan_sg *vxlan_sg;
718
719 vxlan_sg = XCALLOC(MTYPE_PIM_VXLAN_SG, sizeof(*vxlan_sg));
720
721 vxlan_sg->pim = pim;
722 vxlan_sg->sg = *sg;
723 snprintfrr(vxlan_sg->sg_str, sizeof(vxlan_sg->sg_str), "%pSG", sg);
724
725 if (PIM_DEBUG_VXLAN)
726 zlog_debug("vxlan SG %s alloc", vxlan_sg->sg_str);
727
728 vxlan_sg = hash_get(pim->vxlan.sg_hash, vxlan_sg, hash_alloc_intern);
729
730 /* we register with the MLAG daemon in the first VxLAN SG and never
731 * de-register during that life of the pimd
732 */
733 if (pim->vxlan.sg_hash->count == 1) {
734 vxlan_mlag.flags |= PIM_VXLAN_MLAGF_DO_REG;
735 pim_mlag_register();
736 }
737
738 return vxlan_sg;
739 }
740
741 struct pim_vxlan_sg *pim_vxlan_sg_find(struct pim_instance *pim, pim_sgaddr *sg)
742 {
743 struct pim_vxlan_sg lookup;
744
745 lookup.sg = *sg;
746 return hash_lookup(pim->vxlan.sg_hash, &lookup);
747 }
748
749 struct pim_vxlan_sg *pim_vxlan_sg_add(struct pim_instance *pim, pim_sgaddr *sg)
750 {
751 struct pim_vxlan_sg *vxlan_sg;
752
753 vxlan_sg = pim_vxlan_sg_find(pim, sg);
754 if (vxlan_sg)
755 return vxlan_sg;
756
757 vxlan_sg = pim_vxlan_sg_new(pim, sg);
758
759 if (pim_vxlan_is_orig_mroute(vxlan_sg))
760 pim_vxlan_orig_mr_add(vxlan_sg);
761 else
762 pim_vxlan_term_mr_add(vxlan_sg);
763
764 return vxlan_sg;
765 }
766
767 static void pim_vxlan_sg_del_item(struct pim_vxlan_sg *vxlan_sg)
768 {
769 vxlan_sg->flags |= PIM_VXLAN_SGF_DEL_IN_PROG;
770
771 pim_vxlan_del_work(vxlan_sg);
772
773 if (pim_vxlan_is_orig_mroute(vxlan_sg))
774 pim_vxlan_orig_mr_del(vxlan_sg);
775 else
776 pim_vxlan_term_mr_del(vxlan_sg);
777
778 if (PIM_DEBUG_VXLAN)
779 zlog_debug("vxlan SG %s free", vxlan_sg->sg_str);
780
781 XFREE(MTYPE_PIM_VXLAN_SG, vxlan_sg);
782 }
783
784 void pim_vxlan_sg_del(struct pim_instance *pim, pim_sgaddr *sg)
785 {
786 struct pim_vxlan_sg *vxlan_sg;
787
788 vxlan_sg = pim_vxlan_sg_find(pim, sg);
789 if (!vxlan_sg)
790 return;
791
792 hash_release(pim->vxlan.sg_hash, vxlan_sg);
793 pim_vxlan_sg_del_item(vxlan_sg);
794 }
795
796 /******************************* MLAG handling *******************************/
797 bool pim_vxlan_do_mlag_reg(void)
798 {
799 return (vxlan_mlag.flags & PIM_VXLAN_MLAGF_DO_REG);
800 }
801
802 /* The peerlink sub-interface is added as an OIF to the origination-mroute.
803 * This is done to send a copy of the multicast-vxlan encapsulated traffic
804 * to the MLAG peer which may mroute it over the underlay if there are any
805 * interested receivers.
806 */
807 static void pim_vxlan_sg_peerlink_oif_update(struct hash_bucket *bucket,
808 void *arg)
809 {
810 struct interface *new_oif = (struct interface *)arg;
811 struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)bucket->data;
812
813 if (!pim_vxlan_is_orig_mroute(vxlan_sg))
814 return;
815
816 if (vxlan_sg->orig_oif == new_oif)
817 return;
818
819 pim_vxlan_orig_mr_oif_del(vxlan_sg);
820
821 vxlan_sg->orig_oif = new_oif;
822 pim_vxlan_orig_mr_oif_add(vxlan_sg);
823 }
824
825 /* In the case of anycast VTEPs the VTEP-PIP must be used as the
826 * register source.
827 */
828 bool pim_vxlan_get_register_src(struct pim_instance *pim,
829 struct pim_upstream *up, struct in_addr *src_p)
830 {
831 if (!(vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED))
832 return true;
833
834 /* if address is not available suppress the pim-register */
835 if (vxlan_mlag.reg_addr.s_addr == INADDR_ANY)
836 return false;
837
838 *src_p = vxlan_mlag.reg_addr;
839 return true;
840 }
841
842 void pim_vxlan_mlag_update(bool enable, bool peer_state, uint32_t role,
843 struct interface *peerlink_rif,
844 struct in_addr *reg_addr)
845 {
846 struct pim_instance *pim;
847 char addr_buf[INET_ADDRSTRLEN];
848 struct pim_interface *pim_ifp = NULL;
849
850 if (PIM_DEBUG_VXLAN) {
851 inet_ntop(AF_INET, reg_addr,
852 addr_buf, INET_ADDRSTRLEN);
853 zlog_debug("vxlan MLAG update %s state %s role %d rif %s addr %s",
854 enable ? "enable" : "disable",
855 peer_state ? "up" : "down",
856 role,
857 peerlink_rif ? peerlink_rif->name : "-",
858 addr_buf);
859 }
860
861 /* XXX: for now vxlan termination is only possible in the default VRF
862 * when that changes this will need to change to iterate all VRFs
863 */
864 pim = pim_get_pim_instance(VRF_DEFAULT);
865
866 if (!pim) {
867 if (PIM_DEBUG_VXLAN)
868 zlog_debug("%s: Unable to find pim instance", __func__);
869 return;
870 }
871
872 if (enable)
873 vxlan_mlag.flags |= PIM_VXLAN_MLAGF_ENABLED;
874 else
875 vxlan_mlag.flags &= ~PIM_VXLAN_MLAGF_ENABLED;
876
877 if (vxlan_mlag.peerlink_rif != peerlink_rif)
878 vxlan_mlag.peerlink_rif = peerlink_rif;
879
880 vxlan_mlag.reg_addr = *reg_addr;
881 vxlan_mlag.peer_state = peer_state;
882 vxlan_mlag.role = role;
883
884 /* process changes */
885 if (vxlan_mlag.peerlink_rif)
886 pim_ifp = (struct pim_interface *)vxlan_mlag.peerlink_rif->info;
887 if ((vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) &&
888 pim_ifp && (pim_ifp->mroute_vif_index > 0))
889 pim_vxlan_set_peerlink_rif(pim, peerlink_rif);
890 else
891 pim_vxlan_set_peerlink_rif(pim, NULL);
892 }
893
894 /****************************** misc callbacks *******************************/
895 static void pim_vxlan_set_default_iif(struct pim_instance *pim,
896 struct interface *ifp)
897 {
898 struct interface *old_iif;
899
900 if (pim->vxlan.default_iif == ifp)
901 return;
902
903 old_iif = pim->vxlan.default_iif;
904 if (PIM_DEBUG_VXLAN)
905 zlog_debug("%s: vxlan default iif changed from %s to %s",
906 __func__, old_iif ? old_iif->name : "-",
907 ifp ? ifp->name : "-");
908
909 old_iif = pim_vxlan_orig_mr_iif_get(pim);
910 pim->vxlan.default_iif = ifp;
911 ifp = pim_vxlan_orig_mr_iif_get(pim);
912 if (old_iif == ifp)
913 return;
914
915 if (PIM_DEBUG_VXLAN)
916 zlog_debug("%s: vxlan orig iif changed from %s to %s", __func__,
917 old_iif ? old_iif->name : "-",
918 ifp ? ifp->name : "-");
919
920 /* add/del upstream entries for the existing vxlan SG when the
921 * interface becomes available
922 */
923 if (pim->vxlan.sg_hash)
924 hash_iterate(pim->vxlan.sg_hash,
925 pim_vxlan_orig_mr_iif_update, NULL);
926 }
927
928 static void pim_vxlan_up_cost_update(struct pim_instance *pim,
929 struct pim_upstream *up,
930 struct interface *old_peerlink_rif)
931 {
932 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->flags))
933 return;
934
935 if (up->rpf.source_nexthop.interface &&
936 ((up->rpf.source_nexthop.interface ==
937 pim->vxlan.peerlink_rif) ||
938 (up->rpf.source_nexthop.interface ==
939 old_peerlink_rif))) {
940 if (PIM_DEBUG_VXLAN)
941 zlog_debug("RPF cost adjust for %s on peerlink-rif (old: %s, new: %s) change",
942 up->sg_str,
943 old_peerlink_rif ?
944 old_peerlink_rif->name : "-",
945 pim->vxlan.peerlink_rif ?
946 pim->vxlan.peerlink_rif->name : "-");
947 pim_mlag_up_local_add(pim, up);
948 }
949 }
950
951 static void pim_vxlan_term_mr_cost_update(struct hash_bucket *bucket, void *arg)
952 {
953 struct interface *old_peerlink_rif = (struct interface *)arg;
954 struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)bucket->data;
955 struct pim_upstream *up;
956 struct listnode *listnode;
957 struct pim_upstream *child;
958
959 if (pim_vxlan_is_orig_mroute(vxlan_sg))
960 return;
961
962 /* Lookup all XG and SG entries with RPF-interface peerlink_rif */
963 up = vxlan_sg->up;
964 if (!up)
965 return;
966
967 pim_vxlan_up_cost_update(vxlan_sg->pim, up,
968 old_peerlink_rif);
969
970 for (ALL_LIST_ELEMENTS_RO(up->sources, listnode,
971 child))
972 pim_vxlan_up_cost_update(vxlan_sg->pim, child,
973 old_peerlink_rif);
974 }
975
976 static void pim_vxlan_sg_peerlink_rif_update(struct hash_bucket *bucket,
977 void *arg)
978 {
979 pim_vxlan_orig_mr_iif_update(bucket, NULL);
980 pim_vxlan_term_mr_cost_update(bucket, arg);
981 }
982
983 static void pim_vxlan_set_peerlink_rif(struct pim_instance *pim,
984 struct interface *ifp)
985 {
986 struct interface *old_iif;
987 struct interface *new_iif;
988 struct interface *old_oif;
989 struct interface *new_oif;
990
991 if (pim->vxlan.peerlink_rif == ifp)
992 return;
993
994 old_iif = pim->vxlan.peerlink_rif;
995 if (PIM_DEBUG_VXLAN)
996 zlog_debug("%s: vxlan peerlink_rif changed from %s to %s",
997 __func__, old_iif ? old_iif->name : "-",
998 ifp ? ifp->name : "-");
999
1000 old_iif = pim_vxlan_orig_mr_iif_get(pim);
1001 old_oif = pim_vxlan_orig_mr_oif_get(pim);
1002 pim->vxlan.peerlink_rif = ifp;
1003
1004 new_iif = pim_vxlan_orig_mr_iif_get(pim);
1005 if (old_iif != new_iif) {
1006 if (PIM_DEBUG_VXLAN)
1007 zlog_debug("%s: vxlan orig iif changed from %s to %s",
1008 __func__, old_iif ? old_iif->name : "-",
1009 new_iif ? new_iif->name : "-");
1010
1011 /* add/del upstream entries for the existing vxlan SG when the
1012 * interface becomes available
1013 */
1014 if (pim->vxlan.sg_hash)
1015 hash_iterate(pim->vxlan.sg_hash,
1016 pim_vxlan_sg_peerlink_rif_update,
1017 old_iif);
1018 }
1019
1020 new_oif = pim_vxlan_orig_mr_oif_get(pim);
1021 if (old_oif != new_oif) {
1022 if (PIM_DEBUG_VXLAN)
1023 zlog_debug("%s: vxlan orig oif changed from %s to %s",
1024 __func__, old_oif ? old_oif->name : "-",
1025 new_oif ? new_oif->name : "-");
1026 if (pim->vxlan.sg_hash)
1027 hash_iterate(pim->vxlan.sg_hash,
1028 pim_vxlan_sg_peerlink_oif_update,
1029 new_oif);
1030 }
1031 }
1032
1033 static void pim_vxlan_term_mr_oif_update(struct hash_bucket *bucket, void *arg)
1034 {
1035 struct interface *ifp = (struct interface *)arg;
1036 struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)bucket->data;
1037
1038 if (pim_vxlan_is_orig_mroute(vxlan_sg))
1039 return;
1040
1041 if (vxlan_sg->term_oif == ifp)
1042 return;
1043
1044 if (PIM_DEBUG_VXLAN)
1045 zlog_debug("vxlan SG %s term oif changed from %s to %s",
1046 vxlan_sg->sg_str,
1047 vxlan_sg->term_oif ? vxlan_sg->term_oif->name : "-",
1048 ifp ? ifp->name : "-");
1049
1050 pim_vxlan_term_mr_del(vxlan_sg);
1051 vxlan_sg->term_oif = ifp;
1052 pim_vxlan_term_mr_add(vxlan_sg);
1053 }
1054
1055 static void pim_vxlan_term_oif_update(struct pim_instance *pim,
1056 struct interface *ifp)
1057 {
1058 if (pim->vxlan.term_if == ifp)
1059 return;
1060
1061 if (PIM_DEBUG_VXLAN)
1062 zlog_debug("vxlan term oif changed from %s to %s",
1063 pim->vxlan.term_if ? pim->vxlan.term_if->name : "-",
1064 ifp ? ifp->name : "-");
1065
1066 pim->vxlan.term_if = ifp;
1067 if (pim->vxlan.sg_hash)
1068 hash_iterate(pim->vxlan.sg_hash,
1069 pim_vxlan_term_mr_oif_update, ifp);
1070 }
1071
1072 void pim_vxlan_add_vif(struct interface *ifp)
1073 {
1074 struct pim_interface *pim_ifp = ifp->info;
1075 struct pim_instance *pim = pim_ifp->pim;
1076
1077 if (pim->vrf->vrf_id != VRF_DEFAULT)
1078 return;
1079
1080 if (if_is_loopback(ifp))
1081 pim_vxlan_set_default_iif(pim, ifp);
1082
1083 if (vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED &&
1084 (ifp == vxlan_mlag.peerlink_rif))
1085 pim_vxlan_set_peerlink_rif(pim, ifp);
1086
1087 if (pim->vxlan.term_if_cfg == ifp)
1088 pim_vxlan_term_oif_update(pim, ifp);
1089 }
1090
1091 void pim_vxlan_del_vif(struct interface *ifp)
1092 {
1093 struct pim_interface *pim_ifp = ifp->info;
1094 struct pim_instance *pim = pim_ifp->pim;
1095
1096 if (pim->vrf->vrf_id != VRF_DEFAULT)
1097 return;
1098
1099 if (pim->vxlan.default_iif == ifp)
1100 pim_vxlan_set_default_iif(pim, NULL);
1101
1102 if (pim->vxlan.peerlink_rif == ifp)
1103 pim_vxlan_set_peerlink_rif(pim, NULL);
1104
1105 if (pim->vxlan.term_if == ifp)
1106 pim_vxlan_term_oif_update(pim, NULL);
1107 }
1108
1109 /* enable pim implicitly on the termination device add */
1110 void pim_vxlan_add_term_dev(struct pim_instance *pim,
1111 struct interface *ifp)
1112 {
1113 struct pim_interface *pim_ifp;
1114
1115 if (pim->vxlan.term_if_cfg == ifp)
1116 return;
1117
1118 if (PIM_DEBUG_VXLAN)
1119 zlog_debug("vxlan term oif cfg changed from %s to %s",
1120 pim->vxlan.term_if_cfg ?
1121 pim->vxlan.term_if_cfg->name : "-",
1122 ifp->name);
1123
1124 pim->vxlan.term_if_cfg = ifp;
1125
1126 /* enable pim on the term ifp */
1127 pim_ifp = (struct pim_interface *)ifp->info;
1128 if (pim_ifp) {
1129 pim_ifp->pim_enable = true;
1130 /* ifp is already oper up; activate it as a term dev */
1131 if (pim_ifp->mroute_vif_index >= 0)
1132 pim_vxlan_term_oif_update(pim, ifp);
1133 } else {
1134 /* ensure that pimreg exists before using the newly created
1135 * vxlan termination device
1136 */
1137 pim_if_create_pimreg(pim);
1138 (void)pim_if_new(ifp, false /*igmp*/, true /*pim*/,
1139 false /*pimreg*/, true /*vxlan_term*/);
1140 }
1141 }
1142
1143 /* disable pim implicitly, if needed, on the termination device deletion */
1144 void pim_vxlan_del_term_dev(struct pim_instance *pim)
1145 {
1146 struct interface *ifp = pim->vxlan.term_if_cfg;
1147 struct pim_interface *pim_ifp;
1148
1149 if (PIM_DEBUG_VXLAN)
1150 zlog_debug("vxlan term oif cfg changed from %s to -",
1151 ifp->name);
1152
1153 pim->vxlan.term_if_cfg = NULL;
1154
1155 pim_ifp = (struct pim_interface *)ifp->info;
1156 if (pim_ifp) {
1157 pim_ifp->pim_enable = false;
1158 if (!pim_ifp->gm_enable)
1159 pim_if_delete(ifp);
1160 }
1161 }
1162
1163 void pim_vxlan_init(struct pim_instance *pim)
1164 {
1165 char hash_name[64];
1166
1167 snprintf(hash_name, sizeof(hash_name),
1168 "PIM %s vxlan SG hash", pim->vrf->name);
1169 pim->vxlan.sg_hash = hash_create(pim_vxlan_sg_hash_key_make,
1170 pim_vxlan_sg_hash_eq, hash_name);
1171 }
1172
1173 void pim_vxlan_exit(struct pim_instance *pim)
1174 {
1175 if (pim->vxlan.sg_hash) {
1176 hash_clean(pim->vxlan.sg_hash,
1177 (void (*)(void *))pim_vxlan_sg_del_item);
1178 hash_free(pim->vxlan.sg_hash);
1179 pim->vxlan.sg_hash = NULL;
1180 }
1181 }
1182
1183 void pim_vxlan_terminate(void)
1184 {
1185 pim_vxlan_work_timer_setup(false);
1186 }