]> git.proxmox.com Git - mirror_frr.git/blob - pimd/pim_vxlan.c
Merge pull request #9610 from iqras23/best_path
[mirror_frr.git] / pimd / pim_vxlan.c
1 /* PIM support for VxLAN BUM flooding
2 *
3 * Copyright (C) 2019 Cumulus Networks, Inc.
4 *
5 * This file is part of FRR.
6 *
7 * FRR is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2, or (at your option) any
10 * later version.
11 *
12 * FRR is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
20 */
21
22 #include <zebra.h>
23
24 #include <hash.h>
25 #include <jhash.h>
26 #include <log.h>
27 #include <prefix.h>
28 #include <vrf.h>
29
30 #include "pimd.h"
31 #include "pim_iface.h"
32 #include "pim_memory.h"
33 #include "pim_oil.h"
34 #include "pim_register.h"
35 #include "pim_str.h"
36 #include "pim_upstream.h"
37 #include "pim_ifchannel.h"
38 #include "pim_nht.h"
39 #include "pim_zebra.h"
40 #include "pim_vxlan.h"
41 #include "pim_mlag.h"
42
43 /* pim-vxlan global info */
44 struct pim_vxlan vxlan_info, *pim_vxlan_p = &vxlan_info;
45
46 static void pim_vxlan_work_timer_setup(bool start);
47 static void pim_vxlan_set_peerlink_rif(struct pim_instance *pim,
48 struct interface *ifp);
49
50 /*************************** vxlan work list **********************************
51 * A work list is maintained for staggered generation of pim null register
52 * messages for vxlan SG entries that are in a reg_join state.
53 *
54 * A max of 500 NULL registers are generated at one shot. If paused reg
55 * generation continues on the next second and so on till all register
56 * messages have been sent out. And the process is restarted every 60s.
57 *
58 * purpose of this null register generation is to setup the SPT and maintain
59 * independent of the presence of overlay BUM traffic.
60 ****************************************************************************/
61 static void pim_vxlan_do_reg_work(void)
62 {
63 struct listnode *listnode;
64 int work_cnt = 0;
65 struct pim_vxlan_sg *vxlan_sg;
66 static int sec_count;
67
68 ++sec_count;
69
70 if (sec_count > PIM_VXLAN_NULL_REG_INTERVAL) {
71 sec_count = 0;
72 listnode = vxlan_info.next_work ?
73 vxlan_info.next_work :
74 vxlan_info.work_list->head;
75 if (PIM_DEBUG_VXLAN && listnode)
76 zlog_debug("vxlan SG work %s",
77 vxlan_info.next_work ? "continues" : "starts");
78 } else {
79 listnode = vxlan_info.next_work;
80 }
81
82 for (; listnode; listnode = listnode->next) {
83 vxlan_sg = (struct pim_vxlan_sg *)listnode->data;
84 if (vxlan_sg->up && (vxlan_sg->up->reg_state == PIM_REG_JOIN)) {
85 if (PIM_DEBUG_VXLAN)
86 zlog_debug("vxlan SG %s periodic NULL register",
87 vxlan_sg->sg_str);
88
89 /*
90 * If we are on the work queue *and* the rpf
91 * has been lost on the vxlan_sg->up let's
92 * make sure that we don't send it.
93 */
94 if (vxlan_sg->up->rpf.source_nexthop.interface) {
95 pim_null_register_send(vxlan_sg->up);
96 ++work_cnt;
97 }
98 }
99
100 if (work_cnt > vxlan_info.max_work_cnt) {
101 vxlan_info.next_work = listnode->next;
102 if (PIM_DEBUG_VXLAN)
103 zlog_debug("vxlan SG %d work items proc and pause",
104 work_cnt);
105 return;
106 }
107 }
108
109 if (work_cnt) {
110 if (PIM_DEBUG_VXLAN)
111 zlog_debug("vxlan SG %d work items proc", work_cnt);
112 }
113 vxlan_info.next_work = NULL;
114 }
115
116 /* Staggered work related info is initialized when the first work comes
117 * along
118 */
119 static void pim_vxlan_init_work(void)
120 {
121 if (vxlan_info.flags & PIM_VXLANF_WORK_INITED)
122 return;
123
124 vxlan_info.max_work_cnt = PIM_VXLAN_WORK_MAX;
125 vxlan_info.flags |= PIM_VXLANF_WORK_INITED;
126 vxlan_info.work_list = list_new();
127 pim_vxlan_work_timer_setup(true/* start */);
128 }
129
130 static void pim_vxlan_add_work(struct pim_vxlan_sg *vxlan_sg)
131 {
132 if (vxlan_sg->flags & PIM_VXLAN_SGF_DEL_IN_PROG) {
133 if (PIM_DEBUG_VXLAN)
134 zlog_debug("vxlan SG %s skip work list; del-in-prog",
135 vxlan_sg->sg_str);
136 return;
137 }
138
139 pim_vxlan_init_work();
140
141 /* already a part of the work list */
142 if (vxlan_sg->work_node)
143 return;
144
145 if (PIM_DEBUG_VXLAN)
146 zlog_debug("vxlan SG %s work list add",
147 vxlan_sg->sg_str);
148 vxlan_sg->work_node = listnode_add(vxlan_info.work_list, vxlan_sg);
149 /* XXX: adjust max_work_cnt if needed */
150 }
151
152 static void pim_vxlan_del_work(struct pim_vxlan_sg *vxlan_sg)
153 {
154 if (!vxlan_sg->work_node)
155 return;
156
157 if (PIM_DEBUG_VXLAN)
158 zlog_debug("vxlan SG %s work list del",
159 vxlan_sg->sg_str);
160
161 if (vxlan_sg->work_node == vxlan_info.next_work)
162 vxlan_info.next_work = vxlan_sg->work_node->next;
163
164 list_delete_node(vxlan_info.work_list, vxlan_sg->work_node);
165 vxlan_sg->work_node = NULL;
166 }
167
168 void pim_vxlan_update_sg_reg_state(struct pim_instance *pim,
169 struct pim_upstream *up, bool reg_join)
170 {
171 struct pim_vxlan_sg *vxlan_sg;
172
173 vxlan_sg = pim_vxlan_sg_find(pim, &up->sg);
174 if (!vxlan_sg)
175 return;
176
177 /* add the vxlan sg entry to a work list for periodic reg joins.
178 * the entry will stay in the list as long as the register state is
179 * PIM_REG_JOIN
180 */
181 if (reg_join)
182 pim_vxlan_add_work(vxlan_sg);
183 else
184 pim_vxlan_del_work(vxlan_sg);
185 }
186
187 static int pim_vxlan_work_timer_cb(struct thread *t)
188 {
189 pim_vxlan_do_reg_work();
190 pim_vxlan_work_timer_setup(true /* start */);
191 return 0;
192 }
193
194 /* global 1second timer used for periodic processing */
195 static void pim_vxlan_work_timer_setup(bool start)
196 {
197 THREAD_OFF(vxlan_info.work_timer);
198 if (start)
199 thread_add_timer(router->master, pim_vxlan_work_timer_cb, NULL,
200 PIM_VXLAN_WORK_TIME, &vxlan_info.work_timer);
201 }
202
203 /**************************** vxlan origination mroutes ***********************
204 * For every (local-vtep-ip, bum-mcast-grp) registered by evpn an origination
205 * mroute is setup by pimd. The purpose of this mroute is to forward vxlan
206 * encapsulated BUM (broadcast, unknown-unicast and unknown-multicast packets
207 * over the underlay.)
208 *
209 * Sample mroute (single VTEP):
210 * (27.0.0.7, 239.1.1.100) Iif: lo Oifs: uplink-1
211 *
212 * Sample mroute (anycast VTEP):
213 * (36.0.0.9, 239.1.1.100) Iif: peerlink-3.4094\
214 * Oifs: peerlink-3.4094 uplink-1
215 ***************************************************************************/
216 static void pim_vxlan_orig_mr_up_del(struct pim_vxlan_sg *vxlan_sg)
217 {
218 struct pim_upstream *up = vxlan_sg->up;
219
220 if (!up)
221 return;
222
223 if (PIM_DEBUG_VXLAN)
224 zlog_debug("vxlan SG %s orig mroute-up del",
225 vxlan_sg->sg_str);
226
227 vxlan_sg->up = NULL;
228
229 if (up->flags & PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG) {
230 /* clear out all the vxlan properties */
231 up->flags &= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG |
232 PIM_UPSTREAM_FLAG_MASK_STATIC_IIF |
233 PIM_UPSTREAM_FLAG_MASK_DISABLE_KAT_EXPIRY |
234 PIM_UPSTREAM_FLAG_MASK_FORCE_PIMREG |
235 PIM_UPSTREAM_FLAG_MASK_NO_PIMREG_DATA |
236 PIM_UPSTREAM_FLAG_MASK_ALLOW_IIF_IN_OIL);
237
238 /* We bring things to a grinding halt by force expirying
239 * the kat. Doing this will also remove the reference we
240 * created as a "vxlan" source and delete the upstream entry
241 * if there are no other references.
242 */
243 if (PIM_UPSTREAM_FLAG_TEST_SRC_STREAM(up->flags)) {
244 THREAD_OFF(up->t_ka_timer);
245 up = pim_upstream_keep_alive_timer_proc(up);
246 } else {
247 /* this is really unexpected as we force vxlan
248 * origination mroutes active sources but just in
249 * case
250 */
251 up = pim_upstream_del(vxlan_sg->pim, up, __func__);
252 }
253 /* if there are other references register the source
254 * for nht
255 */
256 if (up) {
257 enum pim_rpf_result r;
258
259 r = pim_rpf_update(vxlan_sg->pim, up, NULL, __func__);
260 if (r == PIM_RPF_FAILURE) {
261 if (PIM_DEBUG_VXLAN)
262 zlog_debug(
263 "vxlan SG %s rpf_update failure",
264 vxlan_sg->sg_str);
265 }
266 }
267 }
268 }
269
270 static void pim_vxlan_orig_mr_up_iif_update(struct pim_vxlan_sg *vxlan_sg)
271 {
272 /* update MFC with the new IIF */
273 pim_upstream_fill_static_iif(vxlan_sg->up, vxlan_sg->iif);
274 pim_upstream_mroute_iif_update(vxlan_sg->up->channel_oil, __func__);
275
276 if (PIM_DEBUG_VXLAN)
277 zlog_debug("vxlan SG %s orig mroute-up updated with iif %s",
278 vxlan_sg->sg_str,
279 vxlan_sg->iif?vxlan_sg->iif->name:"-");
280
281 }
282
283 /* For every VxLAN BUM multicast group we setup a SG-up that has the following
284 * "forced properties" -
285 * 1. Directly connected on a DR interface i.e. we must act as an FHR
286 * 2. We prime the pump i.e. no multicast data is needed to register this
287 * source with the FHR. To do that we send periodic null registers if
288 * the SG entry is in a register-join state. We also prevent expiry of
289 * KAT.
290 * 3. As this SG is setup without data there is no need to register encapsulate
291 * data traffic. This encapsulation is explicitly skipped for the following
292 * reasons -
293 * a) Many levels of encapsulation are needed creating MTU disc challenges.
294 * Overlay BUM is encapsulated in a vxlan/UDP/IP header and then
295 * encapsulated again in a pim-register header.
296 * b) On a vxlan-aa setup both switches rx a copy of each BUM packet. if
297 * they both reg encapsulated traffic the RP will accept the duplicates
298 * as there are no RPF checks for this encapsulated data.
299 * a), b) can be workarounded if needed, but there is really no need because
300 * of (2) i.e. the pump is primed without data.
301 */
302 static void pim_vxlan_orig_mr_up_add(struct pim_vxlan_sg *vxlan_sg)
303 {
304 struct pim_upstream *up;
305 struct pim_interface *term_ifp;
306 int flags = 0;
307 struct prefix nht_p;
308 struct pim_instance *pim = vxlan_sg->pim;
309
310 if (vxlan_sg->up) {
311 /* nothing to do */
312 return;
313 }
314
315 if (PIM_DEBUG_VXLAN)
316 zlog_debug("vxlan SG %s orig mroute-up add with iif %s",
317 vxlan_sg->sg_str,
318 vxlan_sg->iif?vxlan_sg->iif->name:"-");
319
320 PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_ORIG(flags);
321 /* pin the IIF to lo or peerlink-subinterface and disable NHT */
322 PIM_UPSTREAM_FLAG_SET_STATIC_IIF(flags);
323 /* Fake traffic by setting SRC_STREAM and starting KAT */
324 /* We intentionally skip updating ref count for SRC_STREAM/FHR.
325 * Setting SRC_VXLAN should have already created a reference
326 * preventing the entry from being deleted
327 */
328 PIM_UPSTREAM_FLAG_SET_FHR(flags);
329 PIM_UPSTREAM_FLAG_SET_SRC_STREAM(flags);
330 /* Force pimreg even if non-DR. This is needed on a MLAG setup for
331 * VxLAN AA
332 */
333 PIM_UPSTREAM_FLAG_SET_FORCE_PIMREG(flags);
334 /* prevent KAT expiry. we want the MDT setup even if there is no BUM
335 * traffic
336 */
337 PIM_UPSTREAM_FLAG_SET_DISABLE_KAT_EXPIRY(flags);
338 /* SPT for vxlan BUM groups is primed and maintained via NULL
339 * registers so there is no need to reg-encapsulate
340 * vxlan-encapsulated overlay data traffic
341 */
342 PIM_UPSTREAM_FLAG_SET_NO_PIMREG_DATA(flags);
343 /* On a MLAG setup we force a copy to the MLAG peer while also
344 * accepting traffic from the peer. To do this we set peerlink-rif as
345 * the IIF and also add it to the OIL
346 */
347 PIM_UPSTREAM_FLAG_SET_ALLOW_IIF_IN_OIL(flags);
348
349 /* XXX: todo: defer pim_upstream add if pim is not enabled on the iif */
350 up = pim_upstream_find(vxlan_sg->pim, &vxlan_sg->sg);
351 if (up) {
352 /* if the iif is set to something other than the vxlan_sg->iif
353 * we must dereg the old nexthop and force to new "static"
354 * iif
355 */
356 if (!PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(up->flags)) {
357 nht_p.family = AF_INET;
358 nht_p.prefixlen = IPV4_MAX_BITLEN;
359 nht_p.u.prefix4 = up->upstream_addr;
360 pim_delete_tracked_nexthop(vxlan_sg->pim, &nht_p, up,
361 NULL);
362 }
363 /* We are acting FHR; clear out use_rpt setting if any */
364 pim_upstream_update_use_rpt(up, false /*update_mroute*/);
365 pim_upstream_ref(up, flags, __func__);
366 vxlan_sg->up = up;
367 term_ifp = pim_vxlan_get_term_ifp(pim);
368 /* mute termination device on origination mroutes */
369 if (term_ifp)
370 pim_channel_update_oif_mute(up->channel_oil,
371 term_ifp);
372 pim_vxlan_orig_mr_up_iif_update(vxlan_sg);
373 /* mute pimreg on origination mroutes */
374 if (pim->regiface)
375 pim_channel_update_oif_mute(up->channel_oil,
376 pim->regiface->info);
377 } else {
378 up = pim_upstream_add(vxlan_sg->pim, &vxlan_sg->sg,
379 vxlan_sg->iif, flags, __func__, NULL);
380 vxlan_sg->up = up;
381 }
382
383 if (!up) {
384 if (PIM_DEBUG_VXLAN)
385 zlog_debug("vxlan SG %s orig mroute-up add failed",
386 vxlan_sg->sg_str);
387 return;
388 }
389
390 pim_upstream_keep_alive_timer_start(up, vxlan_sg->pim->keep_alive_time);
391
392 /* register the source with the RP */
393 if (up->reg_state == PIM_REG_NOINFO) {
394 pim_register_join(up);
395 pim_null_register_send(up);
396 }
397
398 /* update the inherited OIL */
399 pim_upstream_inherited_olist(vxlan_sg->pim, up);
400 if (!up->channel_oil->installed)
401 pim_upstream_mroute_add(up->channel_oil, __func__);
402 }
403
404 static void pim_vxlan_orig_mr_oif_add(struct pim_vxlan_sg *vxlan_sg)
405 {
406 if (!vxlan_sg->up || !vxlan_sg->orig_oif)
407 return;
408
409 if (PIM_DEBUG_VXLAN)
410 zlog_debug("vxlan SG %s oif %s add",
411 vxlan_sg->sg_str, vxlan_sg->orig_oif->name);
412
413 vxlan_sg->flags |= PIM_VXLAN_SGF_OIF_INSTALLED;
414 pim_channel_add_oif(vxlan_sg->up->channel_oil,
415 vxlan_sg->orig_oif, PIM_OIF_FLAG_PROTO_VXLAN,
416 __func__);
417 }
418
419 static void pim_vxlan_orig_mr_oif_del(struct pim_vxlan_sg *vxlan_sg)
420 {
421 struct interface *orig_oif;
422
423 orig_oif = vxlan_sg->orig_oif;
424 vxlan_sg->orig_oif = NULL;
425
426 if (!(vxlan_sg->flags & PIM_VXLAN_SGF_OIF_INSTALLED))
427 return;
428
429 if (PIM_DEBUG_VXLAN)
430 zlog_debug("vxlan SG %s oif %s del",
431 vxlan_sg->sg_str, orig_oif->name);
432
433 vxlan_sg->flags &= ~PIM_VXLAN_SGF_OIF_INSTALLED;
434 pim_channel_del_oif(vxlan_sg->up->channel_oil,
435 orig_oif, PIM_OIF_FLAG_PROTO_VXLAN, __func__);
436 }
437
438 static inline struct interface *pim_vxlan_orig_mr_oif_get(
439 struct pim_instance *pim)
440 {
441 return (vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) ?
442 pim->vxlan.peerlink_rif : NULL;
443 }
444
445 /* Single VTEPs: IIF for the vxlan-origination-mroutes is lo or vrf-dev (if
446 * the mroute is in a non-default vrf).
447 * Anycast VTEPs: IIF is the MLAG ISL/peerlink.
448 */
449 static inline struct interface *pim_vxlan_orig_mr_iif_get(
450 struct pim_instance *pim)
451 {
452 return ((vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) &&
453 pim->vxlan.peerlink_rif) ?
454 pim->vxlan.peerlink_rif : pim->vxlan.default_iif;
455 }
456
457 static bool pim_vxlan_orig_mr_add_is_ok(struct pim_vxlan_sg *vxlan_sg)
458 {
459 struct pim_interface *pim_ifp;
460
461 vxlan_sg->iif = pim_vxlan_orig_mr_iif_get(vxlan_sg->pim);
462 if (!vxlan_sg->iif)
463 return false;
464
465 pim_ifp = (struct pim_interface *)vxlan_sg->iif->info;
466 if (!pim_ifp || (pim_ifp->mroute_vif_index < 0))
467 return false;
468
469 return true;
470 }
471
472 static void pim_vxlan_orig_mr_install(struct pim_vxlan_sg *vxlan_sg)
473 {
474 pim_vxlan_orig_mr_up_add(vxlan_sg);
475
476 vxlan_sg->orig_oif = pim_vxlan_orig_mr_oif_get(vxlan_sg->pim);
477 pim_vxlan_orig_mr_oif_add(vxlan_sg);
478 }
479
480 static void pim_vxlan_orig_mr_add(struct pim_vxlan_sg *vxlan_sg)
481 {
482 if (!pim_vxlan_orig_mr_add_is_ok(vxlan_sg))
483 return;
484
485 if (PIM_DEBUG_VXLAN)
486 zlog_debug("vxlan SG %s orig-mr add", vxlan_sg->sg_str);
487
488 pim_vxlan_orig_mr_install(vxlan_sg);
489 }
490
491 static void pim_vxlan_orig_mr_del(struct pim_vxlan_sg *vxlan_sg)
492 {
493 if (PIM_DEBUG_VXLAN)
494 zlog_debug("vxlan SG %s orig-mr del", vxlan_sg->sg_str);
495
496 pim_vxlan_orig_mr_oif_del(vxlan_sg);
497 pim_vxlan_orig_mr_up_del(vxlan_sg);
498 }
499
500 static void pim_vxlan_orig_mr_iif_update(struct hash_bucket *bucket, void *arg)
501 {
502 struct interface *ifp;
503 struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)bucket->data;
504 struct interface *old_iif = vxlan_sg->iif;
505
506 if (!pim_vxlan_is_orig_mroute(vxlan_sg))
507 return;
508
509 ifp = pim_vxlan_orig_mr_iif_get(vxlan_sg->pim);
510 if (PIM_DEBUG_VXLAN)
511 zlog_debug("vxlan SG %s iif changed from %s to %s",
512 vxlan_sg->sg_str,
513 old_iif ? old_iif->name : "-",
514 ifp ? ifp->name : "-");
515
516 if (pim_vxlan_orig_mr_add_is_ok(vxlan_sg)) {
517 if (vxlan_sg->up) {
518 /* upstream exists but iif changed */
519 pim_vxlan_orig_mr_up_iif_update(vxlan_sg);
520 } else {
521 /* install mroute */
522 pim_vxlan_orig_mr_install(vxlan_sg);
523 }
524 } else {
525 pim_vxlan_orig_mr_del(vxlan_sg);
526 }
527 }
528
529 /**************************** vxlan termination mroutes ***********************
530 * For every bum-mcast-grp registered by evpn a *G termination
531 * mroute is setup by pimd. The purpose of this mroute is to pull down vxlan
532 * packets with the bum-mcast-grp dip from the underlay and terminate the
533 * tunnel. This is done by including the vxlan termination device (ipmr-lo) in
534 * its OIL. The vxlan de-capsulated packets are subject to subsequent overlay
535 * bridging.
536 *
537 * Sample mroute:
538 * (0.0.0.0, 239.1.1.100) Iif: uplink-1 Oifs: ipmr-lo, uplink-1
539 *****************************************************************************/
540 struct pim_interface *pim_vxlan_get_term_ifp(struct pim_instance *pim)
541 {
542 return pim->vxlan.term_if ?
543 (struct pim_interface *)pim->vxlan.term_if->info : NULL;
544 }
545
546 static void pim_vxlan_term_mr_oif_add(struct pim_vxlan_sg *vxlan_sg)
547 {
548 if (vxlan_sg->flags & PIM_VXLAN_SGF_OIF_INSTALLED)
549 return;
550
551 if (PIM_DEBUG_VXLAN)
552 zlog_debug("vxlan SG %s term-oif %s add",
553 vxlan_sg->sg_str, vxlan_sg->term_oif->name);
554
555 if (pim_ifchannel_local_membership_add(vxlan_sg->term_oif,
556 &vxlan_sg->sg, true /*is_vxlan */)) {
557 vxlan_sg->flags |= PIM_VXLAN_SGF_OIF_INSTALLED;
558 /* update the inherited OIL */
559 /* XXX - I don't see the inherited OIL updated when a local
560 * member is added. And that probably needs to be fixed. Till
561 * that happens we do a force update on the inherited OIL
562 * here.
563 */
564 pim_upstream_inherited_olist(vxlan_sg->pim, vxlan_sg->up);
565 } else {
566 zlog_warn("vxlan SG %s term-oif %s add failed",
567 vxlan_sg->sg_str, vxlan_sg->term_oif->name);
568 }
569 }
570
571 static void pim_vxlan_term_mr_oif_del(struct pim_vxlan_sg *vxlan_sg)
572 {
573 if (!(vxlan_sg->flags & PIM_VXLAN_SGF_OIF_INSTALLED))
574 return;
575
576 if (PIM_DEBUG_VXLAN)
577 zlog_debug("vxlan SG %s oif %s del",
578 vxlan_sg->sg_str, vxlan_sg->term_oif->name);
579
580 vxlan_sg->flags &= ~PIM_VXLAN_SGF_OIF_INSTALLED;
581 pim_ifchannel_local_membership_del(vxlan_sg->term_oif, &vxlan_sg->sg);
582 /* update the inherited OIL */
583 /* XXX - I don't see the inherited OIL updated when a local member
584 * is deleted. And that probably needs to be fixed. Till that happens
585 * we do a force update on the inherited OIL here.
586 */
587 pim_upstream_inherited_olist(vxlan_sg->pim, vxlan_sg->up);
588 }
589
590 static void pim_vxlan_update_sg_entry_mlag(struct pim_instance *pim,
591 struct pim_upstream *up, bool inherit)
592 {
593 bool is_df = true;
594
595 if (inherit && up->parent &&
596 PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->parent->flags) &&
597 PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->parent->flags))
598 is_df = false;
599
600 pim_mlag_up_df_role_update(pim, up, is_df, "inherit_xg_df");
601 }
602
603 /* We run MLAG DF election only on mroutes that have the termination
604 * device ipmr-lo in the immediate OIL. This is only (*, G) entries at the
605 * moment. For (S, G) entries that (with ipmr-lo in the inherited OIL) we
606 * inherit the DF role from the (*, G) entry.
607 */
608 void pim_vxlan_inherit_mlag_flags(struct pim_instance *pim,
609 struct pim_upstream *up, bool inherit)
610 {
611 struct listnode *listnode;
612 struct pim_upstream *child;
613
614 for (ALL_LIST_ELEMENTS_RO(up->sources, listnode,
615 child)) {
616 pim_vxlan_update_sg_entry_mlag(pim,
617 child, true /* inherit */);
618 }
619 }
620
621 static void pim_vxlan_term_mr_up_add(struct pim_vxlan_sg *vxlan_sg)
622 {
623 struct pim_upstream *up;
624 int flags = 0;
625
626 if (vxlan_sg->up) {
627 /* nothing to do */
628 return;
629 }
630
631 if (PIM_DEBUG_VXLAN)
632 zlog_debug("vxlan SG %s term mroute-up add",
633 vxlan_sg->sg_str);
634
635 PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_TERM(flags);
636 /* enable MLAG designated-forwarder election on termination mroutes */
637 PIM_UPSTREAM_FLAG_SET_MLAG_VXLAN(flags);
638
639 up = pim_upstream_add(vxlan_sg->pim, &vxlan_sg->sg, NULL /* iif */,
640 flags, __func__, NULL);
641 vxlan_sg->up = up;
642
643 if (!up) {
644 zlog_warn("vxlan SG %s term mroute-up add failed",
645 vxlan_sg->sg_str);
646 return;
647 }
648
649 /* update existing SG entries with the parent's MLAG flag */
650 pim_vxlan_inherit_mlag_flags(vxlan_sg->pim, up, true /*enable*/);
651 }
652
653 static void pim_vxlan_term_mr_up_del(struct pim_vxlan_sg *vxlan_sg)
654 {
655 struct pim_upstream *up = vxlan_sg->up;
656
657 if (!up)
658 return;
659
660 if (PIM_DEBUG_VXLAN)
661 zlog_debug("vxlan SG %s term mroute-up del",
662 vxlan_sg->sg_str);
663 vxlan_sg->up = NULL;
664 if (up->flags & PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM) {
665 /* update SG entries that are inheriting from this XG entry */
666 pim_vxlan_inherit_mlag_flags(vxlan_sg->pim, up,
667 false /*enable*/);
668 /* clear out all the vxlan related flags */
669 up->flags &= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM |
670 PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN);
671 pim_mlag_up_local_del(vxlan_sg->pim, up);
672 pim_upstream_del(vxlan_sg->pim, up, __func__);
673 }
674 }
675
676 static void pim_vxlan_term_mr_add(struct pim_vxlan_sg *vxlan_sg)
677 {
678 if (PIM_DEBUG_VXLAN)
679 zlog_debug("vxlan SG %s term mroute add", vxlan_sg->sg_str);
680
681 vxlan_sg->term_oif = vxlan_sg->pim->vxlan.term_if;
682 if (!vxlan_sg->term_oif)
683 /* defer termination mroute till we have a termination device */
684 return;
685
686 pim_vxlan_term_mr_up_add(vxlan_sg);
687 /* set up local membership for the term-oif */
688 pim_vxlan_term_mr_oif_add(vxlan_sg);
689 }
690
691 static void pim_vxlan_term_mr_del(struct pim_vxlan_sg *vxlan_sg)
692 {
693 if (PIM_DEBUG_VXLAN)
694 zlog_debug("vxlan SG %s term mroute del", vxlan_sg->sg_str);
695
696 /* remove local membership associated with the term oif */
697 pim_vxlan_term_mr_oif_del(vxlan_sg);
698 /* remove references to the upstream entry */
699 pim_vxlan_term_mr_up_del(vxlan_sg);
700 }
701
702 /************************** vxlan SG cache management ************************/
703 static unsigned int pim_vxlan_sg_hash_key_make(const void *p)
704 {
705 const struct pim_vxlan_sg *vxlan_sg = p;
706
707 return (jhash_2words(vxlan_sg->sg.src.s_addr,
708 vxlan_sg->sg.grp.s_addr, 0));
709 }
710
711 static bool pim_vxlan_sg_hash_eq(const void *p1, const void *p2)
712 {
713 const struct pim_vxlan_sg *sg1 = p1;
714 const struct pim_vxlan_sg *sg2 = p2;
715
716 return ((sg1->sg.src.s_addr == sg2->sg.src.s_addr)
717 && (sg1->sg.grp.s_addr == sg2->sg.grp.s_addr));
718 }
719
720 static struct pim_vxlan_sg *pim_vxlan_sg_new(struct pim_instance *pim,
721 struct prefix_sg *sg)
722 {
723 struct pim_vxlan_sg *vxlan_sg;
724
725 vxlan_sg = XCALLOC(MTYPE_PIM_VXLAN_SG, sizeof(*vxlan_sg));
726
727 vxlan_sg->pim = pim;
728 vxlan_sg->sg = *sg;
729 pim_str_sg_set(sg, vxlan_sg->sg_str);
730
731 if (PIM_DEBUG_VXLAN)
732 zlog_debug("vxlan SG %s alloc", vxlan_sg->sg_str);
733
734 vxlan_sg = hash_get(pim->vxlan.sg_hash, vxlan_sg, hash_alloc_intern);
735
736 /* we register with the MLAG daemon in the first VxLAN SG and never
737 * de-register during that life of the pimd
738 */
739 if (pim->vxlan.sg_hash->count == 1) {
740 vxlan_mlag.flags |= PIM_VXLAN_MLAGF_DO_REG;
741 pim_mlag_register();
742 }
743
744 return vxlan_sg;
745 }
746
747 struct pim_vxlan_sg *pim_vxlan_sg_find(struct pim_instance *pim,
748 struct prefix_sg *sg)
749 {
750 struct pim_vxlan_sg lookup;
751
752 lookup.sg = *sg;
753 return hash_lookup(pim->vxlan.sg_hash, &lookup);
754 }
755
756 struct pim_vxlan_sg *pim_vxlan_sg_add(struct pim_instance *pim,
757 struct prefix_sg *sg)
758 {
759 struct pim_vxlan_sg *vxlan_sg;
760
761 vxlan_sg = pim_vxlan_sg_find(pim, sg);
762 if (vxlan_sg)
763 return vxlan_sg;
764
765 vxlan_sg = pim_vxlan_sg_new(pim, sg);
766
767 if (pim_vxlan_is_orig_mroute(vxlan_sg))
768 pim_vxlan_orig_mr_add(vxlan_sg);
769 else
770 pim_vxlan_term_mr_add(vxlan_sg);
771
772 return vxlan_sg;
773 }
774
775 static void pim_vxlan_sg_del_item(struct pim_vxlan_sg *vxlan_sg)
776 {
777 vxlan_sg->flags |= PIM_VXLAN_SGF_DEL_IN_PROG;
778
779 pim_vxlan_del_work(vxlan_sg);
780
781 if (pim_vxlan_is_orig_mroute(vxlan_sg))
782 pim_vxlan_orig_mr_del(vxlan_sg);
783 else
784 pim_vxlan_term_mr_del(vxlan_sg);
785
786 if (PIM_DEBUG_VXLAN)
787 zlog_debug("vxlan SG %s free", vxlan_sg->sg_str);
788
789 XFREE(MTYPE_PIM_VXLAN_SG, vxlan_sg);
790 }
791
792 void pim_vxlan_sg_del(struct pim_instance *pim, struct prefix_sg *sg)
793 {
794 struct pim_vxlan_sg *vxlan_sg;
795
796 vxlan_sg = pim_vxlan_sg_find(pim, sg);
797 if (!vxlan_sg)
798 return;
799
800 hash_release(pim->vxlan.sg_hash, vxlan_sg);
801 pim_vxlan_sg_del_item(vxlan_sg);
802 }
803
804 /******************************* MLAG handling *******************************/
805 bool pim_vxlan_do_mlag_reg(void)
806 {
807 return (vxlan_mlag.flags & PIM_VXLAN_MLAGF_DO_REG);
808 }
809
810 /* The peerlink sub-interface is added as an OIF to the origination-mroute.
811 * This is done to send a copy of the multicast-vxlan encapsulated traffic
812 * to the MLAG peer which may mroute it over the underlay if there are any
813 * interested receivers.
814 */
815 static void pim_vxlan_sg_peerlink_oif_update(struct hash_bucket *bucket,
816 void *arg)
817 {
818 struct interface *new_oif = (struct interface *)arg;
819 struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)bucket->data;
820
821 if (!pim_vxlan_is_orig_mroute(vxlan_sg))
822 return;
823
824 if (vxlan_sg->orig_oif == new_oif)
825 return;
826
827 pim_vxlan_orig_mr_oif_del(vxlan_sg);
828
829 vxlan_sg->orig_oif = new_oif;
830 pim_vxlan_orig_mr_oif_add(vxlan_sg);
831 }
832
833 /* In the case of anycast VTEPs the VTEP-PIP must be used as the
834 * register source.
835 */
836 bool pim_vxlan_get_register_src(struct pim_instance *pim,
837 struct pim_upstream *up, struct in_addr *src_p)
838 {
839 if (!(vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED))
840 return true;
841
842 /* if address is not available suppress the pim-register */
843 if (vxlan_mlag.reg_addr.s_addr == INADDR_ANY)
844 return false;
845
846 *src_p = vxlan_mlag.reg_addr;
847 return true;
848 }
849
850 void pim_vxlan_mlag_update(bool enable, bool peer_state, uint32_t role,
851 struct interface *peerlink_rif,
852 struct in_addr *reg_addr)
853 {
854 struct pim_instance *pim;
855 char addr_buf[INET_ADDRSTRLEN];
856 struct pim_interface *pim_ifp = NULL;
857
858 if (PIM_DEBUG_VXLAN) {
859 inet_ntop(AF_INET, reg_addr,
860 addr_buf, INET_ADDRSTRLEN);
861 zlog_debug("vxlan MLAG update %s state %s role %d rif %s addr %s",
862 enable ? "enable" : "disable",
863 peer_state ? "up" : "down",
864 role,
865 peerlink_rif ? peerlink_rif->name : "-",
866 addr_buf);
867 }
868
869 /* XXX: for now vxlan termination is only possible in the default VRF
870 * when that changes this will need to change to iterate all VRFs
871 */
872 pim = pim_get_pim_instance(VRF_DEFAULT);
873
874 if (enable)
875 vxlan_mlag.flags |= PIM_VXLAN_MLAGF_ENABLED;
876 else
877 vxlan_mlag.flags &= ~PIM_VXLAN_MLAGF_ENABLED;
878
879 if (vxlan_mlag.peerlink_rif != peerlink_rif)
880 vxlan_mlag.peerlink_rif = peerlink_rif;
881
882 vxlan_mlag.reg_addr = *reg_addr;
883 vxlan_mlag.peer_state = peer_state;
884 vxlan_mlag.role = role;
885
886 /* process changes */
887 if (vxlan_mlag.peerlink_rif)
888 pim_ifp = (struct pim_interface *)vxlan_mlag.peerlink_rif->info;
889 if ((vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) &&
890 pim_ifp && (pim_ifp->mroute_vif_index > 0))
891 pim_vxlan_set_peerlink_rif(pim, peerlink_rif);
892 else
893 pim_vxlan_set_peerlink_rif(pim, NULL);
894 }
895
896 /****************************** misc callbacks *******************************/
897 static void pim_vxlan_set_default_iif(struct pim_instance *pim,
898 struct interface *ifp)
899 {
900 struct interface *old_iif;
901
902 if (pim->vxlan.default_iif == ifp)
903 return;
904
905 old_iif = pim->vxlan.default_iif;
906 if (PIM_DEBUG_VXLAN)
907 zlog_debug("%s: vxlan default iif changed from %s to %s",
908 __func__, old_iif ? old_iif->name : "-",
909 ifp ? ifp->name : "-");
910
911 old_iif = pim_vxlan_orig_mr_iif_get(pim);
912 pim->vxlan.default_iif = ifp;
913 ifp = pim_vxlan_orig_mr_iif_get(pim);
914 if (old_iif == ifp)
915 return;
916
917 if (PIM_DEBUG_VXLAN)
918 zlog_debug("%s: vxlan orig iif changed from %s to %s", __func__,
919 old_iif ? old_iif->name : "-",
920 ifp ? ifp->name : "-");
921
922 /* add/del upstream entries for the existing vxlan SG when the
923 * interface becomes available
924 */
925 if (pim->vxlan.sg_hash)
926 hash_iterate(pim->vxlan.sg_hash,
927 pim_vxlan_orig_mr_iif_update, NULL);
928 }
929
930 static void pim_vxlan_up_cost_update(struct pim_instance *pim,
931 struct pim_upstream *up,
932 struct interface *old_peerlink_rif)
933 {
934 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->flags))
935 return;
936
937 if (up->rpf.source_nexthop.interface &&
938 ((up->rpf.source_nexthop.interface ==
939 pim->vxlan.peerlink_rif) ||
940 (up->rpf.source_nexthop.interface ==
941 old_peerlink_rif))) {
942 if (PIM_DEBUG_VXLAN)
943 zlog_debug("RPF cost adjust for %s on peerlink-rif (old: %s, new: %s) change",
944 up->sg_str,
945 old_peerlink_rif ?
946 old_peerlink_rif->name : "-",
947 pim->vxlan.peerlink_rif ?
948 pim->vxlan.peerlink_rif->name : "-");
949 pim_mlag_up_local_add(pim, up);
950 }
951 }
952
953 static void pim_vxlan_term_mr_cost_update(struct hash_bucket *bucket, void *arg)
954 {
955 struct interface *old_peerlink_rif = (struct interface *)arg;
956 struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)bucket->data;
957 struct pim_upstream *up;
958 struct listnode *listnode;
959 struct pim_upstream *child;
960
961 if (pim_vxlan_is_orig_mroute(vxlan_sg))
962 return;
963
964 /* Lookup all XG and SG entries with RPF-interface peerlink_rif */
965 up = vxlan_sg->up;
966 if (!up)
967 return;
968
969 pim_vxlan_up_cost_update(vxlan_sg->pim, up,
970 old_peerlink_rif);
971
972 for (ALL_LIST_ELEMENTS_RO(up->sources, listnode,
973 child))
974 pim_vxlan_up_cost_update(vxlan_sg->pim, child,
975 old_peerlink_rif);
976 }
977
978 static void pim_vxlan_sg_peerlink_rif_update(struct hash_bucket *bucket,
979 void *arg)
980 {
981 pim_vxlan_orig_mr_iif_update(bucket, NULL);
982 pim_vxlan_term_mr_cost_update(bucket, arg);
983 }
984
985 static void pim_vxlan_set_peerlink_rif(struct pim_instance *pim,
986 struct interface *ifp)
987 {
988 struct interface *old_iif;
989 struct interface *new_iif;
990 struct interface *old_oif;
991 struct interface *new_oif;
992
993 if (pim->vxlan.peerlink_rif == ifp)
994 return;
995
996 old_iif = pim->vxlan.peerlink_rif;
997 if (PIM_DEBUG_VXLAN)
998 zlog_debug("%s: vxlan peerlink_rif changed from %s to %s",
999 __func__, old_iif ? old_iif->name : "-",
1000 ifp ? ifp->name : "-");
1001
1002 old_iif = pim_vxlan_orig_mr_iif_get(pim);
1003 old_oif = pim_vxlan_orig_mr_oif_get(pim);
1004 pim->vxlan.peerlink_rif = ifp;
1005
1006 new_iif = pim_vxlan_orig_mr_iif_get(pim);
1007 if (old_iif != new_iif) {
1008 if (PIM_DEBUG_VXLAN)
1009 zlog_debug("%s: vxlan orig iif changed from %s to %s",
1010 __func__, old_iif ? old_iif->name : "-",
1011 new_iif ? new_iif->name : "-");
1012
1013 /* add/del upstream entries for the existing vxlan SG when the
1014 * interface becomes available
1015 */
1016 if (pim->vxlan.sg_hash)
1017 hash_iterate(pim->vxlan.sg_hash,
1018 pim_vxlan_sg_peerlink_rif_update,
1019 old_iif);
1020 }
1021
1022 new_oif = pim_vxlan_orig_mr_oif_get(pim);
1023 if (old_oif != new_oif) {
1024 if (PIM_DEBUG_VXLAN)
1025 zlog_debug("%s: vxlan orig oif changed from %s to %s",
1026 __func__, old_oif ? old_oif->name : "-",
1027 new_oif ? new_oif->name : "-");
1028 if (pim->vxlan.sg_hash)
1029 hash_iterate(pim->vxlan.sg_hash,
1030 pim_vxlan_sg_peerlink_oif_update,
1031 new_oif);
1032 }
1033 }
1034
1035 static void pim_vxlan_term_mr_oif_update(struct hash_bucket *bucket, void *arg)
1036 {
1037 struct interface *ifp = (struct interface *)arg;
1038 struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)bucket->data;
1039
1040 if (pim_vxlan_is_orig_mroute(vxlan_sg))
1041 return;
1042
1043 if (vxlan_sg->term_oif == ifp)
1044 return;
1045
1046 if (PIM_DEBUG_VXLAN)
1047 zlog_debug("vxlan SG %s term oif changed from %s to %s",
1048 vxlan_sg->sg_str,
1049 vxlan_sg->term_oif ? vxlan_sg->term_oif->name : "-",
1050 ifp ? ifp->name : "-");
1051
1052 pim_vxlan_term_mr_del(vxlan_sg);
1053 vxlan_sg->term_oif = ifp;
1054 pim_vxlan_term_mr_add(vxlan_sg);
1055 }
1056
1057 static void pim_vxlan_term_oif_update(struct pim_instance *pim,
1058 struct interface *ifp)
1059 {
1060 if (pim->vxlan.term_if == ifp)
1061 return;
1062
1063 if (PIM_DEBUG_VXLAN)
1064 zlog_debug("vxlan term oif changed from %s to %s",
1065 pim->vxlan.term_if ? pim->vxlan.term_if->name : "-",
1066 ifp ? ifp->name : "-");
1067
1068 pim->vxlan.term_if = ifp;
1069 if (pim->vxlan.sg_hash)
1070 hash_iterate(pim->vxlan.sg_hash,
1071 pim_vxlan_term_mr_oif_update, ifp);
1072 }
1073
1074 void pim_vxlan_add_vif(struct interface *ifp)
1075 {
1076 struct pim_interface *pim_ifp = ifp->info;
1077 struct pim_instance *pim = pim_ifp->pim;
1078
1079 if (pim->vrf->vrf_id != VRF_DEFAULT)
1080 return;
1081
1082 if (if_is_loopback(ifp))
1083 pim_vxlan_set_default_iif(pim, ifp);
1084
1085 if (vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED &&
1086 (ifp == vxlan_mlag.peerlink_rif))
1087 pim_vxlan_set_peerlink_rif(pim, ifp);
1088
1089 if (pim->vxlan.term_if_cfg == ifp)
1090 pim_vxlan_term_oif_update(pim, ifp);
1091 }
1092
1093 void pim_vxlan_del_vif(struct interface *ifp)
1094 {
1095 struct pim_interface *pim_ifp = ifp->info;
1096 struct pim_instance *pim = pim_ifp->pim;
1097
1098 if (pim->vrf->vrf_id != VRF_DEFAULT)
1099 return;
1100
1101 if (pim->vxlan.default_iif == ifp)
1102 pim_vxlan_set_default_iif(pim, NULL);
1103
1104 if (pim->vxlan.peerlink_rif == ifp)
1105 pim_vxlan_set_peerlink_rif(pim, NULL);
1106
1107 if (pim->vxlan.term_if == ifp)
1108 pim_vxlan_term_oif_update(pim, NULL);
1109 }
1110
1111 /* enable pim implicitly on the termination device add */
1112 void pim_vxlan_add_term_dev(struct pim_instance *pim,
1113 struct interface *ifp)
1114 {
1115 struct pim_interface *pim_ifp;
1116
1117 if (pim->vxlan.term_if_cfg == ifp)
1118 return;
1119
1120 if (PIM_DEBUG_VXLAN)
1121 zlog_debug("vxlan term oif cfg changed from %s to %s",
1122 pim->vxlan.term_if_cfg ?
1123 pim->vxlan.term_if_cfg->name : "-",
1124 ifp->name);
1125
1126 pim->vxlan.term_if_cfg = ifp;
1127
1128 /* enable pim on the term ifp */
1129 pim_ifp = (struct pim_interface *)ifp->info;
1130 if (pim_ifp) {
1131 PIM_IF_DO_PIM(pim_ifp->options);
1132 /* ifp is already oper up; activate it as a term dev */
1133 if (pim_ifp->mroute_vif_index >= 0)
1134 pim_vxlan_term_oif_update(pim, ifp);
1135 } else {
1136 /* ensure that pimreg exists before using the newly created
1137 * vxlan termination device
1138 */
1139 pim_if_create_pimreg(pim);
1140 (void)pim_if_new(ifp, false /*igmp*/, true /*pim*/,
1141 false /*pimreg*/, true /*vxlan_term*/);
1142 }
1143 }
1144
1145 /* disable pim implicitly, if needed, on the termination device deletion */
1146 void pim_vxlan_del_term_dev(struct pim_instance *pim)
1147 {
1148 struct interface *ifp = pim->vxlan.term_if_cfg;
1149 struct pim_interface *pim_ifp;
1150
1151 if (PIM_DEBUG_VXLAN)
1152 zlog_debug("vxlan term oif cfg changed from %s to -",
1153 ifp->name);
1154
1155 pim->vxlan.term_if_cfg = NULL;
1156
1157 pim_ifp = (struct pim_interface *)ifp->info;
1158 if (pim_ifp) {
1159 PIM_IF_DONT_PIM(pim_ifp->options);
1160 if (!PIM_IF_TEST_IGMP(pim_ifp->options))
1161 pim_if_delete(ifp);
1162 }
1163 }
1164
1165 void pim_vxlan_init(struct pim_instance *pim)
1166 {
1167 char hash_name[64];
1168
1169 snprintf(hash_name, sizeof(hash_name),
1170 "PIM %s vxlan SG hash", pim->vrf->name);
1171 pim->vxlan.sg_hash = hash_create(pim_vxlan_sg_hash_key_make,
1172 pim_vxlan_sg_hash_eq, hash_name);
1173 }
1174
1175 void pim_vxlan_exit(struct pim_instance *pim)
1176 {
1177 if (pim->vxlan.sg_hash) {
1178 hash_clean(pim->vxlan.sg_hash,
1179 (void (*)(void *))pim_vxlan_sg_del_item);
1180 hash_free(pim->vxlan.sg_hash);
1181 pim->vxlan.sg_hash = NULL;
1182 }
1183 }
1184
1185 void pim_vxlan_terminate(void)
1186 {
1187 pim_vxlan_work_timer_setup(false);
1188 }