]> git.proxmox.com Git - mirror_frr.git/blame - pimd/pim_vxlan.c
Merge pull request #5498 from mjstapp/sharp_with_labels
[mirror_frr.git] / pimd / pim_vxlan.c
CommitLineData
b583b035
AK
1/* PIM support for VxLAN BUM flooding
2 *
3 * Copyright (C) 2019 Cumulus Networks, Inc.
4 *
5 * This file is part of FRR.
6 *
7 * FRR is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2, or (at your option) any
10 * later version.
11 *
12 * FRR is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
20 */
21
22#include <zebra.h>
23
24#include <hash.h>
25#include <jhash.h>
26#include <log.h>
27#include <prefix.h>
28#include <vrf.h>
29
30#include "pimd.h"
31#include "pim_iface.h"
32#include "pim_memory.h"
33#include "pim_oil.h"
34#include "pim_register.h"
35#include "pim_str.h"
36#include "pim_upstream.h"
37#include "pim_ifchannel.h"
38#include "pim_nht.h"
39#include "pim_zebra.h"
40#include "pim_vxlan.h"
41
d889ab75
AK
42/* pim-vxlan global info */
43struct pim_vxlan vxlan_info, *pim_vxlan_p = &vxlan_info;
b583b035 44
b9f3a51c 45static void pim_vxlan_work_timer_setup(bool start);
48b33862
AK
46static void pim_vxlan_set_peerlink_rif(struct pim_instance *pim,
47 struct interface *ifp);
b9f3a51c
AK
48
49/*************************** vxlan work list **********************************
50 * A work list is maintained for staggered generation of pim null register
51 * messages for vxlan SG entries that are in a reg_join state.
52 *
53 * A max of 500 NULL registers are generated at one shot. If paused reg
54 * generation continues on the next second and so on till all register
55 * messages have been sent out. And the process is restarted every 60s.
56 *
57 * purpose of this null register generation is to setup the SPT and maintain
58 * independent of the presence of overlay BUM traffic.
59 ****************************************************************************/
60static void pim_vxlan_do_reg_work(void)
61{
62 struct listnode *listnode;
63 int work_cnt = 0;
64 struct pim_vxlan_sg *vxlan_sg;
65 static int sec_count;
66
67 ++sec_count;
68
69 if (sec_count > PIM_VXLAN_NULL_REG_INTERVAL) {
70 sec_count = 0;
71 listnode = vxlan_info.next_work ?
72 vxlan_info.next_work :
73 vxlan_info.work_list->head;
74 if (PIM_DEBUG_VXLAN && listnode)
75 zlog_debug("vxlan SG work %s",
76 vxlan_info.next_work ? "continues" : "starts");
77 } else {
78 listnode = vxlan_info.next_work;
79 }
80
81 for (; listnode; listnode = listnode->next) {
82 vxlan_sg = (struct pim_vxlan_sg *)listnode->data;
83 if (vxlan_sg->up && (vxlan_sg->up->reg_state == PIM_REG_JOIN)) {
84 if (PIM_DEBUG_VXLAN)
85 zlog_debug("vxlan SG %s periodic NULL register",
86 vxlan_sg->sg_str);
87 pim_null_register_send(vxlan_sg->up);
88 ++work_cnt;
89 }
90
91 if (work_cnt > vxlan_info.max_work_cnt) {
92 vxlan_info.next_work = listnode->next;
93 if (PIM_DEBUG_VXLAN)
94 zlog_debug("vxlan SG %d work items proc and pause",
95 work_cnt);
96 return;
97 }
98 }
99
100 if (work_cnt) {
101 if (PIM_DEBUG_VXLAN)
102 zlog_debug("vxlan SG %d work items proc", work_cnt);
103 }
104 vxlan_info.next_work = NULL;
105}
106
107/* Staggered work related info is initialized when the first work comes
108 * along
109 */
110static void pim_vxlan_init_work(void)
111{
112 if (vxlan_info.flags & PIM_VXLANF_WORK_INITED)
113 return;
114
115 vxlan_info.max_work_cnt = PIM_VXLAN_WORK_MAX;
116 vxlan_info.flags |= PIM_VXLANF_WORK_INITED;
117 vxlan_info.work_list = list_new();
2951a7a4 118 pim_vxlan_work_timer_setup(true/* start */);
b9f3a51c
AK
119}
120
121static void pim_vxlan_add_work(struct pim_vxlan_sg *vxlan_sg)
122{
123 if (vxlan_sg->flags & PIM_VXLAN_SGF_DEL_IN_PROG) {
124 if (PIM_DEBUG_VXLAN)
125 zlog_debug("vxlan SG %s skip work list; del-in-prog",
126 vxlan_sg->sg_str);
127 return;
128 }
129
130 pim_vxlan_init_work();
131
132 /* already a part of the work list */
133 if (vxlan_sg->work_node)
134 return;
135
136 if (PIM_DEBUG_VXLAN)
137 zlog_debug("vxlan SG %s work list add",
138 vxlan_sg->sg_str);
139 vxlan_sg->work_node = listnode_add(vxlan_info.work_list, vxlan_sg);
140 /* XXX: adjust max_work_cnt if needed */
141}
142
143static void pim_vxlan_del_work(struct pim_vxlan_sg *vxlan_sg)
144{
145 if (!vxlan_sg->work_node)
146 return;
147
148 if (PIM_DEBUG_VXLAN)
149 zlog_debug("vxlan SG %s work list del",
150 vxlan_sg->sg_str);
151
152 if (vxlan_sg->work_node == vxlan_info.next_work)
153 vxlan_info.next_work = vxlan_sg->work_node->next;
154
155 list_delete_node(vxlan_info.work_list, vxlan_sg->work_node);
156 vxlan_sg->work_node = NULL;
157}
158
159void pim_vxlan_update_sg_reg_state(struct pim_instance *pim,
160 struct pim_upstream *up, bool reg_join)
161{
162 struct pim_vxlan_sg *vxlan_sg;
163
164 vxlan_sg = pim_vxlan_sg_find(pim, &up->sg);
165 if (!vxlan_sg)
166 return;
167
168 /* add the vxlan sg entry to a work list for periodic reg joins.
169 * the entry will stay in the list as long as the register state is
170 * PIM_REG_JOIN
171 */
172 if (reg_join)
173 pim_vxlan_add_work(vxlan_sg);
174 else
175 pim_vxlan_del_work(vxlan_sg);
176}
177
178static int pim_vxlan_work_timer_cb(struct thread *t)
179{
180 pim_vxlan_do_reg_work();
181 pim_vxlan_work_timer_setup(true /* start */);
182 return 0;
183}
184
185/* global 1second timer used for periodic processing */
186static void pim_vxlan_work_timer_setup(bool start)
187{
188 THREAD_OFF(vxlan_info.work_timer);
189 if (start)
190 thread_add_timer(router->master, pim_vxlan_work_timer_cb, NULL,
191 PIM_VXLAN_WORK_TIME, &vxlan_info.work_timer);
192}
193
a513da36
AK
194/**************************** vxlan origination mroutes ***********************
195 * For every (local-vtep-ip, bum-mcast-grp) registered by evpn an origination
196 * mroute is setup by pimd. The purpose of this mroute is to forward vxlan
197 * encapsulated BUM (broadcast, unknown-unicast and unknown-multicast packets
198 * over the underlay.)
199 *
200 * Sample mroute (single VTEP):
201 * (27.0.0.7, 239.1.1.100) Iif: lo Oifs: uplink-1
202 *
203 * Sample mroute (anycast VTEP):
204 * (36.0.0.9, 239.1.1.100) Iif: peerlink-3.4094\
205 * Oifs: peerlink-3.4094 uplink-1
206 ***************************************************************************/
207static void pim_vxlan_orig_mr_up_del(struct pim_vxlan_sg *vxlan_sg)
208{
209 struct pim_upstream *up = vxlan_sg->up;
210
211 if (!up)
212 return;
213
214 if (PIM_DEBUG_VXLAN)
215 zlog_debug("vxlan SG %s orig mroute-up del",
216 vxlan_sg->sg_str);
217
218 vxlan_sg->up = NULL;
219 if (up->flags & PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG) {
220 /* clear out all the vxlan properties */
221 up->flags &= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG |
222 PIM_UPSTREAM_FLAG_MASK_STATIC_IIF |
223 PIM_UPSTREAM_FLAG_MASK_DISABLE_KAT_EXPIRY |
224 PIM_UPSTREAM_FLAG_MASK_FORCE_PIMREG |
225 PIM_UPSTREAM_FLAG_MASK_NO_PIMREG_DATA |
226 PIM_UPSTREAM_FLAG_MASK_ALLOW_IIF_IN_OIL);
227
228 /* We bring things to a grinding halt by force expirying
229 * the kat. Doing this will also remove the reference we
230 * created as a "vxlan" source and delete the upstream entry
231 * if there are no other references.
232 */
233 if (PIM_UPSTREAM_FLAG_TEST_SRC_STREAM(up->flags)) {
234 THREAD_OFF(up->t_ka_timer);
235 up = pim_upstream_keep_alive_timer_proc(up);
236 } else {
237 /* this is really unexpected as we force vxlan
238 * origination mroutes active sources but just in
239 * case
240 */
241 up = pim_upstream_del(vxlan_sg->pim, up,
242 __PRETTY_FUNCTION__);
243 }
244 /* if there are other references register the source
245 * for nht
246 */
247 if (up)
8c55c132 248 pim_rpf_update(vxlan_sg->pim, up, NULL, __func__);
a513da36
AK
249 }
250}
251
252static void pim_vxlan_orig_mr_up_iif_update(struct pim_vxlan_sg *vxlan_sg)
253{
a513da36
AK
254 /* update MFC with the new IIF */
255 pim_upstream_fill_static_iif(vxlan_sg->up, vxlan_sg->iif);
7984af18 256 pim_upstream_mroute_iif_update(vxlan_sg->up->channel_oil, __func__);
a513da36
AK
257
258 if (PIM_DEBUG_VXLAN)
7984af18 259 zlog_debug("vxlan SG %s orig mroute-up updated with iif %s",
a513da36 260 vxlan_sg->sg_str,
7984af18 261 vxlan_sg->iif?vxlan_sg->iif->name:"-");
a513da36
AK
262
263}
264
265/* For every VxLAN BUM multicast group we setup a SG-up that has the following
266 * "forced properties" -
267 * 1. Directly connected on a DR interface i.e. we must act as an FHR
268 * 2. We prime the pump i.e. no multicast data is needed to register this
269 * source with the FHR. To do that we send periodic null registers if
270 * the SG entry is in a register-join state. We also prevent expiry of
271 * KAT.
272 * 3. As this SG is setup without data there is no need to register encapsulate
273 * data traffic. This encapsulation is explicitly skipped for the following
274 * reasons -
275 * a) Many levels of encapsulation are needed creating MTU disc challenges.
276 * Overlay BUM is encapsulated in a vxlan/UDP/IP header and then
277 * encapsulated again in a pim-register header.
278 * b) On a vxlan-aa setup both switches rx a copy of each BUM packet. if
279 * they both reg encapsulated traffic the RP will accept the duplicates
280 * as there are no RPF checks for this encapsulated data.
281 * a), b) can be workarounded if needed, but there is really no need because
282 * of (2) i.e. the pump is primed without data.
283 */
284static void pim_vxlan_orig_mr_up_add(struct pim_vxlan_sg *vxlan_sg)
285{
286 struct pim_upstream *up;
287 int flags = 0;
288 struct prefix nht_p;
5923b739 289 struct pim_instance *pim = vxlan_sg->pim;
a513da36
AK
290
291 if (vxlan_sg->up) {
292 /* nothing to do */
293 return;
294 }
295
296 if (PIM_DEBUG_VXLAN)
297 zlog_debug("vxlan SG %s orig mroute-up add with iif %s",
298 vxlan_sg->sg_str,
299 vxlan_sg->iif?vxlan_sg->iif->name:"-");
300
301 PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_ORIG(flags);
302 /* pin the IIF to lo or peerlink-subinterface and disable NHT */
303 PIM_UPSTREAM_FLAG_SET_STATIC_IIF(flags);
304 /* Fake traffic by setting SRC_STREAM and starting KAT */
305 /* We intentionally skip updating ref count for SRC_STREAM/FHR.
306 * Setting SRC_VXLAN should have already created a reference
307 * preventing the entry from being deleted
308 */
309 PIM_UPSTREAM_FLAG_SET_FHR(flags);
310 PIM_UPSTREAM_FLAG_SET_SRC_STREAM(flags);
311 /* Force pimreg even if non-DR. This is needed on a MLAG setup for
312 * VxLAN AA
313 */
314 PIM_UPSTREAM_FLAG_SET_FORCE_PIMREG(flags);
315 /* prevent KAT expiry. we want the MDT setup even if there is no BUM
316 * traffic
317 */
318 PIM_UPSTREAM_FLAG_SET_DISABLE_KAT_EXPIRY(flags);
319 /* SPT for vxlan BUM groups is primed and maintained via NULL
320 * registers so there is no need to reg-encapsulate
321 * vxlan-encapsulated overlay data traffic
322 */
323 PIM_UPSTREAM_FLAG_SET_NO_PIMREG_DATA(flags);
324 /* On a MLAG setup we force a copy to the MLAG peer while also
325 * accepting traffic from the peer. To do this we set peerlink-rif as
326 * the IIF and also add it to the OIL
327 */
328 PIM_UPSTREAM_FLAG_SET_ALLOW_IIF_IN_OIL(flags);
329
330 /* XXX: todo: defer pim_upstream add if pim is not enabled on the iif */
331 up = pim_upstream_find(vxlan_sg->pim, &vxlan_sg->sg);
332 if (up) {
333 /* if the iif is set to something other than the vxlan_sg->iif
334 * we must dereg the old nexthop and force to new "static"
335 * iif
336 */
337 if (!PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(up->flags)) {
338 nht_p.family = AF_INET;
339 nht_p.prefixlen = IPV4_MAX_BITLEN;
340 nht_p.u.prefix4 = up->upstream_addr;
341 pim_delete_tracked_nexthop(vxlan_sg->pim,
4533b847 342 &nht_p, up, NULL, false);
a513da36 343 }
69e3538c
AK
344 /* We are acting FHR; clear out use_rpt setting if any */
345 pim_upstream_update_use_rpt(up, false /*update_mroute*/);
a513da36
AK
346 pim_upstream_ref(up, flags, __PRETTY_FUNCTION__);
347 vxlan_sg->up = up;
348 pim_vxlan_orig_mr_up_iif_update(vxlan_sg);
5923b739
AK
349 /* mute pimreg on origination mroutes */
350 if (pim->regiface)
351 pim_channel_update_oif_mute(up->channel_oil,
352 pim->regiface->info);
a513da36
AK
353 } else {
354 up = pim_upstream_add(vxlan_sg->pim, &vxlan_sg->sg,
355 vxlan_sg->iif, flags,
356 __PRETTY_FUNCTION__, NULL);
357 vxlan_sg->up = up;
358 }
359
360 if (!up) {
361 if (PIM_DEBUG_VXLAN)
362 zlog_debug("vxlan SG %s orig mroute-up add failed",
363 vxlan_sg->sg_str);
364 return;
365 }
366
367 pim_upstream_keep_alive_timer_start(up, vxlan_sg->pim->keep_alive_time);
368
369 /* register the source with the RP */
370 if (up->reg_state == PIM_REG_NOINFO) {
371 pim_register_join(up);
372 pim_null_register_send(up);
373 }
374
375 /* update the inherited OIL */
376 pim_upstream_inherited_olist(vxlan_sg->pim, up);
69e3538c
AK
377 if (!up->channel_oil->installed)
378 pim_upstream_mroute_add(up->channel_oil, __func__);
a513da36
AK
379}
380
39df629a
AK
381static void pim_vxlan_orig_mr_oif_add(struct pim_vxlan_sg *vxlan_sg)
382{
383 if (!vxlan_sg->up || !vxlan_sg->orig_oif)
384 return;
385
386 if (PIM_DEBUG_VXLAN)
387 zlog_debug("vxlan SG %s oif %s add",
388 vxlan_sg->sg_str, vxlan_sg->orig_oif->name);
389
390 vxlan_sg->flags |= PIM_VXLAN_SGF_OIF_INSTALLED;
391 pim_channel_add_oif(vxlan_sg->up->channel_oil,
1b249e70
AK
392 vxlan_sg->orig_oif, PIM_OIF_FLAG_PROTO_VXLAN,
393 __func__);
39df629a
AK
394}
395
396static void pim_vxlan_orig_mr_oif_del(struct pim_vxlan_sg *vxlan_sg)
397{
398 struct interface *orig_oif;
399
400 orig_oif = vxlan_sg->orig_oif;
401 vxlan_sg->orig_oif = NULL;
402
403 if (!(vxlan_sg->flags & PIM_VXLAN_SGF_OIF_INSTALLED))
404 return;
405
406 if (PIM_DEBUG_VXLAN)
407 zlog_debug("vxlan SG %s oif %s del",
408 vxlan_sg->sg_str, orig_oif->name);
409
410 vxlan_sg->flags &= ~PIM_VXLAN_SGF_OIF_INSTALLED;
411 pim_channel_del_oif(vxlan_sg->up->channel_oil,
1b249e70 412 orig_oif, PIM_OIF_FLAG_PROTO_VXLAN, __func__);
39df629a
AK
413}
414
415static inline struct interface *pim_vxlan_orig_mr_oif_get(
416 struct pim_instance *pim)
417{
418 return (vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) ?
419 pim->vxlan.peerlink_rif : NULL;
420}
421
a513da36
AK
422/* Single VTEPs: IIF for the vxlan-origination-mroutes is lo or vrf-dev (if
423 * the mroute is in a non-default vrf).
424 * Anycast VTEPs: IIF is the MLAG ISL/peerlink.
425 */
426static inline struct interface *pim_vxlan_orig_mr_iif_get(
427 struct pim_instance *pim)
428{
429 return ((vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) &&
430 pim->vxlan.peerlink_rif) ?
431 pim->vxlan.peerlink_rif : pim->vxlan.default_iif;
432}
433
434static bool pim_vxlan_orig_mr_add_is_ok(struct pim_vxlan_sg *vxlan_sg)
435{
436 struct pim_interface *pim_ifp;
437
438 vxlan_sg->iif = pim_vxlan_orig_mr_iif_get(vxlan_sg->pim);
439 if (!vxlan_sg->iif)
440 return false;
441
442 pim_ifp = (struct pim_interface *)vxlan_sg->iif->info;
443 if (!pim_ifp || (pim_ifp->mroute_vif_index < 0))
444 return false;
445
446 return true;
447}
448
449static void pim_vxlan_orig_mr_install(struct pim_vxlan_sg *vxlan_sg)
450{
451 pim_vxlan_orig_mr_up_add(vxlan_sg);
39df629a
AK
452
453 vxlan_sg->orig_oif = pim_vxlan_orig_mr_oif_get(vxlan_sg->pim);
454 pim_vxlan_orig_mr_oif_add(vxlan_sg);
a513da36
AK
455}
456
457static void pim_vxlan_orig_mr_add(struct pim_vxlan_sg *vxlan_sg)
458{
459 if (!pim_vxlan_orig_mr_add_is_ok(vxlan_sg))
460 return;
461
462 if (PIM_DEBUG_VXLAN)
463 zlog_debug("vxlan SG %s orig-mr add", vxlan_sg->sg_str);
464
465 pim_vxlan_orig_mr_install(vxlan_sg);
466}
467
468static void pim_vxlan_orig_mr_del(struct pim_vxlan_sg *vxlan_sg)
469{
470 if (PIM_DEBUG_VXLAN)
471 zlog_debug("vxlan SG %s orig-mr del", vxlan_sg->sg_str);
39df629a
AK
472
473 pim_vxlan_orig_mr_oif_del(vxlan_sg);
a513da36
AK
474 pim_vxlan_orig_mr_up_del(vxlan_sg);
475}
476
269c1fe1
AK
477static void pim_vxlan_orig_mr_iif_update(struct hash_backet *backet, void *arg)
478{
479 struct interface *ifp = (struct interface *)arg;
480 struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)backet->data;
481 struct interface *old_iif = vxlan_sg->iif;
482
483 if (!pim_vxlan_is_orig_mroute(vxlan_sg))
484 return;
485
486 if (PIM_DEBUG_VXLAN)
487 zlog_debug("vxlan SG %s iif changed from %s to %s",
488 vxlan_sg->sg_str,
489 old_iif ? old_iif->name : "-",
490 ifp ? ifp->name : "-");
491
492 if (pim_vxlan_orig_mr_add_is_ok(vxlan_sg)) {
493 if (vxlan_sg->up) {
494 /* upstream exists but iif changed */
495 pim_vxlan_orig_mr_up_iif_update(vxlan_sg);
496 } else {
497 /* install mroute */
498 pim_vxlan_orig_mr_install(vxlan_sg);
499 }
500 } else {
501 pim_vxlan_orig_mr_del(vxlan_sg);
502 }
503}
504
332087df
AK
505/**************************** vxlan termination mroutes ***********************
506 * For every bum-mcast-grp registered by evpn a *G termination
507 * mroute is setup by pimd. The purpose of this mroute is to pull down vxlan
508 * packets with the bum-mcast-grp dip from the underlay and terminate the
509 * tunnel. This is done by including the vxlan termination device (ipmr-lo) in
510 * its OIL. The vxlan de-capsulated packets are subject to subsequent overlay
511 * bridging.
512 *
513 * Sample mroute:
514 * (0.0.0.0, 239.1.1.100) Iif: uplink-1 Oifs: ipmr-lo, uplink-1
515 *****************************************************************************/
516struct pim_interface *pim_vxlan_get_term_ifp(struct pim_instance *pim)
517{
518 return pim->vxlan.term_if ?
519 (struct pim_interface *)pim->vxlan.term_if->info : NULL;
520}
521
522static void pim_vxlan_term_mr_oif_add(struct pim_vxlan_sg *vxlan_sg)
523{
524 if (vxlan_sg->flags & PIM_VXLAN_SGF_OIF_INSTALLED)
525 return;
526
527 if (PIM_DEBUG_VXLAN)
528 zlog_debug("vxlan SG %s term-oif %s add",
529 vxlan_sg->sg_str, vxlan_sg->term_oif->name);
530
531 if (pim_ifchannel_local_membership_add(vxlan_sg->term_oif,
532 &vxlan_sg->sg)) {
533 vxlan_sg->flags |= PIM_VXLAN_SGF_OIF_INSTALLED;
534 } else {
535 zlog_warn("vxlan SG %s term-oif %s add failed",
536 vxlan_sg->sg_str, vxlan_sg->term_oif->name);
537 }
538}
539
540static void pim_vxlan_term_mr_oif_del(struct pim_vxlan_sg *vxlan_sg)
541{
542 if (!(vxlan_sg->flags & PIM_VXLAN_SGF_OIF_INSTALLED))
543 return;
544
545 if (PIM_DEBUG_VXLAN)
546 zlog_debug("vxlan SG %s oif %s del",
547 vxlan_sg->sg_str, vxlan_sg->term_oif->name);
548
549 vxlan_sg->flags &= ~PIM_VXLAN_SGF_OIF_INSTALLED;
550 pim_ifchannel_local_membership_del(vxlan_sg->term_oif, &vxlan_sg->sg);
551}
552
553static void pim_vxlan_term_mr_up_add(struct pim_vxlan_sg *vxlan_sg)
554{
555 struct pim_upstream *up;
556 int flags = 0;
557
558 if (vxlan_sg->up) {
559 /* nothing to do */
560 return;
561 }
562
563 if (PIM_DEBUG_VXLAN)
564 zlog_debug("vxlan SG %s term mroute-up add",
565 vxlan_sg->sg_str);
566
567 PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_TERM(flags);
568 /* enable MLAG designated-forwarder election on termination mroutes */
569 PIM_UPSTREAM_FLAG_SET_MLAG_VXLAN(flags);
570
571 up = pim_upstream_add(vxlan_sg->pim, &vxlan_sg->sg,
572 NULL /* iif */, flags,
573 __PRETTY_FUNCTION__, NULL);
574 vxlan_sg->up = up;
575
576 if (!up) {
577 zlog_warn("vxlan SG %s term mroute-up add failed",
578 vxlan_sg->sg_str);
579 }
580}
581
582static void pim_vxlan_term_mr_up_del(struct pim_vxlan_sg *vxlan_sg)
583{
584 struct pim_upstream *up = vxlan_sg->up;
585
586 if (!up)
587 return;
588
589 if (PIM_DEBUG_VXLAN)
590 zlog_debug("vxlan SG %s term mroute-up del",
591 vxlan_sg->sg_str);
592 vxlan_sg->up = NULL;
593 if (up->flags & PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM) {
594 /* clear out all the vxlan related flags */
595 up->flags &= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM |
596 PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN);
597
598 pim_upstream_del(vxlan_sg->pim, up,
599 __PRETTY_FUNCTION__);
600 }
601}
602
603static void pim_vxlan_term_mr_add(struct pim_vxlan_sg *vxlan_sg)
604{
605 if (PIM_DEBUG_VXLAN)
606 zlog_debug("vxlan SG %s term mroute add", vxlan_sg->sg_str);
607
608 vxlan_sg->term_oif = vxlan_sg->pim->vxlan.term_if;
609 if (!vxlan_sg->term_oif)
610 /* defer termination mroute till we have a termination device */
611 return;
612
613 pim_vxlan_term_mr_up_add(vxlan_sg);
614 /* set up local membership for the term-oif */
615 pim_vxlan_term_mr_oif_add(vxlan_sg);
616}
617
618static void pim_vxlan_term_mr_del(struct pim_vxlan_sg *vxlan_sg)
619{
620 if (PIM_DEBUG_VXLAN)
621 zlog_debug("vxlan SG %s term mroute del", vxlan_sg->sg_str);
622
623 /* remove local membership associated with the term oif */
624 pim_vxlan_term_mr_oif_del(vxlan_sg);
625 /* remove references to the upstream entry */
626 pim_vxlan_term_mr_up_del(vxlan_sg);
627}
628
b583b035 629/************************** vxlan SG cache management ************************/
d8b87afe 630static unsigned int pim_vxlan_sg_hash_key_make(const void *p)
b583b035 631{
d8b87afe 632 const struct pim_vxlan_sg *vxlan_sg = p;
b583b035
AK
633
634 return (jhash_2words(vxlan_sg->sg.src.s_addr,
635 vxlan_sg->sg.grp.s_addr, 0));
636}
637
638static bool pim_vxlan_sg_hash_eq(const void *p1, const void *p2)
639{
640 const struct pim_vxlan_sg *sg1 = p1;
641 const struct pim_vxlan_sg *sg2 = p2;
642
643 return ((sg1->sg.src.s_addr == sg2->sg.src.s_addr)
644 && (sg1->sg.grp.s_addr == sg2->sg.grp.s_addr));
645}
646
647static struct pim_vxlan_sg *pim_vxlan_sg_new(struct pim_instance *pim,
648 struct prefix_sg *sg)
649{
650 struct pim_vxlan_sg *vxlan_sg;
651
652 vxlan_sg = XCALLOC(MTYPE_PIM_VXLAN_SG, sizeof(*vxlan_sg));
653
654 vxlan_sg->pim = pim;
655 vxlan_sg->sg = *sg;
656 pim_str_sg_set(sg, vxlan_sg->sg_str);
657
658 if (PIM_DEBUG_VXLAN)
659 zlog_debug("vxlan SG %s alloc", vxlan_sg->sg_str);
660
661 vxlan_sg = hash_get(pim->vxlan.sg_hash, vxlan_sg, hash_alloc_intern);
662
663 return vxlan_sg;
664}
665
666struct pim_vxlan_sg *pim_vxlan_sg_find(struct pim_instance *pim,
667 struct prefix_sg *sg)
668{
669 struct pim_vxlan_sg lookup;
670
671 lookup.sg = *sg;
672 return hash_lookup(pim->vxlan.sg_hash, &lookup);
673}
674
675struct pim_vxlan_sg *pim_vxlan_sg_add(struct pim_instance *pim,
676 struct prefix_sg *sg)
677{
678 struct pim_vxlan_sg *vxlan_sg;
679
680 vxlan_sg = pim_vxlan_sg_find(pim, sg);
681 if (vxlan_sg)
682 return vxlan_sg;
683
684 vxlan_sg = pim_vxlan_sg_new(pim, sg);
685
a513da36
AK
686 if (pim_vxlan_is_orig_mroute(vxlan_sg))
687 pim_vxlan_orig_mr_add(vxlan_sg);
332087df
AK
688 else
689 pim_vxlan_term_mr_add(vxlan_sg);
a513da36 690
b583b035
AK
691 return vxlan_sg;
692}
693
694void pim_vxlan_sg_del(struct pim_instance *pim, struct prefix_sg *sg)
695{
696 struct pim_vxlan_sg *vxlan_sg;
697
698 vxlan_sg = pim_vxlan_sg_find(pim, sg);
699 if (!vxlan_sg)
700 return;
701
a513da36
AK
702 vxlan_sg->flags |= PIM_VXLAN_SGF_DEL_IN_PROG;
703
b9f3a51c
AK
704 pim_vxlan_del_work(vxlan_sg);
705
a513da36
AK
706 if (pim_vxlan_is_orig_mroute(vxlan_sg))
707 pim_vxlan_orig_mr_del(vxlan_sg);
332087df
AK
708 else
709 pim_vxlan_term_mr_del(vxlan_sg);
a513da36 710
b583b035
AK
711 hash_release(vxlan_sg->pim->vxlan.sg_hash, vxlan_sg);
712
713 if (PIM_DEBUG_VXLAN)
714 zlog_debug("vxlan SG %s free", vxlan_sg->sg_str);
715
716 XFREE(MTYPE_PIM_VXLAN_SG, vxlan_sg);
717}
718
48b33862
AK
719/******************************* MLAG handling *******************************/
720/* The peerlink sub-interface is added as an OIF to the origination-mroute.
721 * This is done to send a copy of the multicast-vxlan encapsulated traffic
722 * to the MLAG peer which may mroute it over the underlay if there are any
723 * interested receivers.
724 */
725static void pim_vxlan_sg_peerlink_update(struct hash_backet *backet, void *arg)
726{
727 struct interface *new_oif = (struct interface *)arg;
728 struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)backet->data;
729
730 if (!pim_vxlan_is_orig_mroute(vxlan_sg))
731 return;
732
733 if (vxlan_sg->orig_oif == new_oif)
734 return;
735
736 pim_vxlan_orig_mr_oif_del(vxlan_sg);
737
738 vxlan_sg->orig_oif = new_oif;
739 pim_vxlan_orig_mr_oif_add(vxlan_sg);
740}
741
742/* In the case of anycast VTEPs the VTEP-PIP must be used as the
743 * register source.
744 */
745bool pim_vxlan_get_register_src(struct pim_instance *pim,
746 struct pim_upstream *up, struct in_addr *src_p)
747{
748 if (!(vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED))
749 return true;
750
751 /* if address is not available suppress the pim-register */
752 if (vxlan_mlag.reg_addr.s_addr == INADDR_ANY)
753 return false;
754
755 *src_p = vxlan_mlag.reg_addr;
756 return true;
757}
758
759void pim_vxlan_mlag_update(bool enable, bool peer_state, uint32_t role,
760 struct interface *peerlink_rif,
761 struct in_addr *reg_addr)
762{
763 struct pim_instance *pim;
764 struct interface *old_oif;
765 struct interface *new_oif;
766 char addr_buf[INET_ADDRSTRLEN];
767 struct pim_interface *pim_ifp = NULL;
768
769 if (PIM_DEBUG_VXLAN) {
770 inet_ntop(AF_INET, reg_addr,
771 addr_buf, INET_ADDRSTRLEN);
772 zlog_debug("vxlan MLAG update %s state %s role %d rif %s addr %s",
773 enable ? "enable" : "disable",
774 peer_state ? "up" : "down",
775 role,
776 peerlink_rif ? peerlink_rif->name : "-",
777 addr_buf);
778 }
779
780 /* XXX: for now vxlan termination is only possible in the default VRF
781 * when that changes this will need to change to iterate all VRFs
782 */
783 pim = pim_get_pim_instance(VRF_DEFAULT);
784
785 old_oif = pim_vxlan_orig_mr_oif_get(pim);
786
787 if (enable)
788 vxlan_mlag.flags |= PIM_VXLAN_MLAGF_ENABLED;
789 else
790 vxlan_mlag.flags &= ~PIM_VXLAN_MLAGF_ENABLED;
791
792 if (vxlan_mlag.peerlink_rif != peerlink_rif)
793 vxlan_mlag.peerlink_rif = peerlink_rif;
794
795 vxlan_mlag.reg_addr = *reg_addr;
796 vxlan_mlag.peer_state = peer_state;
797 vxlan_mlag.role = role;
798
799 /* process changes */
800 if (vxlan_mlag.peerlink_rif)
801 pim_ifp = (struct pim_interface *)vxlan_mlag.peerlink_rif->info;
802 if ((vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) &&
803 pim_ifp && (pim_ifp->mroute_vif_index > 0))
804 pim_vxlan_set_peerlink_rif(pim, peerlink_rif);
805 else
806 pim_vxlan_set_peerlink_rif(pim, NULL);
807
808 new_oif = pim_vxlan_orig_mr_oif_get(pim);
809 if (old_oif != new_oif)
810 hash_iterate(pim->vxlan.sg_hash, pim_vxlan_sg_peerlink_update,
811 new_oif);
812}
813
269c1fe1 814/****************************** misc callbacks *******************************/
7c85225c
AK
815void pim_vxlan_config_write(struct vty *vty, char *spaces, int *writes)
816{
817 char addr_buf[INET_ADDRSTRLEN];
818
819 if ((vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) &&
820 vxlan_mlag.peerlink_rif) {
821
822 inet_ntop(AF_INET, &vxlan_mlag.reg_addr,
823 addr_buf, sizeof(addr_buf));
824 vty_out(vty,
825 "%sip pim mlag %s role %s state %s addr %s\n",
826 spaces,
827 vxlan_mlag.peerlink_rif->name,
828 (vxlan_mlag.role == PIM_VXLAN_MLAG_ROLE_PRIMARY) ?
829 "primary":"secondary",
830 vxlan_mlag.peer_state ? "up" : "down",
831 addr_buf);
832 *writes += 1;
833 }
834}
835
269c1fe1
AK
836static void pim_vxlan_set_default_iif(struct pim_instance *pim,
837 struct interface *ifp)
838{
839 struct interface *old_iif;
840
841 if (pim->vxlan.default_iif == ifp)
842 return;
843
844 old_iif = pim->vxlan.default_iif;
845 if (PIM_DEBUG_VXLAN)
846 zlog_debug("%s: vxlan default iif changed from %s to %s",
847 __PRETTY_FUNCTION__,
848 old_iif ? old_iif->name : "-",
849 ifp ? ifp->name : "-");
850
851 old_iif = pim_vxlan_orig_mr_iif_get(pim);
852 pim->vxlan.default_iif = ifp;
853 ifp = pim_vxlan_orig_mr_iif_get(pim);
854 if (old_iif == ifp)
855 return;
856
857 if (PIM_DEBUG_VXLAN)
858 zlog_debug("%s: vxlan orig iif changed from %s to %s",
859 __PRETTY_FUNCTION__, old_iif ? old_iif->name : "-",
860 ifp ? ifp->name : "-");
861
862 /* add/del upstream entries for the existing vxlan SG when the
863 * interface becomes available
864 */
b21e0c93
AK
865 if (pim->vxlan.sg_hash)
866 hash_iterate(pim->vxlan.sg_hash,
867 pim_vxlan_orig_mr_iif_update, ifp);
269c1fe1
AK
868}
869
870static void pim_vxlan_set_peerlink_rif(struct pim_instance *pim,
871 struct interface *ifp)
872{
873 struct interface *old_iif;
874
875 if (pim->vxlan.peerlink_rif == ifp)
876 return;
877
878 old_iif = pim->vxlan.peerlink_rif;
879 if (PIM_DEBUG_VXLAN)
880 zlog_debug("%s: vxlan peerlink_rif changed from %s to %s",
881 __PRETTY_FUNCTION__, old_iif ? old_iif->name : "-",
882 ifp ? ifp->name : "-");
883
884 old_iif = pim_vxlan_orig_mr_iif_get(pim);
885 pim->vxlan.peerlink_rif = ifp;
886 ifp = pim_vxlan_orig_mr_iif_get(pim);
887 if (old_iif == ifp)
888 return;
889
890 if (PIM_DEBUG_VXLAN)
891 zlog_debug("%s: vxlan orig iif changed from %s to %s",
892 __PRETTY_FUNCTION__, old_iif ? old_iif->name : "-",
893 ifp ? ifp->name : "-");
894
895 /* add/del upstream entries for the existing vxlan SG when the
896 * interface becomes available
897 */
b21e0c93
AK
898 if (pim->vxlan.sg_hash)
899 hash_iterate(pim->vxlan.sg_hash,
900 pim_vxlan_orig_mr_iif_update, ifp);
269c1fe1
AK
901}
902
903void pim_vxlan_add_vif(struct interface *ifp)
904{
905 struct pim_interface *pim_ifp = ifp->info;
906 struct pim_instance *pim = pim_ifp->pim;
907
269c1fe1
AK
908 if (pim->vrf_id != VRF_DEFAULT)
909 return;
910
911 if (if_is_loopback_or_vrf(ifp))
912 pim_vxlan_set_default_iif(pim, ifp);
913
914 if (vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED &&
915 (ifp == vxlan_mlag.peerlink_rif))
916 pim_vxlan_set_peerlink_rif(pim, ifp);
917}
918
919void pim_vxlan_del_vif(struct interface *ifp)
920{
921 struct pim_interface *pim_ifp = ifp->info;
922 struct pim_instance *pim = pim_ifp->pim;
923
924 if (pim->vrf_id != VRF_DEFAULT)
925 return;
926
927 if (pim->vxlan.default_iif == ifp)
928 pim_vxlan_set_default_iif(pim, NULL);
929
930 if (pim->vxlan.peerlink_rif == ifp)
931 pim_vxlan_set_peerlink_rif(pim, NULL);
932}
933
0a2dcc1c
AK
934static void pim_vxlan_term_mr_oif_update(struct hash_backet *backet, void *arg)
935{
936 struct interface *ifp = (struct interface *)arg;
937 struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)backet->data;
938
939 if (pim_vxlan_is_orig_mroute(vxlan_sg))
940 return;
941
942 if (vxlan_sg->term_oif == ifp)
943 return;
944
945 if (PIM_DEBUG_VXLAN)
946 zlog_debug("vxlan SG %s term oif changed from %s to %s",
947 vxlan_sg->sg_str,
948 vxlan_sg->term_oif ? vxlan_sg->term_oif->name : "-",
949 ifp ? ifp->name : "-");
950
951 pim_vxlan_term_mr_del(vxlan_sg);
952 vxlan_sg->term_oif = ifp;
953 pim_vxlan_term_mr_add(vxlan_sg);
954}
955
956void pim_vxlan_add_term_dev(struct pim_instance *pim,
957 struct interface *ifp)
958{
959 struct pim_interface *pim_ifp;
960
961 if (pim->vxlan.term_if == ifp)
962 return;
963
964 if (PIM_DEBUG_VXLAN)
965 zlog_debug("vxlan term oif changed from %s to %s",
966 pim->vxlan.term_if ? pim->vxlan.term_if->name : "-",
7b227daf 967 ifp->name);
0a2dcc1c
AK
968
969 /* enable pim on the term ifp */
970 pim_ifp = (struct pim_interface *)ifp->info;
5121278e 971 if (pim_ifp) {
0a2dcc1c 972 PIM_IF_DO_PIM(pim_ifp->options);
5121278e 973 } else {
0a2dcc1c
AK
974 pim_ifp = pim_if_new(ifp, false /*igmp*/, true /*pim*/,
975 false /*pimreg*/, true /*vxlan_term*/);
5121278e
AK
976 /* ensure that pimreg existss before using the newly created
977 * vxlan termination device
978 */
979 pim_if_create_pimreg(pim);
980 }
0a2dcc1c
AK
981
982 pim->vxlan.term_if = ifp;
b21e0c93
AK
983
984 if (pim->vxlan.sg_hash)
985 hash_iterate(pim_ifp->pim->vxlan.sg_hash,
986 pim_vxlan_term_mr_oif_update, ifp);
0a2dcc1c
AK
987}
988
989void pim_vxlan_del_term_dev(struct pim_instance *pim)
990{
991 struct interface *ifp = pim->vxlan.term_if;
992 struct pim_interface *pim_ifp;
993
994 if (PIM_DEBUG_VXLAN)
995 zlog_debug("vxlan term oif changed from %s to -", ifp->name);
996
997 pim->vxlan.term_if = NULL;
b21e0c93
AK
998
999 if (pim->vxlan.sg_hash)
1000 hash_iterate(pim->vxlan.sg_hash,
1001 pim_vxlan_term_mr_oif_update, NULL);
0a2dcc1c
AK
1002
1003 pim_ifp = (struct pim_interface *)ifp->info;
1004 if (pim_ifp) {
1005 PIM_IF_DONT_PIM(pim_ifp->options);
1006 if (!PIM_IF_TEST_IGMP(pim_ifp->options))
1007 pim_if_delete(ifp);
1008 }
1009
1010}
1011
b583b035
AK
1012void pim_vxlan_init(struct pim_instance *pim)
1013{
1014 char hash_name[64];
1015
1016 snprintf(hash_name, sizeof(hash_name),
1017 "PIM %s vxlan SG hash", pim->vrf->name);
1018 pim->vxlan.sg_hash = hash_create(pim_vxlan_sg_hash_key_make,
1019 pim_vxlan_sg_hash_eq, hash_name);
1020}
1021
1022void pim_vxlan_exit(struct pim_instance *pim)
1023{
1024 if (pim->vxlan.sg_hash) {
1025 hash_clean(pim->vxlan.sg_hash, NULL);
1026 hash_free(pim->vxlan.sg_hash);
1027 pim->vxlan.sg_hash = NULL;
1028 }
1029}