]> git.proxmox.com Git - mirror_frr.git/blob - pimd/pim_vxlan.c
af76c6d732cbf548c77741076b81c60b4f8752a2
[mirror_frr.git] / pimd / pim_vxlan.c
1 /* PIM support for VxLAN BUM flooding
2 *
3 * Copyright (C) 2019 Cumulus Networks, Inc.
4 *
5 * This file is part of FRR.
6 *
7 * FRR is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; either version 2, or (at your option) any
10 * later version.
11 *
12 * FRR is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License as published by
18 * the Free Software Foundation; either version 2 of the License, or
19 * (at your option) any later version.
20 */
21
22 #include <zebra.h>
23
24 #include <hash.h>
25 #include <jhash.h>
26 #include <log.h>
27 #include <prefix.h>
28 #include <vrf.h>
29
30 #include "pimd.h"
31 #include "pim_iface.h"
32 #include "pim_memory.h"
33 #include "pim_oil.h"
34 #include "pim_register.h"
35 #include "pim_str.h"
36 #include "pim_upstream.h"
37 #include "pim_ifchannel.h"
38 #include "pim_nht.h"
39 #include "pim_zebra.h"
40 #include "pim_vxlan.h"
41
42 /* pim-vxlan global info */
43 struct pim_vxlan vxlan_info, *pim_vxlan_p = &vxlan_info;
44
45 static void pim_vxlan_work_timer_setup(bool start);
46 static void pim_vxlan_set_peerlink_rif(struct pim_instance *pim,
47 struct interface *ifp);
48
49 /*************************** vxlan work list **********************************
50 * A work list is maintained for staggered generation of pim null register
51 * messages for vxlan SG entries that are in a reg_join state.
52 *
53 * A max of 500 NULL registers are generated at one shot. If paused reg
54 * generation continues on the next second and so on till all register
55 * messages have been sent out. And the process is restarted every 60s.
56 *
57 * purpose of this null register generation is to setup the SPT and maintain
58 * independent of the presence of overlay BUM traffic.
59 ****************************************************************************/
60 static void pim_vxlan_do_reg_work(void)
61 {
62 struct listnode *listnode;
63 int work_cnt = 0;
64 struct pim_vxlan_sg *vxlan_sg;
65 static int sec_count;
66
67 ++sec_count;
68
69 if (sec_count > PIM_VXLAN_NULL_REG_INTERVAL) {
70 sec_count = 0;
71 listnode = vxlan_info.next_work ?
72 vxlan_info.next_work :
73 vxlan_info.work_list->head;
74 if (PIM_DEBUG_VXLAN && listnode)
75 zlog_debug("vxlan SG work %s",
76 vxlan_info.next_work ? "continues" : "starts");
77 } else {
78 listnode = vxlan_info.next_work;
79 }
80
81 for (; listnode; listnode = listnode->next) {
82 vxlan_sg = (struct pim_vxlan_sg *)listnode->data;
83 if (vxlan_sg->up && (vxlan_sg->up->reg_state == PIM_REG_JOIN)) {
84 if (PIM_DEBUG_VXLAN)
85 zlog_debug("vxlan SG %s periodic NULL register",
86 vxlan_sg->sg_str);
87 pim_null_register_send(vxlan_sg->up);
88 ++work_cnt;
89 }
90
91 if (work_cnt > vxlan_info.max_work_cnt) {
92 vxlan_info.next_work = listnode->next;
93 if (PIM_DEBUG_VXLAN)
94 zlog_debug("vxlan SG %d work items proc and pause",
95 work_cnt);
96 return;
97 }
98 }
99
100 if (work_cnt) {
101 if (PIM_DEBUG_VXLAN)
102 zlog_debug("vxlan SG %d work items proc", work_cnt);
103 }
104 vxlan_info.next_work = NULL;
105 }
106
107 /* Staggered work related info is initialized when the first work comes
108 * along
109 */
110 static void pim_vxlan_init_work(void)
111 {
112 if (vxlan_info.flags & PIM_VXLANF_WORK_INITED)
113 return;
114
115 vxlan_info.max_work_cnt = PIM_VXLAN_WORK_MAX;
116 vxlan_info.flags |= PIM_VXLANF_WORK_INITED;
117 vxlan_info.work_list = list_new();
118 pim_vxlan_work_timer_setup(TRUE /* start */);
119 }
120
121 static void pim_vxlan_add_work(struct pim_vxlan_sg *vxlan_sg)
122 {
123 if (vxlan_sg->flags & PIM_VXLAN_SGF_DEL_IN_PROG) {
124 if (PIM_DEBUG_VXLAN)
125 zlog_debug("vxlan SG %s skip work list; del-in-prog",
126 vxlan_sg->sg_str);
127 return;
128 }
129
130 pim_vxlan_init_work();
131
132 /* already a part of the work list */
133 if (vxlan_sg->work_node)
134 return;
135
136 if (PIM_DEBUG_VXLAN)
137 zlog_debug("vxlan SG %s work list add",
138 vxlan_sg->sg_str);
139 vxlan_sg->work_node = listnode_add(vxlan_info.work_list, vxlan_sg);
140 /* XXX: adjust max_work_cnt if needed */
141 }
142
143 static void pim_vxlan_del_work(struct pim_vxlan_sg *vxlan_sg)
144 {
145 if (!vxlan_sg->work_node)
146 return;
147
148 if (PIM_DEBUG_VXLAN)
149 zlog_debug("vxlan SG %s work list del",
150 vxlan_sg->sg_str);
151
152 if (vxlan_sg->work_node == vxlan_info.next_work)
153 vxlan_info.next_work = vxlan_sg->work_node->next;
154
155 list_delete_node(vxlan_info.work_list, vxlan_sg->work_node);
156 vxlan_sg->work_node = NULL;
157 }
158
159 void pim_vxlan_update_sg_reg_state(struct pim_instance *pim,
160 struct pim_upstream *up, bool reg_join)
161 {
162 struct pim_vxlan_sg *vxlan_sg;
163
164 vxlan_sg = pim_vxlan_sg_find(pim, &up->sg);
165 if (!vxlan_sg)
166 return;
167
168 /* add the vxlan sg entry to a work list for periodic reg joins.
169 * the entry will stay in the list as long as the register state is
170 * PIM_REG_JOIN
171 */
172 if (reg_join)
173 pim_vxlan_add_work(vxlan_sg);
174 else
175 pim_vxlan_del_work(vxlan_sg);
176 }
177
178 static int pim_vxlan_work_timer_cb(struct thread *t)
179 {
180 pim_vxlan_do_reg_work();
181 pim_vxlan_work_timer_setup(true /* start */);
182 return 0;
183 }
184
185 /* global 1second timer used for periodic processing */
186 static void pim_vxlan_work_timer_setup(bool start)
187 {
188 THREAD_OFF(vxlan_info.work_timer);
189 if (start)
190 thread_add_timer(router->master, pim_vxlan_work_timer_cb, NULL,
191 PIM_VXLAN_WORK_TIME, &vxlan_info.work_timer);
192 }
193
194 /**************************** vxlan origination mroutes ***********************
195 * For every (local-vtep-ip, bum-mcast-grp) registered by evpn an origination
196 * mroute is setup by pimd. The purpose of this mroute is to forward vxlan
197 * encapsulated BUM (broadcast, unknown-unicast and unknown-multicast packets
198 * over the underlay.)
199 *
200 * Sample mroute (single VTEP):
201 * (27.0.0.7, 239.1.1.100) Iif: lo Oifs: uplink-1
202 *
203 * Sample mroute (anycast VTEP):
204 * (36.0.0.9, 239.1.1.100) Iif: peerlink-3.4094\
205 * Oifs: peerlink-3.4094 uplink-1
206 ***************************************************************************/
207 static void pim_vxlan_orig_mr_up_del(struct pim_vxlan_sg *vxlan_sg)
208 {
209 struct pim_upstream *up = vxlan_sg->up;
210
211 if (!up)
212 return;
213
214 if (PIM_DEBUG_VXLAN)
215 zlog_debug("vxlan SG %s orig mroute-up del",
216 vxlan_sg->sg_str);
217
218 vxlan_sg->up = NULL;
219 if (up->flags & PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG) {
220 /* clear out all the vxlan properties */
221 up->flags &= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_ORIG |
222 PIM_UPSTREAM_FLAG_MASK_STATIC_IIF |
223 PIM_UPSTREAM_FLAG_MASK_DISABLE_KAT_EXPIRY |
224 PIM_UPSTREAM_FLAG_MASK_FORCE_PIMREG |
225 PIM_UPSTREAM_FLAG_MASK_NO_PIMREG_DATA |
226 PIM_UPSTREAM_FLAG_MASK_ALLOW_IIF_IN_OIL);
227
228 /* We bring things to a grinding halt by force expirying
229 * the kat. Doing this will also remove the reference we
230 * created as a "vxlan" source and delete the upstream entry
231 * if there are no other references.
232 */
233 if (PIM_UPSTREAM_FLAG_TEST_SRC_STREAM(up->flags)) {
234 THREAD_OFF(up->t_ka_timer);
235 up = pim_upstream_keep_alive_timer_proc(up);
236 } else {
237 /* this is really unexpected as we force vxlan
238 * origination mroutes active sources but just in
239 * case
240 */
241 up = pim_upstream_del(vxlan_sg->pim, up,
242 __PRETTY_FUNCTION__);
243 }
244 /* if there are other references register the source
245 * for nht
246 */
247 if (up)
248 pim_rpf_update(vxlan_sg->pim, up, NULL, 1 /* is_new */);
249 }
250 }
251
252 static void pim_vxlan_orig_mr_up_iif_update(struct pim_vxlan_sg *vxlan_sg)
253 {
254 int vif_index;
255
256 /* update MFC with the new IIF */
257 pim_upstream_fill_static_iif(vxlan_sg->up, vxlan_sg->iif);
258 vif_index = pim_if_find_vifindex_by_ifindex(vxlan_sg->pim,
259 vxlan_sg->iif->ifindex);
260 if (vif_index > 0)
261 pim_scan_individual_oil(vxlan_sg->up->channel_oil,
262 vif_index);
263
264 if (PIM_DEBUG_VXLAN)
265 zlog_debug("vxlan SG %s orig mroute-up updated with iif %s vifi %d",
266 vxlan_sg->sg_str,
267 vxlan_sg->iif?vxlan_sg->iif->name:"-", vif_index);
268
269 }
270
271 /* For every VxLAN BUM multicast group we setup a SG-up that has the following
272 * "forced properties" -
273 * 1. Directly connected on a DR interface i.e. we must act as an FHR
274 * 2. We prime the pump i.e. no multicast data is needed to register this
275 * source with the FHR. To do that we send periodic null registers if
276 * the SG entry is in a register-join state. We also prevent expiry of
277 * KAT.
278 * 3. As this SG is setup without data there is no need to register encapsulate
279 * data traffic. This encapsulation is explicitly skipped for the following
280 * reasons -
281 * a) Many levels of encapsulation are needed creating MTU disc challenges.
282 * Overlay BUM is encapsulated in a vxlan/UDP/IP header and then
283 * encapsulated again in a pim-register header.
284 * b) On a vxlan-aa setup both switches rx a copy of each BUM packet. if
285 * they both reg encapsulated traffic the RP will accept the duplicates
286 * as there are no RPF checks for this encapsulated data.
287 * a), b) can be workarounded if needed, but there is really no need because
288 * of (2) i.e. the pump is primed without data.
289 */
290 static void pim_vxlan_orig_mr_up_add(struct pim_vxlan_sg *vxlan_sg)
291 {
292 struct pim_upstream *up;
293 int flags = 0;
294 struct prefix nht_p;
295
296 if (vxlan_sg->up) {
297 /* nothing to do */
298 return;
299 }
300
301 if (PIM_DEBUG_VXLAN)
302 zlog_debug("vxlan SG %s orig mroute-up add with iif %s",
303 vxlan_sg->sg_str,
304 vxlan_sg->iif?vxlan_sg->iif->name:"-");
305
306 PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_ORIG(flags);
307 /* pin the IIF to lo or peerlink-subinterface and disable NHT */
308 PIM_UPSTREAM_FLAG_SET_STATIC_IIF(flags);
309 /* Fake traffic by setting SRC_STREAM and starting KAT */
310 /* We intentionally skip updating ref count for SRC_STREAM/FHR.
311 * Setting SRC_VXLAN should have already created a reference
312 * preventing the entry from being deleted
313 */
314 PIM_UPSTREAM_FLAG_SET_FHR(flags);
315 PIM_UPSTREAM_FLAG_SET_SRC_STREAM(flags);
316 /* Force pimreg even if non-DR. This is needed on a MLAG setup for
317 * VxLAN AA
318 */
319 PIM_UPSTREAM_FLAG_SET_FORCE_PIMREG(flags);
320 /* prevent KAT expiry. we want the MDT setup even if there is no BUM
321 * traffic
322 */
323 PIM_UPSTREAM_FLAG_SET_DISABLE_KAT_EXPIRY(flags);
324 /* SPT for vxlan BUM groups is primed and maintained via NULL
325 * registers so there is no need to reg-encapsulate
326 * vxlan-encapsulated overlay data traffic
327 */
328 PIM_UPSTREAM_FLAG_SET_NO_PIMREG_DATA(flags);
329 /* On a MLAG setup we force a copy to the MLAG peer while also
330 * accepting traffic from the peer. To do this we set peerlink-rif as
331 * the IIF and also add it to the OIL
332 */
333 PIM_UPSTREAM_FLAG_SET_ALLOW_IIF_IN_OIL(flags);
334
335 /* XXX: todo: defer pim_upstream add if pim is not enabled on the iif */
336 up = pim_upstream_find(vxlan_sg->pim, &vxlan_sg->sg);
337 if (up) {
338 /* if the iif is set to something other than the vxlan_sg->iif
339 * we must dereg the old nexthop and force to new "static"
340 * iif
341 */
342 if (!PIM_UPSTREAM_FLAG_TEST_STATIC_IIF(up->flags)) {
343 nht_p.family = AF_INET;
344 nht_p.prefixlen = IPV4_MAX_BITLEN;
345 nht_p.u.prefix4 = up->upstream_addr;
346 pim_delete_tracked_nexthop(vxlan_sg->pim,
347 &nht_p, up, NULL);
348 }
349 pim_upstream_ref(up, flags, __PRETTY_FUNCTION__);
350 vxlan_sg->up = up;
351 pim_vxlan_orig_mr_up_iif_update(vxlan_sg);
352 } else {
353 up = pim_upstream_add(vxlan_sg->pim, &vxlan_sg->sg,
354 vxlan_sg->iif, flags,
355 __PRETTY_FUNCTION__, NULL);
356 vxlan_sg->up = up;
357 }
358
359 if (!up) {
360 if (PIM_DEBUG_VXLAN)
361 zlog_debug("vxlan SG %s orig mroute-up add failed",
362 vxlan_sg->sg_str);
363 return;
364 }
365
366 pim_upstream_keep_alive_timer_start(up, vxlan_sg->pim->keep_alive_time);
367
368 /* register the source with the RP */
369 if (up->reg_state == PIM_REG_NOINFO) {
370 pim_register_join(up);
371 pim_null_register_send(up);
372 }
373
374 /* update the inherited OIL */
375 pim_upstream_inherited_olist(vxlan_sg->pim, up);
376 }
377
378 static void pim_vxlan_orig_mr_oif_add(struct pim_vxlan_sg *vxlan_sg)
379 {
380 if (!vxlan_sg->up || !vxlan_sg->orig_oif)
381 return;
382
383 if (PIM_DEBUG_VXLAN)
384 zlog_debug("vxlan SG %s oif %s add",
385 vxlan_sg->sg_str, vxlan_sg->orig_oif->name);
386
387 vxlan_sg->flags |= PIM_VXLAN_SGF_OIF_INSTALLED;
388 pim_channel_add_oif(vxlan_sg->up->channel_oil,
389 vxlan_sg->orig_oif, PIM_OIF_FLAG_PROTO_VXLAN);
390 }
391
392 static void pim_vxlan_orig_mr_oif_del(struct pim_vxlan_sg *vxlan_sg)
393 {
394 struct interface *orig_oif;
395
396 orig_oif = vxlan_sg->orig_oif;
397 vxlan_sg->orig_oif = NULL;
398
399 if (!(vxlan_sg->flags & PIM_VXLAN_SGF_OIF_INSTALLED))
400 return;
401
402 if (PIM_DEBUG_VXLAN)
403 zlog_debug("vxlan SG %s oif %s del",
404 vxlan_sg->sg_str, orig_oif->name);
405
406 vxlan_sg->flags &= ~PIM_VXLAN_SGF_OIF_INSTALLED;
407 pim_channel_del_oif(vxlan_sg->up->channel_oil,
408 orig_oif, PIM_OIF_FLAG_PROTO_VXLAN);
409 }
410
411 static inline struct interface *pim_vxlan_orig_mr_oif_get(
412 struct pim_instance *pim)
413 {
414 return (vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) ?
415 pim->vxlan.peerlink_rif : NULL;
416 }
417
418 /* Single VTEPs: IIF for the vxlan-origination-mroutes is lo or vrf-dev (if
419 * the mroute is in a non-default vrf).
420 * Anycast VTEPs: IIF is the MLAG ISL/peerlink.
421 */
422 static inline struct interface *pim_vxlan_orig_mr_iif_get(
423 struct pim_instance *pim)
424 {
425 return ((vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) &&
426 pim->vxlan.peerlink_rif) ?
427 pim->vxlan.peerlink_rif : pim->vxlan.default_iif;
428 }
429
430 static bool pim_vxlan_orig_mr_add_is_ok(struct pim_vxlan_sg *vxlan_sg)
431 {
432 struct pim_interface *pim_ifp;
433
434 vxlan_sg->iif = pim_vxlan_orig_mr_iif_get(vxlan_sg->pim);
435 if (!vxlan_sg->iif)
436 return false;
437
438 pim_ifp = (struct pim_interface *)vxlan_sg->iif->info;
439 if (!pim_ifp || (pim_ifp->mroute_vif_index < 0))
440 return false;
441
442 return true;
443 }
444
445 static void pim_vxlan_orig_mr_install(struct pim_vxlan_sg *vxlan_sg)
446 {
447 pim_vxlan_orig_mr_up_add(vxlan_sg);
448
449 vxlan_sg->orig_oif = pim_vxlan_orig_mr_oif_get(vxlan_sg->pim);
450 pim_vxlan_orig_mr_oif_add(vxlan_sg);
451 }
452
453 static void pim_vxlan_orig_mr_add(struct pim_vxlan_sg *vxlan_sg)
454 {
455 if (!pim_vxlan_orig_mr_add_is_ok(vxlan_sg))
456 return;
457
458 if (PIM_DEBUG_VXLAN)
459 zlog_debug("vxlan SG %s orig-mr add", vxlan_sg->sg_str);
460
461 pim_vxlan_orig_mr_install(vxlan_sg);
462 }
463
464 static void pim_vxlan_orig_mr_del(struct pim_vxlan_sg *vxlan_sg)
465 {
466 if (PIM_DEBUG_VXLAN)
467 zlog_debug("vxlan SG %s orig-mr del", vxlan_sg->sg_str);
468
469 pim_vxlan_orig_mr_oif_del(vxlan_sg);
470 pim_vxlan_orig_mr_up_del(vxlan_sg);
471 }
472
473 static void pim_vxlan_orig_mr_iif_update(struct hash_backet *backet, void *arg)
474 {
475 struct interface *ifp = (struct interface *)arg;
476 struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)backet->data;
477 struct interface *old_iif = vxlan_sg->iif;
478
479 if (!pim_vxlan_is_orig_mroute(vxlan_sg))
480 return;
481
482 if (PIM_DEBUG_VXLAN)
483 zlog_debug("vxlan SG %s iif changed from %s to %s",
484 vxlan_sg->sg_str,
485 old_iif ? old_iif->name : "-",
486 ifp ? ifp->name : "-");
487
488 if (pim_vxlan_orig_mr_add_is_ok(vxlan_sg)) {
489 if (vxlan_sg->up) {
490 /* upstream exists but iif changed */
491 pim_vxlan_orig_mr_up_iif_update(vxlan_sg);
492 } else {
493 /* install mroute */
494 pim_vxlan_orig_mr_install(vxlan_sg);
495 }
496 } else {
497 pim_vxlan_orig_mr_del(vxlan_sg);
498 }
499 }
500
501 /**************************** vxlan termination mroutes ***********************
502 * For every bum-mcast-grp registered by evpn a *G termination
503 * mroute is setup by pimd. The purpose of this mroute is to pull down vxlan
504 * packets with the bum-mcast-grp dip from the underlay and terminate the
505 * tunnel. This is done by including the vxlan termination device (ipmr-lo) in
506 * its OIL. The vxlan de-capsulated packets are subject to subsequent overlay
507 * bridging.
508 *
509 * Sample mroute:
510 * (0.0.0.0, 239.1.1.100) Iif: uplink-1 Oifs: ipmr-lo, uplink-1
511 *****************************************************************************/
512 struct pim_interface *pim_vxlan_get_term_ifp(struct pim_instance *pim)
513 {
514 return pim->vxlan.term_if ?
515 (struct pim_interface *)pim->vxlan.term_if->info : NULL;
516 }
517
518 static void pim_vxlan_term_mr_oif_add(struct pim_vxlan_sg *vxlan_sg)
519 {
520 if (vxlan_sg->flags & PIM_VXLAN_SGF_OIF_INSTALLED)
521 return;
522
523 if (PIM_DEBUG_VXLAN)
524 zlog_debug("vxlan SG %s term-oif %s add",
525 vxlan_sg->sg_str, vxlan_sg->term_oif->name);
526
527 if (pim_ifchannel_local_membership_add(vxlan_sg->term_oif,
528 &vxlan_sg->sg)) {
529 vxlan_sg->flags |= PIM_VXLAN_SGF_OIF_INSTALLED;
530 } else {
531 zlog_warn("vxlan SG %s term-oif %s add failed",
532 vxlan_sg->sg_str, vxlan_sg->term_oif->name);
533 }
534 }
535
536 static void pim_vxlan_term_mr_oif_del(struct pim_vxlan_sg *vxlan_sg)
537 {
538 if (!(vxlan_sg->flags & PIM_VXLAN_SGF_OIF_INSTALLED))
539 return;
540
541 if (PIM_DEBUG_VXLAN)
542 zlog_debug("vxlan SG %s oif %s del",
543 vxlan_sg->sg_str, vxlan_sg->term_oif->name);
544
545 vxlan_sg->flags &= ~PIM_VXLAN_SGF_OIF_INSTALLED;
546 pim_ifchannel_local_membership_del(vxlan_sg->term_oif, &vxlan_sg->sg);
547 }
548
549 static void pim_vxlan_term_mr_up_add(struct pim_vxlan_sg *vxlan_sg)
550 {
551 struct pim_upstream *up;
552 int flags = 0;
553
554 if (vxlan_sg->up) {
555 /* nothing to do */
556 return;
557 }
558
559 if (PIM_DEBUG_VXLAN)
560 zlog_debug("vxlan SG %s term mroute-up add",
561 vxlan_sg->sg_str);
562
563 PIM_UPSTREAM_FLAG_SET_SRC_VXLAN_TERM(flags);
564 /* enable MLAG designated-forwarder election on termination mroutes */
565 PIM_UPSTREAM_FLAG_SET_MLAG_VXLAN(flags);
566
567 up = pim_upstream_add(vxlan_sg->pim, &vxlan_sg->sg,
568 NULL /* iif */, flags,
569 __PRETTY_FUNCTION__, NULL);
570 vxlan_sg->up = up;
571
572 if (!up) {
573 zlog_warn("vxlan SG %s term mroute-up add failed",
574 vxlan_sg->sg_str);
575 }
576 }
577
578 static void pim_vxlan_term_mr_up_del(struct pim_vxlan_sg *vxlan_sg)
579 {
580 struct pim_upstream *up = vxlan_sg->up;
581
582 if (!up)
583 return;
584
585 if (PIM_DEBUG_VXLAN)
586 zlog_debug("vxlan SG %s term mroute-up del",
587 vxlan_sg->sg_str);
588 vxlan_sg->up = NULL;
589 if (up->flags & PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM) {
590 /* clear out all the vxlan related flags */
591 up->flags &= ~(PIM_UPSTREAM_FLAG_MASK_SRC_VXLAN_TERM |
592 PIM_UPSTREAM_FLAG_MASK_MLAG_VXLAN);
593
594 pim_upstream_del(vxlan_sg->pim, up,
595 __PRETTY_FUNCTION__);
596 }
597 }
598
599 static void pim_vxlan_term_mr_add(struct pim_vxlan_sg *vxlan_sg)
600 {
601 if (PIM_DEBUG_VXLAN)
602 zlog_debug("vxlan SG %s term mroute add", vxlan_sg->sg_str);
603
604 vxlan_sg->term_oif = vxlan_sg->pim->vxlan.term_if;
605 if (!vxlan_sg->term_oif)
606 /* defer termination mroute till we have a termination device */
607 return;
608
609 pim_vxlan_term_mr_up_add(vxlan_sg);
610 /* set up local membership for the term-oif */
611 pim_vxlan_term_mr_oif_add(vxlan_sg);
612 }
613
614 static void pim_vxlan_term_mr_del(struct pim_vxlan_sg *vxlan_sg)
615 {
616 if (PIM_DEBUG_VXLAN)
617 zlog_debug("vxlan SG %s term mroute del", vxlan_sg->sg_str);
618
619 /* remove local membership associated with the term oif */
620 pim_vxlan_term_mr_oif_del(vxlan_sg);
621 /* remove references to the upstream entry */
622 pim_vxlan_term_mr_up_del(vxlan_sg);
623 }
624
625 /************************** vxlan SG cache management ************************/
626 static unsigned int pim_vxlan_sg_hash_key_make(void *p)
627 {
628 struct pim_vxlan_sg *vxlan_sg = p;
629
630 return (jhash_2words(vxlan_sg->sg.src.s_addr,
631 vxlan_sg->sg.grp.s_addr, 0));
632 }
633
634 static bool pim_vxlan_sg_hash_eq(const void *p1, const void *p2)
635 {
636 const struct pim_vxlan_sg *sg1 = p1;
637 const struct pim_vxlan_sg *sg2 = p2;
638
639 return ((sg1->sg.src.s_addr == sg2->sg.src.s_addr)
640 && (sg1->sg.grp.s_addr == sg2->sg.grp.s_addr));
641 }
642
643 static struct pim_vxlan_sg *pim_vxlan_sg_new(struct pim_instance *pim,
644 struct prefix_sg *sg)
645 {
646 struct pim_vxlan_sg *vxlan_sg;
647
648 vxlan_sg = XCALLOC(MTYPE_PIM_VXLAN_SG, sizeof(*vxlan_sg));
649
650 vxlan_sg->pim = pim;
651 vxlan_sg->sg = *sg;
652 pim_str_sg_set(sg, vxlan_sg->sg_str);
653
654 if (PIM_DEBUG_VXLAN)
655 zlog_debug("vxlan SG %s alloc", vxlan_sg->sg_str);
656
657 vxlan_sg = hash_get(pim->vxlan.sg_hash, vxlan_sg, hash_alloc_intern);
658
659 return vxlan_sg;
660 }
661
662 struct pim_vxlan_sg *pim_vxlan_sg_find(struct pim_instance *pim,
663 struct prefix_sg *sg)
664 {
665 struct pim_vxlan_sg lookup;
666
667 lookup.sg = *sg;
668 return hash_lookup(pim->vxlan.sg_hash, &lookup);
669 }
670
671 struct pim_vxlan_sg *pim_vxlan_sg_add(struct pim_instance *pim,
672 struct prefix_sg *sg)
673 {
674 struct pim_vxlan_sg *vxlan_sg;
675
676 vxlan_sg = pim_vxlan_sg_find(pim, sg);
677 if (vxlan_sg)
678 return vxlan_sg;
679
680 vxlan_sg = pim_vxlan_sg_new(pim, sg);
681
682 if (pim_vxlan_is_orig_mroute(vxlan_sg))
683 pim_vxlan_orig_mr_add(vxlan_sg);
684 else
685 pim_vxlan_term_mr_add(vxlan_sg);
686
687 return vxlan_sg;
688 }
689
690 void pim_vxlan_sg_del(struct pim_instance *pim, struct prefix_sg *sg)
691 {
692 struct pim_vxlan_sg *vxlan_sg;
693
694 vxlan_sg = pim_vxlan_sg_find(pim, sg);
695 if (!vxlan_sg)
696 return;
697
698 vxlan_sg->flags |= PIM_VXLAN_SGF_DEL_IN_PROG;
699
700 pim_vxlan_del_work(vxlan_sg);
701
702 if (pim_vxlan_is_orig_mroute(vxlan_sg))
703 pim_vxlan_orig_mr_del(vxlan_sg);
704 else
705 pim_vxlan_term_mr_del(vxlan_sg);
706
707 hash_release(vxlan_sg->pim->vxlan.sg_hash, vxlan_sg);
708
709 if (PIM_DEBUG_VXLAN)
710 zlog_debug("vxlan SG %s free", vxlan_sg->sg_str);
711
712 XFREE(MTYPE_PIM_VXLAN_SG, vxlan_sg);
713 }
714
715 /******************************* MLAG handling *******************************/
716 /* The peerlink sub-interface is added as an OIF to the origination-mroute.
717 * This is done to send a copy of the multicast-vxlan encapsulated traffic
718 * to the MLAG peer which may mroute it over the underlay if there are any
719 * interested receivers.
720 */
721 static void pim_vxlan_sg_peerlink_update(struct hash_backet *backet, void *arg)
722 {
723 struct interface *new_oif = (struct interface *)arg;
724 struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)backet->data;
725
726 if (!pim_vxlan_is_orig_mroute(vxlan_sg))
727 return;
728
729 if (vxlan_sg->orig_oif == new_oif)
730 return;
731
732 pim_vxlan_orig_mr_oif_del(vxlan_sg);
733
734 vxlan_sg->orig_oif = new_oif;
735 pim_vxlan_orig_mr_oif_add(vxlan_sg);
736 }
737
738 /* In the case of anycast VTEPs the VTEP-PIP must be used as the
739 * register source.
740 */
741 bool pim_vxlan_get_register_src(struct pim_instance *pim,
742 struct pim_upstream *up, struct in_addr *src_p)
743 {
744 if (!(vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED))
745 return true;
746
747 /* if address is not available suppress the pim-register */
748 if (vxlan_mlag.reg_addr.s_addr == INADDR_ANY)
749 return false;
750
751 *src_p = vxlan_mlag.reg_addr;
752 return true;
753 }
754
755 void pim_vxlan_mlag_update(bool enable, bool peer_state, uint32_t role,
756 struct interface *peerlink_rif,
757 struct in_addr *reg_addr)
758 {
759 struct pim_instance *pim;
760 struct interface *old_oif;
761 struct interface *new_oif;
762 char addr_buf[INET_ADDRSTRLEN];
763 struct pim_interface *pim_ifp = NULL;
764
765 if (PIM_DEBUG_VXLAN) {
766 inet_ntop(AF_INET, reg_addr,
767 addr_buf, INET_ADDRSTRLEN);
768 zlog_debug("vxlan MLAG update %s state %s role %d rif %s addr %s",
769 enable ? "enable" : "disable",
770 peer_state ? "up" : "down",
771 role,
772 peerlink_rif ? peerlink_rif->name : "-",
773 addr_buf);
774 }
775
776 /* XXX: for now vxlan termination is only possible in the default VRF
777 * when that changes this will need to change to iterate all VRFs
778 */
779 pim = pim_get_pim_instance(VRF_DEFAULT);
780
781 old_oif = pim_vxlan_orig_mr_oif_get(pim);
782
783 if (enable)
784 vxlan_mlag.flags |= PIM_VXLAN_MLAGF_ENABLED;
785 else
786 vxlan_mlag.flags &= ~PIM_VXLAN_MLAGF_ENABLED;
787
788 if (vxlan_mlag.peerlink_rif != peerlink_rif)
789 vxlan_mlag.peerlink_rif = peerlink_rif;
790
791 vxlan_mlag.reg_addr = *reg_addr;
792 vxlan_mlag.peer_state = peer_state;
793 vxlan_mlag.role = role;
794
795 /* process changes */
796 if (vxlan_mlag.peerlink_rif)
797 pim_ifp = (struct pim_interface *)vxlan_mlag.peerlink_rif->info;
798 if ((vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) &&
799 pim_ifp && (pim_ifp->mroute_vif_index > 0))
800 pim_vxlan_set_peerlink_rif(pim, peerlink_rif);
801 else
802 pim_vxlan_set_peerlink_rif(pim, NULL);
803
804 new_oif = pim_vxlan_orig_mr_oif_get(pim);
805 if (old_oif != new_oif)
806 hash_iterate(pim->vxlan.sg_hash, pim_vxlan_sg_peerlink_update,
807 new_oif);
808 }
809
810 /****************************** misc callbacks *******************************/
811 void pim_vxlan_config_write(struct vty *vty, char *spaces, int *writes)
812 {
813 char addr_buf[INET_ADDRSTRLEN];
814
815 if ((vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED) &&
816 vxlan_mlag.peerlink_rif) {
817
818 inet_ntop(AF_INET, &vxlan_mlag.reg_addr,
819 addr_buf, sizeof(addr_buf));
820 vty_out(vty,
821 "%sip pim mlag %s role %s state %s addr %s\n",
822 spaces,
823 vxlan_mlag.peerlink_rif->name,
824 (vxlan_mlag.role == PIM_VXLAN_MLAG_ROLE_PRIMARY) ?
825 "primary":"secondary",
826 vxlan_mlag.peer_state ? "up" : "down",
827 addr_buf);
828 *writes += 1;
829 }
830 }
831
832 static void pim_vxlan_set_default_iif(struct pim_instance *pim,
833 struct interface *ifp)
834 {
835 struct interface *old_iif;
836
837 if (pim->vxlan.default_iif == ifp)
838 return;
839
840 old_iif = pim->vxlan.default_iif;
841 if (PIM_DEBUG_VXLAN)
842 zlog_debug("%s: vxlan default iif changed from %s to %s",
843 __PRETTY_FUNCTION__,
844 old_iif ? old_iif->name : "-",
845 ifp ? ifp->name : "-");
846
847 old_iif = pim_vxlan_orig_mr_iif_get(pim);
848 pim->vxlan.default_iif = ifp;
849 ifp = pim_vxlan_orig_mr_iif_get(pim);
850 if (old_iif == ifp)
851 return;
852
853 if (PIM_DEBUG_VXLAN)
854 zlog_debug("%s: vxlan orig iif changed from %s to %s",
855 __PRETTY_FUNCTION__, old_iif ? old_iif->name : "-",
856 ifp ? ifp->name : "-");
857
858 /* add/del upstream entries for the existing vxlan SG when the
859 * interface becomes available
860 */
861 if (pim->vxlan.sg_hash)
862 hash_iterate(pim->vxlan.sg_hash,
863 pim_vxlan_orig_mr_iif_update, ifp);
864 }
865
866 static void pim_vxlan_set_peerlink_rif(struct pim_instance *pim,
867 struct interface *ifp)
868 {
869 struct interface *old_iif;
870
871 if (pim->vxlan.peerlink_rif == ifp)
872 return;
873
874 old_iif = pim->vxlan.peerlink_rif;
875 if (PIM_DEBUG_VXLAN)
876 zlog_debug("%s: vxlan peerlink_rif changed from %s to %s",
877 __PRETTY_FUNCTION__, old_iif ? old_iif->name : "-",
878 ifp ? ifp->name : "-");
879
880 old_iif = pim_vxlan_orig_mr_iif_get(pim);
881 pim->vxlan.peerlink_rif = ifp;
882 ifp = pim_vxlan_orig_mr_iif_get(pim);
883 if (old_iif == ifp)
884 return;
885
886 if (PIM_DEBUG_VXLAN)
887 zlog_debug("%s: vxlan orig iif changed from %s to %s",
888 __PRETTY_FUNCTION__, old_iif ? old_iif->name : "-",
889 ifp ? ifp->name : "-");
890
891 /* add/del upstream entries for the existing vxlan SG when the
892 * interface becomes available
893 */
894 if (pim->vxlan.sg_hash)
895 hash_iterate(pim->vxlan.sg_hash,
896 pim_vxlan_orig_mr_iif_update, ifp);
897 }
898
899 void pim_vxlan_add_vif(struct interface *ifp)
900 {
901 struct pim_interface *pim_ifp = ifp->info;
902 struct pim_instance *pim = pim_ifp->pim;
903
904 if (pim->vrf_id != VRF_DEFAULT)
905 return;
906
907 if (if_is_loopback_or_vrf(ifp))
908 pim_vxlan_set_default_iif(pim, ifp);
909
910 if (vxlan_mlag.flags & PIM_VXLAN_MLAGF_ENABLED &&
911 (ifp == vxlan_mlag.peerlink_rif))
912 pim_vxlan_set_peerlink_rif(pim, ifp);
913 }
914
915 void pim_vxlan_del_vif(struct interface *ifp)
916 {
917 struct pim_interface *pim_ifp = ifp->info;
918 struct pim_instance *pim = pim_ifp->pim;
919
920 if (pim->vrf_id != VRF_DEFAULT)
921 return;
922
923 if (pim->vxlan.default_iif == ifp)
924 pim_vxlan_set_default_iif(pim, NULL);
925
926 if (pim->vxlan.peerlink_rif == ifp)
927 pim_vxlan_set_peerlink_rif(pim, NULL);
928 }
929
930 static void pim_vxlan_term_mr_oif_update(struct hash_backet *backet, void *arg)
931 {
932 struct interface *ifp = (struct interface *)arg;
933 struct pim_vxlan_sg *vxlan_sg = (struct pim_vxlan_sg *)backet->data;
934
935 if (pim_vxlan_is_orig_mroute(vxlan_sg))
936 return;
937
938 if (vxlan_sg->term_oif == ifp)
939 return;
940
941 if (PIM_DEBUG_VXLAN)
942 zlog_debug("vxlan SG %s term oif changed from %s to %s",
943 vxlan_sg->sg_str,
944 vxlan_sg->term_oif ? vxlan_sg->term_oif->name : "-",
945 ifp ? ifp->name : "-");
946
947 pim_vxlan_term_mr_del(vxlan_sg);
948 vxlan_sg->term_oif = ifp;
949 pim_vxlan_term_mr_add(vxlan_sg);
950 }
951
952 void pim_vxlan_add_term_dev(struct pim_instance *pim,
953 struct interface *ifp)
954 {
955 struct pim_interface *pim_ifp;
956
957 if (pim->vxlan.term_if == ifp)
958 return;
959
960 if (PIM_DEBUG_VXLAN)
961 zlog_debug("vxlan term oif changed from %s to %s",
962 pim->vxlan.term_if ? pim->vxlan.term_if->name : "-",
963 ifp->name);
964
965 /* enable pim on the term ifp */
966 pim_ifp = (struct pim_interface *)ifp->info;
967 if (pim_ifp) {
968 PIM_IF_DO_PIM(pim_ifp->options);
969 } else {
970 pim_ifp = pim_if_new(ifp, false /*igmp*/, true /*pim*/,
971 false /*pimreg*/, true /*vxlan_term*/);
972 /* ensure that pimreg existss before using the newly created
973 * vxlan termination device
974 */
975 pim_if_create_pimreg(pim);
976 }
977
978 pim->vxlan.term_if = ifp;
979
980 if (pim->vxlan.sg_hash)
981 hash_iterate(pim_ifp->pim->vxlan.sg_hash,
982 pim_vxlan_term_mr_oif_update, ifp);
983 }
984
985 void pim_vxlan_del_term_dev(struct pim_instance *pim)
986 {
987 struct interface *ifp = pim->vxlan.term_if;
988 struct pim_interface *pim_ifp;
989
990 if (PIM_DEBUG_VXLAN)
991 zlog_debug("vxlan term oif changed from %s to -", ifp->name);
992
993 pim->vxlan.term_if = NULL;
994
995 if (pim->vxlan.sg_hash)
996 hash_iterate(pim->vxlan.sg_hash,
997 pim_vxlan_term_mr_oif_update, NULL);
998
999 pim_ifp = (struct pim_interface *)ifp->info;
1000 if (pim_ifp) {
1001 PIM_IF_DONT_PIM(pim_ifp->options);
1002 if (!PIM_IF_TEST_IGMP(pim_ifp->options))
1003 pim_if_delete(ifp);
1004 }
1005
1006 }
1007
1008 void pim_vxlan_init(struct pim_instance *pim)
1009 {
1010 char hash_name[64];
1011
1012 snprintf(hash_name, sizeof(hash_name),
1013 "PIM %s vxlan SG hash", pim->vrf->name);
1014 pim->vxlan.sg_hash = hash_create(pim_vxlan_sg_hash_key_make,
1015 pim_vxlan_sg_hash_eq, hash_name);
1016 }
1017
1018 void pim_vxlan_exit(struct pim_instance *pim)
1019 {
1020 if (pim->vxlan.sg_hash) {
1021 hash_clean(pim->vxlan.sg_hash, NULL);
1022 hash_free(pim->vxlan.sg_hash);
1023 pim->vxlan.sg_hash = NULL;
1024 }
1025 }