]> git.proxmox.com Git - mirror_frr.git/blob - pimd/pim_mlag.c
*: auto-convert to SPDX License IDs
[mirror_frr.git] / pimd / pim_mlag.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * This is an implementation of PIM MLAG Functionality
4 *
5 * Module name: PIM MLAG
6 *
7 * Author: sathesh Kumar karra <sathk@cumulusnetworks.com>
8 *
9 * Copyright (C) 2019 Cumulus Networks http://www.cumulusnetworks.com
10 */
11 #include <zebra.h>
12
13 #include "pimd.h"
14 #include "pim_mlag.h"
15 #include "pim_upstream.h"
16 #include "pim_vxlan.h"
17
18 extern struct zclient *zclient;
19
20 #define PIM_MLAG_METADATA_LEN 4
21
22 /*********************ACtual Data processing *****************************/
23 /* TBD: There can be duplicate updates to FIB***/
24 #define PIM_MLAG_ADD_OIF_TO_OIL(ch, ch_oil) \
25 do { \
26 if (PIM_DEBUG_MLAG) \
27 zlog_debug( \
28 "%s: add Dual-active Interface to %s " \
29 "to oil:%s", \
30 __func__, ch->interface->name, ch->sg_str); \
31 pim_channel_update_oif_mute(ch_oil, ch->interface->info); \
32 } while (0)
33
34 #define PIM_MLAG_DEL_OIF_TO_OIL(ch, ch_oil) \
35 do { \
36 if (PIM_DEBUG_MLAG) \
37 zlog_debug( \
38 "%s: del Dual-active Interface to %s " \
39 "to oil:%s", \
40 __func__, ch->interface->name, ch->sg_str); \
41 pim_channel_update_oif_mute(ch_oil, ch->interface->info); \
42 } while (0)
43
44
45 static void pim_mlag_calculate_df_for_ifchannels(struct pim_upstream *up,
46 bool is_df)
47 {
48 struct listnode *chnode;
49 struct listnode *chnextnode;
50 struct pim_ifchannel *ch;
51 struct pim_interface *pim_ifp = NULL;
52 struct channel_oil *ch_oil = NULL;
53
54 ch_oil = (up) ? up->channel_oil : NULL;
55
56 if (!ch_oil)
57 return;
58
59 if (PIM_DEBUG_MLAG)
60 zlog_debug("%s: Calculating DF for Dual active if-channel%s",
61 __func__, up->sg_str);
62
63 for (ALL_LIST_ELEMENTS(up->ifchannels, chnode, chnextnode, ch)) {
64 pim_ifp = (ch->interface) ? ch->interface->info : NULL;
65 if (!pim_ifp || !PIM_I_am_DualActive(pim_ifp))
66 continue;
67
68 if (is_df)
69 PIM_MLAG_ADD_OIF_TO_OIL(ch, ch_oil);
70 else
71 PIM_MLAG_DEL_OIF_TO_OIL(ch, ch_oil);
72 }
73 }
74
75 static void pim_mlag_inherit_mlag_flags(struct pim_upstream *up, bool is_df)
76 {
77 struct listnode *listnode;
78 struct pim_upstream *child;
79 struct listnode *chnode;
80 struct listnode *chnextnode;
81 struct pim_ifchannel *ch;
82 struct pim_interface *pim_ifp = NULL;
83 struct channel_oil *ch_oil = NULL;
84
85 if (PIM_DEBUG_MLAG)
86 zlog_debug("%s: Updating DF for uptream:%s children", __func__,
87 up->sg_str);
88
89
90 for (ALL_LIST_ELEMENTS(up->ifchannels, chnode, chnextnode, ch)) {
91 pim_ifp = (ch->interface) ? ch->interface->info : NULL;
92 if (!pim_ifp || !PIM_I_am_DualActive(pim_ifp))
93 continue;
94
95 for (ALL_LIST_ELEMENTS_RO(up->sources, listnode, child)) {
96 if (PIM_DEBUG_MLAG)
97 zlog_debug("%s: Updating DF for child:%s",
98 __func__, child->sg_str);
99 ch_oil = (child) ? child->channel_oil : NULL;
100
101 if (!ch_oil)
102 continue;
103
104 if (is_df)
105 PIM_MLAG_ADD_OIF_TO_OIL(ch, ch_oil);
106 else
107 PIM_MLAG_DEL_OIF_TO_OIL(ch, ch_oil);
108 }
109 }
110 }
111
112 /******************************* pim upstream sync **************************/
113 /* Update DF role for the upstream entry and return true on role change */
114 bool pim_mlag_up_df_role_update(struct pim_instance *pim,
115 struct pim_upstream *up, bool is_df, const char *reason)
116 {
117 struct channel_oil *c_oil = up->channel_oil;
118 bool old_is_df = !PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags);
119 struct pim_interface *vxlan_ifp;
120
121 if (is_df == old_is_df) {
122 if (PIM_DEBUG_MLAG)
123 zlog_debug(
124 "%s: Ignoring Role update for %s, since no change",
125 __func__, up->sg_str);
126 return false;
127 }
128
129 if (PIM_DEBUG_MLAG)
130 zlog_debug("local MLAG mroute %s role changed to %s based on %s",
131 up->sg_str, is_df ? "df" : "non-df", reason);
132
133 if (is_df)
134 PIM_UPSTREAM_FLAG_UNSET_MLAG_NON_DF(up->flags);
135 else
136 PIM_UPSTREAM_FLAG_SET_MLAG_NON_DF(up->flags);
137
138
139 /*
140 * This Upstream entry synced to peer Because of Dual-active
141 * Interface configuration
142 */
143 if (PIM_UPSTREAM_FLAG_TEST_MLAG_INTERFACE(up->flags)) {
144 pim_mlag_inherit_mlag_flags(up, is_df);
145 pim_mlag_calculate_df_for_ifchannels(up, is_df);
146 }
147
148 /* If the DF role has changed check if ipmr-lo needs to be
149 * muted/un-muted. Active-Active devices and vxlan termination
150 * devices (ipmr-lo) are suppressed on the non-DF.
151 * This may leave the mroute with the empty OIL in which case the
152 * the forwarding entry's sole purpose is to just blackhole the flow
153 * headed to the switch.
154 */
155 if (c_oil) {
156 vxlan_ifp = pim_vxlan_get_term_ifp(pim);
157 if (vxlan_ifp)
158 pim_channel_update_oif_mute(c_oil, vxlan_ifp);
159 }
160
161 /* If DF role changed on a (*,G) termination mroute update the
162 * associated DF role on the inherited (S,G) entries
163 */
164 if (pim_addr_is_any(up->sg.src) &&
165 PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->flags))
166 pim_vxlan_inherit_mlag_flags(pim, up, true /* inherit */);
167
168 return true;
169 }
170
171 /* Run per-upstream entry DF election and return true on role change */
172 static bool pim_mlag_up_df_role_elect(struct pim_instance *pim,
173 struct pim_upstream *up)
174 {
175 bool is_df;
176 uint32_t peer_cost;
177 uint32_t local_cost;
178 bool rv;
179
180 if (!pim_up_mlag_is_local(up))
181 return false;
182
183 /* We are yet to rx a status update from the local MLAG daemon so
184 * we will assume DF status.
185 */
186 if (!(router->mlag_flags & PIM_MLAGF_STATUS_RXED))
187 return pim_mlag_up_df_role_update(pim, up,
188 true /*is_df*/, "mlagd-down");
189
190 /* If not connected to peer assume DF role on the MLAG primary
191 * switch (and non-DF on the secondary switch.
192 */
193 if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)) {
194 is_df = (router->mlag_role == MLAG_ROLE_PRIMARY) ? true : false;
195 return pim_mlag_up_df_role_update(pim, up,
196 is_df, "peer-down");
197 }
198
199 /* If MLAG peer session is up but zebra is down on the peer
200 * assume DF role.
201 */
202 if (!(router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP))
203 return pim_mlag_up_df_role_update(pim, up,
204 true /*is_df*/, "zebra-down");
205
206 /* If we are connected to peer switch but don't have a mroute
207 * from it we have to assume non-DF role to avoid duplicates.
208 * Note: When the peer connection comes up we wait for initial
209 * replay to complete before moving "strays" i.e. local-mlag-mroutes
210 * without a peer reference to non-df role.
211 */
212 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
213 return pim_mlag_up_df_role_update(pim, up,
214 false /*is_df*/, "no-peer-mroute");
215
216 /* switch with the lowest RPF cost wins. if both switches have the same
217 * cost MLAG role is used as a tie breaker (MLAG primary wins).
218 */
219 peer_cost = up->mlag.peer_mrib_metric;
220 local_cost = pim_up_mlag_local_cost(up);
221 if (local_cost == peer_cost) {
222 is_df = (router->mlag_role == MLAG_ROLE_PRIMARY) ? true : false;
223 rv = pim_mlag_up_df_role_update(pim, up, is_df, "equal-cost");
224 } else {
225 is_df = (local_cost < peer_cost) ? true : false;
226 rv = pim_mlag_up_df_role_update(pim, up, is_df, "cost");
227 }
228
229 return rv;
230 }
231
232 /* Handle upstream entry add from the peer MLAG switch -
233 * - if a local entry doesn't exist one is created with reference
234 * _MLAG_PEER
235 * - if a local entry exists and has a MLAG OIF DF election is run.
236 * the non-DF switch stop forwarding traffic to MLAG devices.
237 */
238 static void pim_mlag_up_peer_add(struct mlag_mroute_add *msg)
239 {
240 struct pim_upstream *up;
241 struct pim_instance *pim;
242 int flags = 0;
243 pim_sgaddr sg;
244 struct vrf *vrf;
245
246 memset(&sg, 0, sizeof(sg));
247 sg.src.s_addr = htonl(msg->source_ip);
248 sg.grp.s_addr = htonl(msg->group_ip);
249
250 if (PIM_DEBUG_MLAG)
251 zlog_debug("peer MLAG mroute add %s:%pSG cost %d",
252 msg->vrf_name, &sg, msg->cost_to_rp);
253
254 /* XXX - this is not correct. we MUST cache updates to avoid losing
255 * an entry because of race conditions with the peer switch.
256 */
257 vrf = vrf_lookup_by_name(msg->vrf_name);
258 if (!vrf) {
259 if (PIM_DEBUG_MLAG)
260 zlog_debug(
261 "peer MLAG mroute add failed %s:%pSG; no vrf",
262 msg->vrf_name, &sg);
263 return;
264 }
265 pim = vrf->info;
266
267 up = pim_upstream_find(pim, &sg);
268 if (up) {
269 /* upstream already exists; create peer reference if it
270 * doesn't already exist.
271 */
272 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
273 pim_upstream_ref(up, PIM_UPSTREAM_FLAG_MASK_MLAG_PEER,
274 __func__);
275 } else {
276 PIM_UPSTREAM_FLAG_SET_MLAG_PEER(flags);
277 up = pim_upstream_add(pim, &sg, NULL /*iif*/, flags, __func__,
278 NULL /*if_ch*/);
279
280 if (!up) {
281 if (PIM_DEBUG_MLAG)
282 zlog_debug(
283 "peer MLAG mroute add failed %s:%pSG",
284 vrf->name, &sg);
285 return;
286 }
287 }
288 up->mlag.peer_mrib_metric = msg->cost_to_rp;
289 pim_mlag_up_df_role_elect(pim, up);
290 }
291
292 /* Handle upstream entry del from the peer MLAG switch -
293 * - peer reference is removed. this can result in the upstream
294 * being deleted altogether.
295 * - if a local entry continues to exisy and has a MLAG OIF DF election
296 * is re-run (at the end of which the local entry will be the DF).
297 */
298 static struct pim_upstream *pim_mlag_up_peer_deref(struct pim_instance *pim,
299 struct pim_upstream *up)
300 {
301 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
302 return up;
303
304 PIM_UPSTREAM_FLAG_UNSET_MLAG_PEER(up->flags);
305 up = pim_upstream_del(pim, up, __func__);
306 if (up)
307 pim_mlag_up_df_role_elect(pim, up);
308
309 return up;
310 }
311
312 static void pim_mlag_up_peer_del(struct mlag_mroute_del *msg)
313 {
314 struct pim_upstream *up;
315 struct pim_instance *pim;
316 pim_sgaddr sg;
317 struct vrf *vrf;
318
319 memset(&sg, 0, sizeof(sg));
320 sg.src.s_addr = htonl(msg->source_ip);
321 sg.grp.s_addr = htonl(msg->group_ip);
322
323 if (PIM_DEBUG_MLAG)
324 zlog_debug("peer MLAG mroute del %s:%pSG", msg->vrf_name, &sg);
325
326 vrf = vrf_lookup_by_name(msg->vrf_name);
327 if (!vrf) {
328 if (PIM_DEBUG_MLAG)
329 zlog_debug(
330 "peer MLAG mroute del skipped %s:%pSG; no vrf",
331 msg->vrf_name, &sg);
332 return;
333 }
334 pim = vrf->info;
335
336 up = pim_upstream_find(pim, &sg);
337 if (!up) {
338 if (PIM_DEBUG_MLAG)
339 zlog_debug(
340 "peer MLAG mroute del skipped %s:%pSG; no up",
341 vrf->name, &sg);
342 return;
343 }
344
345 (void)pim_mlag_up_peer_deref(pim, up);
346 }
347
348 /* When we lose connection to the local MLAG daemon we can drop all peer
349 * references.
350 */
351 static void pim_mlag_up_peer_del_all(void)
352 {
353 struct list *temp = list_new();
354 struct pim_upstream *up;
355 struct vrf *vrf;
356 struct pim_instance *pim;
357
358 /*
359 * So why these gyrations?
360 * pim->upstream_head has the list of *,G and S,G
361 * that are in the system. The problem of course
362 * is that it is an ordered list:
363 * (*,G1) -> (S1,G1) -> (S2,G2) -> (S3, G2) -> (*,G2) -> (S1,G2)
364 * And the *,G1 has pointers to S1,G1 and S2,G1
365 * if we delete *,G1 then we have a situation where
366 * S1,G1 and S2,G2 can be deleted as well. Then a
367 * simple ALL_LIST_ELEMENTS will have the next listnode
368 * pointer become invalid and we crash.
369 * So let's grab the list of MLAG_PEER upstreams
370 * add a refcount put on another list and delete safely
371 */
372 RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) {
373 pim = vrf->info;
374 frr_each (rb_pim_upstream, &pim->upstream_head, up) {
375 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
376 continue;
377 listnode_add(temp, up);
378 /*
379 * Add a reference since we are adding to this
380 * list for deletion
381 */
382 up->ref_count++;
383 }
384
385 while (temp->count) {
386 up = listnode_head(temp);
387 listnode_delete(temp, up);
388
389 up = pim_mlag_up_peer_deref(pim, up);
390 /*
391 * This is the deletion of the reference added
392 * above
393 */
394 if (up)
395 pim_upstream_del(pim, up, __func__);
396 }
397 }
398
399 list_delete(&temp);
400 }
401
402 /* Send upstream entry to the local MLAG daemon (which will subsequently
403 * send it to the peer MLAG switch).
404 */
405 static void pim_mlag_up_local_add_send(struct pim_instance *pim,
406 struct pim_upstream *up)
407 {
408 struct stream *s = NULL;
409 struct vrf *vrf = pim->vrf;
410
411 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP))
412 return;
413
414 s = stream_new(sizeof(struct mlag_mroute_add) + PIM_MLAG_METADATA_LEN);
415 if (!s)
416 return;
417
418 if (PIM_DEBUG_MLAG)
419 zlog_debug("local MLAG mroute add %s:%s",
420 vrf->name, up->sg_str);
421
422 ++router->mlag_stats.msg.mroute_add_tx;
423
424 stream_putl(s, MLAG_MROUTE_ADD);
425 stream_put(s, vrf->name, VRF_NAMSIZ);
426 stream_putl(s, ntohl(up->sg.src.s_addr));
427 stream_putl(s, ntohl(up->sg.grp.s_addr));
428
429 stream_putl(s, pim_up_mlag_local_cost(up));
430 /* XXX - who is addding*/
431 stream_putl(s, MLAG_OWNER_VXLAN);
432 /* XXX - am_i_DR field should be removed */
433 stream_putc(s, false);
434 stream_putc(s, !(PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags)));
435 stream_putl(s, vrf->vrf_id);
436 /* XXX - this field is a No-op for VXLAN*/
437 stream_put(s, NULL, INTERFACE_NAMSIZ);
438
439 stream_fifo_push_safe(router->mlag_fifo, s);
440 pim_mlag_signal_zpthread();
441 }
442
443 static void pim_mlag_up_local_del_send(struct pim_instance *pim,
444 struct pim_upstream *up)
445 {
446 struct stream *s = NULL;
447 struct vrf *vrf = pim->vrf;
448
449 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP))
450 return;
451
452 s = stream_new(sizeof(struct mlag_mroute_del) + PIM_MLAG_METADATA_LEN);
453 if (!s)
454 return;
455
456 if (PIM_DEBUG_MLAG)
457 zlog_debug("local MLAG mroute del %s:%s",
458 vrf->name, up->sg_str);
459
460 ++router->mlag_stats.msg.mroute_del_tx;
461
462 stream_putl(s, MLAG_MROUTE_DEL);
463 stream_put(s, vrf->name, VRF_NAMSIZ);
464 stream_putl(s, ntohl(up->sg.src.s_addr));
465 stream_putl(s, ntohl(up->sg.grp.s_addr));
466 /* XXX - who is adding */
467 stream_putl(s, MLAG_OWNER_VXLAN);
468 stream_putl(s, vrf->vrf_id);
469 /* XXX - this field is a No-op for VXLAN */
470 stream_put(s, NULL, INTERFACE_NAMSIZ);
471
472 /* XXX - is this the the most optimal way to do things */
473 stream_fifo_push_safe(router->mlag_fifo, s);
474 pim_mlag_signal_zpthread();
475 }
476
477
478 /* Called when a local upstream entry is created or if it's cost changes */
479 void pim_mlag_up_local_add(struct pim_instance *pim,
480 struct pim_upstream *up)
481 {
482 pim_mlag_up_df_role_elect(pim, up);
483 /* XXX - need to add some dup checks here */
484 pim_mlag_up_local_add_send(pim, up);
485 }
486
487 /* Called when local MLAG reference is removed from an upstream entry */
488 void pim_mlag_up_local_del(struct pim_instance *pim,
489 struct pim_upstream *up)
490 {
491 pim_mlag_up_df_role_elect(pim, up);
492 pim_mlag_up_local_del_send(pim, up);
493 }
494
495 /* When connection to local MLAG daemon is established all the local
496 * MLAG upstream entries are replayed to it.
497 */
498 static void pim_mlag_up_local_replay(void)
499 {
500 struct pim_upstream *up;
501 struct vrf *vrf;
502 struct pim_instance *pim;
503
504 RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) {
505 pim = vrf->info;
506 frr_each (rb_pim_upstream, &pim->upstream_head, up) {
507 if (pim_up_mlag_is_local(up))
508 pim_mlag_up_local_add_send(pim, up);
509 }
510 }
511 }
512
513 /* on local/peer mlag connection and role changes the DF status needs
514 * to be re-evaluated
515 */
516 static void pim_mlag_up_local_reeval(bool mlagd_send, const char *reason_code)
517 {
518 struct pim_upstream *up;
519 struct vrf *vrf;
520 struct pim_instance *pim;
521
522 if (PIM_DEBUG_MLAG)
523 zlog_debug("%s re-run DF election because of %s",
524 __func__, reason_code);
525 RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) {
526 pim = vrf->info;
527 frr_each (rb_pim_upstream, &pim->upstream_head, up) {
528 if (!pim_up_mlag_is_local(up))
529 continue;
530 /* if role changes re-send to peer */
531 if (pim_mlag_up_df_role_elect(pim, up) &&
532 mlagd_send)
533 pim_mlag_up_local_add_send(pim, up);
534 }
535 }
536 }
537
538 /*****************PIM Actions for MLAG state changes**********************/
539
540 /* notify the anycast VTEP component about state changes */
541 static inline void pim_mlag_vxlan_state_update(void)
542 {
543 bool enable = !!(router->mlag_flags & PIM_MLAGF_STATUS_RXED);
544 bool peer_state = !!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP);
545
546 pim_vxlan_mlag_update(enable, peer_state, router->mlag_role,
547 router->peerlink_rif_p, &router->local_vtep_ip);
548
549 }
550
551 /**************End of PIM Actions for MLAG State changes******************/
552
553
554 /********************API to process PIM MLAG Data ************************/
555
556 static void pim_mlag_process_mlagd_state_change(struct mlag_status msg)
557 {
558 bool role_chg = false;
559 bool state_chg = false;
560 bool notify_vxlan = false;
561 struct interface *peerlink_rif_p;
562 char buf[MLAG_ROLE_STRSIZE];
563
564 if (PIM_DEBUG_MLAG)
565 zlog_debug("%s: msg dump: my_role: %s, peer_state: %s",
566 __func__,
567 mlag_role2str(msg.my_role, buf, sizeof(buf)),
568 (msg.peer_state == MLAG_STATE_RUNNING ? "RUNNING"
569 : "DOWN"));
570
571 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
572 if (PIM_DEBUG_MLAG)
573 zlog_debug("%s: msg ignored mlagd process state down",
574 __func__);
575 return;
576 }
577 ++router->mlag_stats.msg.mlag_status_updates;
578
579 /* evaluate the changes first */
580 if (router->mlag_role != msg.my_role) {
581 role_chg = true;
582 notify_vxlan = true;
583 router->mlag_role = msg.my_role;
584 }
585
586 strlcpy(router->peerlink_rif, msg.peerlink_rif,
587 sizeof(router->peerlink_rif));
588
589 /* XXX - handle the case where we may rx the interface name from the
590 * MLAG daemon before we get the interface from zebra.
591 */
592 peerlink_rif_p = if_lookup_by_name(router->peerlink_rif, VRF_DEFAULT);
593 if (router->peerlink_rif_p != peerlink_rif_p) {
594 router->peerlink_rif_p = peerlink_rif_p;
595 notify_vxlan = true;
596 }
597
598 if (msg.peer_state == MLAG_STATE_RUNNING) {
599 if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)) {
600 state_chg = true;
601 notify_vxlan = true;
602 router->mlag_flags |= PIM_MLAGF_PEER_CONN_UP;
603 }
604 router->connected_to_mlag = true;
605 } else {
606 if (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP) {
607 ++router->mlag_stats.peer_session_downs;
608 state_chg = true;
609 notify_vxlan = true;
610 router->mlag_flags &= ~PIM_MLAGF_PEER_CONN_UP;
611 }
612 router->connected_to_mlag = false;
613 }
614
615 /* apply the changes */
616 /* when connection to mlagd comes up we hold send mroutes till we have
617 * rxed the status and had a chance to re-valuate DF state
618 */
619 if (!(router->mlag_flags & PIM_MLAGF_STATUS_RXED)) {
620 router->mlag_flags |= PIM_MLAGF_STATUS_RXED;
621 pim_mlag_vxlan_state_update();
622 /* on session up re-eval DF status */
623 pim_mlag_up_local_reeval(false /*mlagd_send*/, "mlagd_up");
624 /* replay all the upstream entries to the local MLAG daemon */
625 pim_mlag_up_local_replay();
626 return;
627 }
628
629 if (notify_vxlan)
630 pim_mlag_vxlan_state_update();
631
632 if (state_chg) {
633 if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP))
634 /* when a connection goes down the primary takes over
635 * DF role for all entries
636 */
637 pim_mlag_up_local_reeval(true /*mlagd_send*/,
638 "peer_down");
639 else
640 /* XXX - when session comes up we need to wait for
641 * PEER_REPLAY_DONE before running re-election on
642 * local-mlag entries that are missing peer reference
643 */
644 pim_mlag_up_local_reeval(true /*mlagd_send*/,
645 "peer_up");
646 } else if (role_chg) {
647 /* MLAG role changed without a state change */
648 pim_mlag_up_local_reeval(true /*mlagd_send*/, "role_chg");
649 }
650 }
651
652 static void pim_mlag_process_peer_frr_state_change(struct mlag_frr_status msg)
653 {
654 if (PIM_DEBUG_MLAG)
655 zlog_debug(
656 "%s: msg dump: peer_frr_state: %s", __func__,
657 (msg.frr_state == MLAG_FRR_STATE_UP ? "UP" : "DOWN"));
658
659 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
660 if (PIM_DEBUG_MLAG)
661 zlog_debug("%s: msg ignored mlagd process state down",
662 __func__);
663 return;
664 }
665 ++router->mlag_stats.msg.peer_zebra_status_updates;
666
667 /* evaluate the changes first */
668 if (msg.frr_state == MLAG_FRR_STATE_UP) {
669 if (!(router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP)) {
670 router->mlag_flags |= PIM_MLAGF_PEER_ZEBRA_UP;
671 /* XXX - when peer zebra comes up we need to wait for
672 * for some time to let the peer setup MDTs before
673 * before relinquishing DF status
674 */
675 pim_mlag_up_local_reeval(true /*mlagd_send*/,
676 "zebra_up");
677 }
678 } else {
679 if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP) {
680 ++router->mlag_stats.peer_zebra_downs;
681 router->mlag_flags &= ~PIM_MLAGF_PEER_ZEBRA_UP;
682 /* when a peer zebra goes down we assume DF role */
683 pim_mlag_up_local_reeval(true /*mlagd_send*/,
684 "zebra_down");
685 }
686 }
687 }
688
689 static void pim_mlag_process_vxlan_update(struct mlag_vxlan *msg)
690 {
691 char addr_buf1[INET_ADDRSTRLEN];
692 char addr_buf2[INET_ADDRSTRLEN];
693 uint32_t local_ip;
694
695 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
696 if (PIM_DEBUG_MLAG)
697 zlog_debug("%s: msg ignored mlagd process state down",
698 __func__);
699 return;
700 }
701
702 ++router->mlag_stats.msg.vxlan_updates;
703 router->anycast_vtep_ip.s_addr = htonl(msg->anycast_ip);
704 local_ip = htonl(msg->local_ip);
705 if (router->local_vtep_ip.s_addr != local_ip) {
706 router->local_vtep_ip.s_addr = local_ip;
707 pim_mlag_vxlan_state_update();
708 }
709
710 if (PIM_DEBUG_MLAG) {
711 inet_ntop(AF_INET, &router->local_vtep_ip,
712 addr_buf1, INET_ADDRSTRLEN);
713 inet_ntop(AF_INET, &router->anycast_vtep_ip,
714 addr_buf2, INET_ADDRSTRLEN);
715
716 zlog_debug("%s: msg dump: local-ip:%s, anycast-ip:%s",
717 __func__, addr_buf1, addr_buf2);
718 }
719 }
720
721 static void pim_mlag_process_mroute_add(struct mlag_mroute_add msg)
722 {
723 if (PIM_DEBUG_MLAG) {
724 pim_sgaddr sg;
725
726 sg.grp.s_addr = ntohl(msg.group_ip);
727 sg.src.s_addr = ntohl(msg.source_ip);
728
729 zlog_debug(
730 "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x (%pSG) cost: %u",
731 __func__, msg.vrf_name, msg.source_ip, msg.group_ip,
732 &sg, msg.cost_to_rp);
733 zlog_debug(
734 "(%pSG)owner_id: %d, DR: %d, Dual active: %d, vrf_id: 0x%x intf_name: %s",
735 &sg, msg.owner_id, msg.am_i_dr, msg.am_i_dual_active,
736 msg.vrf_id, msg.intf_name);
737 }
738
739 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
740 if (PIM_DEBUG_MLAG)
741 zlog_debug("%s: msg ignored mlagd process state down",
742 __func__);
743 return;
744 }
745
746 ++router->mlag_stats.msg.mroute_add_rx;
747
748 pim_mlag_up_peer_add(&msg);
749 }
750
751 static void pim_mlag_process_mroute_del(struct mlag_mroute_del msg)
752 {
753 if (PIM_DEBUG_MLAG) {
754 pim_sgaddr sg;
755
756 sg.grp.s_addr = ntohl(msg.group_ip);
757 sg.src.s_addr = ntohl(msg.source_ip);
758 zlog_debug(
759 "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x(%pSG)",
760 __func__, msg.vrf_name, msg.source_ip, msg.group_ip,
761 &sg);
762 zlog_debug("(%pSG)owner_id: %d, vrf_id: 0x%x intf_name: %s",
763 &sg, msg.owner_id, msg.vrf_id, msg.intf_name);
764 }
765
766 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
767 if (PIM_DEBUG_MLAG)
768 zlog_debug("%s: msg ignored mlagd process state down",
769 __func__);
770 return;
771 }
772
773 ++router->mlag_stats.msg.mroute_del_rx;
774
775 pim_mlag_up_peer_del(&msg);
776 }
777
778 int pim_zebra_mlag_handle_msg(int cmd, struct zclient *zclient,
779 uint16_t zapi_length, vrf_id_t vrf_id)
780 {
781 struct stream *s = zclient->ibuf;
782 struct mlag_msg mlag_msg;
783 char buf[80];
784 int rc = 0;
785 size_t length;
786
787 rc = mlag_lib_decode_mlag_hdr(s, &mlag_msg, &length);
788 if (rc)
789 return (rc);
790
791 if (PIM_DEBUG_MLAG)
792 zlog_debug("%s: Received msg type: %s length: %d, bulk_cnt: %d",
793 __func__,
794 mlag_lib_msgid_to_str(mlag_msg.msg_type, buf,
795 sizeof(buf)),
796 mlag_msg.data_len, mlag_msg.msg_cnt);
797
798 switch (mlag_msg.msg_type) {
799 case MLAG_STATUS_UPDATE: {
800 struct mlag_status msg;
801
802 rc = mlag_lib_decode_mlag_status(s, &msg);
803 if (rc)
804 return (rc);
805 pim_mlag_process_mlagd_state_change(msg);
806 } break;
807 case MLAG_PEER_FRR_STATUS: {
808 struct mlag_frr_status msg;
809
810 rc = mlag_lib_decode_frr_status(s, &msg);
811 if (rc)
812 return (rc);
813 pim_mlag_process_peer_frr_state_change(msg);
814 } break;
815 case MLAG_VXLAN_UPDATE: {
816 struct mlag_vxlan msg;
817
818 rc = mlag_lib_decode_vxlan_update(s, &msg);
819 if (rc)
820 return rc;
821 pim_mlag_process_vxlan_update(&msg);
822 } break;
823 case MLAG_MROUTE_ADD: {
824 struct mlag_mroute_add msg;
825
826 rc = mlag_lib_decode_mroute_add(s, &msg, &length);
827 if (rc)
828 return (rc);
829 pim_mlag_process_mroute_add(msg);
830 } break;
831 case MLAG_MROUTE_DEL: {
832 struct mlag_mroute_del msg;
833
834 rc = mlag_lib_decode_mroute_del(s, &msg, &length);
835 if (rc)
836 return (rc);
837 pim_mlag_process_mroute_del(msg);
838 } break;
839 case MLAG_MROUTE_ADD_BULK: {
840 struct mlag_mroute_add msg;
841 int i;
842
843 for (i = 0; i < mlag_msg.msg_cnt; i++) {
844 rc = mlag_lib_decode_mroute_add(s, &msg, &length);
845 if (rc)
846 return (rc);
847 pim_mlag_process_mroute_add(msg);
848 }
849 } break;
850 case MLAG_MROUTE_DEL_BULK: {
851 struct mlag_mroute_del msg;
852 int i;
853
854 for (i = 0; i < mlag_msg.msg_cnt; i++) {
855 rc = mlag_lib_decode_mroute_del(s, &msg, &length);
856 if (rc)
857 return (rc);
858 pim_mlag_process_mroute_del(msg);
859 }
860 } break;
861 case MLAG_MSG_NONE:
862 case MLAG_REGISTER:
863 case MLAG_DEREGISTER:
864 case MLAG_DUMP:
865 case MLAG_PIM_CFG_DUMP:
866 break;
867 }
868 return 0;
869 }
870
871 /****************End of PIM Mesasge processing handler********************/
872
873 int pim_zebra_mlag_process_up(ZAPI_CALLBACK_ARGS)
874 {
875 if (PIM_DEBUG_MLAG)
876 zlog_debug("%s: Received Process-Up from Mlag", __func__);
877
878 /*
879 * Incase of local MLAG restart, PIM needs to replay all the data
880 * since MLAG is empty.
881 */
882 router->connected_to_mlag = true;
883 router->mlag_flags |= PIM_MLAGF_LOCAL_CONN_UP;
884 return 0;
885 }
886
887 static void pim_mlag_param_reset(void)
888 {
889 /* reset the cached params and stats */
890 router->mlag_flags &= ~(PIM_MLAGF_STATUS_RXED |
891 PIM_MLAGF_LOCAL_CONN_UP |
892 PIM_MLAGF_PEER_CONN_UP |
893 PIM_MLAGF_PEER_ZEBRA_UP);
894 router->local_vtep_ip.s_addr = INADDR_ANY;
895 router->anycast_vtep_ip.s_addr = INADDR_ANY;
896 router->mlag_role = MLAG_ROLE_NONE;
897 memset(&router->mlag_stats.msg, 0, sizeof(router->mlag_stats.msg));
898 router->peerlink_rif[0] = '\0';
899 }
900
901 int pim_zebra_mlag_process_down(ZAPI_CALLBACK_ARGS)
902 {
903 if (PIM_DEBUG_MLAG)
904 zlog_debug("%s: Received Process-Down from Mlag", __func__);
905
906 /* Local CLAG is down, reset peer data and forward the traffic if
907 * we are DR
908 */
909 if (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)
910 ++router->mlag_stats.peer_session_downs;
911 if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP)
912 ++router->mlag_stats.peer_zebra_downs;
913 router->connected_to_mlag = false;
914 pim_mlag_param_reset();
915 /* on mlagd session down re-eval DF status */
916 pim_mlag_up_local_reeval(false /*mlagd_send*/, "mlagd_down");
917 /* flush all peer references */
918 pim_mlag_up_peer_del_all();
919 /* notify the vxlan component */
920 pim_mlag_vxlan_state_update();
921 return 0;
922 }
923
924 static void pim_mlag_register_handler(struct thread *thread)
925 {
926 uint32_t bit_mask = 0;
927
928 if (!zclient)
929 return;
930
931 SET_FLAG(bit_mask, (1 << MLAG_STATUS_UPDATE));
932 SET_FLAG(bit_mask, (1 << MLAG_MROUTE_ADD));
933 SET_FLAG(bit_mask, (1 << MLAG_MROUTE_DEL));
934 SET_FLAG(bit_mask, (1 << MLAG_DUMP));
935 SET_FLAG(bit_mask, (1 << MLAG_MROUTE_ADD_BULK));
936 SET_FLAG(bit_mask, (1 << MLAG_MROUTE_DEL_BULK));
937 SET_FLAG(bit_mask, (1 << MLAG_PIM_CFG_DUMP));
938 SET_FLAG(bit_mask, (1 << MLAG_VXLAN_UPDATE));
939 SET_FLAG(bit_mask, (1 << MLAG_PEER_FRR_STATUS));
940
941 if (PIM_DEBUG_MLAG)
942 zlog_debug("%s: Posting Client Register to MLAG mask: 0x%x",
943 __func__, bit_mask);
944
945 zclient_send_mlag_register(zclient, bit_mask);
946 }
947
948 void pim_mlag_register(void)
949 {
950 if (router->mlag_process_register)
951 return;
952
953 router->mlag_process_register = true;
954
955 thread_add_event(router->master, pim_mlag_register_handler, NULL, 0,
956 NULL);
957 }
958
959 static void pim_mlag_deregister_handler(struct thread *thread)
960 {
961 if (!zclient)
962 return;
963
964 if (PIM_DEBUG_MLAG)
965 zlog_debug("%s: Posting Client De-Register to MLAG from PIM",
966 __func__);
967 router->connected_to_mlag = false;
968 zclient_send_mlag_deregister(zclient);
969 }
970
971 void pim_mlag_deregister(void)
972 {
973 /* if somebody still interested in the MLAG channel skip de-reg */
974 if (router->pim_mlag_intf_cnt || pim_vxlan_do_mlag_reg())
975 return;
976
977 /* not registered; nothing do */
978 if (!router->mlag_process_register)
979 return;
980
981 router->mlag_process_register = false;
982
983 thread_add_event(router->master, pim_mlag_deregister_handler, NULL, 0,
984 NULL);
985 }
986
987 void pim_if_configure_mlag_dualactive(struct pim_interface *pim_ifp)
988 {
989 if (!pim_ifp || !pim_ifp->pim || pim_ifp->activeactive == true)
990 return;
991
992 if (PIM_DEBUG_MLAG)
993 zlog_debug("%s: Configuring active-active on Interface: %s",
994 __func__, "NULL");
995
996 pim_ifp->activeactive = true;
997 if (pim_ifp->pim)
998 pim_ifp->pim->inst_mlag_intf_cnt++;
999
1000 router->pim_mlag_intf_cnt++;
1001 if (PIM_DEBUG_MLAG)
1002 zlog_debug(
1003 "%s: Total MLAG configured Interfaces on router: %d, Inst: %d",
1004 __func__, router->pim_mlag_intf_cnt,
1005 pim_ifp->pim->inst_mlag_intf_cnt);
1006
1007 if (router->pim_mlag_intf_cnt == 1) {
1008 /*
1009 * at least one Interface is configured for MLAG, send register
1010 * to Zebra for receiving MLAG Updates
1011 */
1012 pim_mlag_register();
1013 }
1014 }
1015
1016 void pim_if_unconfigure_mlag_dualactive(struct pim_interface *pim_ifp)
1017 {
1018 if (!pim_ifp || !pim_ifp->pim || pim_ifp->activeactive == false)
1019 return;
1020
1021 if (PIM_DEBUG_MLAG)
1022 zlog_debug("%s: UnConfiguring active-active on Interface: %s",
1023 __func__, "NULL");
1024
1025 pim_ifp->activeactive = false;
1026 pim_ifp->pim->inst_mlag_intf_cnt--;
1027
1028 router->pim_mlag_intf_cnt--;
1029 if (PIM_DEBUG_MLAG)
1030 zlog_debug(
1031 "%s: Total MLAG configured Interfaces on router: %d, Inst: %d",
1032 __func__, router->pim_mlag_intf_cnt,
1033 pim_ifp->pim->inst_mlag_intf_cnt);
1034
1035 if (router->pim_mlag_intf_cnt == 0) {
1036 /*
1037 * all the Interfaces are MLAG un-configured, post MLAG
1038 * De-register to Zebra
1039 */
1040 pim_mlag_deregister();
1041 pim_mlag_param_reset();
1042 }
1043 }
1044
1045
1046 void pim_instance_mlag_init(struct pim_instance *pim)
1047 {
1048 if (!pim)
1049 return;
1050
1051 pim->inst_mlag_intf_cnt = 0;
1052 }
1053
1054
1055 void pim_instance_mlag_terminate(struct pim_instance *pim)
1056 {
1057 struct interface *ifp;
1058
1059 if (!pim)
1060 return;
1061
1062 FOR_ALL_INTERFACES (pim->vrf, ifp) {
1063 struct pim_interface *pim_ifp = ifp->info;
1064
1065 if (!pim_ifp || pim_ifp->activeactive == false)
1066 continue;
1067
1068 pim_if_unconfigure_mlag_dualactive(pim_ifp);
1069 }
1070 pim->inst_mlag_intf_cnt = 0;
1071 }
1072
1073 void pim_mlag_terminate(void)
1074 {
1075 stream_free(router->mlag_stream);
1076 router->mlag_stream = NULL;
1077 stream_fifo_free(router->mlag_fifo);
1078 router->mlag_fifo = NULL;
1079 }
1080
1081 void pim_mlag_init(void)
1082 {
1083 pim_mlag_param_reset();
1084 router->pim_mlag_intf_cnt = 0;
1085 router->connected_to_mlag = false;
1086 router->mlag_fifo = stream_fifo_new();
1087 router->zpthread_mlag_write = NULL;
1088 router->mlag_stream = stream_new(MLAG_BUF_LIMIT);
1089 }