]> git.proxmox.com Git - mirror_frr.git/blob - pimd/pim_mlag.c
Merge pull request #7669 from ranjanyash54/2370
[mirror_frr.git] / pimd / pim_mlag.c
1 /*
2 * This is an implementation of PIM MLAG Functionality
3 *
4 * Module name: PIM MLAG
5 *
6 * Author: sathesh Kumar karra <sathk@cumulusnetworks.com>
7 *
8 * Copyright (C) 2019 Cumulus Networks http://www.cumulusnetworks.com
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 2 of the License, or (at your option)
13 * any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with this program; see the file COPYING; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24 #include <zebra.h>
25
26 #include "pimd.h"
27 #include "pim_mlag.h"
28 #include "pim_upstream.h"
29 #include "pim_vxlan.h"
30
31 extern struct zclient *zclient;
32
33 #define PIM_MLAG_METADATA_LEN 4
34
35 /*********************ACtual Data processing *****************************/
36 /* TBD: There can be duplicate updates to FIB***/
37 #define PIM_MLAG_ADD_OIF_TO_OIL(ch, ch_oil) \
38 do { \
39 if (PIM_DEBUG_MLAG) \
40 zlog_debug( \
41 "%s: add Dual-active Interface to %s " \
42 "to oil:%s", \
43 __func__, ch->interface->name, ch->sg_str); \
44 pim_channel_update_oif_mute(ch_oil, ch->interface->info); \
45 } while (0)
46
47 #define PIM_MLAG_DEL_OIF_TO_OIL(ch, ch_oil) \
48 do { \
49 if (PIM_DEBUG_MLAG) \
50 zlog_debug( \
51 "%s: del Dual-active Interface to %s " \
52 "to oil:%s", \
53 __func__, ch->interface->name, ch->sg_str); \
54 pim_channel_update_oif_mute(ch_oil, ch->interface->info); \
55 } while (0)
56
57
58 static void pim_mlag_calculate_df_for_ifchannels(struct pim_upstream *up,
59 bool is_df)
60 {
61 struct listnode *chnode;
62 struct listnode *chnextnode;
63 struct pim_ifchannel *ch;
64 struct pim_interface *pim_ifp = NULL;
65 struct channel_oil *ch_oil = NULL;
66
67 ch_oil = (up) ? up->channel_oil : NULL;
68
69 if (!ch_oil)
70 return;
71
72 if (PIM_DEBUG_MLAG)
73 zlog_debug("%s: Calculating DF for Dual active if-channel%s",
74 __func__, up->sg_str);
75
76 for (ALL_LIST_ELEMENTS(up->ifchannels, chnode, chnextnode, ch)) {
77 pim_ifp = (ch->interface) ? ch->interface->info : NULL;
78 if (!pim_ifp || !PIM_I_am_DualActive(pim_ifp))
79 continue;
80
81 if (is_df)
82 PIM_MLAG_ADD_OIF_TO_OIL(ch, ch_oil);
83 else
84 PIM_MLAG_DEL_OIF_TO_OIL(ch, ch_oil);
85 }
86 }
87
88 static void pim_mlag_inherit_mlag_flags(struct pim_upstream *up, bool is_df)
89 {
90 struct listnode *listnode;
91 struct pim_upstream *child;
92 struct listnode *chnode;
93 struct listnode *chnextnode;
94 struct pim_ifchannel *ch;
95 struct pim_interface *pim_ifp = NULL;
96 struct channel_oil *ch_oil = NULL;
97
98 if (PIM_DEBUG_MLAG)
99 zlog_debug("%s: Updating DF for uptream:%s children", __func__,
100 up->sg_str);
101
102
103 for (ALL_LIST_ELEMENTS(up->ifchannels, chnode, chnextnode, ch)) {
104 pim_ifp = (ch->interface) ? ch->interface->info : NULL;
105 if (!pim_ifp || !PIM_I_am_DualActive(pim_ifp))
106 continue;
107
108 for (ALL_LIST_ELEMENTS_RO(up->sources, listnode, child)) {
109 if (PIM_DEBUG_MLAG)
110 zlog_debug("%s: Updating DF for child:%s",
111 __func__, child->sg_str);
112 ch_oil = (child) ? child->channel_oil : NULL;
113
114 if (!ch_oil)
115 continue;
116
117 if (is_df)
118 PIM_MLAG_ADD_OIF_TO_OIL(ch, ch_oil);
119 else
120 PIM_MLAG_DEL_OIF_TO_OIL(ch, ch_oil);
121 }
122 }
123 }
124
125 /******************************* pim upstream sync **************************/
126 /* Update DF role for the upstream entry and return true on role change */
127 bool pim_mlag_up_df_role_update(struct pim_instance *pim,
128 struct pim_upstream *up, bool is_df, const char *reason)
129 {
130 struct channel_oil *c_oil = up->channel_oil;
131 bool old_is_df = !PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags);
132 struct pim_interface *vxlan_ifp;
133
134 if (is_df == old_is_df) {
135 if (PIM_DEBUG_MLAG)
136 zlog_debug(
137 "%s: Ignoring Role update for %s, since no change",
138 __func__, up->sg_str);
139 return false;
140 }
141
142 if (PIM_DEBUG_MLAG)
143 zlog_debug("local MLAG mroute %s role changed to %s based on %s",
144 up->sg_str, is_df ? "df" : "non-df", reason);
145
146 if (is_df)
147 PIM_UPSTREAM_FLAG_UNSET_MLAG_NON_DF(up->flags);
148 else
149 PIM_UPSTREAM_FLAG_SET_MLAG_NON_DF(up->flags);
150
151
152 /*
153 * This Upstream entry synced to peer Because of Dual-active
154 * Interface configuration
155 */
156 if (PIM_UPSTREAM_FLAG_TEST_MLAG_INTERFACE(up->flags)) {
157 pim_mlag_inherit_mlag_flags(up, is_df);
158 pim_mlag_calculate_df_for_ifchannels(up, is_df);
159 }
160
161 /* If the DF role has changed check if ipmr-lo needs to be
162 * muted/un-muted. Active-Active devices and vxlan termination
163 * devices (ipmr-lo) are suppressed on the non-DF.
164 * This may leave the mroute with the empty OIL in which case the
165 * the forwarding entry's sole purpose is to just blackhole the flow
166 * headed to the switch.
167 */
168 if (c_oil) {
169 vxlan_ifp = pim_vxlan_get_term_ifp(pim);
170 if (vxlan_ifp)
171 pim_channel_update_oif_mute(c_oil, vxlan_ifp);
172 }
173
174 /* If DF role changed on a (*,G) termination mroute update the
175 * associated DF role on the inherited (S,G) entries
176 */
177 if ((up->sg.src.s_addr == INADDR_ANY) &&
178 PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->flags))
179 pim_vxlan_inherit_mlag_flags(pim, up, true /* inherit */);
180
181 return true;
182 }
183
184 /* Run per-upstream entry DF election and return true on role change */
185 static bool pim_mlag_up_df_role_elect(struct pim_instance *pim,
186 struct pim_upstream *up)
187 {
188 bool is_df;
189 uint32_t peer_cost;
190 uint32_t local_cost;
191 bool rv;
192
193 if (!pim_up_mlag_is_local(up))
194 return false;
195
196 /* We are yet to rx a status update from the local MLAG daemon so
197 * we will assume DF status.
198 */
199 if (!(router->mlag_flags & PIM_MLAGF_STATUS_RXED))
200 return pim_mlag_up_df_role_update(pim, up,
201 true /*is_df*/, "mlagd-down");
202
203 /* If not connected to peer assume DF role on the MLAG primary
204 * switch (and non-DF on the secondary switch.
205 */
206 if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)) {
207 is_df = (router->mlag_role == MLAG_ROLE_PRIMARY) ? true : false;
208 return pim_mlag_up_df_role_update(pim, up,
209 is_df, "peer-down");
210 }
211
212 /* If MLAG peer session is up but zebra is down on the peer
213 * assume DF role.
214 */
215 if (!(router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP))
216 return pim_mlag_up_df_role_update(pim, up,
217 true /*is_df*/, "zebra-down");
218
219 /* If we are connected to peer switch but don't have a mroute
220 * from it we have to assume non-DF role to avoid duplicates.
221 * Note: When the peer connection comes up we wait for initial
222 * replay to complete before moving "strays" i.e. local-mlag-mroutes
223 * without a peer reference to non-df role.
224 */
225 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
226 return pim_mlag_up_df_role_update(pim, up,
227 false /*is_df*/, "no-peer-mroute");
228
229 /* switch with the lowest RPF cost wins. if both switches have the same
230 * cost MLAG role is used as a tie breaker (MLAG primary wins).
231 */
232 peer_cost = up->mlag.peer_mrib_metric;
233 local_cost = pim_up_mlag_local_cost(up);
234 if (local_cost == peer_cost) {
235 is_df = (router->mlag_role == MLAG_ROLE_PRIMARY) ? true : false;
236 rv = pim_mlag_up_df_role_update(pim, up, is_df, "equal-cost");
237 } else {
238 is_df = (local_cost < peer_cost) ? true : false;
239 rv = pim_mlag_up_df_role_update(pim, up, is_df, "cost");
240 }
241
242 return rv;
243 }
244
245 /* Handle upstream entry add from the peer MLAG switch -
246 * - if a local entry doesn't exist one is created with reference
247 * _MLAG_PEER
248 * - if a local entry exists and has a MLAG OIF DF election is run.
249 * the non-DF switch stop forwarding traffic to MLAG devices.
250 */
251 static void pim_mlag_up_peer_add(struct mlag_mroute_add *msg)
252 {
253 struct pim_upstream *up;
254 struct pim_instance *pim;
255 int flags = 0;
256 struct prefix_sg sg;
257 struct vrf *vrf;
258 char sg_str[PIM_SG_LEN];
259
260 memset(&sg, 0, sizeof(struct prefix_sg));
261 sg.src.s_addr = htonl(msg->source_ip);
262 sg.grp.s_addr = htonl(msg->group_ip);
263 if (PIM_DEBUG_MLAG)
264 pim_str_sg_set(&sg, sg_str);
265
266 if (PIM_DEBUG_MLAG)
267 zlog_debug("peer MLAG mroute add %s:%s cost %d",
268 msg->vrf_name, sg_str, msg->cost_to_rp);
269
270 /* XXX - this is not correct. we MUST cache updates to avoid losing
271 * an entry because of race conditions with the peer switch.
272 */
273 vrf = vrf_lookup_by_name(msg->vrf_name);
274 if (!vrf) {
275 if (PIM_DEBUG_MLAG)
276 zlog_debug("peer MLAG mroute add failed %s:%s; no vrf",
277 msg->vrf_name, sg_str);
278 return;
279 }
280 pim = vrf->info;
281
282 up = pim_upstream_find(pim, &sg);
283 if (up) {
284 /* upstream already exists; create peer reference if it
285 * doesn't already exist.
286 */
287 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
288 pim_upstream_ref(up, PIM_UPSTREAM_FLAG_MASK_MLAG_PEER,
289 __func__);
290 } else {
291 PIM_UPSTREAM_FLAG_SET_MLAG_PEER(flags);
292 up = pim_upstream_add(pim, &sg, NULL /*iif*/, flags, __func__,
293 NULL /*if_ch*/);
294
295 if (!up) {
296 if (PIM_DEBUG_MLAG)
297 zlog_debug("peer MLAG mroute add failed %s:%s",
298 vrf->name, sg_str);
299 return;
300 }
301 }
302 up->mlag.peer_mrib_metric = msg->cost_to_rp;
303 pim_mlag_up_df_role_elect(pim, up);
304 }
305
306 /* Handle upstream entry del from the peer MLAG switch -
307 * - peer reference is removed. this can result in the upstream
308 * being deleted altogether.
309 * - if a local entry continues to exisy and has a MLAG OIF DF election
310 * is re-run (at the end of which the local entry will be the DF).
311 */
312 static struct pim_upstream *pim_mlag_up_peer_deref(struct pim_instance *pim,
313 struct pim_upstream *up)
314 {
315 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
316 return up;
317
318 PIM_UPSTREAM_FLAG_UNSET_MLAG_PEER(up->flags);
319 up = pim_upstream_del(pim, up, __func__);
320 if (up)
321 pim_mlag_up_df_role_elect(pim, up);
322
323 return up;
324 }
325
326 static void pim_mlag_up_peer_del(struct mlag_mroute_del *msg)
327 {
328 struct pim_upstream *up;
329 struct pim_instance *pim;
330 struct prefix_sg sg;
331 struct vrf *vrf;
332 char sg_str[PIM_SG_LEN];
333
334 memset(&sg, 0, sizeof(struct prefix_sg));
335 sg.src.s_addr = htonl(msg->source_ip);
336 sg.grp.s_addr = htonl(msg->group_ip);
337 if (PIM_DEBUG_MLAG)
338 pim_str_sg_set(&sg, sg_str);
339
340 if (PIM_DEBUG_MLAG)
341 zlog_debug("peer MLAG mroute del %s:%s", msg->vrf_name,
342 sg_str);
343
344 vrf = vrf_lookup_by_name(msg->vrf_name);
345 if (!vrf) {
346 if (PIM_DEBUG_MLAG)
347 zlog_debug("peer MLAG mroute del skipped %s:%s; no vrf",
348 msg->vrf_name, sg_str);
349 return;
350 }
351 pim = vrf->info;
352
353 up = pim_upstream_find(pim, &sg);
354 if (!up) {
355 if (PIM_DEBUG_MLAG)
356 zlog_debug("peer MLAG mroute del skipped %s:%s; no up",
357 vrf->name, sg_str);
358 return;
359 }
360
361 (void)pim_mlag_up_peer_deref(pim, up);
362 }
363
364 /* When we lose connection to the local MLAG daemon we can drop all peer
365 * references.
366 */
367 static void pim_mlag_up_peer_del_all(void)
368 {
369 struct list *temp = list_new();
370 struct pim_upstream *up;
371 struct vrf *vrf;
372 struct pim_instance *pim;
373
374 /*
375 * So why these gyrations?
376 * pim->upstream_head has the list of *,G and S,G
377 * that are in the system. The problem of course
378 * is that it is an ordered list:
379 * (*,G1) -> (S1,G1) -> (S2,G2) -> (S3, G2) -> (*,G2) -> (S1,G2)
380 * And the *,G1 has pointers to S1,G1 and S2,G1
381 * if we delete *,G1 then we have a situation where
382 * S1,G1 and S2,G2 can be deleted as well. Then a
383 * simple ALL_LIST_ELEMENTS will have the next listnode
384 * pointer become invalid and we crash.
385 * So let's grab the list of MLAG_PEER upstreams
386 * add a refcount put on another list and delete safely
387 */
388 RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) {
389 pim = vrf->info;
390 frr_each (rb_pim_upstream, &pim->upstream_head, up) {
391 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
392 continue;
393 listnode_add(temp, up);
394 /*
395 * Add a reference since we are adding to this
396 * list for deletion
397 */
398 up->ref_count++;
399 }
400
401 while (temp->count) {
402 up = listnode_head(temp);
403 listnode_delete(temp, up);
404
405 up = pim_mlag_up_peer_deref(pim, up);
406 /*
407 * This is the deletion of the reference added
408 * above
409 */
410 if (up)
411 pim_upstream_del(pim, up, __func__);
412 }
413 }
414
415 list_delete(&temp);
416 }
417
418 /* Send upstream entry to the local MLAG daemon (which will subsequently
419 * send it to the peer MLAG switch).
420 */
421 static void pim_mlag_up_local_add_send(struct pim_instance *pim,
422 struct pim_upstream *up)
423 {
424 struct stream *s = NULL;
425 struct vrf *vrf = pim->vrf;
426
427 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP))
428 return;
429
430 s = stream_new(sizeof(struct mlag_mroute_add) + PIM_MLAG_METADATA_LEN);
431 if (!s)
432 return;
433
434 if (PIM_DEBUG_MLAG)
435 zlog_debug("local MLAG mroute add %s:%s",
436 vrf->name, up->sg_str);
437
438 ++router->mlag_stats.msg.mroute_add_tx;
439
440 stream_putl(s, MLAG_MROUTE_ADD);
441 stream_put(s, vrf->name, VRF_NAMSIZ);
442 stream_putl(s, ntohl(up->sg.src.s_addr));
443 stream_putl(s, ntohl(up->sg.grp.s_addr));
444
445 stream_putl(s, pim_up_mlag_local_cost(up));
446 /* XXX - who is addding*/
447 stream_putl(s, MLAG_OWNER_VXLAN);
448 /* XXX - am_i_DR field should be removed */
449 stream_putc(s, false);
450 stream_putc(s, !(PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags)));
451 stream_putl(s, vrf->vrf_id);
452 /* XXX - this field is a No-op for VXLAN*/
453 stream_put(s, NULL, INTERFACE_NAMSIZ);
454
455 stream_fifo_push_safe(router->mlag_fifo, s);
456 pim_mlag_signal_zpthread();
457 }
458
459 static void pim_mlag_up_local_del_send(struct pim_instance *pim,
460 struct pim_upstream *up)
461 {
462 struct stream *s = NULL;
463 struct vrf *vrf = pim->vrf;
464
465 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP))
466 return;
467
468 s = stream_new(sizeof(struct mlag_mroute_del) + PIM_MLAG_METADATA_LEN);
469 if (!s)
470 return;
471
472 if (PIM_DEBUG_MLAG)
473 zlog_debug("local MLAG mroute del %s:%s",
474 vrf->name, up->sg_str);
475
476 ++router->mlag_stats.msg.mroute_del_tx;
477
478 stream_putl(s, MLAG_MROUTE_DEL);
479 stream_put(s, vrf->name, VRF_NAMSIZ);
480 stream_putl(s, ntohl(up->sg.src.s_addr));
481 stream_putl(s, ntohl(up->sg.grp.s_addr));
482 /* XXX - who is adding */
483 stream_putl(s, MLAG_OWNER_VXLAN);
484 stream_putl(s, vrf->vrf_id);
485 /* XXX - this field is a No-op for VXLAN */
486 stream_put(s, NULL, INTERFACE_NAMSIZ);
487
488 /* XXX - is this the the most optimal way to do things */
489 stream_fifo_push_safe(router->mlag_fifo, s);
490 pim_mlag_signal_zpthread();
491 }
492
493
494 /* Called when a local upstream entry is created or if it's cost changes */
495 void pim_mlag_up_local_add(struct pim_instance *pim,
496 struct pim_upstream *up)
497 {
498 pim_mlag_up_df_role_elect(pim, up);
499 /* XXX - need to add some dup checks here */
500 pim_mlag_up_local_add_send(pim, up);
501 }
502
503 /* Called when local MLAG reference is removed from an upstream entry */
504 void pim_mlag_up_local_del(struct pim_instance *pim,
505 struct pim_upstream *up)
506 {
507 pim_mlag_up_df_role_elect(pim, up);
508 pim_mlag_up_local_del_send(pim, up);
509 }
510
511 /* When connection to local MLAG daemon is established all the local
512 * MLAG upstream entries are replayed to it.
513 */
514 static void pim_mlag_up_local_replay(void)
515 {
516 struct pim_upstream *up;
517 struct vrf *vrf;
518 struct pim_instance *pim;
519
520 RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) {
521 pim = vrf->info;
522 frr_each (rb_pim_upstream, &pim->upstream_head, up) {
523 if (pim_up_mlag_is_local(up))
524 pim_mlag_up_local_add_send(pim, up);
525 }
526 }
527 }
528
529 /* on local/peer mlag connection and role changes the DF status needs
530 * to be re-evaluated
531 */
532 static void pim_mlag_up_local_reeval(bool mlagd_send, const char *reason_code)
533 {
534 struct pim_upstream *up;
535 struct vrf *vrf;
536 struct pim_instance *pim;
537
538 if (PIM_DEBUG_MLAG)
539 zlog_debug("%s re-run DF election because of %s",
540 __func__, reason_code);
541 RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) {
542 pim = vrf->info;
543 frr_each (rb_pim_upstream, &pim->upstream_head, up) {
544 if (!pim_up_mlag_is_local(up))
545 continue;
546 /* if role changes re-send to peer */
547 if (pim_mlag_up_df_role_elect(pim, up) &&
548 mlagd_send)
549 pim_mlag_up_local_add_send(pim, up);
550 }
551 }
552 }
553
554 /*****************PIM Actions for MLAG state changes**********************/
555
556 /* notify the anycast VTEP component about state changes */
557 static inline void pim_mlag_vxlan_state_update(void)
558 {
559 bool enable = !!(router->mlag_flags & PIM_MLAGF_STATUS_RXED);
560 bool peer_state = !!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP);
561
562 pim_vxlan_mlag_update(enable, peer_state, router->mlag_role,
563 router->peerlink_rif_p, &router->local_vtep_ip);
564
565 }
566
567 /**************End of PIM Actions for MLAG State changes******************/
568
569
570 /********************API to process PIM MLAG Data ************************/
571
572 static void pim_mlag_process_mlagd_state_change(struct mlag_status msg)
573 {
574 bool role_chg = false;
575 bool state_chg = false;
576 bool notify_vxlan = false;
577 struct interface *peerlink_rif_p;
578 char buf[MLAG_ROLE_STRSIZE];
579
580 if (PIM_DEBUG_MLAG)
581 zlog_debug("%s: msg dump: my_role: %s, peer_state: %s",
582 __func__,
583 mlag_role2str(msg.my_role, buf, sizeof(buf)),
584 (msg.peer_state == MLAG_STATE_RUNNING ? "RUNNING"
585 : "DOWN"));
586
587 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
588 if (PIM_DEBUG_MLAG)
589 zlog_debug("%s: msg ignored mlagd process state down",
590 __func__);
591 return;
592 }
593 ++router->mlag_stats.msg.mlag_status_updates;
594
595 /* evaluate the changes first */
596 if (router->mlag_role != msg.my_role) {
597 role_chg = true;
598 notify_vxlan = true;
599 router->mlag_role = msg.my_role;
600 }
601
602 strlcpy(router->peerlink_rif, msg.peerlink_rif,
603 sizeof(router->peerlink_rif));
604
605 /* XXX - handle the case where we may rx the interface name from the
606 * MLAG daemon before we get the interface from zebra.
607 */
608 peerlink_rif_p = if_lookup_by_name(router->peerlink_rif, VRF_DEFAULT);
609 if (router->peerlink_rif_p != peerlink_rif_p) {
610 router->peerlink_rif_p = peerlink_rif_p;
611 notify_vxlan = true;
612 }
613
614 if (msg.peer_state == MLAG_STATE_RUNNING) {
615 if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)) {
616 state_chg = true;
617 notify_vxlan = true;
618 router->mlag_flags |= PIM_MLAGF_PEER_CONN_UP;
619 }
620 router->connected_to_mlag = true;
621 } else {
622 if (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP) {
623 ++router->mlag_stats.peer_session_downs;
624 state_chg = true;
625 notify_vxlan = true;
626 router->mlag_flags &= ~PIM_MLAGF_PEER_CONN_UP;
627 }
628 router->connected_to_mlag = false;
629 }
630
631 /* apply the changes */
632 /* when connection to mlagd comes up we hold send mroutes till we have
633 * rxed the status and had a chance to re-valuate DF state
634 */
635 if (!(router->mlag_flags & PIM_MLAGF_STATUS_RXED)) {
636 router->mlag_flags |= PIM_MLAGF_STATUS_RXED;
637 pim_mlag_vxlan_state_update();
638 /* on session up re-eval DF status */
639 pim_mlag_up_local_reeval(false /*mlagd_send*/, "mlagd_up");
640 /* replay all the upstream entries to the local MLAG daemon */
641 pim_mlag_up_local_replay();
642 return;
643 }
644
645 if (notify_vxlan)
646 pim_mlag_vxlan_state_update();
647
648 if (state_chg) {
649 if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP))
650 /* when a connection goes down the primary takes over
651 * DF role for all entries
652 */
653 pim_mlag_up_local_reeval(true /*mlagd_send*/,
654 "peer_down");
655 else
656 /* XXX - when session comes up we need to wait for
657 * PEER_REPLAY_DONE before running re-election on
658 * local-mlag entries that are missing peer reference
659 */
660 pim_mlag_up_local_reeval(true /*mlagd_send*/,
661 "peer_up");
662 } else if (role_chg) {
663 /* MLAG role changed without a state change */
664 pim_mlag_up_local_reeval(true /*mlagd_send*/, "role_chg");
665 }
666 }
667
668 static void pim_mlag_process_peer_frr_state_change(struct mlag_frr_status msg)
669 {
670 if (PIM_DEBUG_MLAG)
671 zlog_debug(
672 "%s: msg dump: peer_frr_state: %s", __func__,
673 (msg.frr_state == MLAG_FRR_STATE_UP ? "UP" : "DOWN"));
674
675 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
676 if (PIM_DEBUG_MLAG)
677 zlog_debug("%s: msg ignored mlagd process state down",
678 __func__);
679 return;
680 }
681 ++router->mlag_stats.msg.peer_zebra_status_updates;
682
683 /* evaluate the changes first */
684 if (msg.frr_state == MLAG_FRR_STATE_UP) {
685 if (!(router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP)) {
686 router->mlag_flags |= PIM_MLAGF_PEER_ZEBRA_UP;
687 /* XXX - when peer zebra comes up we need to wait for
688 * for some time to let the peer setup MDTs before
689 * before relinquishing DF status
690 */
691 pim_mlag_up_local_reeval(true /*mlagd_send*/,
692 "zebra_up");
693 }
694 } else {
695 if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP) {
696 ++router->mlag_stats.peer_zebra_downs;
697 router->mlag_flags &= ~PIM_MLAGF_PEER_ZEBRA_UP;
698 /* when a peer zebra goes down we assume DF role */
699 pim_mlag_up_local_reeval(true /*mlagd_send*/,
700 "zebra_down");
701 }
702 }
703 }
704
705 static void pim_mlag_process_vxlan_update(struct mlag_vxlan *msg)
706 {
707 char addr_buf1[INET_ADDRSTRLEN];
708 char addr_buf2[INET_ADDRSTRLEN];
709 uint32_t local_ip;
710
711 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
712 if (PIM_DEBUG_MLAG)
713 zlog_debug("%s: msg ignored mlagd process state down",
714 __func__);
715 return;
716 }
717
718 ++router->mlag_stats.msg.vxlan_updates;
719 router->anycast_vtep_ip.s_addr = htonl(msg->anycast_ip);
720 local_ip = htonl(msg->local_ip);
721 if (router->local_vtep_ip.s_addr != local_ip) {
722 router->local_vtep_ip.s_addr = local_ip;
723 pim_mlag_vxlan_state_update();
724 }
725
726 if (PIM_DEBUG_MLAG) {
727 inet_ntop(AF_INET, &router->local_vtep_ip,
728 addr_buf1, INET_ADDRSTRLEN);
729 inet_ntop(AF_INET, &router->anycast_vtep_ip,
730 addr_buf2, INET_ADDRSTRLEN);
731
732 zlog_debug("%s: msg dump: local-ip:%s, anycast-ip:%s",
733 __func__, addr_buf1, addr_buf2);
734 }
735 }
736
737 static void pim_mlag_process_mroute_add(struct mlag_mroute_add msg)
738 {
739 if (PIM_DEBUG_MLAG) {
740 struct prefix_sg sg;
741
742 sg.grp.s_addr = ntohl(msg.group_ip);
743 sg.src.s_addr = ntohl(msg.source_ip);
744
745 zlog_debug(
746 "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x (%pSG4) cost: %u",
747 __func__, msg.vrf_name, msg.source_ip, msg.group_ip,
748 &sg, msg.cost_to_rp);
749 zlog_debug(
750 "(%pSG4)owner_id: %d, DR: %d, Dual active: %d, vrf_id: 0x%x intf_name: %s",
751 &sg, msg.owner_id, msg.am_i_dr, msg.am_i_dual_active,
752 msg.vrf_id, msg.intf_name);
753 }
754
755 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
756 if (PIM_DEBUG_MLAG)
757 zlog_debug("%s: msg ignored mlagd process state down",
758 __func__);
759 return;
760 }
761
762 ++router->mlag_stats.msg.mroute_add_rx;
763
764 pim_mlag_up_peer_add(&msg);
765 }
766
767 static void pim_mlag_process_mroute_del(struct mlag_mroute_del msg)
768 {
769 if (PIM_DEBUG_MLAG) {
770 struct prefix_sg sg;
771
772 sg.grp.s_addr = ntohl(msg.group_ip);
773 sg.src.s_addr = ntohl(msg.source_ip);
774 zlog_debug(
775 "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x(%pSG4)",
776 __func__, msg.vrf_name, msg.source_ip, msg.group_ip,
777 &sg);
778 zlog_debug("(%pSG4)owner_id: %d, vrf_id: 0x%x intf_name: %s",
779 &sg, msg.owner_id, msg.vrf_id, msg.intf_name);
780 }
781
782 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
783 if (PIM_DEBUG_MLAG)
784 zlog_debug("%s: msg ignored mlagd process state down",
785 __func__);
786 return;
787 }
788
789 ++router->mlag_stats.msg.mroute_del_rx;
790
791 pim_mlag_up_peer_del(&msg);
792 }
793
794 int pim_zebra_mlag_handle_msg(struct stream *s, int len)
795 {
796 struct mlag_msg mlag_msg;
797 char buf[80];
798 int rc = 0;
799 size_t length;
800
801 rc = mlag_lib_decode_mlag_hdr(s, &mlag_msg, &length);
802 if (rc)
803 return (rc);
804
805 if (PIM_DEBUG_MLAG)
806 zlog_debug("%s: Received msg type: %s length: %d, bulk_cnt: %d",
807 __func__,
808 mlag_lib_msgid_to_str(mlag_msg.msg_type, buf,
809 sizeof(buf)),
810 mlag_msg.data_len, mlag_msg.msg_cnt);
811
812 switch (mlag_msg.msg_type) {
813 case MLAG_STATUS_UPDATE: {
814 struct mlag_status msg;
815
816 rc = mlag_lib_decode_mlag_status(s, &msg);
817 if (rc)
818 return (rc);
819 pim_mlag_process_mlagd_state_change(msg);
820 } break;
821 case MLAG_PEER_FRR_STATUS: {
822 struct mlag_frr_status msg;
823
824 rc = mlag_lib_decode_frr_status(s, &msg);
825 if (rc)
826 return (rc);
827 pim_mlag_process_peer_frr_state_change(msg);
828 } break;
829 case MLAG_VXLAN_UPDATE: {
830 struct mlag_vxlan msg;
831
832 rc = mlag_lib_decode_vxlan_update(s, &msg);
833 if (rc)
834 return rc;
835 pim_mlag_process_vxlan_update(&msg);
836 } break;
837 case MLAG_MROUTE_ADD: {
838 struct mlag_mroute_add msg;
839
840 rc = mlag_lib_decode_mroute_add(s, &msg, &length);
841 if (rc)
842 return (rc);
843 pim_mlag_process_mroute_add(msg);
844 } break;
845 case MLAG_MROUTE_DEL: {
846 struct mlag_mroute_del msg;
847
848 rc = mlag_lib_decode_mroute_del(s, &msg, &length);
849 if (rc)
850 return (rc);
851 pim_mlag_process_mroute_del(msg);
852 } break;
853 case MLAG_MROUTE_ADD_BULK: {
854 struct mlag_mroute_add msg;
855 int i;
856
857 for (i = 0; i < mlag_msg.msg_cnt; i++) {
858 rc = mlag_lib_decode_mroute_add(s, &msg, &length);
859 if (rc)
860 return (rc);
861 pim_mlag_process_mroute_add(msg);
862 }
863 } break;
864 case MLAG_MROUTE_DEL_BULK: {
865 struct mlag_mroute_del msg;
866 int i;
867
868 for (i = 0; i < mlag_msg.msg_cnt; i++) {
869 rc = mlag_lib_decode_mroute_del(s, &msg, &length);
870 if (rc)
871 return (rc);
872 pim_mlag_process_mroute_del(msg);
873 }
874 } break;
875 default:
876 break;
877 }
878 return 0;
879 }
880
881 /****************End of PIM Mesasge processing handler********************/
882
883 int pim_zebra_mlag_process_up(void)
884 {
885 if (PIM_DEBUG_MLAG)
886 zlog_debug("%s: Received Process-Up from Mlag", __func__);
887
888 /*
889 * Incase of local MLAG restart, PIM needs to replay all the data
890 * since MLAG is empty.
891 */
892 router->connected_to_mlag = true;
893 router->mlag_flags |= PIM_MLAGF_LOCAL_CONN_UP;
894 return 0;
895 }
896
897 static void pim_mlag_param_reset(void)
898 {
899 /* reset the cached params and stats */
900 router->mlag_flags &= ~(PIM_MLAGF_STATUS_RXED |
901 PIM_MLAGF_LOCAL_CONN_UP |
902 PIM_MLAGF_PEER_CONN_UP |
903 PIM_MLAGF_PEER_ZEBRA_UP);
904 router->local_vtep_ip.s_addr = INADDR_ANY;
905 router->anycast_vtep_ip.s_addr = INADDR_ANY;
906 router->mlag_role = MLAG_ROLE_NONE;
907 memset(&router->mlag_stats.msg, 0, sizeof(router->mlag_stats.msg));
908 router->peerlink_rif[0] = '\0';
909 }
910
911 int pim_zebra_mlag_process_down(void)
912 {
913 if (PIM_DEBUG_MLAG)
914 zlog_debug("%s: Received Process-Down from Mlag", __func__);
915
916 /* Local CLAG is down, reset peer data and forward the traffic if
917 * we are DR
918 */
919 if (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)
920 ++router->mlag_stats.peer_session_downs;
921 if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP)
922 ++router->mlag_stats.peer_zebra_downs;
923 router->connected_to_mlag = false;
924 pim_mlag_param_reset();
925 /* on mlagd session down re-eval DF status */
926 pim_mlag_up_local_reeval(false /*mlagd_send*/, "mlagd_down");
927 /* flush all peer references */
928 pim_mlag_up_peer_del_all();
929 /* notify the vxlan component */
930 pim_mlag_vxlan_state_update();
931 return 0;
932 }
933
934 static int pim_mlag_register_handler(struct thread *thread)
935 {
936 uint32_t bit_mask = 0;
937
938 if (!zclient)
939 return -1;
940
941 SET_FLAG(bit_mask, (1 << MLAG_STATUS_UPDATE));
942 SET_FLAG(bit_mask, (1 << MLAG_MROUTE_ADD));
943 SET_FLAG(bit_mask, (1 << MLAG_MROUTE_DEL));
944 SET_FLAG(bit_mask, (1 << MLAG_DUMP));
945 SET_FLAG(bit_mask, (1 << MLAG_MROUTE_ADD_BULK));
946 SET_FLAG(bit_mask, (1 << MLAG_MROUTE_DEL_BULK));
947 SET_FLAG(bit_mask, (1 << MLAG_PIM_CFG_DUMP));
948 SET_FLAG(bit_mask, (1 << MLAG_VXLAN_UPDATE));
949 SET_FLAG(bit_mask, (1 << MLAG_PEER_FRR_STATUS));
950
951 if (PIM_DEBUG_MLAG)
952 zlog_debug("%s: Posting Client Register to MLAG mask: 0x%x",
953 __func__, bit_mask);
954
955 zclient_send_mlag_register(zclient, bit_mask);
956 return 0;
957 }
958
959 void pim_mlag_register(void)
960 {
961 if (router->mlag_process_register)
962 return;
963
964 router->mlag_process_register = true;
965
966 thread_add_event(router->master, pim_mlag_register_handler, NULL, 0,
967 NULL);
968 }
969
970 static int pim_mlag_deregister_handler(struct thread *thread)
971 {
972 if (!zclient)
973 return -1;
974
975 if (PIM_DEBUG_MLAG)
976 zlog_debug("%s: Posting Client De-Register to MLAG from PIM",
977 __func__);
978 router->connected_to_mlag = false;
979 zclient_send_mlag_deregister(zclient);
980 return 0;
981 }
982
983 void pim_mlag_deregister(void)
984 {
985 /* if somebody still interested in the MLAG channel skip de-reg */
986 if (router->pim_mlag_intf_cnt || pim_vxlan_do_mlag_reg())
987 return;
988
989 /* not registered; nothing do */
990 if (!router->mlag_process_register)
991 return;
992
993 router->mlag_process_register = false;
994
995 thread_add_event(router->master, pim_mlag_deregister_handler, NULL, 0,
996 NULL);
997 }
998
999 void pim_if_configure_mlag_dualactive(struct pim_interface *pim_ifp)
1000 {
1001 if (!pim_ifp || !pim_ifp->pim || pim_ifp->activeactive == true)
1002 return;
1003
1004 if (PIM_DEBUG_MLAG)
1005 zlog_debug("%s: Configuring active-active on Interface: %s",
1006 __func__, "NULL");
1007
1008 pim_ifp->activeactive = true;
1009 if (pim_ifp->pim)
1010 pim_ifp->pim->inst_mlag_intf_cnt++;
1011
1012 router->pim_mlag_intf_cnt++;
1013 if (PIM_DEBUG_MLAG)
1014 zlog_debug(
1015 "%s: Total MLAG configured Interfaces on router: %d, Inst: %d",
1016 __func__, router->pim_mlag_intf_cnt,
1017 pim_ifp->pim->inst_mlag_intf_cnt);
1018
1019 if (router->pim_mlag_intf_cnt == 1) {
1020 /*
1021 * atleast one Interface is configured for MLAG, send register
1022 * to Zebra for receiving MLAG Updates
1023 */
1024 pim_mlag_register();
1025 }
1026 }
1027
1028 void pim_if_unconfigure_mlag_dualactive(struct pim_interface *pim_ifp)
1029 {
1030 if (!pim_ifp || !pim_ifp->pim || pim_ifp->activeactive == false)
1031 return;
1032
1033 if (PIM_DEBUG_MLAG)
1034 zlog_debug("%s: UnConfiguring active-active on Interface: %s",
1035 __func__, "NULL");
1036
1037 pim_ifp->activeactive = false;
1038 pim_ifp->pim->inst_mlag_intf_cnt--;
1039
1040 router->pim_mlag_intf_cnt--;
1041 if (PIM_DEBUG_MLAG)
1042 zlog_debug(
1043 "%s: Total MLAG configured Interfaces on router: %d, Inst: %d",
1044 __func__, router->pim_mlag_intf_cnt,
1045 pim_ifp->pim->inst_mlag_intf_cnt);
1046
1047 if (router->pim_mlag_intf_cnt == 0) {
1048 /*
1049 * all the Interfaces are MLAG un-configured, post MLAG
1050 * De-register to Zebra
1051 */
1052 pim_mlag_deregister();
1053 pim_mlag_param_reset();
1054 }
1055 }
1056
1057
1058 void pim_instance_mlag_init(struct pim_instance *pim)
1059 {
1060 if (!pim)
1061 return;
1062
1063 pim->inst_mlag_intf_cnt = 0;
1064 }
1065
1066
1067 void pim_instance_mlag_terminate(struct pim_instance *pim)
1068 {
1069 struct interface *ifp;
1070
1071 if (!pim)
1072 return;
1073
1074 FOR_ALL_INTERFACES (pim->vrf, ifp) {
1075 struct pim_interface *pim_ifp = ifp->info;
1076
1077 if (!pim_ifp || pim_ifp->activeactive == false)
1078 continue;
1079
1080 pim_if_unconfigure_mlag_dualactive(pim_ifp);
1081 }
1082 pim->inst_mlag_intf_cnt = 0;
1083 }
1084
1085 void pim_mlag_terminate(void)
1086 {
1087 stream_free(router->mlag_stream);
1088 router->mlag_stream = NULL;
1089 stream_fifo_free(router->mlag_fifo);
1090 router->mlag_fifo = NULL;
1091 }
1092
1093 void pim_mlag_init(void)
1094 {
1095 pim_mlag_param_reset();
1096 router->pim_mlag_intf_cnt = 0;
1097 router->connected_to_mlag = false;
1098 router->mlag_fifo = stream_fifo_new();
1099 router->zpthread_mlag_write = NULL;
1100 router->mlag_stream = stream_new(MLAG_BUF_LIMIT);
1101 }