]> git.proxmox.com Git - mirror_frr.git/blob - pimd/pim_mlag.c
Merge pull request #10445 from ton31337/fix/frr-reload_stop_disabled_daemons
[mirror_frr.git] / pimd / pim_mlag.c
1 /*
2 * This is an implementation of PIM MLAG Functionality
3 *
4 * Module name: PIM MLAG
5 *
6 * Author: sathesh Kumar karra <sathk@cumulusnetworks.com>
7 *
8 * Copyright (C) 2019 Cumulus Networks http://www.cumulusnetworks.com
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 2 of the License, or (at your option)
13 * any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 * more details.
19 *
20 * You should have received a copy of the GNU General Public License along
21 * with this program; see the file COPYING; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24 #include <zebra.h>
25
26 #include "pimd.h"
27 #include "pim_mlag.h"
28 #include "pim_upstream.h"
29 #include "pim_vxlan.h"
30
31 extern struct zclient *zclient;
32
33 #define PIM_MLAG_METADATA_LEN 4
34
35 /*********************ACtual Data processing *****************************/
36 /* TBD: There can be duplicate updates to FIB***/
37 #define PIM_MLAG_ADD_OIF_TO_OIL(ch, ch_oil) \
38 do { \
39 if (PIM_DEBUG_MLAG) \
40 zlog_debug( \
41 "%s: add Dual-active Interface to %s " \
42 "to oil:%s", \
43 __func__, ch->interface->name, ch->sg_str); \
44 pim_channel_update_oif_mute(ch_oil, ch->interface->info); \
45 } while (0)
46
47 #define PIM_MLAG_DEL_OIF_TO_OIL(ch, ch_oil) \
48 do { \
49 if (PIM_DEBUG_MLAG) \
50 zlog_debug( \
51 "%s: del Dual-active Interface to %s " \
52 "to oil:%s", \
53 __func__, ch->interface->name, ch->sg_str); \
54 pim_channel_update_oif_mute(ch_oil, ch->interface->info); \
55 } while (0)
56
57
58 static void pim_mlag_calculate_df_for_ifchannels(struct pim_upstream *up,
59 bool is_df)
60 {
61 struct listnode *chnode;
62 struct listnode *chnextnode;
63 struct pim_ifchannel *ch;
64 struct pim_interface *pim_ifp = NULL;
65 struct channel_oil *ch_oil = NULL;
66
67 ch_oil = (up) ? up->channel_oil : NULL;
68
69 if (!ch_oil)
70 return;
71
72 if (PIM_DEBUG_MLAG)
73 zlog_debug("%s: Calculating DF for Dual active if-channel%s",
74 __func__, up->sg_str);
75
76 for (ALL_LIST_ELEMENTS(up->ifchannels, chnode, chnextnode, ch)) {
77 pim_ifp = (ch->interface) ? ch->interface->info : NULL;
78 if (!pim_ifp || !PIM_I_am_DualActive(pim_ifp))
79 continue;
80
81 if (is_df)
82 PIM_MLAG_ADD_OIF_TO_OIL(ch, ch_oil);
83 else
84 PIM_MLAG_DEL_OIF_TO_OIL(ch, ch_oil);
85 }
86 }
87
88 static void pim_mlag_inherit_mlag_flags(struct pim_upstream *up, bool is_df)
89 {
90 struct listnode *listnode;
91 struct pim_upstream *child;
92 struct listnode *chnode;
93 struct listnode *chnextnode;
94 struct pim_ifchannel *ch;
95 struct pim_interface *pim_ifp = NULL;
96 struct channel_oil *ch_oil = NULL;
97
98 if (PIM_DEBUG_MLAG)
99 zlog_debug("%s: Updating DF for uptream:%s children", __func__,
100 up->sg_str);
101
102
103 for (ALL_LIST_ELEMENTS(up->ifchannels, chnode, chnextnode, ch)) {
104 pim_ifp = (ch->interface) ? ch->interface->info : NULL;
105 if (!pim_ifp || !PIM_I_am_DualActive(pim_ifp))
106 continue;
107
108 for (ALL_LIST_ELEMENTS_RO(up->sources, listnode, child)) {
109 if (PIM_DEBUG_MLAG)
110 zlog_debug("%s: Updating DF for child:%s",
111 __func__, child->sg_str);
112 ch_oil = (child) ? child->channel_oil : NULL;
113
114 if (!ch_oil)
115 continue;
116
117 if (is_df)
118 PIM_MLAG_ADD_OIF_TO_OIL(ch, ch_oil);
119 else
120 PIM_MLAG_DEL_OIF_TO_OIL(ch, ch_oil);
121 }
122 }
123 }
124
125 /******************************* pim upstream sync **************************/
126 /* Update DF role for the upstream entry and return true on role change */
127 bool pim_mlag_up_df_role_update(struct pim_instance *pim,
128 struct pim_upstream *up, bool is_df, const char *reason)
129 {
130 struct channel_oil *c_oil = up->channel_oil;
131 bool old_is_df = !PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags);
132 struct pim_interface *vxlan_ifp;
133
134 if (is_df == old_is_df) {
135 if (PIM_DEBUG_MLAG)
136 zlog_debug(
137 "%s: Ignoring Role update for %s, since no change",
138 __func__, up->sg_str);
139 return false;
140 }
141
142 if (PIM_DEBUG_MLAG)
143 zlog_debug("local MLAG mroute %s role changed to %s based on %s",
144 up->sg_str, is_df ? "df" : "non-df", reason);
145
146 if (is_df)
147 PIM_UPSTREAM_FLAG_UNSET_MLAG_NON_DF(up->flags);
148 else
149 PIM_UPSTREAM_FLAG_SET_MLAG_NON_DF(up->flags);
150
151
152 /*
153 * This Upstream entry synced to peer Because of Dual-active
154 * Interface configuration
155 */
156 if (PIM_UPSTREAM_FLAG_TEST_MLAG_INTERFACE(up->flags)) {
157 pim_mlag_inherit_mlag_flags(up, is_df);
158 pim_mlag_calculate_df_for_ifchannels(up, is_df);
159 }
160
161 /* If the DF role has changed check if ipmr-lo needs to be
162 * muted/un-muted. Active-Active devices and vxlan termination
163 * devices (ipmr-lo) are suppressed on the non-DF.
164 * This may leave the mroute with the empty OIL in which case the
165 * the forwarding entry's sole purpose is to just blackhole the flow
166 * headed to the switch.
167 */
168 if (c_oil) {
169 vxlan_ifp = pim_vxlan_get_term_ifp(pim);
170 if (vxlan_ifp)
171 pim_channel_update_oif_mute(c_oil, vxlan_ifp);
172 }
173
174 /* If DF role changed on a (*,G) termination mroute update the
175 * associated DF role on the inherited (S,G) entries
176 */
177 if (pim_addr_is_any(up->sg.src) &&
178 PIM_UPSTREAM_FLAG_TEST_MLAG_VXLAN(up->flags))
179 pim_vxlan_inherit_mlag_flags(pim, up, true /* inherit */);
180
181 return true;
182 }
183
184 /* Run per-upstream entry DF election and return true on role change */
185 static bool pim_mlag_up_df_role_elect(struct pim_instance *pim,
186 struct pim_upstream *up)
187 {
188 bool is_df;
189 uint32_t peer_cost;
190 uint32_t local_cost;
191 bool rv;
192
193 if (!pim_up_mlag_is_local(up))
194 return false;
195
196 /* We are yet to rx a status update from the local MLAG daemon so
197 * we will assume DF status.
198 */
199 if (!(router->mlag_flags & PIM_MLAGF_STATUS_RXED))
200 return pim_mlag_up_df_role_update(pim, up,
201 true /*is_df*/, "mlagd-down");
202
203 /* If not connected to peer assume DF role on the MLAG primary
204 * switch (and non-DF on the secondary switch.
205 */
206 if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)) {
207 is_df = (router->mlag_role == MLAG_ROLE_PRIMARY) ? true : false;
208 return pim_mlag_up_df_role_update(pim, up,
209 is_df, "peer-down");
210 }
211
212 /* If MLAG peer session is up but zebra is down on the peer
213 * assume DF role.
214 */
215 if (!(router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP))
216 return pim_mlag_up_df_role_update(pim, up,
217 true /*is_df*/, "zebra-down");
218
219 /* If we are connected to peer switch but don't have a mroute
220 * from it we have to assume non-DF role to avoid duplicates.
221 * Note: When the peer connection comes up we wait for initial
222 * replay to complete before moving "strays" i.e. local-mlag-mroutes
223 * without a peer reference to non-df role.
224 */
225 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
226 return pim_mlag_up_df_role_update(pim, up,
227 false /*is_df*/, "no-peer-mroute");
228
229 /* switch with the lowest RPF cost wins. if both switches have the same
230 * cost MLAG role is used as a tie breaker (MLAG primary wins).
231 */
232 peer_cost = up->mlag.peer_mrib_metric;
233 local_cost = pim_up_mlag_local_cost(up);
234 if (local_cost == peer_cost) {
235 is_df = (router->mlag_role == MLAG_ROLE_PRIMARY) ? true : false;
236 rv = pim_mlag_up_df_role_update(pim, up, is_df, "equal-cost");
237 } else {
238 is_df = (local_cost < peer_cost) ? true : false;
239 rv = pim_mlag_up_df_role_update(pim, up, is_df, "cost");
240 }
241
242 return rv;
243 }
244
245 /* Handle upstream entry add from the peer MLAG switch -
246 * - if a local entry doesn't exist one is created with reference
247 * _MLAG_PEER
248 * - if a local entry exists and has a MLAG OIF DF election is run.
249 * the non-DF switch stop forwarding traffic to MLAG devices.
250 */
251 static void pim_mlag_up_peer_add(struct mlag_mroute_add *msg)
252 {
253 struct pim_upstream *up;
254 struct pim_instance *pim;
255 int flags = 0;
256 pim_sgaddr sg;
257 struct vrf *vrf;
258
259 memset(&sg, 0, sizeof(sg));
260 sg.src.s_addr = htonl(msg->source_ip);
261 sg.grp.s_addr = htonl(msg->group_ip);
262
263 if (PIM_DEBUG_MLAG)
264 zlog_debug("peer MLAG mroute add %s:%pSG cost %d",
265 msg->vrf_name, &sg, msg->cost_to_rp);
266
267 /* XXX - this is not correct. we MUST cache updates to avoid losing
268 * an entry because of race conditions with the peer switch.
269 */
270 vrf = vrf_lookup_by_name(msg->vrf_name);
271 if (!vrf) {
272 if (PIM_DEBUG_MLAG)
273 zlog_debug(
274 "peer MLAG mroute add failed %s:%pSG; no vrf",
275 msg->vrf_name, &sg);
276 return;
277 }
278 pim = vrf->info;
279
280 up = pim_upstream_find(pim, &sg);
281 if (up) {
282 /* upstream already exists; create peer reference if it
283 * doesn't already exist.
284 */
285 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
286 pim_upstream_ref(up, PIM_UPSTREAM_FLAG_MASK_MLAG_PEER,
287 __func__);
288 } else {
289 PIM_UPSTREAM_FLAG_SET_MLAG_PEER(flags);
290 up = pim_upstream_add(pim, &sg, NULL /*iif*/, flags, __func__,
291 NULL /*if_ch*/);
292
293 if (!up) {
294 if (PIM_DEBUG_MLAG)
295 zlog_debug(
296 "peer MLAG mroute add failed %s:%pSG",
297 vrf->name, &sg);
298 return;
299 }
300 }
301 up->mlag.peer_mrib_metric = msg->cost_to_rp;
302 pim_mlag_up_df_role_elect(pim, up);
303 }
304
305 /* Handle upstream entry del from the peer MLAG switch -
306 * - peer reference is removed. this can result in the upstream
307 * being deleted altogether.
308 * - if a local entry continues to exisy and has a MLAG OIF DF election
309 * is re-run (at the end of which the local entry will be the DF).
310 */
311 static struct pim_upstream *pim_mlag_up_peer_deref(struct pim_instance *pim,
312 struct pim_upstream *up)
313 {
314 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
315 return up;
316
317 PIM_UPSTREAM_FLAG_UNSET_MLAG_PEER(up->flags);
318 up = pim_upstream_del(pim, up, __func__);
319 if (up)
320 pim_mlag_up_df_role_elect(pim, up);
321
322 return up;
323 }
324
325 static void pim_mlag_up_peer_del(struct mlag_mroute_del *msg)
326 {
327 struct pim_upstream *up;
328 struct pim_instance *pim;
329 pim_sgaddr sg;
330 struct vrf *vrf;
331
332 memset(&sg, 0, sizeof(sg));
333 sg.src.s_addr = htonl(msg->source_ip);
334 sg.grp.s_addr = htonl(msg->group_ip);
335
336 if (PIM_DEBUG_MLAG)
337 zlog_debug("peer MLAG mroute del %s:%pSG", msg->vrf_name, &sg);
338
339 vrf = vrf_lookup_by_name(msg->vrf_name);
340 if (!vrf) {
341 if (PIM_DEBUG_MLAG)
342 zlog_debug(
343 "peer MLAG mroute del skipped %s:%pSG; no vrf",
344 msg->vrf_name, &sg);
345 return;
346 }
347 pim = vrf->info;
348
349 up = pim_upstream_find(pim, &sg);
350 if (!up) {
351 if (PIM_DEBUG_MLAG)
352 zlog_debug(
353 "peer MLAG mroute del skipped %s:%pSG; no up",
354 vrf->name, &sg);
355 return;
356 }
357
358 (void)pim_mlag_up_peer_deref(pim, up);
359 }
360
361 /* When we lose connection to the local MLAG daemon we can drop all peer
362 * references.
363 */
364 static void pim_mlag_up_peer_del_all(void)
365 {
366 struct list *temp = list_new();
367 struct pim_upstream *up;
368 struct vrf *vrf;
369 struct pim_instance *pim;
370
371 /*
372 * So why these gyrations?
373 * pim->upstream_head has the list of *,G and S,G
374 * that are in the system. The problem of course
375 * is that it is an ordered list:
376 * (*,G1) -> (S1,G1) -> (S2,G2) -> (S3, G2) -> (*,G2) -> (S1,G2)
377 * And the *,G1 has pointers to S1,G1 and S2,G1
378 * if we delete *,G1 then we have a situation where
379 * S1,G1 and S2,G2 can be deleted as well. Then a
380 * simple ALL_LIST_ELEMENTS will have the next listnode
381 * pointer become invalid and we crash.
382 * So let's grab the list of MLAG_PEER upstreams
383 * add a refcount put on another list and delete safely
384 */
385 RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) {
386 pim = vrf->info;
387 frr_each (rb_pim_upstream, &pim->upstream_head, up) {
388 if (!PIM_UPSTREAM_FLAG_TEST_MLAG_PEER(up->flags))
389 continue;
390 listnode_add(temp, up);
391 /*
392 * Add a reference since we are adding to this
393 * list for deletion
394 */
395 up->ref_count++;
396 }
397
398 while (temp->count) {
399 up = listnode_head(temp);
400 listnode_delete(temp, up);
401
402 up = pim_mlag_up_peer_deref(pim, up);
403 /*
404 * This is the deletion of the reference added
405 * above
406 */
407 if (up)
408 pim_upstream_del(pim, up, __func__);
409 }
410 }
411
412 list_delete(&temp);
413 }
414
415 /* Send upstream entry to the local MLAG daemon (which will subsequently
416 * send it to the peer MLAG switch).
417 */
418 static void pim_mlag_up_local_add_send(struct pim_instance *pim,
419 struct pim_upstream *up)
420 {
421 struct stream *s = NULL;
422 struct vrf *vrf = pim->vrf;
423
424 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP))
425 return;
426
427 s = stream_new(sizeof(struct mlag_mroute_add) + PIM_MLAG_METADATA_LEN);
428 if (!s)
429 return;
430
431 if (PIM_DEBUG_MLAG)
432 zlog_debug("local MLAG mroute add %s:%s",
433 vrf->name, up->sg_str);
434
435 ++router->mlag_stats.msg.mroute_add_tx;
436
437 stream_putl(s, MLAG_MROUTE_ADD);
438 stream_put(s, vrf->name, VRF_NAMSIZ);
439 stream_putl(s, ntohl(up->sg.src.s_addr));
440 stream_putl(s, ntohl(up->sg.grp.s_addr));
441
442 stream_putl(s, pim_up_mlag_local_cost(up));
443 /* XXX - who is addding*/
444 stream_putl(s, MLAG_OWNER_VXLAN);
445 /* XXX - am_i_DR field should be removed */
446 stream_putc(s, false);
447 stream_putc(s, !(PIM_UPSTREAM_FLAG_TEST_MLAG_NON_DF(up->flags)));
448 stream_putl(s, vrf->vrf_id);
449 /* XXX - this field is a No-op for VXLAN*/
450 stream_put(s, NULL, INTERFACE_NAMSIZ);
451
452 stream_fifo_push_safe(router->mlag_fifo, s);
453 pim_mlag_signal_zpthread();
454 }
455
456 static void pim_mlag_up_local_del_send(struct pim_instance *pim,
457 struct pim_upstream *up)
458 {
459 struct stream *s = NULL;
460 struct vrf *vrf = pim->vrf;
461
462 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP))
463 return;
464
465 s = stream_new(sizeof(struct mlag_mroute_del) + PIM_MLAG_METADATA_LEN);
466 if (!s)
467 return;
468
469 if (PIM_DEBUG_MLAG)
470 zlog_debug("local MLAG mroute del %s:%s",
471 vrf->name, up->sg_str);
472
473 ++router->mlag_stats.msg.mroute_del_tx;
474
475 stream_putl(s, MLAG_MROUTE_DEL);
476 stream_put(s, vrf->name, VRF_NAMSIZ);
477 stream_putl(s, ntohl(up->sg.src.s_addr));
478 stream_putl(s, ntohl(up->sg.grp.s_addr));
479 /* XXX - who is adding */
480 stream_putl(s, MLAG_OWNER_VXLAN);
481 stream_putl(s, vrf->vrf_id);
482 /* XXX - this field is a No-op for VXLAN */
483 stream_put(s, NULL, INTERFACE_NAMSIZ);
484
485 /* XXX - is this the the most optimal way to do things */
486 stream_fifo_push_safe(router->mlag_fifo, s);
487 pim_mlag_signal_zpthread();
488 }
489
490
491 /* Called when a local upstream entry is created or if it's cost changes */
492 void pim_mlag_up_local_add(struct pim_instance *pim,
493 struct pim_upstream *up)
494 {
495 pim_mlag_up_df_role_elect(pim, up);
496 /* XXX - need to add some dup checks here */
497 pim_mlag_up_local_add_send(pim, up);
498 }
499
500 /* Called when local MLAG reference is removed from an upstream entry */
501 void pim_mlag_up_local_del(struct pim_instance *pim,
502 struct pim_upstream *up)
503 {
504 pim_mlag_up_df_role_elect(pim, up);
505 pim_mlag_up_local_del_send(pim, up);
506 }
507
508 /* When connection to local MLAG daemon is established all the local
509 * MLAG upstream entries are replayed to it.
510 */
511 static void pim_mlag_up_local_replay(void)
512 {
513 struct pim_upstream *up;
514 struct vrf *vrf;
515 struct pim_instance *pim;
516
517 RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) {
518 pim = vrf->info;
519 frr_each (rb_pim_upstream, &pim->upstream_head, up) {
520 if (pim_up_mlag_is_local(up))
521 pim_mlag_up_local_add_send(pim, up);
522 }
523 }
524 }
525
526 /* on local/peer mlag connection and role changes the DF status needs
527 * to be re-evaluated
528 */
529 static void pim_mlag_up_local_reeval(bool mlagd_send, const char *reason_code)
530 {
531 struct pim_upstream *up;
532 struct vrf *vrf;
533 struct pim_instance *pim;
534
535 if (PIM_DEBUG_MLAG)
536 zlog_debug("%s re-run DF election because of %s",
537 __func__, reason_code);
538 RB_FOREACH(vrf, vrf_name_head, &vrfs_by_name) {
539 pim = vrf->info;
540 frr_each (rb_pim_upstream, &pim->upstream_head, up) {
541 if (!pim_up_mlag_is_local(up))
542 continue;
543 /* if role changes re-send to peer */
544 if (pim_mlag_up_df_role_elect(pim, up) &&
545 mlagd_send)
546 pim_mlag_up_local_add_send(pim, up);
547 }
548 }
549 }
550
551 /*****************PIM Actions for MLAG state changes**********************/
552
553 /* notify the anycast VTEP component about state changes */
554 static inline void pim_mlag_vxlan_state_update(void)
555 {
556 bool enable = !!(router->mlag_flags & PIM_MLAGF_STATUS_RXED);
557 bool peer_state = !!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP);
558
559 pim_vxlan_mlag_update(enable, peer_state, router->mlag_role,
560 router->peerlink_rif_p, &router->local_vtep_ip);
561
562 }
563
564 /**************End of PIM Actions for MLAG State changes******************/
565
566
567 /********************API to process PIM MLAG Data ************************/
568
569 static void pim_mlag_process_mlagd_state_change(struct mlag_status msg)
570 {
571 bool role_chg = false;
572 bool state_chg = false;
573 bool notify_vxlan = false;
574 struct interface *peerlink_rif_p;
575 char buf[MLAG_ROLE_STRSIZE];
576
577 if (PIM_DEBUG_MLAG)
578 zlog_debug("%s: msg dump: my_role: %s, peer_state: %s",
579 __func__,
580 mlag_role2str(msg.my_role, buf, sizeof(buf)),
581 (msg.peer_state == MLAG_STATE_RUNNING ? "RUNNING"
582 : "DOWN"));
583
584 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
585 if (PIM_DEBUG_MLAG)
586 zlog_debug("%s: msg ignored mlagd process state down",
587 __func__);
588 return;
589 }
590 ++router->mlag_stats.msg.mlag_status_updates;
591
592 /* evaluate the changes first */
593 if (router->mlag_role != msg.my_role) {
594 role_chg = true;
595 notify_vxlan = true;
596 router->mlag_role = msg.my_role;
597 }
598
599 strlcpy(router->peerlink_rif, msg.peerlink_rif,
600 sizeof(router->peerlink_rif));
601
602 /* XXX - handle the case where we may rx the interface name from the
603 * MLAG daemon before we get the interface from zebra.
604 */
605 peerlink_rif_p = if_lookup_by_name(router->peerlink_rif, VRF_DEFAULT);
606 if (router->peerlink_rif_p != peerlink_rif_p) {
607 router->peerlink_rif_p = peerlink_rif_p;
608 notify_vxlan = true;
609 }
610
611 if (msg.peer_state == MLAG_STATE_RUNNING) {
612 if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)) {
613 state_chg = true;
614 notify_vxlan = true;
615 router->mlag_flags |= PIM_MLAGF_PEER_CONN_UP;
616 }
617 router->connected_to_mlag = true;
618 } else {
619 if (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP) {
620 ++router->mlag_stats.peer_session_downs;
621 state_chg = true;
622 notify_vxlan = true;
623 router->mlag_flags &= ~PIM_MLAGF_PEER_CONN_UP;
624 }
625 router->connected_to_mlag = false;
626 }
627
628 /* apply the changes */
629 /* when connection to mlagd comes up we hold send mroutes till we have
630 * rxed the status and had a chance to re-valuate DF state
631 */
632 if (!(router->mlag_flags & PIM_MLAGF_STATUS_RXED)) {
633 router->mlag_flags |= PIM_MLAGF_STATUS_RXED;
634 pim_mlag_vxlan_state_update();
635 /* on session up re-eval DF status */
636 pim_mlag_up_local_reeval(false /*mlagd_send*/, "mlagd_up");
637 /* replay all the upstream entries to the local MLAG daemon */
638 pim_mlag_up_local_replay();
639 return;
640 }
641
642 if (notify_vxlan)
643 pim_mlag_vxlan_state_update();
644
645 if (state_chg) {
646 if (!(router->mlag_flags & PIM_MLAGF_PEER_CONN_UP))
647 /* when a connection goes down the primary takes over
648 * DF role for all entries
649 */
650 pim_mlag_up_local_reeval(true /*mlagd_send*/,
651 "peer_down");
652 else
653 /* XXX - when session comes up we need to wait for
654 * PEER_REPLAY_DONE before running re-election on
655 * local-mlag entries that are missing peer reference
656 */
657 pim_mlag_up_local_reeval(true /*mlagd_send*/,
658 "peer_up");
659 } else if (role_chg) {
660 /* MLAG role changed without a state change */
661 pim_mlag_up_local_reeval(true /*mlagd_send*/, "role_chg");
662 }
663 }
664
665 static void pim_mlag_process_peer_frr_state_change(struct mlag_frr_status msg)
666 {
667 if (PIM_DEBUG_MLAG)
668 zlog_debug(
669 "%s: msg dump: peer_frr_state: %s", __func__,
670 (msg.frr_state == MLAG_FRR_STATE_UP ? "UP" : "DOWN"));
671
672 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
673 if (PIM_DEBUG_MLAG)
674 zlog_debug("%s: msg ignored mlagd process state down",
675 __func__);
676 return;
677 }
678 ++router->mlag_stats.msg.peer_zebra_status_updates;
679
680 /* evaluate the changes first */
681 if (msg.frr_state == MLAG_FRR_STATE_UP) {
682 if (!(router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP)) {
683 router->mlag_flags |= PIM_MLAGF_PEER_ZEBRA_UP;
684 /* XXX - when peer zebra comes up we need to wait for
685 * for some time to let the peer setup MDTs before
686 * before relinquishing DF status
687 */
688 pim_mlag_up_local_reeval(true /*mlagd_send*/,
689 "zebra_up");
690 }
691 } else {
692 if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP) {
693 ++router->mlag_stats.peer_zebra_downs;
694 router->mlag_flags &= ~PIM_MLAGF_PEER_ZEBRA_UP;
695 /* when a peer zebra goes down we assume DF role */
696 pim_mlag_up_local_reeval(true /*mlagd_send*/,
697 "zebra_down");
698 }
699 }
700 }
701
702 static void pim_mlag_process_vxlan_update(struct mlag_vxlan *msg)
703 {
704 char addr_buf1[INET_ADDRSTRLEN];
705 char addr_buf2[INET_ADDRSTRLEN];
706 uint32_t local_ip;
707
708 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
709 if (PIM_DEBUG_MLAG)
710 zlog_debug("%s: msg ignored mlagd process state down",
711 __func__);
712 return;
713 }
714
715 ++router->mlag_stats.msg.vxlan_updates;
716 router->anycast_vtep_ip.s_addr = htonl(msg->anycast_ip);
717 local_ip = htonl(msg->local_ip);
718 if (router->local_vtep_ip.s_addr != local_ip) {
719 router->local_vtep_ip.s_addr = local_ip;
720 pim_mlag_vxlan_state_update();
721 }
722
723 if (PIM_DEBUG_MLAG) {
724 inet_ntop(AF_INET, &router->local_vtep_ip,
725 addr_buf1, INET_ADDRSTRLEN);
726 inet_ntop(AF_INET, &router->anycast_vtep_ip,
727 addr_buf2, INET_ADDRSTRLEN);
728
729 zlog_debug("%s: msg dump: local-ip:%s, anycast-ip:%s",
730 __func__, addr_buf1, addr_buf2);
731 }
732 }
733
734 static void pim_mlag_process_mroute_add(struct mlag_mroute_add msg)
735 {
736 if (PIM_DEBUG_MLAG) {
737 pim_sgaddr sg;
738
739 sg.grp.s_addr = ntohl(msg.group_ip);
740 sg.src.s_addr = ntohl(msg.source_ip);
741
742 zlog_debug(
743 "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x (%pSG) cost: %u",
744 __func__, msg.vrf_name, msg.source_ip, msg.group_ip,
745 &sg, msg.cost_to_rp);
746 zlog_debug(
747 "(%pSG)owner_id: %d, DR: %d, Dual active: %d, vrf_id: 0x%x intf_name: %s",
748 &sg, msg.owner_id, msg.am_i_dr, msg.am_i_dual_active,
749 msg.vrf_id, msg.intf_name);
750 }
751
752 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
753 if (PIM_DEBUG_MLAG)
754 zlog_debug("%s: msg ignored mlagd process state down",
755 __func__);
756 return;
757 }
758
759 ++router->mlag_stats.msg.mroute_add_rx;
760
761 pim_mlag_up_peer_add(&msg);
762 }
763
764 static void pim_mlag_process_mroute_del(struct mlag_mroute_del msg)
765 {
766 if (PIM_DEBUG_MLAG) {
767 pim_sgaddr sg;
768
769 sg.grp.s_addr = ntohl(msg.group_ip);
770 sg.src.s_addr = ntohl(msg.source_ip);
771 zlog_debug(
772 "%s: msg dump: vrf_name: %s, s.ip: 0x%x, g.ip: 0x%x(%pSG)",
773 __func__, msg.vrf_name, msg.source_ip, msg.group_ip,
774 &sg);
775 zlog_debug("(%pSG)owner_id: %d, vrf_id: 0x%x intf_name: %s",
776 &sg, msg.owner_id, msg.vrf_id, msg.intf_name);
777 }
778
779 if (!(router->mlag_flags & PIM_MLAGF_LOCAL_CONN_UP)) {
780 if (PIM_DEBUG_MLAG)
781 zlog_debug("%s: msg ignored mlagd process state down",
782 __func__);
783 return;
784 }
785
786 ++router->mlag_stats.msg.mroute_del_rx;
787
788 pim_mlag_up_peer_del(&msg);
789 }
790
791 int pim_zebra_mlag_handle_msg(int cmd, struct zclient *zclient,
792 uint16_t zapi_length, vrf_id_t vrf_id)
793 {
794 struct stream *s = zclient->ibuf;
795 struct mlag_msg mlag_msg;
796 char buf[80];
797 int rc = 0;
798 size_t length;
799
800 rc = mlag_lib_decode_mlag_hdr(s, &mlag_msg, &length);
801 if (rc)
802 return (rc);
803
804 if (PIM_DEBUG_MLAG)
805 zlog_debug("%s: Received msg type: %s length: %d, bulk_cnt: %d",
806 __func__,
807 mlag_lib_msgid_to_str(mlag_msg.msg_type, buf,
808 sizeof(buf)),
809 mlag_msg.data_len, mlag_msg.msg_cnt);
810
811 switch (mlag_msg.msg_type) {
812 case MLAG_STATUS_UPDATE: {
813 struct mlag_status msg;
814
815 rc = mlag_lib_decode_mlag_status(s, &msg);
816 if (rc)
817 return (rc);
818 pim_mlag_process_mlagd_state_change(msg);
819 } break;
820 case MLAG_PEER_FRR_STATUS: {
821 struct mlag_frr_status msg;
822
823 rc = mlag_lib_decode_frr_status(s, &msg);
824 if (rc)
825 return (rc);
826 pim_mlag_process_peer_frr_state_change(msg);
827 } break;
828 case MLAG_VXLAN_UPDATE: {
829 struct mlag_vxlan msg;
830
831 rc = mlag_lib_decode_vxlan_update(s, &msg);
832 if (rc)
833 return rc;
834 pim_mlag_process_vxlan_update(&msg);
835 } break;
836 case MLAG_MROUTE_ADD: {
837 struct mlag_mroute_add msg;
838
839 rc = mlag_lib_decode_mroute_add(s, &msg, &length);
840 if (rc)
841 return (rc);
842 pim_mlag_process_mroute_add(msg);
843 } break;
844 case MLAG_MROUTE_DEL: {
845 struct mlag_mroute_del msg;
846
847 rc = mlag_lib_decode_mroute_del(s, &msg, &length);
848 if (rc)
849 return (rc);
850 pim_mlag_process_mroute_del(msg);
851 } break;
852 case MLAG_MROUTE_ADD_BULK: {
853 struct mlag_mroute_add msg;
854 int i;
855
856 for (i = 0; i < mlag_msg.msg_cnt; i++) {
857 rc = mlag_lib_decode_mroute_add(s, &msg, &length);
858 if (rc)
859 return (rc);
860 pim_mlag_process_mroute_add(msg);
861 }
862 } break;
863 case MLAG_MROUTE_DEL_BULK: {
864 struct mlag_mroute_del msg;
865 int i;
866
867 for (i = 0; i < mlag_msg.msg_cnt; i++) {
868 rc = mlag_lib_decode_mroute_del(s, &msg, &length);
869 if (rc)
870 return (rc);
871 pim_mlag_process_mroute_del(msg);
872 }
873 } break;
874 default:
875 break;
876 }
877 return 0;
878 }
879
880 /****************End of PIM Mesasge processing handler********************/
881
882 int pim_zebra_mlag_process_up(ZAPI_CALLBACK_ARGS)
883 {
884 if (PIM_DEBUG_MLAG)
885 zlog_debug("%s: Received Process-Up from Mlag", __func__);
886
887 /*
888 * Incase of local MLAG restart, PIM needs to replay all the data
889 * since MLAG is empty.
890 */
891 router->connected_to_mlag = true;
892 router->mlag_flags |= PIM_MLAGF_LOCAL_CONN_UP;
893 return 0;
894 }
895
896 static void pim_mlag_param_reset(void)
897 {
898 /* reset the cached params and stats */
899 router->mlag_flags &= ~(PIM_MLAGF_STATUS_RXED |
900 PIM_MLAGF_LOCAL_CONN_UP |
901 PIM_MLAGF_PEER_CONN_UP |
902 PIM_MLAGF_PEER_ZEBRA_UP);
903 router->local_vtep_ip.s_addr = INADDR_ANY;
904 router->anycast_vtep_ip.s_addr = INADDR_ANY;
905 router->mlag_role = MLAG_ROLE_NONE;
906 memset(&router->mlag_stats.msg, 0, sizeof(router->mlag_stats.msg));
907 router->peerlink_rif[0] = '\0';
908 }
909
910 int pim_zebra_mlag_process_down(ZAPI_CALLBACK_ARGS)
911 {
912 if (PIM_DEBUG_MLAG)
913 zlog_debug("%s: Received Process-Down from Mlag", __func__);
914
915 /* Local CLAG is down, reset peer data and forward the traffic if
916 * we are DR
917 */
918 if (router->mlag_flags & PIM_MLAGF_PEER_CONN_UP)
919 ++router->mlag_stats.peer_session_downs;
920 if (router->mlag_flags & PIM_MLAGF_PEER_ZEBRA_UP)
921 ++router->mlag_stats.peer_zebra_downs;
922 router->connected_to_mlag = false;
923 pim_mlag_param_reset();
924 /* on mlagd session down re-eval DF status */
925 pim_mlag_up_local_reeval(false /*mlagd_send*/, "mlagd_down");
926 /* flush all peer references */
927 pim_mlag_up_peer_del_all();
928 /* notify the vxlan component */
929 pim_mlag_vxlan_state_update();
930 return 0;
931 }
932
933 static int pim_mlag_register_handler(struct thread *thread)
934 {
935 uint32_t bit_mask = 0;
936
937 if (!zclient)
938 return -1;
939
940 SET_FLAG(bit_mask, (1 << MLAG_STATUS_UPDATE));
941 SET_FLAG(bit_mask, (1 << MLAG_MROUTE_ADD));
942 SET_FLAG(bit_mask, (1 << MLAG_MROUTE_DEL));
943 SET_FLAG(bit_mask, (1 << MLAG_DUMP));
944 SET_FLAG(bit_mask, (1 << MLAG_MROUTE_ADD_BULK));
945 SET_FLAG(bit_mask, (1 << MLAG_MROUTE_DEL_BULK));
946 SET_FLAG(bit_mask, (1 << MLAG_PIM_CFG_DUMP));
947 SET_FLAG(bit_mask, (1 << MLAG_VXLAN_UPDATE));
948 SET_FLAG(bit_mask, (1 << MLAG_PEER_FRR_STATUS));
949
950 if (PIM_DEBUG_MLAG)
951 zlog_debug("%s: Posting Client Register to MLAG mask: 0x%x",
952 __func__, bit_mask);
953
954 zclient_send_mlag_register(zclient, bit_mask);
955 return 0;
956 }
957
958 void pim_mlag_register(void)
959 {
960 if (router->mlag_process_register)
961 return;
962
963 router->mlag_process_register = true;
964
965 thread_add_event(router->master, pim_mlag_register_handler, NULL, 0,
966 NULL);
967 }
968
969 static int pim_mlag_deregister_handler(struct thread *thread)
970 {
971 if (!zclient)
972 return -1;
973
974 if (PIM_DEBUG_MLAG)
975 zlog_debug("%s: Posting Client De-Register to MLAG from PIM",
976 __func__);
977 router->connected_to_mlag = false;
978 zclient_send_mlag_deregister(zclient);
979 return 0;
980 }
981
982 void pim_mlag_deregister(void)
983 {
984 /* if somebody still interested in the MLAG channel skip de-reg */
985 if (router->pim_mlag_intf_cnt || pim_vxlan_do_mlag_reg())
986 return;
987
988 /* not registered; nothing do */
989 if (!router->mlag_process_register)
990 return;
991
992 router->mlag_process_register = false;
993
994 thread_add_event(router->master, pim_mlag_deregister_handler, NULL, 0,
995 NULL);
996 }
997
998 void pim_if_configure_mlag_dualactive(struct pim_interface *pim_ifp)
999 {
1000 if (!pim_ifp || !pim_ifp->pim || pim_ifp->activeactive == true)
1001 return;
1002
1003 if (PIM_DEBUG_MLAG)
1004 zlog_debug("%s: Configuring active-active on Interface: %s",
1005 __func__, "NULL");
1006
1007 pim_ifp->activeactive = true;
1008 if (pim_ifp->pim)
1009 pim_ifp->pim->inst_mlag_intf_cnt++;
1010
1011 router->pim_mlag_intf_cnt++;
1012 if (PIM_DEBUG_MLAG)
1013 zlog_debug(
1014 "%s: Total MLAG configured Interfaces on router: %d, Inst: %d",
1015 __func__, router->pim_mlag_intf_cnt,
1016 pim_ifp->pim->inst_mlag_intf_cnt);
1017
1018 if (router->pim_mlag_intf_cnt == 1) {
1019 /*
1020 * atleast one Interface is configured for MLAG, send register
1021 * to Zebra for receiving MLAG Updates
1022 */
1023 pim_mlag_register();
1024 }
1025 }
1026
1027 void pim_if_unconfigure_mlag_dualactive(struct pim_interface *pim_ifp)
1028 {
1029 if (!pim_ifp || !pim_ifp->pim || pim_ifp->activeactive == false)
1030 return;
1031
1032 if (PIM_DEBUG_MLAG)
1033 zlog_debug("%s: UnConfiguring active-active on Interface: %s",
1034 __func__, "NULL");
1035
1036 pim_ifp->activeactive = false;
1037 pim_ifp->pim->inst_mlag_intf_cnt--;
1038
1039 router->pim_mlag_intf_cnt--;
1040 if (PIM_DEBUG_MLAG)
1041 zlog_debug(
1042 "%s: Total MLAG configured Interfaces on router: %d, Inst: %d",
1043 __func__, router->pim_mlag_intf_cnt,
1044 pim_ifp->pim->inst_mlag_intf_cnt);
1045
1046 if (router->pim_mlag_intf_cnt == 0) {
1047 /*
1048 * all the Interfaces are MLAG un-configured, post MLAG
1049 * De-register to Zebra
1050 */
1051 pim_mlag_deregister();
1052 pim_mlag_param_reset();
1053 }
1054 }
1055
1056
1057 void pim_instance_mlag_init(struct pim_instance *pim)
1058 {
1059 if (!pim)
1060 return;
1061
1062 pim->inst_mlag_intf_cnt = 0;
1063 }
1064
1065
1066 void pim_instance_mlag_terminate(struct pim_instance *pim)
1067 {
1068 struct interface *ifp;
1069
1070 if (!pim)
1071 return;
1072
1073 FOR_ALL_INTERFACES (pim->vrf, ifp) {
1074 struct pim_interface *pim_ifp = ifp->info;
1075
1076 if (!pim_ifp || pim_ifp->activeactive == false)
1077 continue;
1078
1079 pim_if_unconfigure_mlag_dualactive(pim_ifp);
1080 }
1081 pim->inst_mlag_intf_cnt = 0;
1082 }
1083
1084 void pim_mlag_terminate(void)
1085 {
1086 stream_free(router->mlag_stream);
1087 router->mlag_stream = NULL;
1088 stream_fifo_free(router->mlag_fifo);
1089 router->mlag_fifo = NULL;
1090 }
1091
1092 void pim_mlag_init(void)
1093 {
1094 pim_mlag_param_reset();
1095 router->pim_mlag_intf_cnt = 0;
1096 router->connected_to_mlag = false;
1097 router->mlag_fifo = stream_fifo_new();
1098 router->zpthread_mlag_write = NULL;
1099 router->mlag_stream = stream_new(MLAG_BUF_LIMIT);
1100 }