]> git.proxmox.com Git - mirror_frr.git/blob - zebra/zebra_fpm.c
Merge pull request #9236 from AnuradhaKaruppiah/v6-nh-rmac
[mirror_frr.git] / zebra / zebra_fpm.c
1 /*
2 * Main implementation file for interface to Forwarding Plane Manager.
3 *
4 * Copyright (C) 2012 by Open Source Routing.
5 * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
6 *
7 * This file is part of GNU Zebra.
8 *
9 * GNU Zebra is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the
11 * Free Software Foundation; either version 2, or (at your option) any
12 * later version.
13 *
14 * GNU Zebra is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; see the file COPYING; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include <zebra.h>
25
26 #include "log.h"
27 #include "libfrr.h"
28 #include "stream.h"
29 #include "thread.h"
30 #include "network.h"
31 #include "command.h"
32 #include "lib/version.h"
33 #include "jhash.h"
34
35 #include "zebra/rib.h"
36 #include "zebra/zserv.h"
37 #include "zebra/zebra_ns.h"
38 #include "zebra/zebra_vrf.h"
39 #include "zebra/zebra_errors.h"
40
41 #include "fpm/fpm.h"
42 #include "zebra_fpm_private.h"
43 #include "zebra/zebra_router.h"
44 #include "zebra_vxlan_private.h"
45
46 DEFINE_MTYPE_STATIC(ZEBRA, FPM_MAC_INFO, "FPM_MAC_INFO");
47
48 /*
49 * Interval at which we attempt to connect to the FPM.
50 */
51 #define ZFPM_CONNECT_RETRY_IVL 5
52
53 /*
54 * Sizes of outgoing and incoming stream buffers for writing/reading
55 * FPM messages.
56 */
57 #define ZFPM_OBUF_SIZE (2 * FPM_MAX_MSG_LEN)
58 #define ZFPM_IBUF_SIZE (FPM_MAX_MSG_LEN)
59
60 /*
61 * The maximum number of times the FPM socket write callback can call
62 * 'write' before it yields.
63 */
64 #define ZFPM_MAX_WRITES_PER_RUN 10
65
66 /*
67 * Interval over which we collect statistics.
68 */
69 #define ZFPM_STATS_IVL_SECS 10
70 #define FPM_MAX_MAC_MSG_LEN 512
71
72 static void zfpm_iterate_rmac_table(struct hash_bucket *bucket, void *args);
73
74 /*
75 * Structure that holds state for iterating over all route_node
76 * structures that are candidates for being communicated to the FPM.
77 */
78 struct zfpm_rnodes_iter {
79 rib_tables_iter_t tables_iter;
80 route_table_iter_t iter;
81 };
82
83 /*
84 * Statistics.
85 */
86 struct zfpm_stats {
87 unsigned long connect_calls;
88 unsigned long connect_no_sock;
89
90 unsigned long read_cb_calls;
91
92 unsigned long write_cb_calls;
93 unsigned long write_calls;
94 unsigned long partial_writes;
95 unsigned long max_writes_hit;
96 unsigned long t_write_yields;
97
98 unsigned long nop_deletes_skipped;
99 unsigned long route_adds;
100 unsigned long route_dels;
101
102 unsigned long updates_triggered;
103 unsigned long redundant_triggers;
104
105 unsigned long dests_del_after_update;
106
107 unsigned long t_conn_down_starts;
108 unsigned long t_conn_down_dests_processed;
109 unsigned long t_conn_down_yields;
110 unsigned long t_conn_down_finishes;
111
112 unsigned long t_conn_up_starts;
113 unsigned long t_conn_up_dests_processed;
114 unsigned long t_conn_up_yields;
115 unsigned long t_conn_up_aborts;
116 unsigned long t_conn_up_finishes;
117 };
118
119 /*
120 * States for the FPM state machine.
121 */
122 enum zfpm_state {
123
124 /*
125 * In this state we are not yet ready to connect to the FPM. This
126 * can happen when this module is disabled, or if we're cleaning up
127 * after a connection has gone down.
128 */
129 ZFPM_STATE_IDLE,
130
131 /*
132 * Ready to talk to the FPM and periodically trying to connect to
133 * it.
134 */
135 ZFPM_STATE_ACTIVE,
136
137 /*
138 * In the middle of bringing up a TCP connection. Specifically,
139 * waiting for a connect() call to complete asynchronously.
140 */
141 ZFPM_STATE_CONNECTING,
142
143 /*
144 * TCP connection to the FPM is up.
145 */
146 ZFPM_STATE_ESTABLISHED
147
148 };
149
150 /*
151 * Message format to be used to communicate with the FPM.
152 */
153 enum zfpm_msg_format {
154 ZFPM_MSG_FORMAT_NONE,
155 ZFPM_MSG_FORMAT_NETLINK,
156 ZFPM_MSG_FORMAT_PROTOBUF,
157 };
158
159 /*
160 * Globals.
161 */
162 struct zfpm_glob {
163
164 /*
165 * True if the FPM module has been enabled.
166 */
167 int enabled;
168
169 /*
170 * Message format to be used to communicate with the fpm.
171 */
172 enum zfpm_msg_format message_format;
173
174 struct thread_master *master;
175
176 enum zfpm_state state;
177
178 in_addr_t fpm_server;
179 /*
180 * Port on which the FPM is running.
181 */
182 int fpm_port;
183
184 /*
185 * List of rib_dest_t structures to be processed
186 */
187 TAILQ_HEAD(zfpm_dest_q, rib_dest_t_) dest_q;
188
189 /*
190 * List of fpm_mac_info structures to be processed
191 */
192 TAILQ_HEAD(zfpm_mac_q, fpm_mac_info_t) mac_q;
193
194 /*
195 * Hash table of fpm_mac_info_t entries
196 *
197 * While adding fpm_mac_info_t for a MAC to the mac_q,
198 * it is possible that another fpm_mac_info_t node for the this MAC
199 * is already present in the queue.
200 * This is possible in the case of consecutive add->delete operations.
201 * To avoid such duplicate insertions in the mac_q,
202 * define a hash table for fpm_mac_info_t which can be looked up
203 * to see if an fpm_mac_info_t node for a MAC is already present
204 * in the mac_q.
205 */
206 struct hash *fpm_mac_info_table;
207
208 /*
209 * Stream socket to the FPM.
210 */
211 int sock;
212
213 /*
214 * Buffers for messages to/from the FPM.
215 */
216 struct stream *obuf;
217 struct stream *ibuf;
218
219 /*
220 * Threads for I/O.
221 */
222 struct thread *t_connect;
223 struct thread *t_write;
224 struct thread *t_read;
225
226 /*
227 * Thread to clean up after the TCP connection to the FPM goes down
228 * and the state that belongs to it.
229 */
230 struct thread *t_conn_down;
231
232 struct {
233 struct zfpm_rnodes_iter iter;
234 } t_conn_down_state;
235
236 /*
237 * Thread to take actions once the TCP conn to the FPM comes up, and
238 * the state that belongs to it.
239 */
240 struct thread *t_conn_up;
241
242 struct {
243 struct zfpm_rnodes_iter iter;
244 } t_conn_up_state;
245
246 unsigned long connect_calls;
247 time_t last_connect_call_time;
248
249 /*
250 * Stats from the start of the current statistics interval up to
251 * now. These are the counters we typically update in the code.
252 */
253 struct zfpm_stats stats;
254
255 /*
256 * Statistics that were gathered in the last collection interval.
257 */
258 struct zfpm_stats last_ivl_stats;
259
260 /*
261 * Cumulative stats from the last clear to the start of the current
262 * statistics interval.
263 */
264 struct zfpm_stats cumulative_stats;
265
266 /*
267 * Stats interval timer.
268 */
269 struct thread *t_stats;
270
271 /*
272 * If non-zero, the last time when statistics were cleared.
273 */
274 time_t last_stats_clear_time;
275
276 /*
277 * Flag to track the MAC dump status to FPM
278 */
279 bool fpm_mac_dump_done;
280 };
281
282 static struct zfpm_glob zfpm_glob_space;
283 static struct zfpm_glob *zfpm_g = &zfpm_glob_space;
284
285 static int zfpm_trigger_update(struct route_node *rn, const char *reason);
286
287 static int zfpm_read_cb(struct thread *thread);
288 static int zfpm_write_cb(struct thread *thread);
289
290 static void zfpm_set_state(enum zfpm_state state, const char *reason);
291 static void zfpm_start_connect_timer(const char *reason);
292 static void zfpm_start_stats_timer(void);
293 static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac);
294
295 /*
296 * zfpm_thread_should_yield
297 */
298 static inline int zfpm_thread_should_yield(struct thread *t)
299 {
300 return thread_should_yield(t);
301 }
302
303 /*
304 * zfpm_state_to_str
305 */
306 static const char *zfpm_state_to_str(enum zfpm_state state)
307 {
308 switch (state) {
309
310 case ZFPM_STATE_IDLE:
311 return "idle";
312
313 case ZFPM_STATE_ACTIVE:
314 return "active";
315
316 case ZFPM_STATE_CONNECTING:
317 return "connecting";
318
319 case ZFPM_STATE_ESTABLISHED:
320 return "established";
321
322 default:
323 return "unknown";
324 }
325 }
326
327 /*
328 * zfpm_get_elapsed_time
329 *
330 * Returns the time elapsed (in seconds) since the given time.
331 */
332 static time_t zfpm_get_elapsed_time(time_t reference)
333 {
334 time_t now;
335
336 now = monotime(NULL);
337
338 if (now < reference) {
339 assert(0);
340 return 0;
341 }
342
343 return now - reference;
344 }
345
346 /*
347 * zfpm_rnodes_iter_init
348 */
349 static inline void zfpm_rnodes_iter_init(struct zfpm_rnodes_iter *iter)
350 {
351 memset(iter, 0, sizeof(*iter));
352 rib_tables_iter_init(&iter->tables_iter);
353
354 /*
355 * This is a hack, but it makes implementing 'next' easier by
356 * ensuring that route_table_iter_next() will return NULL the first
357 * time we call it.
358 */
359 route_table_iter_init(&iter->iter, NULL);
360 route_table_iter_cleanup(&iter->iter);
361 }
362
363 /*
364 * zfpm_rnodes_iter_next
365 */
366 static inline struct route_node *
367 zfpm_rnodes_iter_next(struct zfpm_rnodes_iter *iter)
368 {
369 struct route_node *rn;
370 struct route_table *table;
371
372 while (1) {
373 rn = route_table_iter_next(&iter->iter);
374 if (rn)
375 return rn;
376
377 /*
378 * We've made our way through this table, go to the next one.
379 */
380 route_table_iter_cleanup(&iter->iter);
381
382 table = rib_tables_iter_next(&iter->tables_iter);
383
384 if (!table)
385 return NULL;
386
387 route_table_iter_init(&iter->iter, table);
388 }
389
390 return NULL;
391 }
392
393 /*
394 * zfpm_rnodes_iter_pause
395 */
396 static inline void zfpm_rnodes_iter_pause(struct zfpm_rnodes_iter *iter)
397 {
398 route_table_iter_pause(&iter->iter);
399 }
400
401 /*
402 * zfpm_rnodes_iter_cleanup
403 */
404 static inline void zfpm_rnodes_iter_cleanup(struct zfpm_rnodes_iter *iter)
405 {
406 route_table_iter_cleanup(&iter->iter);
407 rib_tables_iter_cleanup(&iter->tables_iter);
408 }
409
410 /*
411 * zfpm_stats_init
412 *
413 * Initialize a statistics block.
414 */
415 static inline void zfpm_stats_init(struct zfpm_stats *stats)
416 {
417 memset(stats, 0, sizeof(*stats));
418 }
419
420 /*
421 * zfpm_stats_reset
422 */
423 static inline void zfpm_stats_reset(struct zfpm_stats *stats)
424 {
425 zfpm_stats_init(stats);
426 }
427
428 /*
429 * zfpm_stats_copy
430 */
431 static inline void zfpm_stats_copy(const struct zfpm_stats *src,
432 struct zfpm_stats *dest)
433 {
434 memcpy(dest, src, sizeof(*dest));
435 }
436
437 /*
438 * zfpm_stats_compose
439 *
440 * Total up the statistics in two stats structures ('s1 and 's2') and
441 * return the result in the third argument, 'result'. Note that the
442 * pointer 'result' may be the same as 's1' or 's2'.
443 *
444 * For simplicity, the implementation below assumes that the stats
445 * structure is composed entirely of counters. This can easily be
446 * changed when necessary.
447 */
448 static void zfpm_stats_compose(const struct zfpm_stats *s1,
449 const struct zfpm_stats *s2,
450 struct zfpm_stats *result)
451 {
452 const unsigned long *p1, *p2;
453 unsigned long *result_p;
454 int i, num_counters;
455
456 p1 = (const unsigned long *)s1;
457 p2 = (const unsigned long *)s2;
458 result_p = (unsigned long *)result;
459
460 num_counters = (sizeof(struct zfpm_stats) / sizeof(unsigned long));
461
462 for (i = 0; i < num_counters; i++) {
463 result_p[i] = p1[i] + p2[i];
464 }
465 }
466
467 /*
468 * zfpm_read_on
469 */
470 static inline void zfpm_read_on(void)
471 {
472 assert(!zfpm_g->t_read);
473 assert(zfpm_g->sock >= 0);
474
475 thread_add_read(zfpm_g->master, zfpm_read_cb, 0, zfpm_g->sock,
476 &zfpm_g->t_read);
477 }
478
479 /*
480 * zfpm_write_on
481 */
482 static inline void zfpm_write_on(void)
483 {
484 assert(!zfpm_g->t_write);
485 assert(zfpm_g->sock >= 0);
486
487 thread_add_write(zfpm_g->master, zfpm_write_cb, 0, zfpm_g->sock,
488 &zfpm_g->t_write);
489 }
490
491 /*
492 * zfpm_read_off
493 */
494 static inline void zfpm_read_off(void)
495 {
496 thread_cancel(&zfpm_g->t_read);
497 }
498
499 /*
500 * zfpm_write_off
501 */
502 static inline void zfpm_write_off(void)
503 {
504 thread_cancel(&zfpm_g->t_write);
505 }
506
507 static inline void zfpm_connect_off(void)
508 {
509 thread_cancel(&zfpm_g->t_connect);
510 }
511
512 /*
513 * zfpm_conn_up_thread_cb
514 *
515 * Callback for actions to be taken when the connection to the FPM
516 * comes up.
517 */
518 static int zfpm_conn_up_thread_cb(struct thread *thread)
519 {
520 struct route_node *rnode;
521 struct zfpm_rnodes_iter *iter;
522 rib_dest_t *dest;
523
524 iter = &zfpm_g->t_conn_up_state.iter;
525
526 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED) {
527 zfpm_debug(
528 "Connection not up anymore, conn_up thread aborting");
529 zfpm_g->stats.t_conn_up_aborts++;
530 goto done;
531 }
532
533 if (!zfpm_g->fpm_mac_dump_done) {
534 /* Enqueue FPM updates for all the RMAC entries */
535 hash_iterate(zrouter.l3vni_table, zfpm_iterate_rmac_table,
536 NULL);
537 /* mark dump done so that its not repeated after yield */
538 zfpm_g->fpm_mac_dump_done = true;
539 }
540
541 while ((rnode = zfpm_rnodes_iter_next(iter))) {
542 dest = rib_dest_from_rnode(rnode);
543
544 if (dest) {
545 zfpm_g->stats.t_conn_up_dests_processed++;
546 zfpm_trigger_update(rnode, NULL);
547 }
548
549 /*
550 * Yield if need be.
551 */
552 if (!zfpm_thread_should_yield(thread))
553 continue;
554
555 zfpm_g->stats.t_conn_up_yields++;
556 zfpm_rnodes_iter_pause(iter);
557 thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb,
558 NULL, 0, &zfpm_g->t_conn_up);
559 return 0;
560 }
561
562 zfpm_g->stats.t_conn_up_finishes++;
563
564 done:
565 zfpm_rnodes_iter_cleanup(iter);
566 return 0;
567 }
568
569 /*
570 * zfpm_connection_up
571 *
572 * Called when the connection to the FPM comes up.
573 */
574 static void zfpm_connection_up(const char *detail)
575 {
576 assert(zfpm_g->sock >= 0);
577 zfpm_read_on();
578 zfpm_write_on();
579 zfpm_set_state(ZFPM_STATE_ESTABLISHED, detail);
580
581 /*
582 * Start thread to push existing routes to the FPM.
583 */
584 thread_cancel(&zfpm_g->t_conn_up);
585
586 zfpm_rnodes_iter_init(&zfpm_g->t_conn_up_state.iter);
587 zfpm_g->fpm_mac_dump_done = false;
588
589 zfpm_debug("Starting conn_up thread");
590
591 thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb, NULL, 0,
592 &zfpm_g->t_conn_up);
593 zfpm_g->stats.t_conn_up_starts++;
594 }
595
596 /*
597 * zfpm_connect_check
598 *
599 * Check if an asynchronous connect() to the FPM is complete.
600 */
601 static void zfpm_connect_check(void)
602 {
603 int status;
604 socklen_t slen;
605 int ret;
606
607 zfpm_read_off();
608 zfpm_write_off();
609
610 slen = sizeof(status);
611 ret = getsockopt(zfpm_g->sock, SOL_SOCKET, SO_ERROR, (void *)&status,
612 &slen);
613
614 if (ret >= 0 && status == 0) {
615 zfpm_connection_up("async connect complete");
616 return;
617 }
618
619 /*
620 * getsockopt() failed or indicated an error on the socket.
621 */
622 close(zfpm_g->sock);
623 zfpm_g->sock = -1;
624
625 zfpm_start_connect_timer("getsockopt() after async connect failed");
626 return;
627 }
628
629 /*
630 * zfpm_conn_down_thread_cb
631 *
632 * Callback that is invoked to clean up state after the TCP connection
633 * to the FPM goes down.
634 */
635 static int zfpm_conn_down_thread_cb(struct thread *thread)
636 {
637 struct route_node *rnode;
638 struct zfpm_rnodes_iter *iter;
639 rib_dest_t *dest;
640 struct fpm_mac_info_t *mac = NULL;
641
642 assert(zfpm_g->state == ZFPM_STATE_IDLE);
643
644 /*
645 * Delink and free all fpm_mac_info_t nodes
646 * in the mac_q and fpm_mac_info_hash
647 */
648 while ((mac = TAILQ_FIRST(&zfpm_g->mac_q)) != NULL)
649 zfpm_mac_info_del(mac);
650
651 zfpm_g->t_conn_down = NULL;
652
653 iter = &zfpm_g->t_conn_down_state.iter;
654
655 while ((rnode = zfpm_rnodes_iter_next(iter))) {
656 dest = rib_dest_from_rnode(rnode);
657
658 if (dest) {
659 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) {
660 TAILQ_REMOVE(&zfpm_g->dest_q, dest,
661 fpm_q_entries);
662 }
663
664 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
665 UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
666
667 zfpm_g->stats.t_conn_down_dests_processed++;
668
669 /*
670 * Check if the dest should be deleted.
671 */
672 rib_gc_dest(rnode);
673 }
674
675 /*
676 * Yield if need be.
677 */
678 if (!zfpm_thread_should_yield(thread))
679 continue;
680
681 zfpm_g->stats.t_conn_down_yields++;
682 zfpm_rnodes_iter_pause(iter);
683 zfpm_g->t_conn_down = NULL;
684 thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb,
685 NULL, 0, &zfpm_g->t_conn_down);
686 return 0;
687 }
688
689 zfpm_g->stats.t_conn_down_finishes++;
690 zfpm_rnodes_iter_cleanup(iter);
691
692 /*
693 * Start the process of connecting to the FPM again.
694 */
695 zfpm_start_connect_timer("cleanup complete");
696 return 0;
697 }
698
699 /*
700 * zfpm_connection_down
701 *
702 * Called when the connection to the FPM has gone down.
703 */
704 static void zfpm_connection_down(const char *detail)
705 {
706 if (!detail)
707 detail = "unknown";
708
709 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
710
711 zlog_info("connection to the FPM has gone down: %s", detail);
712
713 zfpm_read_off();
714 zfpm_write_off();
715
716 stream_reset(zfpm_g->ibuf);
717 stream_reset(zfpm_g->obuf);
718
719 if (zfpm_g->sock >= 0) {
720 close(zfpm_g->sock);
721 zfpm_g->sock = -1;
722 }
723
724 /*
725 * Start thread to clean up state after the connection goes down.
726 */
727 assert(!zfpm_g->t_conn_down);
728 zfpm_rnodes_iter_init(&zfpm_g->t_conn_down_state.iter);
729 zfpm_g->t_conn_down = NULL;
730 thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb, NULL, 0,
731 &zfpm_g->t_conn_down);
732 zfpm_g->stats.t_conn_down_starts++;
733
734 zfpm_set_state(ZFPM_STATE_IDLE, detail);
735 }
736
737 /*
738 * zfpm_read_cb
739 */
740 static int zfpm_read_cb(struct thread *thread)
741 {
742 size_t already;
743 struct stream *ibuf;
744 uint16_t msg_len;
745 fpm_msg_hdr_t *hdr;
746
747 zfpm_g->stats.read_cb_calls++;
748
749 /*
750 * Check if async connect is now done.
751 */
752 if (zfpm_g->state == ZFPM_STATE_CONNECTING) {
753 zfpm_connect_check();
754 return 0;
755 }
756
757 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
758 assert(zfpm_g->sock >= 0);
759
760 ibuf = zfpm_g->ibuf;
761
762 already = stream_get_endp(ibuf);
763 if (already < FPM_MSG_HDR_LEN) {
764 ssize_t nbyte;
765
766 nbyte = stream_read_try(ibuf, zfpm_g->sock,
767 FPM_MSG_HDR_LEN - already);
768 if (nbyte == 0 || nbyte == -1) {
769 if (nbyte == -1) {
770 char buffer[1024];
771
772 snprintf(buffer, sizeof(buffer),
773 "closed socket in read(%d): %s", errno,
774 safe_strerror(errno));
775 zfpm_connection_down(buffer);
776 } else
777 zfpm_connection_down("closed socket in read");
778 return 0;
779 }
780
781 if (nbyte != (ssize_t)(FPM_MSG_HDR_LEN - already))
782 goto done;
783
784 already = FPM_MSG_HDR_LEN;
785 }
786
787 stream_set_getp(ibuf, 0);
788
789 hdr = (fpm_msg_hdr_t *)stream_pnt(ibuf);
790
791 if (!fpm_msg_hdr_ok(hdr)) {
792 zfpm_connection_down("invalid message header");
793 return 0;
794 }
795
796 msg_len = fpm_msg_len(hdr);
797
798 /*
799 * Read out the rest of the packet.
800 */
801 if (already < msg_len) {
802 ssize_t nbyte;
803
804 nbyte = stream_read_try(ibuf, zfpm_g->sock, msg_len - already);
805
806 if (nbyte == 0 || nbyte == -1) {
807 if (nbyte == -1) {
808 char buffer[1024];
809
810 snprintf(buffer, sizeof(buffer),
811 "failed to read message(%d) %s", errno,
812 safe_strerror(errno));
813 zfpm_connection_down(buffer);
814 } else
815 zfpm_connection_down("failed to read message");
816 return 0;
817 }
818
819 if (nbyte != (ssize_t)(msg_len - already))
820 goto done;
821 }
822
823 /*
824 * Just throw it away for now.
825 */
826 stream_reset(ibuf);
827
828 done:
829 zfpm_read_on();
830 return 0;
831 }
832
833 static bool zfpm_updates_pending(void)
834 {
835 if (!(TAILQ_EMPTY(&zfpm_g->dest_q)) || !(TAILQ_EMPTY(&zfpm_g->mac_q)))
836 return true;
837
838 return false;
839 }
840
841 /*
842 * zfpm_writes_pending
843 *
844 * Returns true if we may have something to write to the FPM.
845 */
846 static int zfpm_writes_pending(void)
847 {
848
849 /*
850 * Check if there is any data in the outbound buffer that has not
851 * been written to the socket yet.
852 */
853 if (stream_get_endp(zfpm_g->obuf) - stream_get_getp(zfpm_g->obuf))
854 return 1;
855
856 /*
857 * Check if there are any updates scheduled on the outbound queues.
858 */
859 if (zfpm_updates_pending())
860 return 1;
861
862 return 0;
863 }
864
865 /*
866 * zfpm_encode_route
867 *
868 * Encode a message to the FPM with information about the given route.
869 *
870 * Returns the number of bytes written to the buffer. 0 or a negative
871 * value indicates an error.
872 */
873 static inline int zfpm_encode_route(rib_dest_t *dest, struct route_entry *re,
874 char *in_buf, size_t in_buf_len,
875 fpm_msg_type_e *msg_type)
876 {
877 size_t len;
878 #ifdef HAVE_NETLINK
879 int cmd;
880 #endif
881 len = 0;
882
883 *msg_type = FPM_MSG_TYPE_NONE;
884
885 switch (zfpm_g->message_format) {
886
887 case ZFPM_MSG_FORMAT_PROTOBUF:
888 #ifdef HAVE_PROTOBUF
889 len = zfpm_protobuf_encode_route(dest, re, (uint8_t *)in_buf,
890 in_buf_len);
891 *msg_type = FPM_MSG_TYPE_PROTOBUF;
892 #endif
893 break;
894
895 case ZFPM_MSG_FORMAT_NETLINK:
896 #ifdef HAVE_NETLINK
897 *msg_type = FPM_MSG_TYPE_NETLINK;
898 cmd = re ? RTM_NEWROUTE : RTM_DELROUTE;
899 len = zfpm_netlink_encode_route(cmd, dest, re, in_buf,
900 in_buf_len);
901 assert(fpm_msg_align(len) == len);
902 *msg_type = FPM_MSG_TYPE_NETLINK;
903 #endif /* HAVE_NETLINK */
904 break;
905
906 default:
907 break;
908 }
909
910 return len;
911 }
912
913 /*
914 * zfpm_route_for_update
915 *
916 * Returns the re that is to be sent to the FPM for a given dest.
917 */
918 struct route_entry *zfpm_route_for_update(rib_dest_t *dest)
919 {
920 return dest->selected_fib;
921 }
922
923 /*
924 * Define an enum for return codes for queue processing functions
925 *
926 * FPM_WRITE_STOP: This return code indicates that the write buffer is full.
927 * Stop processing all the queues and empty the buffer by writing its content
928 * to the socket.
929 *
930 * FPM_GOTO_NEXT_Q: This return code indicates that either this queue is
931 * empty or we have processed enough updates from this queue.
932 * So, move on to the next queue.
933 */
934 enum {
935 FPM_WRITE_STOP = 0,
936 FPM_GOTO_NEXT_Q = 1
937 };
938
939 #define FPM_QUEUE_PROCESS_LIMIT 10000
940
941 /*
942 * zfpm_build_route_updates
943 *
944 * Process the dest_q queue and write FPM messages to the outbound buffer.
945 */
946 static int zfpm_build_route_updates(void)
947 {
948 struct stream *s;
949 rib_dest_t *dest;
950 unsigned char *buf, *data, *buf_end;
951 size_t msg_len;
952 size_t data_len;
953 fpm_msg_hdr_t *hdr;
954 struct route_entry *re;
955 int is_add, write_msg;
956 fpm_msg_type_e msg_type;
957 uint16_t q_limit;
958
959 if (TAILQ_EMPTY(&zfpm_g->dest_q))
960 return FPM_GOTO_NEXT_Q;
961
962 s = zfpm_g->obuf;
963 q_limit = FPM_QUEUE_PROCESS_LIMIT;
964
965 do {
966 /*
967 * Make sure there is enough space to write another message.
968 */
969 if (STREAM_WRITEABLE(s) < FPM_MAX_MSG_LEN)
970 return FPM_WRITE_STOP;
971
972 buf = STREAM_DATA(s) + stream_get_endp(s);
973 buf_end = buf + STREAM_WRITEABLE(s);
974
975 dest = TAILQ_FIRST(&zfpm_g->dest_q);
976 if (!dest)
977 return FPM_GOTO_NEXT_Q;
978
979 assert(CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM));
980
981 hdr = (fpm_msg_hdr_t *)buf;
982 hdr->version = FPM_PROTO_VERSION;
983
984 data = fpm_msg_data(hdr);
985
986 re = zfpm_route_for_update(dest);
987 is_add = re ? 1 : 0;
988
989 write_msg = 1;
990
991 /*
992 * If this is a route deletion, and we have not sent the route
993 * to
994 * the FPM previously, skip it.
995 */
996 if (!is_add && !CHECK_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM)) {
997 write_msg = 0;
998 zfpm_g->stats.nop_deletes_skipped++;
999 }
1000
1001 if (write_msg) {
1002 data_len = zfpm_encode_route(dest, re, (char *)data,
1003 buf_end - data, &msg_type);
1004
1005 if (data_len) {
1006 hdr->msg_type = msg_type;
1007 msg_len = fpm_data_len_to_msg_len(data_len);
1008 hdr->msg_len = htons(msg_len);
1009 stream_forward_endp(s, msg_len);
1010
1011 if (is_add)
1012 zfpm_g->stats.route_adds++;
1013 else
1014 zfpm_g->stats.route_dels++;
1015 } else {
1016 zlog_err("%s: Encoding Prefix: %pRN No valid nexthops",
1017 __func__, dest->rnode);
1018 }
1019 }
1020
1021 /*
1022 * Remove the dest from the queue, and reset the flag.
1023 */
1024 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1025 TAILQ_REMOVE(&zfpm_g->dest_q, dest, fpm_q_entries);
1026
1027 if (is_add) {
1028 SET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
1029 } else {
1030 UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
1031 }
1032
1033 /*
1034 * Delete the destination if necessary.
1035 */
1036 if (rib_gc_dest(dest->rnode))
1037 zfpm_g->stats.dests_del_after_update++;
1038
1039 q_limit--;
1040 if (q_limit == 0) {
1041 /*
1042 * We have processed enough updates in this queue.
1043 * Now yield for other queues.
1044 */
1045 return FPM_GOTO_NEXT_Q;
1046 }
1047 } while (true);
1048 }
1049
1050 /*
1051 * zfpm_encode_mac
1052 *
1053 * Encode a message to FPM with information about the given MAC.
1054 *
1055 * Returns the number of bytes written to the buffer.
1056 */
1057 static inline int zfpm_encode_mac(struct fpm_mac_info_t *mac, char *in_buf,
1058 size_t in_buf_len, fpm_msg_type_e *msg_type)
1059 {
1060 size_t len = 0;
1061
1062 *msg_type = FPM_MSG_TYPE_NONE;
1063
1064 switch (zfpm_g->message_format) {
1065
1066 case ZFPM_MSG_FORMAT_NONE:
1067 break;
1068 case ZFPM_MSG_FORMAT_NETLINK:
1069 #ifdef HAVE_NETLINK
1070 len = zfpm_netlink_encode_mac(mac, in_buf, in_buf_len);
1071 assert(fpm_msg_align(len) == len);
1072 *msg_type = FPM_MSG_TYPE_NETLINK;
1073 #endif /* HAVE_NETLINK */
1074 break;
1075 case ZFPM_MSG_FORMAT_PROTOBUF:
1076 break;
1077 }
1078 return len;
1079 }
1080
1081 static int zfpm_build_mac_updates(void)
1082 {
1083 struct stream *s;
1084 struct fpm_mac_info_t *mac;
1085 unsigned char *buf, *data, *buf_end;
1086 fpm_msg_hdr_t *hdr;
1087 size_t data_len, msg_len;
1088 fpm_msg_type_e msg_type;
1089 uint16_t q_limit;
1090
1091 if (TAILQ_EMPTY(&zfpm_g->mac_q))
1092 return FPM_GOTO_NEXT_Q;
1093
1094 s = zfpm_g->obuf;
1095 q_limit = FPM_QUEUE_PROCESS_LIMIT;
1096
1097 do {
1098 /* Make sure there is enough space to write another message. */
1099 if (STREAM_WRITEABLE(s) < FPM_MAX_MAC_MSG_LEN)
1100 return FPM_WRITE_STOP;
1101
1102 buf = STREAM_DATA(s) + stream_get_endp(s);
1103 buf_end = buf + STREAM_WRITEABLE(s);
1104
1105 mac = TAILQ_FIRST(&zfpm_g->mac_q);
1106 if (!mac)
1107 return FPM_GOTO_NEXT_Q;
1108
1109 /* Check for no-op */
1110 if (!CHECK_FLAG(mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM)) {
1111 zfpm_g->stats.nop_deletes_skipped++;
1112 zfpm_mac_info_del(mac);
1113 continue;
1114 }
1115
1116 hdr = (fpm_msg_hdr_t *)buf;
1117 hdr->version = FPM_PROTO_VERSION;
1118
1119 data = fpm_msg_data(hdr);
1120 data_len = zfpm_encode_mac(mac, (char *)data, buf_end - data,
1121 &msg_type);
1122 assert(data_len);
1123
1124 hdr->msg_type = msg_type;
1125 msg_len = fpm_data_len_to_msg_len(data_len);
1126 hdr->msg_len = htons(msg_len);
1127 stream_forward_endp(s, msg_len);
1128
1129 /* Remove the MAC from the queue, and delete it. */
1130 zfpm_mac_info_del(mac);
1131
1132 q_limit--;
1133 if (q_limit == 0) {
1134 /*
1135 * We have processed enough updates in this queue.
1136 * Now yield for other queues.
1137 */
1138 return FPM_GOTO_NEXT_Q;
1139 }
1140 } while (1);
1141 }
1142
1143 /*
1144 * zfpm_build_updates
1145 *
1146 * Process the outgoing queues and write messages to the outbound
1147 * buffer.
1148 */
1149 static void zfpm_build_updates(void)
1150 {
1151 struct stream *s;
1152
1153 s = zfpm_g->obuf;
1154 assert(stream_empty(s));
1155
1156 do {
1157 /*
1158 * Stop processing the queues if zfpm_g->obuf is full
1159 * or we do not have more updates to process
1160 */
1161 if (zfpm_build_mac_updates() == FPM_WRITE_STOP)
1162 break;
1163 if (zfpm_build_route_updates() == FPM_WRITE_STOP)
1164 break;
1165 } while (zfpm_updates_pending());
1166 }
1167
1168 /*
1169 * zfpm_write_cb
1170 */
1171 static int zfpm_write_cb(struct thread *thread)
1172 {
1173 struct stream *s;
1174 int num_writes;
1175
1176 zfpm_g->stats.write_cb_calls++;
1177
1178 /*
1179 * Check if async connect is now done.
1180 */
1181 if (zfpm_g->state == ZFPM_STATE_CONNECTING) {
1182 zfpm_connect_check();
1183 return 0;
1184 }
1185
1186 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
1187 assert(zfpm_g->sock >= 0);
1188
1189 num_writes = 0;
1190
1191 do {
1192 int bytes_to_write, bytes_written;
1193
1194 s = zfpm_g->obuf;
1195
1196 /*
1197 * If the stream is empty, try fill it up with data.
1198 */
1199 if (stream_empty(s)) {
1200 zfpm_build_updates();
1201 }
1202
1203 bytes_to_write = stream_get_endp(s) - stream_get_getp(s);
1204 if (!bytes_to_write)
1205 break;
1206
1207 bytes_written =
1208 write(zfpm_g->sock, stream_pnt(s), bytes_to_write);
1209 zfpm_g->stats.write_calls++;
1210 num_writes++;
1211
1212 if (bytes_written < 0) {
1213 if (ERRNO_IO_RETRY(errno))
1214 break;
1215
1216 zfpm_connection_down("failed to write to socket");
1217 return 0;
1218 }
1219
1220 if (bytes_written != bytes_to_write) {
1221
1222 /*
1223 * Partial write.
1224 */
1225 stream_forward_getp(s, bytes_written);
1226 zfpm_g->stats.partial_writes++;
1227 break;
1228 }
1229
1230 /*
1231 * We've written out the entire contents of the stream.
1232 */
1233 stream_reset(s);
1234
1235 if (num_writes >= ZFPM_MAX_WRITES_PER_RUN) {
1236 zfpm_g->stats.max_writes_hit++;
1237 break;
1238 }
1239
1240 if (zfpm_thread_should_yield(thread)) {
1241 zfpm_g->stats.t_write_yields++;
1242 break;
1243 }
1244 } while (1);
1245
1246 if (zfpm_writes_pending())
1247 zfpm_write_on();
1248
1249 return 0;
1250 }
1251
1252 /*
1253 * zfpm_connect_cb
1254 */
1255 static int zfpm_connect_cb(struct thread *t)
1256 {
1257 int sock, ret;
1258 struct sockaddr_in serv;
1259
1260 assert(zfpm_g->state == ZFPM_STATE_ACTIVE);
1261
1262 sock = socket(AF_INET, SOCK_STREAM, 0);
1263 if (sock < 0) {
1264 zlog_err("Failed to create socket for connect(): %s",
1265 strerror(errno));
1266 zfpm_g->stats.connect_no_sock++;
1267 return 0;
1268 }
1269
1270 set_nonblocking(sock);
1271
1272 /* Make server socket. */
1273 memset(&serv, 0, sizeof(serv));
1274 serv.sin_family = AF_INET;
1275 serv.sin_port = htons(zfpm_g->fpm_port);
1276 #ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
1277 serv.sin_len = sizeof(struct sockaddr_in);
1278 #endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */
1279 if (!zfpm_g->fpm_server)
1280 serv.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1281 else
1282 serv.sin_addr.s_addr = (zfpm_g->fpm_server);
1283
1284 /*
1285 * Connect to the FPM.
1286 */
1287 zfpm_g->connect_calls++;
1288 zfpm_g->stats.connect_calls++;
1289 zfpm_g->last_connect_call_time = monotime(NULL);
1290
1291 ret = connect(sock, (struct sockaddr *)&serv, sizeof(serv));
1292 if (ret >= 0) {
1293 zfpm_g->sock = sock;
1294 zfpm_connection_up("connect succeeded");
1295 return 1;
1296 }
1297
1298 if (errno == EINPROGRESS) {
1299 zfpm_g->sock = sock;
1300 zfpm_read_on();
1301 zfpm_write_on();
1302 zfpm_set_state(ZFPM_STATE_CONNECTING,
1303 "async connect in progress");
1304 return 0;
1305 }
1306
1307 zlog_info("can't connect to FPM %d: %s", sock, safe_strerror(errno));
1308 close(sock);
1309
1310 /*
1311 * Restart timer for retrying connection.
1312 */
1313 zfpm_start_connect_timer("connect() failed");
1314 return 0;
1315 }
1316
1317 /*
1318 * zfpm_set_state
1319 *
1320 * Move state machine into the given state.
1321 */
1322 static void zfpm_set_state(enum zfpm_state state, const char *reason)
1323 {
1324 enum zfpm_state cur_state = zfpm_g->state;
1325
1326 if (!reason)
1327 reason = "Unknown";
1328
1329 if (state == cur_state)
1330 return;
1331
1332 zfpm_debug("beginning state transition %s -> %s. Reason: %s",
1333 zfpm_state_to_str(cur_state), zfpm_state_to_str(state),
1334 reason);
1335
1336 switch (state) {
1337
1338 case ZFPM_STATE_IDLE:
1339 assert(cur_state == ZFPM_STATE_ESTABLISHED);
1340 break;
1341
1342 case ZFPM_STATE_ACTIVE:
1343 assert(cur_state == ZFPM_STATE_IDLE
1344 || cur_state == ZFPM_STATE_CONNECTING);
1345 assert(zfpm_g->t_connect);
1346 break;
1347
1348 case ZFPM_STATE_CONNECTING:
1349 assert(zfpm_g->sock);
1350 assert(cur_state == ZFPM_STATE_ACTIVE);
1351 assert(zfpm_g->t_read);
1352 assert(zfpm_g->t_write);
1353 break;
1354
1355 case ZFPM_STATE_ESTABLISHED:
1356 assert(cur_state == ZFPM_STATE_ACTIVE
1357 || cur_state == ZFPM_STATE_CONNECTING);
1358 assert(zfpm_g->sock);
1359 assert(zfpm_g->t_read);
1360 assert(zfpm_g->t_write);
1361 break;
1362 }
1363
1364 zfpm_g->state = state;
1365 }
1366
1367 /*
1368 * zfpm_calc_connect_delay
1369 *
1370 * Returns the number of seconds after which we should attempt to
1371 * reconnect to the FPM.
1372 */
1373 static long zfpm_calc_connect_delay(void)
1374 {
1375 time_t elapsed;
1376
1377 /*
1378 * Return 0 if this is our first attempt to connect.
1379 */
1380 if (zfpm_g->connect_calls == 0) {
1381 return 0;
1382 }
1383
1384 elapsed = zfpm_get_elapsed_time(zfpm_g->last_connect_call_time);
1385
1386 if (elapsed > ZFPM_CONNECT_RETRY_IVL) {
1387 return 0;
1388 }
1389
1390 return ZFPM_CONNECT_RETRY_IVL - elapsed;
1391 }
1392
1393 /*
1394 * zfpm_start_connect_timer
1395 */
1396 static void zfpm_start_connect_timer(const char *reason)
1397 {
1398 long delay_secs;
1399
1400 assert(!zfpm_g->t_connect);
1401 assert(zfpm_g->sock < 0);
1402
1403 assert(zfpm_g->state == ZFPM_STATE_IDLE
1404 || zfpm_g->state == ZFPM_STATE_ACTIVE
1405 || zfpm_g->state == ZFPM_STATE_CONNECTING);
1406
1407 delay_secs = zfpm_calc_connect_delay();
1408 zfpm_debug("scheduling connect in %ld seconds", delay_secs);
1409
1410 thread_add_timer(zfpm_g->master, zfpm_connect_cb, 0, delay_secs,
1411 &zfpm_g->t_connect);
1412 zfpm_set_state(ZFPM_STATE_ACTIVE, reason);
1413 }
1414
1415 /*
1416 * zfpm_is_enabled
1417 *
1418 * Returns true if the zebra FPM module has been enabled.
1419 */
1420 static inline int zfpm_is_enabled(void)
1421 {
1422 return zfpm_g->enabled;
1423 }
1424
1425 /*
1426 * zfpm_conn_is_up
1427 *
1428 * Returns true if the connection to the FPM is up.
1429 */
1430 static inline int zfpm_conn_is_up(void)
1431 {
1432 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED)
1433 return 0;
1434
1435 assert(zfpm_g->sock >= 0);
1436
1437 return 1;
1438 }
1439
1440 /*
1441 * zfpm_trigger_update
1442 *
1443 * The zebra code invokes this function to indicate that we should
1444 * send an update to the FPM about the given route_node.
1445 */
1446 static int zfpm_trigger_update(struct route_node *rn, const char *reason)
1447 {
1448 rib_dest_t *dest;
1449
1450 /*
1451 * Ignore if the connection is down. We will update the FPM about
1452 * all destinations once the connection comes up.
1453 */
1454 if (!zfpm_conn_is_up())
1455 return 0;
1456
1457 dest = rib_dest_from_rnode(rn);
1458
1459 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) {
1460 zfpm_g->stats.redundant_triggers++;
1461 return 0;
1462 }
1463
1464 if (reason) {
1465 zfpm_debug("%pFX triggering update to FPM - Reason: %s", &rn->p,
1466 reason);
1467 }
1468
1469 SET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1470 TAILQ_INSERT_TAIL(&zfpm_g->dest_q, dest, fpm_q_entries);
1471 zfpm_g->stats.updates_triggered++;
1472
1473 /*
1474 * Make sure that writes are enabled.
1475 */
1476 if (zfpm_g->t_write)
1477 return 0;
1478
1479 zfpm_write_on();
1480 return 0;
1481 }
1482
1483 /*
1484 * Generate Key for FPM MAC info hash entry
1485 */
1486 static unsigned int zfpm_mac_info_hash_keymake(const void *p)
1487 {
1488 struct fpm_mac_info_t *fpm_mac = (struct fpm_mac_info_t *)p;
1489 uint32_t mac_key;
1490
1491 mac_key = jhash(fpm_mac->macaddr.octet, ETH_ALEN, 0xa5a5a55a);
1492
1493 return jhash_2words(mac_key, fpm_mac->vni, 0);
1494 }
1495
1496 /*
1497 * Compare function for FPM MAC info hash lookup
1498 */
1499 static bool zfpm_mac_info_cmp(const void *p1, const void *p2)
1500 {
1501 const struct fpm_mac_info_t *fpm_mac1 = p1;
1502 const struct fpm_mac_info_t *fpm_mac2 = p2;
1503
1504 if (memcmp(fpm_mac1->macaddr.octet, fpm_mac2->macaddr.octet, ETH_ALEN)
1505 != 0)
1506 return false;
1507 if (fpm_mac1->vni != fpm_mac2->vni)
1508 return false;
1509
1510 return true;
1511 }
1512
1513 /*
1514 * Lookup FPM MAC info hash entry.
1515 */
1516 static struct fpm_mac_info_t *zfpm_mac_info_lookup(struct fpm_mac_info_t *key)
1517 {
1518 return hash_lookup(zfpm_g->fpm_mac_info_table, key);
1519 }
1520
1521 /*
1522 * Callback to allocate fpm_mac_info_t structure.
1523 */
1524 static void *zfpm_mac_info_alloc(void *p)
1525 {
1526 const struct fpm_mac_info_t *key = p;
1527 struct fpm_mac_info_t *fpm_mac;
1528
1529 fpm_mac = XCALLOC(MTYPE_FPM_MAC_INFO, sizeof(struct fpm_mac_info_t));
1530
1531 memcpy(&fpm_mac->macaddr, &key->macaddr, ETH_ALEN);
1532 fpm_mac->vni = key->vni;
1533
1534 return (void *)fpm_mac;
1535 }
1536
1537 /*
1538 * Delink and free fpm_mac_info_t.
1539 */
1540 static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac)
1541 {
1542 hash_release(zfpm_g->fpm_mac_info_table, fpm_mac);
1543 TAILQ_REMOVE(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries);
1544 XFREE(MTYPE_FPM_MAC_INFO, fpm_mac);
1545 }
1546
1547 /*
1548 * zfpm_trigger_rmac_update
1549 *
1550 * Zebra code invokes this function to indicate that we should
1551 * send an update to FPM for given MAC entry.
1552 *
1553 * This function checks if we already have enqueued an update for this RMAC,
1554 * If yes, update the same fpm_mac_info_t. Else, create and enqueue an update.
1555 */
1556 static int zfpm_trigger_rmac_update(zebra_mac_t *rmac, zebra_l3vni_t *zl3vni,
1557 bool delete, const char *reason)
1558 {
1559 struct fpm_mac_info_t *fpm_mac, key;
1560 struct interface *vxlan_if, *svi_if;
1561 bool mac_found = false;
1562
1563 /*
1564 * Ignore if the connection is down. We will update the FPM about
1565 * all destinations once the connection comes up.
1566 */
1567 if (!zfpm_conn_is_up())
1568 return 0;
1569
1570 if (reason) {
1571 zfpm_debug("triggering update to FPM - Reason: %s - %pEA",
1572 reason, &rmac->macaddr);
1573 }
1574
1575 vxlan_if = zl3vni_map_to_vxlan_if(zl3vni);
1576 svi_if = zl3vni_map_to_svi_if(zl3vni);
1577
1578 memset(&key, 0, sizeof(struct fpm_mac_info_t));
1579
1580 memcpy(&key.macaddr, &rmac->macaddr, ETH_ALEN);
1581 key.vni = zl3vni->vni;
1582
1583 /* Check if this MAC is already present in the queue. */
1584 fpm_mac = zfpm_mac_info_lookup(&key);
1585
1586 if (fpm_mac) {
1587 mac_found = true;
1588
1589 /*
1590 * If the enqueued op is "add" and current op is "delete",
1591 * this is a noop. So, Unset ZEBRA_MAC_UPDATE_FPM flag.
1592 * While processing FPM queue, we will silently delete this
1593 * MAC entry without sending any update for this MAC.
1594 */
1595 if (!CHECK_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM) &&
1596 delete == 1) {
1597 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1598 UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM);
1599 return 0;
1600 }
1601 } else {
1602 fpm_mac = hash_get(zfpm_g->fpm_mac_info_table, &key,
1603 zfpm_mac_info_alloc);
1604 if (!fpm_mac)
1605 return 0;
1606 }
1607
1608 fpm_mac->r_vtep_ip.s_addr = rmac->fwd_info.r_vtep_ip.s_addr;
1609 fpm_mac->zebra_flags = rmac->flags;
1610 fpm_mac->vxlan_if = vxlan_if ? vxlan_if->ifindex : 0;
1611 fpm_mac->svi_if = svi_if ? svi_if->ifindex : 0;
1612
1613 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM);
1614 if (delete)
1615 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1616 else
1617 UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1618
1619 if (!mac_found)
1620 TAILQ_INSERT_TAIL(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries);
1621
1622 zfpm_g->stats.updates_triggered++;
1623
1624 /* If writes are already enabled, return. */
1625 if (zfpm_g->t_write)
1626 return 0;
1627
1628 zfpm_write_on();
1629 return 0;
1630 }
1631
1632 /*
1633 * This function is called when the FPM connections is established.
1634 * Iterate over all the RMAC entries for the given L3VNI
1635 * and enqueue the RMAC for FPM processing.
1636 */
1637 static void zfpm_trigger_rmac_update_wrapper(struct hash_bucket *bucket,
1638 void *args)
1639 {
1640 zebra_mac_t *zrmac = (zebra_mac_t *)bucket->data;
1641 zebra_l3vni_t *zl3vni = (zebra_l3vni_t *)args;
1642
1643 zfpm_trigger_rmac_update(zrmac, zl3vni, false, "RMAC added");
1644 }
1645
1646 /*
1647 * This function is called when the FPM connections is established.
1648 * This function iterates over all the L3VNIs to trigger
1649 * FPM updates for RMACs currently available.
1650 */
1651 static void zfpm_iterate_rmac_table(struct hash_bucket *bucket, void *args)
1652 {
1653 zebra_l3vni_t *zl3vni = (zebra_l3vni_t *)bucket->data;
1654
1655 hash_iterate(zl3vni->rmac_table, zfpm_trigger_rmac_update_wrapper,
1656 (void *)zl3vni);
1657 }
1658
1659 /*
1660 * struct zfpm_statsimer_cb
1661 */
1662 static int zfpm_stats_timer_cb(struct thread *t)
1663 {
1664 zfpm_g->t_stats = NULL;
1665
1666 /*
1667 * Remember the stats collected in the last interval for display
1668 * purposes.
1669 */
1670 zfpm_stats_copy(&zfpm_g->stats, &zfpm_g->last_ivl_stats);
1671
1672 /*
1673 * Add the current set of stats into the cumulative statistics.
1674 */
1675 zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats,
1676 &zfpm_g->cumulative_stats);
1677
1678 /*
1679 * Start collecting stats afresh over the next interval.
1680 */
1681 zfpm_stats_reset(&zfpm_g->stats);
1682
1683 zfpm_start_stats_timer();
1684
1685 return 0;
1686 }
1687
1688 /*
1689 * zfpm_stop_stats_timer
1690 */
1691 static void zfpm_stop_stats_timer(void)
1692 {
1693 if (!zfpm_g->t_stats)
1694 return;
1695
1696 zfpm_debug("Stopping existing stats timer");
1697 thread_cancel(&zfpm_g->t_stats);
1698 }
1699
1700 /*
1701 * zfpm_start_stats_timer
1702 */
1703 void zfpm_start_stats_timer(void)
1704 {
1705 assert(!zfpm_g->t_stats);
1706
1707 thread_add_timer(zfpm_g->master, zfpm_stats_timer_cb, 0,
1708 ZFPM_STATS_IVL_SECS, &zfpm_g->t_stats);
1709 }
1710
1711 /*
1712 * Helper macro for zfpm_show_stats() below.
1713 */
1714 #define ZFPM_SHOW_STAT(counter) \
1715 do { \
1716 vty_out(vty, "%-40s %10lu %16lu\n", #counter, \
1717 total_stats.counter, zfpm_g->last_ivl_stats.counter); \
1718 } while (0)
1719
1720 /*
1721 * zfpm_show_stats
1722 */
1723 static void zfpm_show_stats(struct vty *vty)
1724 {
1725 struct zfpm_stats total_stats;
1726 time_t elapsed;
1727
1728 vty_out(vty, "\n%-40s %10s Last %2d secs\n\n", "Counter", "Total",
1729 ZFPM_STATS_IVL_SECS);
1730
1731 /*
1732 * Compute the total stats up to this instant.
1733 */
1734 zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats,
1735 &total_stats);
1736
1737 ZFPM_SHOW_STAT(connect_calls);
1738 ZFPM_SHOW_STAT(connect_no_sock);
1739 ZFPM_SHOW_STAT(read_cb_calls);
1740 ZFPM_SHOW_STAT(write_cb_calls);
1741 ZFPM_SHOW_STAT(write_calls);
1742 ZFPM_SHOW_STAT(partial_writes);
1743 ZFPM_SHOW_STAT(max_writes_hit);
1744 ZFPM_SHOW_STAT(t_write_yields);
1745 ZFPM_SHOW_STAT(nop_deletes_skipped);
1746 ZFPM_SHOW_STAT(route_adds);
1747 ZFPM_SHOW_STAT(route_dels);
1748 ZFPM_SHOW_STAT(updates_triggered);
1749 ZFPM_SHOW_STAT(redundant_triggers);
1750 ZFPM_SHOW_STAT(dests_del_after_update);
1751 ZFPM_SHOW_STAT(t_conn_down_starts);
1752 ZFPM_SHOW_STAT(t_conn_down_dests_processed);
1753 ZFPM_SHOW_STAT(t_conn_down_yields);
1754 ZFPM_SHOW_STAT(t_conn_down_finishes);
1755 ZFPM_SHOW_STAT(t_conn_up_starts);
1756 ZFPM_SHOW_STAT(t_conn_up_dests_processed);
1757 ZFPM_SHOW_STAT(t_conn_up_yields);
1758 ZFPM_SHOW_STAT(t_conn_up_aborts);
1759 ZFPM_SHOW_STAT(t_conn_up_finishes);
1760
1761 if (!zfpm_g->last_stats_clear_time)
1762 return;
1763
1764 elapsed = zfpm_get_elapsed_time(zfpm_g->last_stats_clear_time);
1765
1766 vty_out(vty, "\nStats were cleared %lu seconds ago\n",
1767 (unsigned long)elapsed);
1768 }
1769
1770 /*
1771 * zfpm_clear_stats
1772 */
1773 static void zfpm_clear_stats(struct vty *vty)
1774 {
1775 if (!zfpm_is_enabled()) {
1776 vty_out(vty, "The FPM module is not enabled...\n");
1777 return;
1778 }
1779
1780 zfpm_stats_reset(&zfpm_g->stats);
1781 zfpm_stats_reset(&zfpm_g->last_ivl_stats);
1782 zfpm_stats_reset(&zfpm_g->cumulative_stats);
1783
1784 zfpm_stop_stats_timer();
1785 zfpm_start_stats_timer();
1786
1787 zfpm_g->last_stats_clear_time = monotime(NULL);
1788
1789 vty_out(vty, "Cleared FPM stats\n");
1790 }
1791
1792 /*
1793 * show_zebra_fpm_stats
1794 */
1795 DEFUN (show_zebra_fpm_stats,
1796 show_zebra_fpm_stats_cmd,
1797 "show zebra fpm stats",
1798 SHOW_STR
1799 ZEBRA_STR
1800 "Forwarding Path Manager information\n"
1801 "Statistics\n")
1802 {
1803 zfpm_show_stats(vty);
1804 return CMD_SUCCESS;
1805 }
1806
1807 /*
1808 * clear_zebra_fpm_stats
1809 */
1810 DEFUN (clear_zebra_fpm_stats,
1811 clear_zebra_fpm_stats_cmd,
1812 "clear zebra fpm stats",
1813 CLEAR_STR
1814 ZEBRA_STR
1815 "Clear Forwarding Path Manager information\n"
1816 "Statistics\n")
1817 {
1818 zfpm_clear_stats(vty);
1819 return CMD_SUCCESS;
1820 }
1821
1822 /*
1823 * update fpm connection information
1824 */
1825 DEFUN ( fpm_remote_ip,
1826 fpm_remote_ip_cmd,
1827 "fpm connection ip A.B.C.D port (1-65535)",
1828 "fpm connection remote ip and port\n"
1829 "Remote fpm server ip A.B.C.D\n"
1830 "Enter ip ")
1831 {
1832
1833 in_addr_t fpm_server;
1834 uint32_t port_no;
1835
1836 fpm_server = inet_addr(argv[3]->arg);
1837 if (fpm_server == INADDR_NONE)
1838 return CMD_ERR_INCOMPLETE;
1839
1840 port_no = atoi(argv[5]->arg);
1841 if (port_no < TCP_MIN_PORT || port_no > TCP_MAX_PORT)
1842 return CMD_ERR_INCOMPLETE;
1843
1844 zfpm_g->fpm_server = fpm_server;
1845 zfpm_g->fpm_port = port_no;
1846
1847
1848 return CMD_SUCCESS;
1849 }
1850
1851 DEFUN ( no_fpm_remote_ip,
1852 no_fpm_remote_ip_cmd,
1853 "no fpm connection ip A.B.C.D port (1-65535)",
1854 "fpm connection remote ip and port\n"
1855 "Connection\n"
1856 "Remote fpm server ip A.B.C.D\n"
1857 "Enter ip ")
1858 {
1859 if (zfpm_g->fpm_server != inet_addr(argv[4]->arg)
1860 || zfpm_g->fpm_port != atoi(argv[6]->arg))
1861 return CMD_ERR_NO_MATCH;
1862
1863 zfpm_g->fpm_server = FPM_DEFAULT_IP;
1864 zfpm_g->fpm_port = FPM_DEFAULT_PORT;
1865
1866 return CMD_SUCCESS;
1867 }
1868
1869 /*
1870 * zfpm_init_message_format
1871 */
1872 static inline void zfpm_init_message_format(const char *format)
1873 {
1874 int have_netlink, have_protobuf;
1875
1876 #ifdef HAVE_NETLINK
1877 have_netlink = 1;
1878 #else
1879 have_netlink = 0;
1880 #endif
1881
1882 #ifdef HAVE_PROTOBUF
1883 have_protobuf = 1;
1884 #else
1885 have_protobuf = 0;
1886 #endif
1887
1888 zfpm_g->message_format = ZFPM_MSG_FORMAT_NONE;
1889
1890 if (!format) {
1891 if (have_netlink) {
1892 zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK;
1893 } else if (have_protobuf) {
1894 zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF;
1895 }
1896 return;
1897 }
1898
1899 if (!strcmp("netlink", format)) {
1900 if (!have_netlink) {
1901 flog_err(EC_ZEBRA_NETLINK_NOT_AVAILABLE,
1902 "FPM netlink message format is not available");
1903 return;
1904 }
1905 zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK;
1906 return;
1907 }
1908
1909 if (!strcmp("protobuf", format)) {
1910 if (!have_protobuf) {
1911 flog_err(
1912 EC_ZEBRA_PROTOBUF_NOT_AVAILABLE,
1913 "FPM protobuf message format is not available");
1914 return;
1915 }
1916 flog_warn(EC_ZEBRA_PROTOBUF_NOT_AVAILABLE,
1917 "FPM protobuf message format is deprecated and scheduled to be removed. Please convert to using netlink format or contact dev@lists.frrouting.org with your use case.");
1918 zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF;
1919 return;
1920 }
1921
1922 flog_warn(EC_ZEBRA_FPM_FORMAT_UNKNOWN, "Unknown fpm format '%s'",
1923 format);
1924 }
1925
1926 /**
1927 * fpm_remote_srv_write
1928 *
1929 * Module to write remote fpm connection
1930 *
1931 * Returns ZERO on success.
1932 */
1933
1934 static int fpm_remote_srv_write(struct vty *vty)
1935 {
1936 struct in_addr in;
1937
1938 in.s_addr = zfpm_g->fpm_server;
1939
1940 if ((zfpm_g->fpm_server != FPM_DEFAULT_IP
1941 && zfpm_g->fpm_server != INADDR_ANY)
1942 || (zfpm_g->fpm_port != FPM_DEFAULT_PORT && zfpm_g->fpm_port != 0))
1943 vty_out(vty, "fpm connection ip %pI4 port %d\n", &in,
1944 zfpm_g->fpm_port);
1945
1946 return 0;
1947 }
1948
1949
1950 static int fpm_remote_srv_write(struct vty *vty);
1951 /* Zebra node */
1952 static struct cmd_node zebra_node = {
1953 .name = "zebra",
1954 .node = ZEBRA_NODE,
1955 .parent_node = CONFIG_NODE,
1956 .prompt = "",
1957 .config_write = fpm_remote_srv_write,
1958 };
1959
1960
1961 /**
1962 * zfpm_init
1963 *
1964 * One-time initialization of the Zebra FPM module.
1965 *
1966 * @param[in] port port at which FPM is running.
1967 * @param[in] enable true if the zebra FPM module should be enabled
1968 * @param[in] format to use to talk to the FPM. Can be 'netink' or 'protobuf'.
1969 *
1970 * Returns true on success.
1971 */
1972 static int zfpm_init(struct thread_master *master)
1973 {
1974 int enable = 1;
1975 uint16_t port = 0;
1976 const char *format = THIS_MODULE->load_args;
1977
1978 memset(zfpm_g, 0, sizeof(*zfpm_g));
1979 zfpm_g->master = master;
1980 TAILQ_INIT(&zfpm_g->dest_q);
1981 TAILQ_INIT(&zfpm_g->mac_q);
1982
1983 /* Create hash table for fpm_mac_info_t enties */
1984 zfpm_g->fpm_mac_info_table = hash_create(zfpm_mac_info_hash_keymake,
1985 zfpm_mac_info_cmp,
1986 "FPM MAC info hash table");
1987
1988 zfpm_g->sock = -1;
1989 zfpm_g->state = ZFPM_STATE_IDLE;
1990
1991 zfpm_stats_init(&zfpm_g->stats);
1992 zfpm_stats_init(&zfpm_g->last_ivl_stats);
1993 zfpm_stats_init(&zfpm_g->cumulative_stats);
1994
1995 install_node(&zebra_node);
1996 install_element(ENABLE_NODE, &show_zebra_fpm_stats_cmd);
1997 install_element(ENABLE_NODE, &clear_zebra_fpm_stats_cmd);
1998 install_element(CONFIG_NODE, &fpm_remote_ip_cmd);
1999 install_element(CONFIG_NODE, &no_fpm_remote_ip_cmd);
2000
2001 zfpm_init_message_format(format);
2002
2003 /*
2004 * Disable FPM interface if no suitable format is available.
2005 */
2006 if (zfpm_g->message_format == ZFPM_MSG_FORMAT_NONE)
2007 enable = 0;
2008
2009 zfpm_g->enabled = enable;
2010
2011 if (!zfpm_g->fpm_server)
2012 zfpm_g->fpm_server = FPM_DEFAULT_IP;
2013
2014 if (!port)
2015 port = FPM_DEFAULT_PORT;
2016
2017 zfpm_g->fpm_port = port;
2018
2019 zfpm_g->obuf = stream_new(ZFPM_OBUF_SIZE);
2020 zfpm_g->ibuf = stream_new(ZFPM_IBUF_SIZE);
2021
2022 zfpm_start_stats_timer();
2023 zfpm_start_connect_timer("initialized");
2024 return 0;
2025 }
2026
2027 static int zfpm_fini(void)
2028 {
2029 zfpm_write_off();
2030 zfpm_read_off();
2031 zfpm_connect_off();
2032
2033 zfpm_stop_stats_timer();
2034
2035 hook_unregister(rib_update, zfpm_trigger_update);
2036 return 0;
2037 }
2038
2039 static int zebra_fpm_module_init(void)
2040 {
2041 hook_register(rib_update, zfpm_trigger_update);
2042 hook_register(zebra_rmac_update, zfpm_trigger_rmac_update);
2043 hook_register(frr_late_init, zfpm_init);
2044 hook_register(frr_early_fini, zfpm_fini);
2045 return 0;
2046 }
2047
2048 FRR_MODULE_SETUP(.name = "zebra_fpm", .version = FRR_VERSION,
2049 .description = "zebra FPM (Forwarding Plane Manager) module",
2050 .init = zebra_fpm_module_init,
2051 );