]> git.proxmox.com Git - mirror_frr.git/blob - zebra/zebra_fpm.c
Merge pull request #7352 from mjstapp/fix_rt_netlink_indent
[mirror_frr.git] / zebra / zebra_fpm.c
1 /*
2 * Main implementation file for interface to Forwarding Plane Manager.
3 *
4 * Copyright (C) 2012 by Open Source Routing.
5 * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
6 *
7 * This file is part of GNU Zebra.
8 *
9 * GNU Zebra is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the
11 * Free Software Foundation; either version 2, or (at your option) any
12 * later version.
13 *
14 * GNU Zebra is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; see the file COPYING; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include <zebra.h>
25
26 #include "log.h"
27 #include "libfrr.h"
28 #include "stream.h"
29 #include "thread.h"
30 #include "network.h"
31 #include "command.h"
32 #include "version.h"
33 #include "jhash.h"
34
35 #include "zebra/rib.h"
36 #include "zebra/zserv.h"
37 #include "zebra/zebra_ns.h"
38 #include "zebra/zebra_vrf.h"
39 #include "zebra/zebra_errors.h"
40 #include "zebra/zebra_memory.h"
41
42 #include "fpm/fpm.h"
43 #include "zebra_fpm_private.h"
44 #include "zebra/zebra_router.h"
45 #include "zebra_vxlan_private.h"
46
47 DEFINE_MTYPE_STATIC(ZEBRA, FPM_MAC_INFO, "FPM_MAC_INFO");
48
49 /*
50 * Interval at which we attempt to connect to the FPM.
51 */
52 #define ZFPM_CONNECT_RETRY_IVL 5
53
54 /*
55 * Sizes of outgoing and incoming stream buffers for writing/reading
56 * FPM messages.
57 */
58 #define ZFPM_OBUF_SIZE (2 * FPM_MAX_MSG_LEN)
59 #define ZFPM_IBUF_SIZE (FPM_MAX_MSG_LEN)
60
61 /*
62 * The maximum number of times the FPM socket write callback can call
63 * 'write' before it yields.
64 */
65 #define ZFPM_MAX_WRITES_PER_RUN 10
66
67 /*
68 * Interval over which we collect statistics.
69 */
70 #define ZFPM_STATS_IVL_SECS 10
71 #define FPM_MAX_MAC_MSG_LEN 512
72
73 static void zfpm_iterate_rmac_table(struct hash_bucket *backet, void *args);
74
75 /*
76 * Structure that holds state for iterating over all route_node
77 * structures that are candidates for being communicated to the FPM.
78 */
79 struct zfpm_rnodes_iter {
80 rib_tables_iter_t tables_iter;
81 route_table_iter_t iter;
82 };
83
84 /*
85 * Statistics.
86 */
87 struct zfpm_stats {
88 unsigned long connect_calls;
89 unsigned long connect_no_sock;
90
91 unsigned long read_cb_calls;
92
93 unsigned long write_cb_calls;
94 unsigned long write_calls;
95 unsigned long partial_writes;
96 unsigned long max_writes_hit;
97 unsigned long t_write_yields;
98
99 unsigned long nop_deletes_skipped;
100 unsigned long route_adds;
101 unsigned long route_dels;
102
103 unsigned long updates_triggered;
104 unsigned long redundant_triggers;
105
106 unsigned long dests_del_after_update;
107
108 unsigned long t_conn_down_starts;
109 unsigned long t_conn_down_dests_processed;
110 unsigned long t_conn_down_yields;
111 unsigned long t_conn_down_finishes;
112
113 unsigned long t_conn_up_starts;
114 unsigned long t_conn_up_dests_processed;
115 unsigned long t_conn_up_yields;
116 unsigned long t_conn_up_aborts;
117 unsigned long t_conn_up_finishes;
118 };
119
120 /*
121 * States for the FPM state machine.
122 */
123 enum zfpm_state {
124
125 /*
126 * In this state we are not yet ready to connect to the FPM. This
127 * can happen when this module is disabled, or if we're cleaning up
128 * after a connection has gone down.
129 */
130 ZFPM_STATE_IDLE,
131
132 /*
133 * Ready to talk to the FPM and periodically trying to connect to
134 * it.
135 */
136 ZFPM_STATE_ACTIVE,
137
138 /*
139 * In the middle of bringing up a TCP connection. Specifically,
140 * waiting for a connect() call to complete asynchronously.
141 */
142 ZFPM_STATE_CONNECTING,
143
144 /*
145 * TCP connection to the FPM is up.
146 */
147 ZFPM_STATE_ESTABLISHED
148
149 };
150
151 /*
152 * Message format to be used to communicate with the FPM.
153 */
154 enum zfpm_msg_format {
155 ZFPM_MSG_FORMAT_NONE,
156 ZFPM_MSG_FORMAT_NETLINK,
157 ZFPM_MSG_FORMAT_PROTOBUF,
158 };
159
160 /*
161 * Globals.
162 */
163 struct zfpm_glob {
164
165 /*
166 * True if the FPM module has been enabled.
167 */
168 int enabled;
169
170 /*
171 * Message format to be used to communicate with the fpm.
172 */
173 enum zfpm_msg_format message_format;
174
175 struct thread_master *master;
176
177 enum zfpm_state state;
178
179 in_addr_t fpm_server;
180 /*
181 * Port on which the FPM is running.
182 */
183 int fpm_port;
184
185 /*
186 * List of rib_dest_t structures to be processed
187 */
188 TAILQ_HEAD(zfpm_dest_q, rib_dest_t_) dest_q;
189
190 /*
191 * List of fpm_mac_info structures to be processed
192 */
193 TAILQ_HEAD(zfpm_mac_q, fpm_mac_info_t) mac_q;
194
195 /*
196 * Hash table of fpm_mac_info_t entries
197 *
198 * While adding fpm_mac_info_t for a MAC to the mac_q,
199 * it is possible that another fpm_mac_info_t node for the this MAC
200 * is already present in the queue.
201 * This is possible in the case of consecutive add->delete operations.
202 * To avoid such duplicate insertions in the mac_q,
203 * define a hash table for fpm_mac_info_t which can be looked up
204 * to see if an fpm_mac_info_t node for a MAC is already present
205 * in the mac_q.
206 */
207 struct hash *fpm_mac_info_table;
208
209 /*
210 * Stream socket to the FPM.
211 */
212 int sock;
213
214 /*
215 * Buffers for messages to/from the FPM.
216 */
217 struct stream *obuf;
218 struct stream *ibuf;
219
220 /*
221 * Threads for I/O.
222 */
223 struct thread *t_connect;
224 struct thread *t_write;
225 struct thread *t_read;
226
227 /*
228 * Thread to clean up after the TCP connection to the FPM goes down
229 * and the state that belongs to it.
230 */
231 struct thread *t_conn_down;
232
233 struct {
234 struct zfpm_rnodes_iter iter;
235 } t_conn_down_state;
236
237 /*
238 * Thread to take actions once the TCP conn to the FPM comes up, and
239 * the state that belongs to it.
240 */
241 struct thread *t_conn_up;
242
243 struct {
244 struct zfpm_rnodes_iter iter;
245 } t_conn_up_state;
246
247 unsigned long connect_calls;
248 time_t last_connect_call_time;
249
250 /*
251 * Stats from the start of the current statistics interval up to
252 * now. These are the counters we typically update in the code.
253 */
254 struct zfpm_stats stats;
255
256 /*
257 * Statistics that were gathered in the last collection interval.
258 */
259 struct zfpm_stats last_ivl_stats;
260
261 /*
262 * Cumulative stats from the last clear to the start of the current
263 * statistics interval.
264 */
265 struct zfpm_stats cumulative_stats;
266
267 /*
268 * Stats interval timer.
269 */
270 struct thread *t_stats;
271
272 /*
273 * If non-zero, the last time when statistics were cleared.
274 */
275 time_t last_stats_clear_time;
276 };
277
278 static struct zfpm_glob zfpm_glob_space;
279 static struct zfpm_glob *zfpm_g = &zfpm_glob_space;
280
281 static int zfpm_trigger_update(struct route_node *rn, const char *reason);
282
283 static int zfpm_read_cb(struct thread *thread);
284 static int zfpm_write_cb(struct thread *thread);
285
286 static void zfpm_set_state(enum zfpm_state state, const char *reason);
287 static void zfpm_start_connect_timer(const char *reason);
288 static void zfpm_start_stats_timer(void);
289 static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac);
290
291 /*
292 * zfpm_thread_should_yield
293 */
294 static inline int zfpm_thread_should_yield(struct thread *t)
295 {
296 return thread_should_yield(t);
297 }
298
299 /*
300 * zfpm_state_to_str
301 */
302 static const char *zfpm_state_to_str(enum zfpm_state state)
303 {
304 switch (state) {
305
306 case ZFPM_STATE_IDLE:
307 return "idle";
308
309 case ZFPM_STATE_ACTIVE:
310 return "active";
311
312 case ZFPM_STATE_CONNECTING:
313 return "connecting";
314
315 case ZFPM_STATE_ESTABLISHED:
316 return "established";
317
318 default:
319 return "unknown";
320 }
321 }
322
323 /*
324 * zfpm_get_elapsed_time
325 *
326 * Returns the time elapsed (in seconds) since the given time.
327 */
328 static time_t zfpm_get_elapsed_time(time_t reference)
329 {
330 time_t now;
331
332 now = monotime(NULL);
333
334 if (now < reference) {
335 assert(0);
336 return 0;
337 }
338
339 return now - reference;
340 }
341
342 /*
343 * zfpm_rnodes_iter_init
344 */
345 static inline void zfpm_rnodes_iter_init(struct zfpm_rnodes_iter *iter)
346 {
347 memset(iter, 0, sizeof(*iter));
348 rib_tables_iter_init(&iter->tables_iter);
349
350 /*
351 * This is a hack, but it makes implementing 'next' easier by
352 * ensuring that route_table_iter_next() will return NULL the first
353 * time we call it.
354 */
355 route_table_iter_init(&iter->iter, NULL);
356 route_table_iter_cleanup(&iter->iter);
357 }
358
359 /*
360 * zfpm_rnodes_iter_next
361 */
362 static inline struct route_node *
363 zfpm_rnodes_iter_next(struct zfpm_rnodes_iter *iter)
364 {
365 struct route_node *rn;
366 struct route_table *table;
367
368 while (1) {
369 rn = route_table_iter_next(&iter->iter);
370 if (rn)
371 return rn;
372
373 /*
374 * We've made our way through this table, go to the next one.
375 */
376 route_table_iter_cleanup(&iter->iter);
377
378 table = rib_tables_iter_next(&iter->tables_iter);
379
380 if (!table)
381 return NULL;
382
383 route_table_iter_init(&iter->iter, table);
384 }
385
386 return NULL;
387 }
388
389 /*
390 * zfpm_rnodes_iter_pause
391 */
392 static inline void zfpm_rnodes_iter_pause(struct zfpm_rnodes_iter *iter)
393 {
394 route_table_iter_pause(&iter->iter);
395 }
396
397 /*
398 * zfpm_rnodes_iter_cleanup
399 */
400 static inline void zfpm_rnodes_iter_cleanup(struct zfpm_rnodes_iter *iter)
401 {
402 route_table_iter_cleanup(&iter->iter);
403 rib_tables_iter_cleanup(&iter->tables_iter);
404 }
405
406 /*
407 * zfpm_stats_init
408 *
409 * Initialize a statistics block.
410 */
411 static inline void zfpm_stats_init(struct zfpm_stats *stats)
412 {
413 memset(stats, 0, sizeof(*stats));
414 }
415
416 /*
417 * zfpm_stats_reset
418 */
419 static inline void zfpm_stats_reset(struct zfpm_stats *stats)
420 {
421 zfpm_stats_init(stats);
422 }
423
424 /*
425 * zfpm_stats_copy
426 */
427 static inline void zfpm_stats_copy(const struct zfpm_stats *src,
428 struct zfpm_stats *dest)
429 {
430 memcpy(dest, src, sizeof(*dest));
431 }
432
433 /*
434 * zfpm_stats_compose
435 *
436 * Total up the statistics in two stats structures ('s1 and 's2') and
437 * return the result in the third argument, 'result'. Note that the
438 * pointer 'result' may be the same as 's1' or 's2'.
439 *
440 * For simplicity, the implementation below assumes that the stats
441 * structure is composed entirely of counters. This can easily be
442 * changed when necessary.
443 */
444 static void zfpm_stats_compose(const struct zfpm_stats *s1,
445 const struct zfpm_stats *s2,
446 struct zfpm_stats *result)
447 {
448 const unsigned long *p1, *p2;
449 unsigned long *result_p;
450 int i, num_counters;
451
452 p1 = (const unsigned long *)s1;
453 p2 = (const unsigned long *)s2;
454 result_p = (unsigned long *)result;
455
456 num_counters = (sizeof(struct zfpm_stats) / sizeof(unsigned long));
457
458 for (i = 0; i < num_counters; i++) {
459 result_p[i] = p1[i] + p2[i];
460 }
461 }
462
463 /*
464 * zfpm_read_on
465 */
466 static inline void zfpm_read_on(void)
467 {
468 assert(!zfpm_g->t_read);
469 assert(zfpm_g->sock >= 0);
470
471 thread_add_read(zfpm_g->master, zfpm_read_cb, 0, zfpm_g->sock,
472 &zfpm_g->t_read);
473 }
474
475 /*
476 * zfpm_write_on
477 */
478 static inline void zfpm_write_on(void)
479 {
480 assert(!zfpm_g->t_write);
481 assert(zfpm_g->sock >= 0);
482
483 thread_add_write(zfpm_g->master, zfpm_write_cb, 0, zfpm_g->sock,
484 &zfpm_g->t_write);
485 }
486
487 /*
488 * zfpm_read_off
489 */
490 static inline void zfpm_read_off(void)
491 {
492 thread_cancel(&zfpm_g->t_read);
493 }
494
495 /*
496 * zfpm_write_off
497 */
498 static inline void zfpm_write_off(void)
499 {
500 thread_cancel(&zfpm_g->t_write);
501 }
502
503 static inline void zfpm_connect_off(void)
504 {
505 thread_cancel(&zfpm_g->t_connect);
506 }
507
508 /*
509 * zfpm_conn_up_thread_cb
510 *
511 * Callback for actions to be taken when the connection to the FPM
512 * comes up.
513 */
514 static int zfpm_conn_up_thread_cb(struct thread *thread)
515 {
516 struct route_node *rnode;
517 struct zfpm_rnodes_iter *iter;
518 rib_dest_t *dest;
519
520 zfpm_g->t_conn_up = NULL;
521
522 iter = &zfpm_g->t_conn_up_state.iter;
523
524 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED) {
525 zfpm_debug(
526 "Connection not up anymore, conn_up thread aborting");
527 zfpm_g->stats.t_conn_up_aborts++;
528 goto done;
529 }
530
531 /* Enqueue FPM updates for all the RMAC entries */
532 hash_iterate(zrouter.l3vni_table, zfpm_iterate_rmac_table, NULL);
533
534 while ((rnode = zfpm_rnodes_iter_next(iter))) {
535 dest = rib_dest_from_rnode(rnode);
536
537 if (dest) {
538 zfpm_g->stats.t_conn_up_dests_processed++;
539 zfpm_trigger_update(rnode, NULL);
540 }
541
542 /*
543 * Yield if need be.
544 */
545 if (!zfpm_thread_should_yield(thread))
546 continue;
547
548 zfpm_g->stats.t_conn_up_yields++;
549 zfpm_rnodes_iter_pause(iter);
550 zfpm_g->t_conn_up = NULL;
551 thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb,
552 NULL, 0, &zfpm_g->t_conn_up);
553 return 0;
554 }
555
556 zfpm_g->stats.t_conn_up_finishes++;
557
558 done:
559 zfpm_rnodes_iter_cleanup(iter);
560 return 0;
561 }
562
563 /*
564 * zfpm_connection_up
565 *
566 * Called when the connection to the FPM comes up.
567 */
568 static void zfpm_connection_up(const char *detail)
569 {
570 assert(zfpm_g->sock >= 0);
571 zfpm_read_on();
572 zfpm_write_on();
573 zfpm_set_state(ZFPM_STATE_ESTABLISHED, detail);
574
575 /*
576 * Start thread to push existing routes to the FPM.
577 */
578 assert(!zfpm_g->t_conn_up);
579
580 zfpm_rnodes_iter_init(&zfpm_g->t_conn_up_state.iter);
581
582 zfpm_debug("Starting conn_up thread");
583 zfpm_g->t_conn_up = NULL;
584 thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb, NULL, 0,
585 &zfpm_g->t_conn_up);
586 zfpm_g->stats.t_conn_up_starts++;
587 }
588
589 /*
590 * zfpm_connect_check
591 *
592 * Check if an asynchronous connect() to the FPM is complete.
593 */
594 static void zfpm_connect_check(void)
595 {
596 int status;
597 socklen_t slen;
598 int ret;
599
600 zfpm_read_off();
601 zfpm_write_off();
602
603 slen = sizeof(status);
604 ret = getsockopt(zfpm_g->sock, SOL_SOCKET, SO_ERROR, (void *)&status,
605 &slen);
606
607 if (ret >= 0 && status == 0) {
608 zfpm_connection_up("async connect complete");
609 return;
610 }
611
612 /*
613 * getsockopt() failed or indicated an error on the socket.
614 */
615 close(zfpm_g->sock);
616 zfpm_g->sock = -1;
617
618 zfpm_start_connect_timer("getsockopt() after async connect failed");
619 return;
620 }
621
622 /*
623 * zfpm_conn_down_thread_cb
624 *
625 * Callback that is invoked to clean up state after the TCP connection
626 * to the FPM goes down.
627 */
628 static int zfpm_conn_down_thread_cb(struct thread *thread)
629 {
630 struct route_node *rnode;
631 struct zfpm_rnodes_iter *iter;
632 rib_dest_t *dest;
633 struct fpm_mac_info_t *mac = NULL;
634
635 assert(zfpm_g->state == ZFPM_STATE_IDLE);
636
637 /*
638 * Delink and free all fpm_mac_info_t nodes
639 * in the mac_q and fpm_mac_info_hash
640 */
641 while ((mac = TAILQ_FIRST(&zfpm_g->mac_q)) != NULL)
642 zfpm_mac_info_del(mac);
643
644 zfpm_g->t_conn_down = NULL;
645
646 iter = &zfpm_g->t_conn_down_state.iter;
647
648 while ((rnode = zfpm_rnodes_iter_next(iter))) {
649 dest = rib_dest_from_rnode(rnode);
650
651 if (dest) {
652 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) {
653 TAILQ_REMOVE(&zfpm_g->dest_q, dest,
654 fpm_q_entries);
655 }
656
657 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
658 UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
659
660 zfpm_g->stats.t_conn_down_dests_processed++;
661
662 /*
663 * Check if the dest should be deleted.
664 */
665 rib_gc_dest(rnode);
666 }
667
668 /*
669 * Yield if need be.
670 */
671 if (!zfpm_thread_should_yield(thread))
672 continue;
673
674 zfpm_g->stats.t_conn_down_yields++;
675 zfpm_rnodes_iter_pause(iter);
676 zfpm_g->t_conn_down = NULL;
677 thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb,
678 NULL, 0, &zfpm_g->t_conn_down);
679 return 0;
680 }
681
682 zfpm_g->stats.t_conn_down_finishes++;
683 zfpm_rnodes_iter_cleanup(iter);
684
685 /*
686 * Start the process of connecting to the FPM again.
687 */
688 zfpm_start_connect_timer("cleanup complete");
689 return 0;
690 }
691
692 /*
693 * zfpm_connection_down
694 *
695 * Called when the connection to the FPM has gone down.
696 */
697 static void zfpm_connection_down(const char *detail)
698 {
699 if (!detail)
700 detail = "unknown";
701
702 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
703
704 zlog_info("connection to the FPM has gone down: %s", detail);
705
706 zfpm_read_off();
707 zfpm_write_off();
708
709 stream_reset(zfpm_g->ibuf);
710 stream_reset(zfpm_g->obuf);
711
712 if (zfpm_g->sock >= 0) {
713 close(zfpm_g->sock);
714 zfpm_g->sock = -1;
715 }
716
717 /*
718 * Start thread to clean up state after the connection goes down.
719 */
720 assert(!zfpm_g->t_conn_down);
721 zfpm_rnodes_iter_init(&zfpm_g->t_conn_down_state.iter);
722 zfpm_g->t_conn_down = NULL;
723 thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb, NULL, 0,
724 &zfpm_g->t_conn_down);
725 zfpm_g->stats.t_conn_down_starts++;
726
727 zfpm_set_state(ZFPM_STATE_IDLE, detail);
728 }
729
730 /*
731 * zfpm_read_cb
732 */
733 static int zfpm_read_cb(struct thread *thread)
734 {
735 size_t already;
736 struct stream *ibuf;
737 uint16_t msg_len;
738 fpm_msg_hdr_t *hdr;
739
740 zfpm_g->stats.read_cb_calls++;
741
742 /*
743 * Check if async connect is now done.
744 */
745 if (zfpm_g->state == ZFPM_STATE_CONNECTING) {
746 zfpm_connect_check();
747 return 0;
748 }
749
750 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
751 assert(zfpm_g->sock >= 0);
752
753 ibuf = zfpm_g->ibuf;
754
755 already = stream_get_endp(ibuf);
756 if (already < FPM_MSG_HDR_LEN) {
757 ssize_t nbyte;
758
759 nbyte = stream_read_try(ibuf, zfpm_g->sock,
760 FPM_MSG_HDR_LEN - already);
761 if (nbyte == 0 || nbyte == -1) {
762 if (nbyte == -1) {
763 char buffer[1024];
764
765 snprintf(buffer, sizeof(buffer),
766 "closed socket in read(%d): %s", errno,
767 safe_strerror(errno));
768 zfpm_connection_down(buffer);
769 } else
770 zfpm_connection_down("closed socket in read");
771 return 0;
772 }
773
774 if (nbyte != (ssize_t)(FPM_MSG_HDR_LEN - already))
775 goto done;
776
777 already = FPM_MSG_HDR_LEN;
778 }
779
780 stream_set_getp(ibuf, 0);
781
782 hdr = (fpm_msg_hdr_t *)stream_pnt(ibuf);
783
784 if (!fpm_msg_hdr_ok(hdr)) {
785 zfpm_connection_down("invalid message header");
786 return 0;
787 }
788
789 msg_len = fpm_msg_len(hdr);
790
791 /*
792 * Read out the rest of the packet.
793 */
794 if (already < msg_len) {
795 ssize_t nbyte;
796
797 nbyte = stream_read_try(ibuf, zfpm_g->sock, msg_len - already);
798
799 if (nbyte == 0 || nbyte == -1) {
800 if (nbyte == -1) {
801 char buffer[1024];
802
803 snprintf(buffer, sizeof(buffer),
804 "failed to read message(%d) %s", errno,
805 safe_strerror(errno));
806 zfpm_connection_down(buffer);
807 } else
808 zfpm_connection_down("failed to read message");
809 return 0;
810 }
811
812 if (nbyte != (ssize_t)(msg_len - already))
813 goto done;
814 }
815
816 /*
817 * Just throw it away for now.
818 */
819 stream_reset(ibuf);
820
821 done:
822 zfpm_read_on();
823 return 0;
824 }
825
826 static bool zfpm_updates_pending(void)
827 {
828 if (!(TAILQ_EMPTY(&zfpm_g->dest_q)) || !(TAILQ_EMPTY(&zfpm_g->mac_q)))
829 return true;
830
831 return false;
832 }
833
834 /*
835 * zfpm_writes_pending
836 *
837 * Returns true if we may have something to write to the FPM.
838 */
839 static int zfpm_writes_pending(void)
840 {
841
842 /*
843 * Check if there is any data in the outbound buffer that has not
844 * been written to the socket yet.
845 */
846 if (stream_get_endp(zfpm_g->obuf) - stream_get_getp(zfpm_g->obuf))
847 return 1;
848
849 /*
850 * Check if there are any updates scheduled on the outbound queues.
851 */
852 if (zfpm_updates_pending())
853 return 1;
854
855 return 0;
856 }
857
858 /*
859 * zfpm_encode_route
860 *
861 * Encode a message to the FPM with information about the given route.
862 *
863 * Returns the number of bytes written to the buffer. 0 or a negative
864 * value indicates an error.
865 */
866 static inline int zfpm_encode_route(rib_dest_t *dest, struct route_entry *re,
867 char *in_buf, size_t in_buf_len,
868 fpm_msg_type_e *msg_type)
869 {
870 size_t len;
871 #ifdef HAVE_NETLINK
872 int cmd;
873 #endif
874 len = 0;
875
876 *msg_type = FPM_MSG_TYPE_NONE;
877
878 switch (zfpm_g->message_format) {
879
880 case ZFPM_MSG_FORMAT_PROTOBUF:
881 #ifdef HAVE_PROTOBUF
882 len = zfpm_protobuf_encode_route(dest, re, (uint8_t *)in_buf,
883 in_buf_len);
884 *msg_type = FPM_MSG_TYPE_PROTOBUF;
885 #endif
886 break;
887
888 case ZFPM_MSG_FORMAT_NETLINK:
889 #ifdef HAVE_NETLINK
890 *msg_type = FPM_MSG_TYPE_NETLINK;
891 cmd = re ? RTM_NEWROUTE : RTM_DELROUTE;
892 len = zfpm_netlink_encode_route(cmd, dest, re, in_buf,
893 in_buf_len);
894 assert(fpm_msg_align(len) == len);
895 *msg_type = FPM_MSG_TYPE_NETLINK;
896 #endif /* HAVE_NETLINK */
897 break;
898
899 default:
900 break;
901 }
902
903 return len;
904 }
905
906 /*
907 * zfpm_route_for_update
908 *
909 * Returns the re that is to be sent to the FPM for a given dest.
910 */
911 struct route_entry *zfpm_route_for_update(rib_dest_t *dest)
912 {
913 return dest->selected_fib;
914 }
915
916 /*
917 * Define an enum for return codes for queue processing functions
918 *
919 * FPM_WRITE_STOP: This return code indicates that the write buffer is full.
920 * Stop processing all the queues and empty the buffer by writing its content
921 * to the socket.
922 *
923 * FPM_GOTO_NEXT_Q: This return code indicates that either this queue is
924 * empty or we have processed enough updates from this queue.
925 * So, move on to the next queue.
926 */
927 enum {
928 FPM_WRITE_STOP = 0,
929 FPM_GOTO_NEXT_Q = 1
930 };
931
932 #define FPM_QUEUE_PROCESS_LIMIT 10000
933
934 /*
935 * zfpm_build_route_updates
936 *
937 * Process the dest_q queue and write FPM messages to the outbound buffer.
938 */
939 static int zfpm_build_route_updates(void)
940 {
941 struct stream *s;
942 rib_dest_t *dest;
943 unsigned char *buf, *data, *buf_end;
944 size_t msg_len;
945 size_t data_len;
946 fpm_msg_hdr_t *hdr;
947 struct route_entry *re;
948 int is_add, write_msg;
949 fpm_msg_type_e msg_type;
950 uint16_t q_limit;
951
952 if (TAILQ_EMPTY(&zfpm_g->dest_q))
953 return FPM_GOTO_NEXT_Q;
954
955 s = zfpm_g->obuf;
956 q_limit = FPM_QUEUE_PROCESS_LIMIT;
957
958 do {
959 /*
960 * Make sure there is enough space to write another message.
961 */
962 if (STREAM_WRITEABLE(s) < FPM_MAX_MSG_LEN)
963 return FPM_WRITE_STOP;
964
965 buf = STREAM_DATA(s) + stream_get_endp(s);
966 buf_end = buf + STREAM_WRITEABLE(s);
967
968 dest = TAILQ_FIRST(&zfpm_g->dest_q);
969 if (!dest)
970 return FPM_GOTO_NEXT_Q;
971
972 assert(CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM));
973
974 hdr = (fpm_msg_hdr_t *)buf;
975 hdr->version = FPM_PROTO_VERSION;
976
977 data = fpm_msg_data(hdr);
978
979 re = zfpm_route_for_update(dest);
980 is_add = re ? 1 : 0;
981
982 write_msg = 1;
983
984 /*
985 * If this is a route deletion, and we have not sent the route
986 * to
987 * the FPM previously, skip it.
988 */
989 if (!is_add && !CHECK_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM)) {
990 write_msg = 0;
991 zfpm_g->stats.nop_deletes_skipped++;
992 }
993
994 if (write_msg) {
995 data_len = zfpm_encode_route(dest, re, (char *)data,
996 buf_end - data, &msg_type);
997
998 assert(data_len);
999 if (data_len) {
1000 hdr->msg_type = msg_type;
1001 msg_len = fpm_data_len_to_msg_len(data_len);
1002 hdr->msg_len = htons(msg_len);
1003 stream_forward_endp(s, msg_len);
1004
1005 if (is_add)
1006 zfpm_g->stats.route_adds++;
1007 else
1008 zfpm_g->stats.route_dels++;
1009 }
1010 }
1011
1012 /*
1013 * Remove the dest from the queue, and reset the flag.
1014 */
1015 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1016 TAILQ_REMOVE(&zfpm_g->dest_q, dest, fpm_q_entries);
1017
1018 if (is_add) {
1019 SET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
1020 } else {
1021 UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
1022 }
1023
1024 /*
1025 * Delete the destination if necessary.
1026 */
1027 if (rib_gc_dest(dest->rnode))
1028 zfpm_g->stats.dests_del_after_update++;
1029
1030 q_limit--;
1031 if (q_limit == 0) {
1032 /*
1033 * We have processed enough updates in this queue.
1034 * Now yield for other queues.
1035 */
1036 return FPM_GOTO_NEXT_Q;
1037 }
1038 } while (true);
1039 }
1040
1041 /*
1042 * zfpm_encode_mac
1043 *
1044 * Encode a message to FPM with information about the given MAC.
1045 *
1046 * Returns the number of bytes written to the buffer.
1047 */
1048 static inline int zfpm_encode_mac(struct fpm_mac_info_t *mac, char *in_buf,
1049 size_t in_buf_len, fpm_msg_type_e *msg_type)
1050 {
1051 size_t len = 0;
1052
1053 *msg_type = FPM_MSG_TYPE_NONE;
1054
1055 switch (zfpm_g->message_format) {
1056
1057 case ZFPM_MSG_FORMAT_NONE:
1058 break;
1059 case ZFPM_MSG_FORMAT_NETLINK:
1060 #ifdef HAVE_NETLINK
1061 len = zfpm_netlink_encode_mac(mac, in_buf, in_buf_len);
1062 assert(fpm_msg_align(len) == len);
1063 *msg_type = FPM_MSG_TYPE_NETLINK;
1064 #endif /* HAVE_NETLINK */
1065 break;
1066 case ZFPM_MSG_FORMAT_PROTOBUF:
1067 break;
1068 }
1069 return len;
1070 }
1071
1072 static int zfpm_build_mac_updates(void)
1073 {
1074 struct stream *s;
1075 struct fpm_mac_info_t *mac;
1076 unsigned char *buf, *data, *buf_end;
1077 fpm_msg_hdr_t *hdr;
1078 size_t data_len, msg_len;
1079 fpm_msg_type_e msg_type;
1080 uint16_t q_limit;
1081
1082 if (TAILQ_EMPTY(&zfpm_g->mac_q))
1083 return FPM_GOTO_NEXT_Q;
1084
1085 s = zfpm_g->obuf;
1086 q_limit = FPM_QUEUE_PROCESS_LIMIT;
1087
1088 do {
1089 /* Make sure there is enough space to write another message. */
1090 if (STREAM_WRITEABLE(s) < FPM_MAX_MAC_MSG_LEN)
1091 return FPM_WRITE_STOP;
1092
1093 buf = STREAM_DATA(s) + stream_get_endp(s);
1094 buf_end = buf + STREAM_WRITEABLE(s);
1095
1096 mac = TAILQ_FIRST(&zfpm_g->mac_q);
1097 if (!mac)
1098 return FPM_GOTO_NEXT_Q;
1099
1100 /* Check for no-op */
1101 if (!CHECK_FLAG(mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM)) {
1102 zfpm_g->stats.nop_deletes_skipped++;
1103 zfpm_mac_info_del(mac);
1104 continue;
1105 }
1106
1107 hdr = (fpm_msg_hdr_t *)buf;
1108 hdr->version = FPM_PROTO_VERSION;
1109
1110 data = fpm_msg_data(hdr);
1111 data_len = zfpm_encode_mac(mac, (char *)data, buf_end - data,
1112 &msg_type);
1113 assert(data_len);
1114
1115 hdr->msg_type = msg_type;
1116 msg_len = fpm_data_len_to_msg_len(data_len);
1117 hdr->msg_len = htons(msg_len);
1118 stream_forward_endp(s, msg_len);
1119
1120 /* Remove the MAC from the queue, and delete it. */
1121 zfpm_mac_info_del(mac);
1122
1123 q_limit--;
1124 if (q_limit == 0) {
1125 /*
1126 * We have processed enough updates in this queue.
1127 * Now yield for other queues.
1128 */
1129 return FPM_GOTO_NEXT_Q;
1130 }
1131 } while (1);
1132 }
1133
1134 /*
1135 * zfpm_build_updates
1136 *
1137 * Process the outgoing queues and write messages to the outbound
1138 * buffer.
1139 */
1140 static void zfpm_build_updates(void)
1141 {
1142 struct stream *s;
1143
1144 s = zfpm_g->obuf;
1145 assert(stream_empty(s));
1146
1147 do {
1148 /*
1149 * Stop processing the queues if zfpm_g->obuf is full
1150 * or we do not have more updates to process
1151 */
1152 if (zfpm_build_mac_updates() == FPM_WRITE_STOP)
1153 break;
1154 if (zfpm_build_route_updates() == FPM_WRITE_STOP)
1155 break;
1156 } while (zfpm_updates_pending());
1157 }
1158
1159 /*
1160 * zfpm_write_cb
1161 */
1162 static int zfpm_write_cb(struct thread *thread)
1163 {
1164 struct stream *s;
1165 int num_writes;
1166
1167 zfpm_g->stats.write_cb_calls++;
1168
1169 /*
1170 * Check if async connect is now done.
1171 */
1172 if (zfpm_g->state == ZFPM_STATE_CONNECTING) {
1173 zfpm_connect_check();
1174 return 0;
1175 }
1176
1177 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
1178 assert(zfpm_g->sock >= 0);
1179
1180 num_writes = 0;
1181
1182 do {
1183 int bytes_to_write, bytes_written;
1184
1185 s = zfpm_g->obuf;
1186
1187 /*
1188 * If the stream is empty, try fill it up with data.
1189 */
1190 if (stream_empty(s)) {
1191 zfpm_build_updates();
1192 }
1193
1194 bytes_to_write = stream_get_endp(s) - stream_get_getp(s);
1195 if (!bytes_to_write)
1196 break;
1197
1198 bytes_written =
1199 write(zfpm_g->sock, stream_pnt(s), bytes_to_write);
1200 zfpm_g->stats.write_calls++;
1201 num_writes++;
1202
1203 if (bytes_written < 0) {
1204 if (ERRNO_IO_RETRY(errno))
1205 break;
1206
1207 zfpm_connection_down("failed to write to socket");
1208 return 0;
1209 }
1210
1211 if (bytes_written != bytes_to_write) {
1212
1213 /*
1214 * Partial write.
1215 */
1216 stream_forward_getp(s, bytes_written);
1217 zfpm_g->stats.partial_writes++;
1218 break;
1219 }
1220
1221 /*
1222 * We've written out the entire contents of the stream.
1223 */
1224 stream_reset(s);
1225
1226 if (num_writes >= ZFPM_MAX_WRITES_PER_RUN) {
1227 zfpm_g->stats.max_writes_hit++;
1228 break;
1229 }
1230
1231 if (zfpm_thread_should_yield(thread)) {
1232 zfpm_g->stats.t_write_yields++;
1233 break;
1234 }
1235 } while (1);
1236
1237 if (zfpm_writes_pending())
1238 zfpm_write_on();
1239
1240 return 0;
1241 }
1242
1243 /*
1244 * zfpm_connect_cb
1245 */
1246 static int zfpm_connect_cb(struct thread *t)
1247 {
1248 int sock, ret;
1249 struct sockaddr_in serv;
1250
1251 assert(zfpm_g->state == ZFPM_STATE_ACTIVE);
1252
1253 sock = socket(AF_INET, SOCK_STREAM, 0);
1254 if (sock < 0) {
1255 zlog_err("Failed to create socket for connect(): %s",
1256 strerror(errno));
1257 zfpm_g->stats.connect_no_sock++;
1258 return 0;
1259 }
1260
1261 set_nonblocking(sock);
1262
1263 /* Make server socket. */
1264 memset(&serv, 0, sizeof(serv));
1265 serv.sin_family = AF_INET;
1266 serv.sin_port = htons(zfpm_g->fpm_port);
1267 #ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
1268 serv.sin_len = sizeof(struct sockaddr_in);
1269 #endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */
1270 if (!zfpm_g->fpm_server)
1271 serv.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1272 else
1273 serv.sin_addr.s_addr = (zfpm_g->fpm_server);
1274
1275 /*
1276 * Connect to the FPM.
1277 */
1278 zfpm_g->connect_calls++;
1279 zfpm_g->stats.connect_calls++;
1280 zfpm_g->last_connect_call_time = monotime(NULL);
1281
1282 ret = connect(sock, (struct sockaddr *)&serv, sizeof(serv));
1283 if (ret >= 0) {
1284 zfpm_g->sock = sock;
1285 zfpm_connection_up("connect succeeded");
1286 return 1;
1287 }
1288
1289 if (errno == EINPROGRESS) {
1290 zfpm_g->sock = sock;
1291 zfpm_read_on();
1292 zfpm_write_on();
1293 zfpm_set_state(ZFPM_STATE_CONNECTING,
1294 "async connect in progress");
1295 return 0;
1296 }
1297
1298 zlog_info("can't connect to FPM %d: %s", sock, safe_strerror(errno));
1299 close(sock);
1300
1301 /*
1302 * Restart timer for retrying connection.
1303 */
1304 zfpm_start_connect_timer("connect() failed");
1305 return 0;
1306 }
1307
1308 /*
1309 * zfpm_set_state
1310 *
1311 * Move state machine into the given state.
1312 */
1313 static void zfpm_set_state(enum zfpm_state state, const char *reason)
1314 {
1315 enum zfpm_state cur_state = zfpm_g->state;
1316
1317 if (!reason)
1318 reason = "Unknown";
1319
1320 if (state == cur_state)
1321 return;
1322
1323 zfpm_debug("beginning state transition %s -> %s. Reason: %s",
1324 zfpm_state_to_str(cur_state), zfpm_state_to_str(state),
1325 reason);
1326
1327 switch (state) {
1328
1329 case ZFPM_STATE_IDLE:
1330 assert(cur_state == ZFPM_STATE_ESTABLISHED);
1331 break;
1332
1333 case ZFPM_STATE_ACTIVE:
1334 assert(cur_state == ZFPM_STATE_IDLE
1335 || cur_state == ZFPM_STATE_CONNECTING);
1336 assert(zfpm_g->t_connect);
1337 break;
1338
1339 case ZFPM_STATE_CONNECTING:
1340 assert(zfpm_g->sock);
1341 assert(cur_state == ZFPM_STATE_ACTIVE);
1342 assert(zfpm_g->t_read);
1343 assert(zfpm_g->t_write);
1344 break;
1345
1346 case ZFPM_STATE_ESTABLISHED:
1347 assert(cur_state == ZFPM_STATE_ACTIVE
1348 || cur_state == ZFPM_STATE_CONNECTING);
1349 assert(zfpm_g->sock);
1350 assert(zfpm_g->t_read);
1351 assert(zfpm_g->t_write);
1352 break;
1353 }
1354
1355 zfpm_g->state = state;
1356 }
1357
1358 /*
1359 * zfpm_calc_connect_delay
1360 *
1361 * Returns the number of seconds after which we should attempt to
1362 * reconnect to the FPM.
1363 */
1364 static long zfpm_calc_connect_delay(void)
1365 {
1366 time_t elapsed;
1367
1368 /*
1369 * Return 0 if this is our first attempt to connect.
1370 */
1371 if (zfpm_g->connect_calls == 0) {
1372 return 0;
1373 }
1374
1375 elapsed = zfpm_get_elapsed_time(zfpm_g->last_connect_call_time);
1376
1377 if (elapsed > ZFPM_CONNECT_RETRY_IVL) {
1378 return 0;
1379 }
1380
1381 return ZFPM_CONNECT_RETRY_IVL - elapsed;
1382 }
1383
1384 /*
1385 * zfpm_start_connect_timer
1386 */
1387 static void zfpm_start_connect_timer(const char *reason)
1388 {
1389 long delay_secs;
1390
1391 assert(!zfpm_g->t_connect);
1392 assert(zfpm_g->sock < 0);
1393
1394 assert(zfpm_g->state == ZFPM_STATE_IDLE
1395 || zfpm_g->state == ZFPM_STATE_ACTIVE
1396 || zfpm_g->state == ZFPM_STATE_CONNECTING);
1397
1398 delay_secs = zfpm_calc_connect_delay();
1399 zfpm_debug("scheduling connect in %ld seconds", delay_secs);
1400
1401 thread_add_timer(zfpm_g->master, zfpm_connect_cb, 0, delay_secs,
1402 &zfpm_g->t_connect);
1403 zfpm_set_state(ZFPM_STATE_ACTIVE, reason);
1404 }
1405
1406 /*
1407 * zfpm_is_enabled
1408 *
1409 * Returns true if the zebra FPM module has been enabled.
1410 */
1411 static inline int zfpm_is_enabled(void)
1412 {
1413 return zfpm_g->enabled;
1414 }
1415
1416 /*
1417 * zfpm_conn_is_up
1418 *
1419 * Returns true if the connection to the FPM is up.
1420 */
1421 static inline int zfpm_conn_is_up(void)
1422 {
1423 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED)
1424 return 0;
1425
1426 assert(zfpm_g->sock >= 0);
1427
1428 return 1;
1429 }
1430
1431 /*
1432 * zfpm_trigger_update
1433 *
1434 * The zebra code invokes this function to indicate that we should
1435 * send an update to the FPM about the given route_node.
1436 */
1437 static int zfpm_trigger_update(struct route_node *rn, const char *reason)
1438 {
1439 rib_dest_t *dest;
1440
1441 /*
1442 * Ignore if the connection is down. We will update the FPM about
1443 * all destinations once the connection comes up.
1444 */
1445 if (!zfpm_conn_is_up())
1446 return 0;
1447
1448 dest = rib_dest_from_rnode(rn);
1449
1450 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) {
1451 zfpm_g->stats.redundant_triggers++;
1452 return 0;
1453 }
1454
1455 if (reason) {
1456 zfpm_debug("%pFX triggering update to FPM - Reason: %s", &rn->p,
1457 reason);
1458 }
1459
1460 SET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1461 TAILQ_INSERT_TAIL(&zfpm_g->dest_q, dest, fpm_q_entries);
1462 zfpm_g->stats.updates_triggered++;
1463
1464 /*
1465 * Make sure that writes are enabled.
1466 */
1467 if (zfpm_g->t_write)
1468 return 0;
1469
1470 zfpm_write_on();
1471 return 0;
1472 }
1473
1474 /*
1475 * Generate Key for FPM MAC info hash entry
1476 */
1477 static unsigned int zfpm_mac_info_hash_keymake(const void *p)
1478 {
1479 struct fpm_mac_info_t *fpm_mac = (struct fpm_mac_info_t *)p;
1480 uint32_t mac_key;
1481
1482 mac_key = jhash(fpm_mac->macaddr.octet, ETH_ALEN, 0xa5a5a55a);
1483
1484 return jhash_2words(mac_key, fpm_mac->vni, 0);
1485 }
1486
1487 /*
1488 * Compare function for FPM MAC info hash lookup
1489 */
1490 static bool zfpm_mac_info_cmp(const void *p1, const void *p2)
1491 {
1492 const struct fpm_mac_info_t *fpm_mac1 = p1;
1493 const struct fpm_mac_info_t *fpm_mac2 = p2;
1494
1495 if (memcmp(fpm_mac1->macaddr.octet, fpm_mac2->macaddr.octet, ETH_ALEN)
1496 != 0)
1497 return false;
1498 if (fpm_mac1->vni != fpm_mac2->vni)
1499 return false;
1500
1501 return true;
1502 }
1503
1504 /*
1505 * Lookup FPM MAC info hash entry.
1506 */
1507 static struct fpm_mac_info_t *zfpm_mac_info_lookup(struct fpm_mac_info_t *key)
1508 {
1509 return hash_lookup(zfpm_g->fpm_mac_info_table, key);
1510 }
1511
1512 /*
1513 * Callback to allocate fpm_mac_info_t structure.
1514 */
1515 static void *zfpm_mac_info_alloc(void *p)
1516 {
1517 const struct fpm_mac_info_t *key = p;
1518 struct fpm_mac_info_t *fpm_mac;
1519
1520 fpm_mac = XCALLOC(MTYPE_FPM_MAC_INFO, sizeof(struct fpm_mac_info_t));
1521
1522 memcpy(&fpm_mac->macaddr, &key->macaddr, ETH_ALEN);
1523 fpm_mac->vni = key->vni;
1524
1525 return (void *)fpm_mac;
1526 }
1527
1528 /*
1529 * Delink and free fpm_mac_info_t.
1530 */
1531 static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac)
1532 {
1533 hash_release(zfpm_g->fpm_mac_info_table, fpm_mac);
1534 TAILQ_REMOVE(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries);
1535 XFREE(MTYPE_FPM_MAC_INFO, fpm_mac);
1536 }
1537
1538 /*
1539 * zfpm_trigger_rmac_update
1540 *
1541 * Zebra code invokes this function to indicate that we should
1542 * send an update to FPM for given MAC entry.
1543 *
1544 * This function checks if we already have enqueued an update for this RMAC,
1545 * If yes, update the same fpm_mac_info_t. Else, create and enqueue an update.
1546 */
1547 static int zfpm_trigger_rmac_update(zebra_mac_t *rmac, zebra_l3vni_t *zl3vni,
1548 bool delete, const char *reason)
1549 {
1550 char buf[ETHER_ADDR_STRLEN];
1551 struct fpm_mac_info_t *fpm_mac, key;
1552 struct interface *vxlan_if, *svi_if;
1553 bool mac_found = false;
1554
1555 /*
1556 * Ignore if the connection is down. We will update the FPM about
1557 * all destinations once the connection comes up.
1558 */
1559 if (!zfpm_conn_is_up())
1560 return 0;
1561
1562 if (reason) {
1563 zfpm_debug("triggering update to FPM - Reason: %s - %s",
1564 reason,
1565 prefix_mac2str(&rmac->macaddr, buf, sizeof(buf)));
1566 }
1567
1568 vxlan_if = zl3vni_map_to_vxlan_if(zl3vni);
1569 svi_if = zl3vni_map_to_svi_if(zl3vni);
1570
1571 memset(&key, 0, sizeof(struct fpm_mac_info_t));
1572
1573 memcpy(&key.macaddr, &rmac->macaddr, ETH_ALEN);
1574 key.vni = zl3vni->vni;
1575
1576 /* Check if this MAC is already present in the queue. */
1577 fpm_mac = zfpm_mac_info_lookup(&key);
1578
1579 if (fpm_mac) {
1580 mac_found = true;
1581
1582 /*
1583 * If the enqueued op is "add" and current op is "delete",
1584 * this is a noop. So, Unset ZEBRA_MAC_UPDATE_FPM flag.
1585 * While processing FPM queue, we will silently delete this
1586 * MAC entry without sending any update for this MAC.
1587 */
1588 if (!CHECK_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM) &&
1589 delete == 1) {
1590 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1591 UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM);
1592 return 0;
1593 }
1594 } else {
1595 fpm_mac = hash_get(zfpm_g->fpm_mac_info_table, &key,
1596 zfpm_mac_info_alloc);
1597 if (!fpm_mac)
1598 return 0;
1599 }
1600
1601 fpm_mac->r_vtep_ip.s_addr = rmac->fwd_info.r_vtep_ip.s_addr;
1602 fpm_mac->zebra_flags = rmac->flags;
1603 fpm_mac->vxlan_if = vxlan_if ? vxlan_if->ifindex : 0;
1604 fpm_mac->svi_if = svi_if ? svi_if->ifindex : 0;
1605
1606 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM);
1607 if (delete)
1608 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1609 else
1610 UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1611
1612 if (!mac_found)
1613 TAILQ_INSERT_TAIL(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries);
1614
1615 zfpm_g->stats.updates_triggered++;
1616
1617 /* If writes are already enabled, return. */
1618 if (zfpm_g->t_write)
1619 return 0;
1620
1621 zfpm_write_on();
1622 return 0;
1623 }
1624
1625 /*
1626 * This function is called when the FPM connections is established.
1627 * Iterate over all the RMAC entries for the given L3VNI
1628 * and enqueue the RMAC for FPM processing.
1629 */
1630 static void zfpm_trigger_rmac_update_wrapper(struct hash_bucket *backet,
1631 void *args)
1632 {
1633 zebra_mac_t *zrmac = (zebra_mac_t *)backet->data;
1634 zebra_l3vni_t *zl3vni = (zebra_l3vni_t *)args;
1635
1636 zfpm_trigger_rmac_update(zrmac, zl3vni, false, "RMAC added");
1637 }
1638
1639 /*
1640 * This function is called when the FPM connections is established.
1641 * This function iterates over all the L3VNIs to trigger
1642 * FPM updates for RMACs currently available.
1643 */
1644 static void zfpm_iterate_rmac_table(struct hash_bucket *backet, void *args)
1645 {
1646 zebra_l3vni_t *zl3vni = (zebra_l3vni_t *)backet->data;
1647
1648 hash_iterate(zl3vni->rmac_table, zfpm_trigger_rmac_update_wrapper,
1649 (void *)zl3vni);
1650 }
1651
1652 /*
1653 * struct zfpm_statsimer_cb
1654 */
1655 static int zfpm_stats_timer_cb(struct thread *t)
1656 {
1657 zfpm_g->t_stats = NULL;
1658
1659 /*
1660 * Remember the stats collected in the last interval for display
1661 * purposes.
1662 */
1663 zfpm_stats_copy(&zfpm_g->stats, &zfpm_g->last_ivl_stats);
1664
1665 /*
1666 * Add the current set of stats into the cumulative statistics.
1667 */
1668 zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats,
1669 &zfpm_g->cumulative_stats);
1670
1671 /*
1672 * Start collecting stats afresh over the next interval.
1673 */
1674 zfpm_stats_reset(&zfpm_g->stats);
1675
1676 zfpm_start_stats_timer();
1677
1678 return 0;
1679 }
1680
1681 /*
1682 * zfpm_stop_stats_timer
1683 */
1684 static void zfpm_stop_stats_timer(void)
1685 {
1686 if (!zfpm_g->t_stats)
1687 return;
1688
1689 zfpm_debug("Stopping existing stats timer");
1690 thread_cancel(&zfpm_g->t_stats);
1691 }
1692
1693 /*
1694 * zfpm_start_stats_timer
1695 */
1696 void zfpm_start_stats_timer(void)
1697 {
1698 assert(!zfpm_g->t_stats);
1699
1700 thread_add_timer(zfpm_g->master, zfpm_stats_timer_cb, 0,
1701 ZFPM_STATS_IVL_SECS, &zfpm_g->t_stats);
1702 }
1703
1704 /*
1705 * Helper macro for zfpm_show_stats() below.
1706 */
1707 #define ZFPM_SHOW_STAT(counter) \
1708 do { \
1709 vty_out(vty, "%-40s %10lu %16lu\n", #counter, \
1710 total_stats.counter, zfpm_g->last_ivl_stats.counter); \
1711 } while (0)
1712
1713 /*
1714 * zfpm_show_stats
1715 */
1716 static void zfpm_show_stats(struct vty *vty)
1717 {
1718 struct zfpm_stats total_stats;
1719 time_t elapsed;
1720
1721 vty_out(vty, "\n%-40s %10s Last %2d secs\n\n", "Counter", "Total",
1722 ZFPM_STATS_IVL_SECS);
1723
1724 /*
1725 * Compute the total stats up to this instant.
1726 */
1727 zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats,
1728 &total_stats);
1729
1730 ZFPM_SHOW_STAT(connect_calls);
1731 ZFPM_SHOW_STAT(connect_no_sock);
1732 ZFPM_SHOW_STAT(read_cb_calls);
1733 ZFPM_SHOW_STAT(write_cb_calls);
1734 ZFPM_SHOW_STAT(write_calls);
1735 ZFPM_SHOW_STAT(partial_writes);
1736 ZFPM_SHOW_STAT(max_writes_hit);
1737 ZFPM_SHOW_STAT(t_write_yields);
1738 ZFPM_SHOW_STAT(nop_deletes_skipped);
1739 ZFPM_SHOW_STAT(route_adds);
1740 ZFPM_SHOW_STAT(route_dels);
1741 ZFPM_SHOW_STAT(updates_triggered);
1742 ZFPM_SHOW_STAT(redundant_triggers);
1743 ZFPM_SHOW_STAT(dests_del_after_update);
1744 ZFPM_SHOW_STAT(t_conn_down_starts);
1745 ZFPM_SHOW_STAT(t_conn_down_dests_processed);
1746 ZFPM_SHOW_STAT(t_conn_down_yields);
1747 ZFPM_SHOW_STAT(t_conn_down_finishes);
1748 ZFPM_SHOW_STAT(t_conn_up_starts);
1749 ZFPM_SHOW_STAT(t_conn_up_dests_processed);
1750 ZFPM_SHOW_STAT(t_conn_up_yields);
1751 ZFPM_SHOW_STAT(t_conn_up_aborts);
1752 ZFPM_SHOW_STAT(t_conn_up_finishes);
1753
1754 if (!zfpm_g->last_stats_clear_time)
1755 return;
1756
1757 elapsed = zfpm_get_elapsed_time(zfpm_g->last_stats_clear_time);
1758
1759 vty_out(vty, "\nStats were cleared %lu seconds ago\n",
1760 (unsigned long)elapsed);
1761 }
1762
1763 /*
1764 * zfpm_clear_stats
1765 */
1766 static void zfpm_clear_stats(struct vty *vty)
1767 {
1768 if (!zfpm_is_enabled()) {
1769 vty_out(vty, "The FPM module is not enabled...\n");
1770 return;
1771 }
1772
1773 zfpm_stats_reset(&zfpm_g->stats);
1774 zfpm_stats_reset(&zfpm_g->last_ivl_stats);
1775 zfpm_stats_reset(&zfpm_g->cumulative_stats);
1776
1777 zfpm_stop_stats_timer();
1778 zfpm_start_stats_timer();
1779
1780 zfpm_g->last_stats_clear_time = monotime(NULL);
1781
1782 vty_out(vty, "Cleared FPM stats\n");
1783 }
1784
1785 /*
1786 * show_zebra_fpm_stats
1787 */
1788 DEFUN (show_zebra_fpm_stats,
1789 show_zebra_fpm_stats_cmd,
1790 "show zebra fpm stats",
1791 SHOW_STR
1792 ZEBRA_STR
1793 "Forwarding Path Manager information\n"
1794 "Statistics\n")
1795 {
1796 zfpm_show_stats(vty);
1797 return CMD_SUCCESS;
1798 }
1799
1800 /*
1801 * clear_zebra_fpm_stats
1802 */
1803 DEFUN (clear_zebra_fpm_stats,
1804 clear_zebra_fpm_stats_cmd,
1805 "clear zebra fpm stats",
1806 CLEAR_STR
1807 ZEBRA_STR
1808 "Clear Forwarding Path Manager information\n"
1809 "Statistics\n")
1810 {
1811 zfpm_clear_stats(vty);
1812 return CMD_SUCCESS;
1813 }
1814
1815 /*
1816 * update fpm connection information
1817 */
1818 DEFUN ( fpm_remote_ip,
1819 fpm_remote_ip_cmd,
1820 "fpm connection ip A.B.C.D port (1-65535)",
1821 "fpm connection remote ip and port\n"
1822 "Remote fpm server ip A.B.C.D\n"
1823 "Enter ip ")
1824 {
1825
1826 in_addr_t fpm_server;
1827 uint32_t port_no;
1828
1829 fpm_server = inet_addr(argv[3]->arg);
1830 if (fpm_server == INADDR_NONE)
1831 return CMD_ERR_INCOMPLETE;
1832
1833 port_no = atoi(argv[5]->arg);
1834 if (port_no < TCP_MIN_PORT || port_no > TCP_MAX_PORT)
1835 return CMD_ERR_INCOMPLETE;
1836
1837 zfpm_g->fpm_server = fpm_server;
1838 zfpm_g->fpm_port = port_no;
1839
1840
1841 return CMD_SUCCESS;
1842 }
1843
1844 DEFUN ( no_fpm_remote_ip,
1845 no_fpm_remote_ip_cmd,
1846 "no fpm connection ip A.B.C.D port (1-65535)",
1847 "fpm connection remote ip and port\n"
1848 "Connection\n"
1849 "Remote fpm server ip A.B.C.D\n"
1850 "Enter ip ")
1851 {
1852 if (zfpm_g->fpm_server != inet_addr(argv[4]->arg)
1853 || zfpm_g->fpm_port != atoi(argv[6]->arg))
1854 return CMD_ERR_NO_MATCH;
1855
1856 zfpm_g->fpm_server = FPM_DEFAULT_IP;
1857 zfpm_g->fpm_port = FPM_DEFAULT_PORT;
1858
1859 return CMD_SUCCESS;
1860 }
1861
1862 /*
1863 * zfpm_init_message_format
1864 */
1865 static inline void zfpm_init_message_format(const char *format)
1866 {
1867 int have_netlink, have_protobuf;
1868
1869 #ifdef HAVE_NETLINK
1870 have_netlink = 1;
1871 #else
1872 have_netlink = 0;
1873 #endif
1874
1875 #ifdef HAVE_PROTOBUF
1876 have_protobuf = 1;
1877 #else
1878 have_protobuf = 0;
1879 #endif
1880
1881 zfpm_g->message_format = ZFPM_MSG_FORMAT_NONE;
1882
1883 if (!format) {
1884 if (have_netlink) {
1885 zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK;
1886 } else if (have_protobuf) {
1887 zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF;
1888 }
1889 return;
1890 }
1891
1892 if (!strcmp("netlink", format)) {
1893 if (!have_netlink) {
1894 flog_err(EC_ZEBRA_NETLINK_NOT_AVAILABLE,
1895 "FPM netlink message format is not available");
1896 return;
1897 }
1898 zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK;
1899 return;
1900 }
1901
1902 if (!strcmp("protobuf", format)) {
1903 if (!have_protobuf) {
1904 flog_err(
1905 EC_ZEBRA_PROTOBUF_NOT_AVAILABLE,
1906 "FPM protobuf message format is not available");
1907 return;
1908 }
1909 flog_warn(EC_ZEBRA_PROTOBUF_NOT_AVAILABLE,
1910 "FPM protobuf message format is deprecated and scheduled to be removed. Please convert to using netlink format or contact dev@lists.frrouting.org with your use case.");
1911 zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF;
1912 return;
1913 }
1914
1915 flog_warn(EC_ZEBRA_FPM_FORMAT_UNKNOWN, "Unknown fpm format '%s'",
1916 format);
1917 }
1918
1919 /**
1920 * fpm_remote_srv_write
1921 *
1922 * Module to write remote fpm connection
1923 *
1924 * Returns ZERO on success.
1925 */
1926
1927 static int fpm_remote_srv_write(struct vty *vty)
1928 {
1929 struct in_addr in;
1930
1931 in.s_addr = zfpm_g->fpm_server;
1932
1933 if ((zfpm_g->fpm_server != FPM_DEFAULT_IP
1934 && zfpm_g->fpm_server != INADDR_ANY)
1935 || (zfpm_g->fpm_port != FPM_DEFAULT_PORT && zfpm_g->fpm_port != 0))
1936 vty_out(vty, "fpm connection ip %pI4 port %d\n", &in,
1937 zfpm_g->fpm_port);
1938
1939 return 0;
1940 }
1941
1942
1943 static int fpm_remote_srv_write(struct vty *vty);
1944 /* Zebra node */
1945 static struct cmd_node zebra_node = {
1946 .name = "zebra",
1947 .node = ZEBRA_NODE,
1948 .parent_node = CONFIG_NODE,
1949 .prompt = "",
1950 .config_write = fpm_remote_srv_write,
1951 };
1952
1953
1954 /**
1955 * zfpm_init
1956 *
1957 * One-time initialization of the Zebra FPM module.
1958 *
1959 * @param[in] port port at which FPM is running.
1960 * @param[in] enable true if the zebra FPM module should be enabled
1961 * @param[in] format to use to talk to the FPM. Can be 'netink' or 'protobuf'.
1962 *
1963 * Returns true on success.
1964 */
1965 static int zfpm_init(struct thread_master *master)
1966 {
1967 int enable = 1;
1968 uint16_t port = 0;
1969 const char *format = THIS_MODULE->load_args;
1970
1971 memset(zfpm_g, 0, sizeof(*zfpm_g));
1972 zfpm_g->master = master;
1973 TAILQ_INIT(&zfpm_g->dest_q);
1974 TAILQ_INIT(&zfpm_g->mac_q);
1975
1976 /* Create hash table for fpm_mac_info_t enties */
1977 zfpm_g->fpm_mac_info_table = hash_create(zfpm_mac_info_hash_keymake,
1978 zfpm_mac_info_cmp,
1979 "FPM MAC info hash table");
1980
1981 zfpm_g->sock = -1;
1982 zfpm_g->state = ZFPM_STATE_IDLE;
1983
1984 zfpm_stats_init(&zfpm_g->stats);
1985 zfpm_stats_init(&zfpm_g->last_ivl_stats);
1986 zfpm_stats_init(&zfpm_g->cumulative_stats);
1987
1988 install_node(&zebra_node);
1989 install_element(ENABLE_NODE, &show_zebra_fpm_stats_cmd);
1990 install_element(ENABLE_NODE, &clear_zebra_fpm_stats_cmd);
1991 install_element(CONFIG_NODE, &fpm_remote_ip_cmd);
1992 install_element(CONFIG_NODE, &no_fpm_remote_ip_cmd);
1993
1994 zfpm_init_message_format(format);
1995
1996 /*
1997 * Disable FPM interface if no suitable format is available.
1998 */
1999 if (zfpm_g->message_format == ZFPM_MSG_FORMAT_NONE)
2000 enable = 0;
2001
2002 zfpm_g->enabled = enable;
2003
2004 if (!zfpm_g->fpm_server)
2005 zfpm_g->fpm_server = FPM_DEFAULT_IP;
2006
2007 if (!port)
2008 port = FPM_DEFAULT_PORT;
2009
2010 zfpm_g->fpm_port = port;
2011
2012 zfpm_g->obuf = stream_new(ZFPM_OBUF_SIZE);
2013 zfpm_g->ibuf = stream_new(ZFPM_IBUF_SIZE);
2014
2015 zfpm_start_stats_timer();
2016 zfpm_start_connect_timer("initialized");
2017 return 0;
2018 }
2019
2020 static int zfpm_fini(void)
2021 {
2022 zfpm_write_off();
2023 zfpm_read_off();
2024 zfpm_connect_off();
2025
2026 zfpm_stop_stats_timer();
2027
2028 hook_unregister(rib_update, zfpm_trigger_update);
2029 return 0;
2030 }
2031
2032 static int zebra_fpm_module_init(void)
2033 {
2034 hook_register(rib_update, zfpm_trigger_update);
2035 hook_register(zebra_rmac_update, zfpm_trigger_rmac_update);
2036 hook_register(frr_late_init, zfpm_init);
2037 hook_register(frr_early_fini, zfpm_fini);
2038 return 0;
2039 }
2040
2041 FRR_MODULE_SETUP(.name = "zebra_fpm", .version = FRR_VERSION,
2042 .description = "zebra FPM (Forwarding Plane Manager) module",
2043 .init = zebra_fpm_module_init, )