]> git.proxmox.com Git - mirror_frr.git/blob - zebra/zebra_fpm.c
Merge pull request #9199 from LabNConsulting/chopps/micronet-prime
[mirror_frr.git] / zebra / zebra_fpm.c
1 /*
2 * Main implementation file for interface to Forwarding Plane Manager.
3 *
4 * Copyright (C) 2012 by Open Source Routing.
5 * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
6 *
7 * This file is part of GNU Zebra.
8 *
9 * GNU Zebra is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the
11 * Free Software Foundation; either version 2, or (at your option) any
12 * later version.
13 *
14 * GNU Zebra is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; see the file COPYING; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include <zebra.h>
25
26 #include "log.h"
27 #include "libfrr.h"
28 #include "stream.h"
29 #include "thread.h"
30 #include "network.h"
31 #include "command.h"
32 #include "lib/version.h"
33 #include "jhash.h"
34
35 #include "zebra/rib.h"
36 #include "zebra/zserv.h"
37 #include "zebra/zebra_ns.h"
38 #include "zebra/zebra_vrf.h"
39 #include "zebra/zebra_errors.h"
40
41 #include "fpm/fpm.h"
42 #include "zebra_fpm_private.h"
43 #include "zebra/zebra_router.h"
44 #include "zebra_vxlan_private.h"
45
46 DEFINE_MTYPE_STATIC(ZEBRA, FPM_MAC_INFO, "FPM_MAC_INFO");
47
48 /*
49 * Interval at which we attempt to connect to the FPM.
50 */
51 #define ZFPM_CONNECT_RETRY_IVL 5
52
53 /*
54 * Sizes of outgoing and incoming stream buffers for writing/reading
55 * FPM messages.
56 */
57 #define ZFPM_OBUF_SIZE (2 * FPM_MAX_MSG_LEN)
58 #define ZFPM_IBUF_SIZE (FPM_MAX_MSG_LEN)
59
60 /*
61 * The maximum number of times the FPM socket write callback can call
62 * 'write' before it yields.
63 */
64 #define ZFPM_MAX_WRITES_PER_RUN 10
65
66 /*
67 * Interval over which we collect statistics.
68 */
69 #define ZFPM_STATS_IVL_SECS 10
70 #define FPM_MAX_MAC_MSG_LEN 512
71
72 static void zfpm_iterate_rmac_table(struct hash_bucket *bucket, void *args);
73
74 /*
75 * Structure that holds state for iterating over all route_node
76 * structures that are candidates for being communicated to the FPM.
77 */
78 struct zfpm_rnodes_iter {
79 rib_tables_iter_t tables_iter;
80 route_table_iter_t iter;
81 };
82
83 /*
84 * Statistics.
85 */
86 struct zfpm_stats {
87 unsigned long connect_calls;
88 unsigned long connect_no_sock;
89
90 unsigned long read_cb_calls;
91
92 unsigned long write_cb_calls;
93 unsigned long write_calls;
94 unsigned long partial_writes;
95 unsigned long max_writes_hit;
96 unsigned long t_write_yields;
97
98 unsigned long nop_deletes_skipped;
99 unsigned long route_adds;
100 unsigned long route_dels;
101
102 unsigned long updates_triggered;
103 unsigned long redundant_triggers;
104
105 unsigned long dests_del_after_update;
106
107 unsigned long t_conn_down_starts;
108 unsigned long t_conn_down_dests_processed;
109 unsigned long t_conn_down_yields;
110 unsigned long t_conn_down_finishes;
111
112 unsigned long t_conn_up_starts;
113 unsigned long t_conn_up_dests_processed;
114 unsigned long t_conn_up_yields;
115 unsigned long t_conn_up_aborts;
116 unsigned long t_conn_up_finishes;
117 };
118
119 /*
120 * States for the FPM state machine.
121 */
122 enum zfpm_state {
123
124 /*
125 * In this state we are not yet ready to connect to the FPM. This
126 * can happen when this module is disabled, or if we're cleaning up
127 * after a connection has gone down.
128 */
129 ZFPM_STATE_IDLE,
130
131 /*
132 * Ready to talk to the FPM and periodically trying to connect to
133 * it.
134 */
135 ZFPM_STATE_ACTIVE,
136
137 /*
138 * In the middle of bringing up a TCP connection. Specifically,
139 * waiting for a connect() call to complete asynchronously.
140 */
141 ZFPM_STATE_CONNECTING,
142
143 /*
144 * TCP connection to the FPM is up.
145 */
146 ZFPM_STATE_ESTABLISHED
147
148 };
149
150 /*
151 * Message format to be used to communicate with the FPM.
152 */
153 enum zfpm_msg_format {
154 ZFPM_MSG_FORMAT_NONE,
155 ZFPM_MSG_FORMAT_NETLINK,
156 ZFPM_MSG_FORMAT_PROTOBUF,
157 };
158
159 /*
160 * Globals.
161 */
162 struct zfpm_glob {
163
164 /*
165 * True if the FPM module has been enabled.
166 */
167 int enabled;
168
169 /*
170 * Message format to be used to communicate with the fpm.
171 */
172 enum zfpm_msg_format message_format;
173
174 struct thread_master *master;
175
176 enum zfpm_state state;
177
178 in_addr_t fpm_server;
179 /*
180 * Port on which the FPM is running.
181 */
182 int fpm_port;
183
184 /*
185 * List of rib_dest_t structures to be processed
186 */
187 TAILQ_HEAD(zfpm_dest_q, rib_dest_t_) dest_q;
188
189 /*
190 * List of fpm_mac_info structures to be processed
191 */
192 TAILQ_HEAD(zfpm_mac_q, fpm_mac_info_t) mac_q;
193
194 /*
195 * Hash table of fpm_mac_info_t entries
196 *
197 * While adding fpm_mac_info_t for a MAC to the mac_q,
198 * it is possible that another fpm_mac_info_t node for the this MAC
199 * is already present in the queue.
200 * This is possible in the case of consecutive add->delete operations.
201 * To avoid such duplicate insertions in the mac_q,
202 * define a hash table for fpm_mac_info_t which can be looked up
203 * to see if an fpm_mac_info_t node for a MAC is already present
204 * in the mac_q.
205 */
206 struct hash *fpm_mac_info_table;
207
208 /*
209 * Stream socket to the FPM.
210 */
211 int sock;
212
213 /*
214 * Buffers for messages to/from the FPM.
215 */
216 struct stream *obuf;
217 struct stream *ibuf;
218
219 /*
220 * Threads for I/O.
221 */
222 struct thread *t_connect;
223 struct thread *t_write;
224 struct thread *t_read;
225
226 /*
227 * Thread to clean up after the TCP connection to the FPM goes down
228 * and the state that belongs to it.
229 */
230 struct thread *t_conn_down;
231
232 struct {
233 struct zfpm_rnodes_iter iter;
234 } t_conn_down_state;
235
236 /*
237 * Thread to take actions once the TCP conn to the FPM comes up, and
238 * the state that belongs to it.
239 */
240 struct thread *t_conn_up;
241
242 struct {
243 struct zfpm_rnodes_iter iter;
244 } t_conn_up_state;
245
246 unsigned long connect_calls;
247 time_t last_connect_call_time;
248
249 /*
250 * Stats from the start of the current statistics interval up to
251 * now. These are the counters we typically update in the code.
252 */
253 struct zfpm_stats stats;
254
255 /*
256 * Statistics that were gathered in the last collection interval.
257 */
258 struct zfpm_stats last_ivl_stats;
259
260 /*
261 * Cumulative stats from the last clear to the start of the current
262 * statistics interval.
263 */
264 struct zfpm_stats cumulative_stats;
265
266 /*
267 * Stats interval timer.
268 */
269 struct thread *t_stats;
270
271 /*
272 * If non-zero, the last time when statistics were cleared.
273 */
274 time_t last_stats_clear_time;
275
276 /*
277 * Flag to track the MAC dump status to FPM
278 */
279 bool fpm_mac_dump_done;
280 };
281
282 static struct zfpm_glob zfpm_glob_space;
283 static struct zfpm_glob *zfpm_g = &zfpm_glob_space;
284
285 static int zfpm_trigger_update(struct route_node *rn, const char *reason);
286
287 static int zfpm_read_cb(struct thread *thread);
288 static int zfpm_write_cb(struct thread *thread);
289
290 static void zfpm_set_state(enum zfpm_state state, const char *reason);
291 static void zfpm_start_connect_timer(const char *reason);
292 static void zfpm_start_stats_timer(void);
293 static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac);
294
295 /*
296 * zfpm_thread_should_yield
297 */
298 static inline int zfpm_thread_should_yield(struct thread *t)
299 {
300 return thread_should_yield(t);
301 }
302
303 /*
304 * zfpm_state_to_str
305 */
306 static const char *zfpm_state_to_str(enum zfpm_state state)
307 {
308 switch (state) {
309
310 case ZFPM_STATE_IDLE:
311 return "idle";
312
313 case ZFPM_STATE_ACTIVE:
314 return "active";
315
316 case ZFPM_STATE_CONNECTING:
317 return "connecting";
318
319 case ZFPM_STATE_ESTABLISHED:
320 return "established";
321
322 default:
323 return "unknown";
324 }
325 }
326
327 /*
328 * zfpm_get_elapsed_time
329 *
330 * Returns the time elapsed (in seconds) since the given time.
331 */
332 static time_t zfpm_get_elapsed_time(time_t reference)
333 {
334 time_t now;
335
336 now = monotime(NULL);
337
338 if (now < reference) {
339 assert(0);
340 return 0;
341 }
342
343 return now - reference;
344 }
345
346 /*
347 * zfpm_rnodes_iter_init
348 */
349 static inline void zfpm_rnodes_iter_init(struct zfpm_rnodes_iter *iter)
350 {
351 memset(iter, 0, sizeof(*iter));
352 rib_tables_iter_init(&iter->tables_iter);
353
354 /*
355 * This is a hack, but it makes implementing 'next' easier by
356 * ensuring that route_table_iter_next() will return NULL the first
357 * time we call it.
358 */
359 route_table_iter_init(&iter->iter, NULL);
360 route_table_iter_cleanup(&iter->iter);
361 }
362
363 /*
364 * zfpm_rnodes_iter_next
365 */
366 static inline struct route_node *
367 zfpm_rnodes_iter_next(struct zfpm_rnodes_iter *iter)
368 {
369 struct route_node *rn;
370 struct route_table *table;
371
372 while (1) {
373 rn = route_table_iter_next(&iter->iter);
374 if (rn)
375 return rn;
376
377 /*
378 * We've made our way through this table, go to the next one.
379 */
380 route_table_iter_cleanup(&iter->iter);
381
382 table = rib_tables_iter_next(&iter->tables_iter);
383
384 if (!table)
385 return NULL;
386
387 route_table_iter_init(&iter->iter, table);
388 }
389
390 return NULL;
391 }
392
393 /*
394 * zfpm_rnodes_iter_pause
395 */
396 static inline void zfpm_rnodes_iter_pause(struct zfpm_rnodes_iter *iter)
397 {
398 route_table_iter_pause(&iter->iter);
399 }
400
401 /*
402 * zfpm_rnodes_iter_cleanup
403 */
404 static inline void zfpm_rnodes_iter_cleanup(struct zfpm_rnodes_iter *iter)
405 {
406 route_table_iter_cleanup(&iter->iter);
407 rib_tables_iter_cleanup(&iter->tables_iter);
408 }
409
410 /*
411 * zfpm_stats_init
412 *
413 * Initialize a statistics block.
414 */
415 static inline void zfpm_stats_init(struct zfpm_stats *stats)
416 {
417 memset(stats, 0, sizeof(*stats));
418 }
419
420 /*
421 * zfpm_stats_reset
422 */
423 static inline void zfpm_stats_reset(struct zfpm_stats *stats)
424 {
425 zfpm_stats_init(stats);
426 }
427
428 /*
429 * zfpm_stats_copy
430 */
431 static inline void zfpm_stats_copy(const struct zfpm_stats *src,
432 struct zfpm_stats *dest)
433 {
434 memcpy(dest, src, sizeof(*dest));
435 }
436
437 /*
438 * zfpm_stats_compose
439 *
440 * Total up the statistics in two stats structures ('s1 and 's2') and
441 * return the result in the third argument, 'result'. Note that the
442 * pointer 'result' may be the same as 's1' or 's2'.
443 *
444 * For simplicity, the implementation below assumes that the stats
445 * structure is composed entirely of counters. This can easily be
446 * changed when necessary.
447 */
448 static void zfpm_stats_compose(const struct zfpm_stats *s1,
449 const struct zfpm_stats *s2,
450 struct zfpm_stats *result)
451 {
452 const unsigned long *p1, *p2;
453 unsigned long *result_p;
454 int i, num_counters;
455
456 p1 = (const unsigned long *)s1;
457 p2 = (const unsigned long *)s2;
458 result_p = (unsigned long *)result;
459
460 num_counters = (sizeof(struct zfpm_stats) / sizeof(unsigned long));
461
462 for (i = 0; i < num_counters; i++) {
463 result_p[i] = p1[i] + p2[i];
464 }
465 }
466
467 /*
468 * zfpm_read_on
469 */
470 static inline void zfpm_read_on(void)
471 {
472 assert(!zfpm_g->t_read);
473 assert(zfpm_g->sock >= 0);
474
475 thread_add_read(zfpm_g->master, zfpm_read_cb, 0, zfpm_g->sock,
476 &zfpm_g->t_read);
477 }
478
479 /*
480 * zfpm_write_on
481 */
482 static inline void zfpm_write_on(void)
483 {
484 assert(!zfpm_g->t_write);
485 assert(zfpm_g->sock >= 0);
486
487 thread_add_write(zfpm_g->master, zfpm_write_cb, 0, zfpm_g->sock,
488 &zfpm_g->t_write);
489 }
490
491 /*
492 * zfpm_read_off
493 */
494 static inline void zfpm_read_off(void)
495 {
496 thread_cancel(&zfpm_g->t_read);
497 }
498
499 /*
500 * zfpm_write_off
501 */
502 static inline void zfpm_write_off(void)
503 {
504 thread_cancel(&zfpm_g->t_write);
505 }
506
507 static inline void zfpm_connect_off(void)
508 {
509 thread_cancel(&zfpm_g->t_connect);
510 }
511
512 /*
513 * zfpm_conn_up_thread_cb
514 *
515 * Callback for actions to be taken when the connection to the FPM
516 * comes up.
517 */
518 static int zfpm_conn_up_thread_cb(struct thread *thread)
519 {
520 struct route_node *rnode;
521 struct zfpm_rnodes_iter *iter;
522 rib_dest_t *dest;
523
524 iter = &zfpm_g->t_conn_up_state.iter;
525
526 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED) {
527 zfpm_debug(
528 "Connection not up anymore, conn_up thread aborting");
529 zfpm_g->stats.t_conn_up_aborts++;
530 goto done;
531 }
532
533 if (!zfpm_g->fpm_mac_dump_done) {
534 /* Enqueue FPM updates for all the RMAC entries */
535 hash_iterate(zrouter.l3vni_table, zfpm_iterate_rmac_table,
536 NULL);
537 /* mark dump done so that its not repeated after yield */
538 zfpm_g->fpm_mac_dump_done = true;
539 }
540
541 while ((rnode = zfpm_rnodes_iter_next(iter))) {
542 dest = rib_dest_from_rnode(rnode);
543
544 if (dest) {
545 zfpm_g->stats.t_conn_up_dests_processed++;
546 zfpm_trigger_update(rnode, NULL);
547 }
548
549 /*
550 * Yield if need be.
551 */
552 if (!zfpm_thread_should_yield(thread))
553 continue;
554
555 zfpm_g->stats.t_conn_up_yields++;
556 zfpm_rnodes_iter_pause(iter);
557 thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb,
558 NULL, 0, &zfpm_g->t_conn_up);
559 return 0;
560 }
561
562 zfpm_g->stats.t_conn_up_finishes++;
563
564 done:
565 zfpm_rnodes_iter_cleanup(iter);
566 return 0;
567 }
568
569 /*
570 * zfpm_connection_up
571 *
572 * Called when the connection to the FPM comes up.
573 */
574 static void zfpm_connection_up(const char *detail)
575 {
576 assert(zfpm_g->sock >= 0);
577 zfpm_read_on();
578 zfpm_write_on();
579 zfpm_set_state(ZFPM_STATE_ESTABLISHED, detail);
580
581 /*
582 * Start thread to push existing routes to the FPM.
583 */
584 thread_cancel(&zfpm_g->t_conn_up);
585
586 zfpm_rnodes_iter_init(&zfpm_g->t_conn_up_state.iter);
587 zfpm_g->fpm_mac_dump_done = false;
588
589 zfpm_debug("Starting conn_up thread");
590
591 thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb, NULL, 0,
592 &zfpm_g->t_conn_up);
593 zfpm_g->stats.t_conn_up_starts++;
594 }
595
596 /*
597 * zfpm_connect_check
598 *
599 * Check if an asynchronous connect() to the FPM is complete.
600 */
601 static void zfpm_connect_check(void)
602 {
603 int status;
604 socklen_t slen;
605 int ret;
606
607 zfpm_read_off();
608 zfpm_write_off();
609
610 slen = sizeof(status);
611 ret = getsockopt(zfpm_g->sock, SOL_SOCKET, SO_ERROR, (void *)&status,
612 &slen);
613
614 if (ret >= 0 && status == 0) {
615 zfpm_connection_up("async connect complete");
616 return;
617 }
618
619 /*
620 * getsockopt() failed or indicated an error on the socket.
621 */
622 close(zfpm_g->sock);
623 zfpm_g->sock = -1;
624
625 zfpm_start_connect_timer("getsockopt() after async connect failed");
626 return;
627 }
628
629 /*
630 * zfpm_conn_down_thread_cb
631 *
632 * Callback that is invoked to clean up state after the TCP connection
633 * to the FPM goes down.
634 */
635 static int zfpm_conn_down_thread_cb(struct thread *thread)
636 {
637 struct route_node *rnode;
638 struct zfpm_rnodes_iter *iter;
639 rib_dest_t *dest;
640 struct fpm_mac_info_t *mac = NULL;
641
642 assert(zfpm_g->state == ZFPM_STATE_IDLE);
643
644 /*
645 * Delink and free all fpm_mac_info_t nodes
646 * in the mac_q and fpm_mac_info_hash
647 */
648 while ((mac = TAILQ_FIRST(&zfpm_g->mac_q)) != NULL)
649 zfpm_mac_info_del(mac);
650
651 zfpm_g->t_conn_down = NULL;
652
653 iter = &zfpm_g->t_conn_down_state.iter;
654
655 while ((rnode = zfpm_rnodes_iter_next(iter))) {
656 dest = rib_dest_from_rnode(rnode);
657
658 if (dest) {
659 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) {
660 TAILQ_REMOVE(&zfpm_g->dest_q, dest,
661 fpm_q_entries);
662 }
663
664 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
665 UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
666
667 zfpm_g->stats.t_conn_down_dests_processed++;
668
669 /*
670 * Check if the dest should be deleted.
671 */
672 rib_gc_dest(rnode);
673 }
674
675 /*
676 * Yield if need be.
677 */
678 if (!zfpm_thread_should_yield(thread))
679 continue;
680
681 zfpm_g->stats.t_conn_down_yields++;
682 zfpm_rnodes_iter_pause(iter);
683 zfpm_g->t_conn_down = NULL;
684 thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb,
685 NULL, 0, &zfpm_g->t_conn_down);
686 return 0;
687 }
688
689 zfpm_g->stats.t_conn_down_finishes++;
690 zfpm_rnodes_iter_cleanup(iter);
691
692 /*
693 * Start the process of connecting to the FPM again.
694 */
695 zfpm_start_connect_timer("cleanup complete");
696 return 0;
697 }
698
699 /*
700 * zfpm_connection_down
701 *
702 * Called when the connection to the FPM has gone down.
703 */
704 static void zfpm_connection_down(const char *detail)
705 {
706 if (!detail)
707 detail = "unknown";
708
709 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
710
711 zlog_info("connection to the FPM has gone down: %s", detail);
712
713 zfpm_read_off();
714 zfpm_write_off();
715
716 stream_reset(zfpm_g->ibuf);
717 stream_reset(zfpm_g->obuf);
718
719 if (zfpm_g->sock >= 0) {
720 close(zfpm_g->sock);
721 zfpm_g->sock = -1;
722 }
723
724 /*
725 * Start thread to clean up state after the connection goes down.
726 */
727 assert(!zfpm_g->t_conn_down);
728 zfpm_rnodes_iter_init(&zfpm_g->t_conn_down_state.iter);
729 zfpm_g->t_conn_down = NULL;
730 thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb, NULL, 0,
731 &zfpm_g->t_conn_down);
732 zfpm_g->stats.t_conn_down_starts++;
733
734 zfpm_set_state(ZFPM_STATE_IDLE, detail);
735 }
736
737 /*
738 * zfpm_read_cb
739 */
740 static int zfpm_read_cb(struct thread *thread)
741 {
742 size_t already;
743 struct stream *ibuf;
744 uint16_t msg_len;
745 fpm_msg_hdr_t *hdr;
746
747 zfpm_g->stats.read_cb_calls++;
748
749 /*
750 * Check if async connect is now done.
751 */
752 if (zfpm_g->state == ZFPM_STATE_CONNECTING) {
753 zfpm_connect_check();
754 return 0;
755 }
756
757 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
758 assert(zfpm_g->sock >= 0);
759
760 ibuf = zfpm_g->ibuf;
761
762 already = stream_get_endp(ibuf);
763 if (already < FPM_MSG_HDR_LEN) {
764 ssize_t nbyte;
765
766 nbyte = stream_read_try(ibuf, zfpm_g->sock,
767 FPM_MSG_HDR_LEN - already);
768 if (nbyte == 0 || nbyte == -1) {
769 if (nbyte == -1) {
770 char buffer[1024];
771
772 snprintf(buffer, sizeof(buffer),
773 "closed socket in read(%d): %s", errno,
774 safe_strerror(errno));
775 zfpm_connection_down(buffer);
776 } else
777 zfpm_connection_down("closed socket in read");
778 return 0;
779 }
780
781 if (nbyte != (ssize_t)(FPM_MSG_HDR_LEN - already))
782 goto done;
783
784 already = FPM_MSG_HDR_LEN;
785 }
786
787 stream_set_getp(ibuf, 0);
788
789 hdr = (fpm_msg_hdr_t *)stream_pnt(ibuf);
790
791 if (!fpm_msg_hdr_ok(hdr)) {
792 zfpm_connection_down("invalid message header");
793 return 0;
794 }
795
796 msg_len = fpm_msg_len(hdr);
797
798 /*
799 * Read out the rest of the packet.
800 */
801 if (already < msg_len) {
802 ssize_t nbyte;
803
804 nbyte = stream_read_try(ibuf, zfpm_g->sock, msg_len - already);
805
806 if (nbyte == 0 || nbyte == -1) {
807 if (nbyte == -1) {
808 char buffer[1024];
809
810 snprintf(buffer, sizeof(buffer),
811 "failed to read message(%d) %s", errno,
812 safe_strerror(errno));
813 zfpm_connection_down(buffer);
814 } else
815 zfpm_connection_down("failed to read message");
816 return 0;
817 }
818
819 if (nbyte != (ssize_t)(msg_len - already))
820 goto done;
821 }
822
823 /*
824 * Just throw it away for now.
825 */
826 stream_reset(ibuf);
827
828 done:
829 zfpm_read_on();
830 return 0;
831 }
832
833 static bool zfpm_updates_pending(void)
834 {
835 if (!(TAILQ_EMPTY(&zfpm_g->dest_q)) || !(TAILQ_EMPTY(&zfpm_g->mac_q)))
836 return true;
837
838 return false;
839 }
840
841 /*
842 * zfpm_writes_pending
843 *
844 * Returns true if we may have something to write to the FPM.
845 */
846 static int zfpm_writes_pending(void)
847 {
848
849 /*
850 * Check if there is any data in the outbound buffer that has not
851 * been written to the socket yet.
852 */
853 if (stream_get_endp(zfpm_g->obuf) - stream_get_getp(zfpm_g->obuf))
854 return 1;
855
856 /*
857 * Check if there are any updates scheduled on the outbound queues.
858 */
859 if (zfpm_updates_pending())
860 return 1;
861
862 return 0;
863 }
864
865 /*
866 * zfpm_encode_route
867 *
868 * Encode a message to the FPM with information about the given route.
869 *
870 * Returns the number of bytes written to the buffer. 0 or a negative
871 * value indicates an error.
872 */
873 static inline int zfpm_encode_route(rib_dest_t *dest, struct route_entry *re,
874 char *in_buf, size_t in_buf_len,
875 fpm_msg_type_e *msg_type)
876 {
877 size_t len;
878 #ifdef HAVE_NETLINK
879 int cmd;
880 #endif
881 len = 0;
882
883 *msg_type = FPM_MSG_TYPE_NONE;
884
885 switch (zfpm_g->message_format) {
886
887 case ZFPM_MSG_FORMAT_PROTOBUF:
888 #ifdef HAVE_PROTOBUF
889 len = zfpm_protobuf_encode_route(dest, re, (uint8_t *)in_buf,
890 in_buf_len);
891 *msg_type = FPM_MSG_TYPE_PROTOBUF;
892 #endif
893 break;
894
895 case ZFPM_MSG_FORMAT_NETLINK:
896 #ifdef HAVE_NETLINK
897 *msg_type = FPM_MSG_TYPE_NETLINK;
898 cmd = re ? RTM_NEWROUTE : RTM_DELROUTE;
899 len = zfpm_netlink_encode_route(cmd, dest, re, in_buf,
900 in_buf_len);
901 assert(fpm_msg_align(len) == len);
902 *msg_type = FPM_MSG_TYPE_NETLINK;
903 #endif /* HAVE_NETLINK */
904 break;
905
906 default:
907 break;
908 }
909
910 return len;
911 }
912
913 /*
914 * zfpm_route_for_update
915 *
916 * Returns the re that is to be sent to the FPM for a given dest.
917 */
918 struct route_entry *zfpm_route_for_update(rib_dest_t *dest)
919 {
920 return dest->selected_fib;
921 }
922
923 /*
924 * Define an enum for return codes for queue processing functions
925 *
926 * FPM_WRITE_STOP: This return code indicates that the write buffer is full.
927 * Stop processing all the queues and empty the buffer by writing its content
928 * to the socket.
929 *
930 * FPM_GOTO_NEXT_Q: This return code indicates that either this queue is
931 * empty or we have processed enough updates from this queue.
932 * So, move on to the next queue.
933 */
934 enum {
935 FPM_WRITE_STOP = 0,
936 FPM_GOTO_NEXT_Q = 1
937 };
938
939 #define FPM_QUEUE_PROCESS_LIMIT 10000
940
941 /*
942 * zfpm_build_route_updates
943 *
944 * Process the dest_q queue and write FPM messages to the outbound buffer.
945 */
946 static int zfpm_build_route_updates(void)
947 {
948 struct stream *s;
949 rib_dest_t *dest;
950 unsigned char *buf, *data, *buf_end;
951 size_t msg_len;
952 size_t data_len;
953 fpm_msg_hdr_t *hdr;
954 struct route_entry *re;
955 int is_add, write_msg;
956 fpm_msg_type_e msg_type;
957 uint16_t q_limit;
958
959 if (TAILQ_EMPTY(&zfpm_g->dest_q))
960 return FPM_GOTO_NEXT_Q;
961
962 s = zfpm_g->obuf;
963 q_limit = FPM_QUEUE_PROCESS_LIMIT;
964
965 do {
966 /*
967 * Make sure there is enough space to write another message.
968 */
969 if (STREAM_WRITEABLE(s) < FPM_MAX_MSG_LEN)
970 return FPM_WRITE_STOP;
971
972 buf = STREAM_DATA(s) + stream_get_endp(s);
973 buf_end = buf + STREAM_WRITEABLE(s);
974
975 dest = TAILQ_FIRST(&zfpm_g->dest_q);
976 if (!dest)
977 return FPM_GOTO_NEXT_Q;
978
979 assert(CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM));
980
981 hdr = (fpm_msg_hdr_t *)buf;
982 hdr->version = FPM_PROTO_VERSION;
983
984 data = fpm_msg_data(hdr);
985
986 re = zfpm_route_for_update(dest);
987 is_add = re ? 1 : 0;
988
989 write_msg = 1;
990
991 /*
992 * If this is a route deletion, and we have not sent the route
993 * to
994 * the FPM previously, skip it.
995 */
996 if (!is_add && !CHECK_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM)) {
997 write_msg = 0;
998 zfpm_g->stats.nop_deletes_skipped++;
999 }
1000
1001 if (write_msg) {
1002 data_len = zfpm_encode_route(dest, re, (char *)data,
1003 buf_end - data, &msg_type);
1004
1005 if (data_len) {
1006 hdr->msg_type = msg_type;
1007 msg_len = fpm_data_len_to_msg_len(data_len);
1008 hdr->msg_len = htons(msg_len);
1009 stream_forward_endp(s, msg_len);
1010
1011 if (is_add)
1012 zfpm_g->stats.route_adds++;
1013 else
1014 zfpm_g->stats.route_dels++;
1015 } else {
1016 zlog_err("%s: Encoding Prefix: %pRN No valid nexthops",
1017 __func__, dest->rnode);
1018 }
1019 }
1020
1021 /*
1022 * Remove the dest from the queue, and reset the flag.
1023 */
1024 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1025 TAILQ_REMOVE(&zfpm_g->dest_q, dest, fpm_q_entries);
1026
1027 if (is_add) {
1028 SET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
1029 } else {
1030 UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
1031 }
1032
1033 /*
1034 * Delete the destination if necessary.
1035 */
1036 if (rib_gc_dest(dest->rnode))
1037 zfpm_g->stats.dests_del_after_update++;
1038
1039 q_limit--;
1040 if (q_limit == 0) {
1041 /*
1042 * We have processed enough updates in this queue.
1043 * Now yield for other queues.
1044 */
1045 return FPM_GOTO_NEXT_Q;
1046 }
1047 } while (true);
1048 }
1049
1050 /*
1051 * zfpm_encode_mac
1052 *
1053 * Encode a message to FPM with information about the given MAC.
1054 *
1055 * Returns the number of bytes written to the buffer.
1056 */
1057 static inline int zfpm_encode_mac(struct fpm_mac_info_t *mac, char *in_buf,
1058 size_t in_buf_len, fpm_msg_type_e *msg_type)
1059 {
1060 size_t len = 0;
1061
1062 *msg_type = FPM_MSG_TYPE_NONE;
1063
1064 switch (zfpm_g->message_format) {
1065
1066 case ZFPM_MSG_FORMAT_NONE:
1067 break;
1068 case ZFPM_MSG_FORMAT_NETLINK:
1069 #ifdef HAVE_NETLINK
1070 len = zfpm_netlink_encode_mac(mac, in_buf, in_buf_len);
1071 assert(fpm_msg_align(len) == len);
1072 *msg_type = FPM_MSG_TYPE_NETLINK;
1073 #endif /* HAVE_NETLINK */
1074 break;
1075 case ZFPM_MSG_FORMAT_PROTOBUF:
1076 break;
1077 }
1078 return len;
1079 }
1080
1081 static int zfpm_build_mac_updates(void)
1082 {
1083 struct stream *s;
1084 struct fpm_mac_info_t *mac;
1085 unsigned char *buf, *data, *buf_end;
1086 fpm_msg_hdr_t *hdr;
1087 size_t data_len, msg_len;
1088 fpm_msg_type_e msg_type;
1089 uint16_t q_limit;
1090
1091 if (TAILQ_EMPTY(&zfpm_g->mac_q))
1092 return FPM_GOTO_NEXT_Q;
1093
1094 s = zfpm_g->obuf;
1095 q_limit = FPM_QUEUE_PROCESS_LIMIT;
1096
1097 do {
1098 /* Make sure there is enough space to write another message. */
1099 if (STREAM_WRITEABLE(s) < FPM_MAX_MAC_MSG_LEN)
1100 return FPM_WRITE_STOP;
1101
1102 buf = STREAM_DATA(s) + stream_get_endp(s);
1103 buf_end = buf + STREAM_WRITEABLE(s);
1104
1105 mac = TAILQ_FIRST(&zfpm_g->mac_q);
1106 if (!mac)
1107 return FPM_GOTO_NEXT_Q;
1108
1109 /* Check for no-op */
1110 if (!CHECK_FLAG(mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM)) {
1111 zfpm_g->stats.nop_deletes_skipped++;
1112 zfpm_mac_info_del(mac);
1113 continue;
1114 }
1115
1116 hdr = (fpm_msg_hdr_t *)buf;
1117 hdr->version = FPM_PROTO_VERSION;
1118
1119 data = fpm_msg_data(hdr);
1120 data_len = zfpm_encode_mac(mac, (char *)data, buf_end - data,
1121 &msg_type);
1122 assert(data_len);
1123
1124 hdr->msg_type = msg_type;
1125 msg_len = fpm_data_len_to_msg_len(data_len);
1126 hdr->msg_len = htons(msg_len);
1127 stream_forward_endp(s, msg_len);
1128
1129 /* Remove the MAC from the queue, and delete it. */
1130 zfpm_mac_info_del(mac);
1131
1132 q_limit--;
1133 if (q_limit == 0) {
1134 /*
1135 * We have processed enough updates in this queue.
1136 * Now yield for other queues.
1137 */
1138 return FPM_GOTO_NEXT_Q;
1139 }
1140 } while (1);
1141 }
1142
1143 /*
1144 * zfpm_build_updates
1145 *
1146 * Process the outgoing queues and write messages to the outbound
1147 * buffer.
1148 */
1149 static void zfpm_build_updates(void)
1150 {
1151 struct stream *s;
1152
1153 s = zfpm_g->obuf;
1154 assert(stream_empty(s));
1155
1156 do {
1157 /*
1158 * Stop processing the queues if zfpm_g->obuf is full
1159 * or we do not have more updates to process
1160 */
1161 if (zfpm_build_mac_updates() == FPM_WRITE_STOP)
1162 break;
1163 if (zfpm_build_route_updates() == FPM_WRITE_STOP)
1164 break;
1165 } while (zfpm_updates_pending());
1166 }
1167
1168 /*
1169 * zfpm_write_cb
1170 */
1171 static int zfpm_write_cb(struct thread *thread)
1172 {
1173 struct stream *s;
1174 int num_writes;
1175
1176 zfpm_g->stats.write_cb_calls++;
1177
1178 /*
1179 * Check if async connect is now done.
1180 */
1181 if (zfpm_g->state == ZFPM_STATE_CONNECTING) {
1182 zfpm_connect_check();
1183 return 0;
1184 }
1185
1186 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
1187 assert(zfpm_g->sock >= 0);
1188
1189 num_writes = 0;
1190
1191 do {
1192 int bytes_to_write, bytes_written;
1193
1194 s = zfpm_g->obuf;
1195
1196 /*
1197 * If the stream is empty, try fill it up with data.
1198 */
1199 if (stream_empty(s)) {
1200 zfpm_build_updates();
1201 }
1202
1203 bytes_to_write = stream_get_endp(s) - stream_get_getp(s);
1204 if (!bytes_to_write)
1205 break;
1206
1207 bytes_written =
1208 write(zfpm_g->sock, stream_pnt(s), bytes_to_write);
1209 zfpm_g->stats.write_calls++;
1210 num_writes++;
1211
1212 if (bytes_written < 0) {
1213 if (ERRNO_IO_RETRY(errno))
1214 break;
1215
1216 zfpm_connection_down("failed to write to socket");
1217 return 0;
1218 }
1219
1220 if (bytes_written != bytes_to_write) {
1221
1222 /*
1223 * Partial write.
1224 */
1225 stream_forward_getp(s, bytes_written);
1226 zfpm_g->stats.partial_writes++;
1227 break;
1228 }
1229
1230 /*
1231 * We've written out the entire contents of the stream.
1232 */
1233 stream_reset(s);
1234
1235 if (num_writes >= ZFPM_MAX_WRITES_PER_RUN) {
1236 zfpm_g->stats.max_writes_hit++;
1237 break;
1238 }
1239
1240 if (zfpm_thread_should_yield(thread)) {
1241 zfpm_g->stats.t_write_yields++;
1242 break;
1243 }
1244 } while (1);
1245
1246 if (zfpm_writes_pending())
1247 zfpm_write_on();
1248
1249 return 0;
1250 }
1251
1252 /*
1253 * zfpm_connect_cb
1254 */
1255 static int zfpm_connect_cb(struct thread *t)
1256 {
1257 int sock, ret;
1258 struct sockaddr_in serv;
1259
1260 assert(zfpm_g->state == ZFPM_STATE_ACTIVE);
1261
1262 sock = socket(AF_INET, SOCK_STREAM, 0);
1263 if (sock < 0) {
1264 zlog_err("Failed to create socket for connect(): %s",
1265 strerror(errno));
1266 zfpm_g->stats.connect_no_sock++;
1267 return 0;
1268 }
1269
1270 set_nonblocking(sock);
1271
1272 /* Make server socket. */
1273 memset(&serv, 0, sizeof(serv));
1274 serv.sin_family = AF_INET;
1275 serv.sin_port = htons(zfpm_g->fpm_port);
1276 #ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
1277 serv.sin_len = sizeof(struct sockaddr_in);
1278 #endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */
1279 if (!zfpm_g->fpm_server)
1280 serv.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1281 else
1282 serv.sin_addr.s_addr = (zfpm_g->fpm_server);
1283
1284 /*
1285 * Connect to the FPM.
1286 */
1287 zfpm_g->connect_calls++;
1288 zfpm_g->stats.connect_calls++;
1289 zfpm_g->last_connect_call_time = monotime(NULL);
1290
1291 ret = connect(sock, (struct sockaddr *)&serv, sizeof(serv));
1292 if (ret >= 0) {
1293 zfpm_g->sock = sock;
1294 zfpm_connection_up("connect succeeded");
1295 return 1;
1296 }
1297
1298 if (errno == EINPROGRESS) {
1299 zfpm_g->sock = sock;
1300 zfpm_read_on();
1301 zfpm_write_on();
1302 zfpm_set_state(ZFPM_STATE_CONNECTING,
1303 "async connect in progress");
1304 return 0;
1305 }
1306
1307 zlog_info("can't connect to FPM %d: %s", sock, safe_strerror(errno));
1308 close(sock);
1309
1310 /*
1311 * Restart timer for retrying connection.
1312 */
1313 zfpm_start_connect_timer("connect() failed");
1314 return 0;
1315 }
1316
1317 /*
1318 * zfpm_set_state
1319 *
1320 * Move state machine into the given state.
1321 */
1322 static void zfpm_set_state(enum zfpm_state state, const char *reason)
1323 {
1324 enum zfpm_state cur_state = zfpm_g->state;
1325
1326 if (!reason)
1327 reason = "Unknown";
1328
1329 if (state == cur_state)
1330 return;
1331
1332 zfpm_debug("beginning state transition %s -> %s. Reason: %s",
1333 zfpm_state_to_str(cur_state), zfpm_state_to_str(state),
1334 reason);
1335
1336 switch (state) {
1337
1338 case ZFPM_STATE_IDLE:
1339 assert(cur_state == ZFPM_STATE_ESTABLISHED);
1340 break;
1341
1342 case ZFPM_STATE_ACTIVE:
1343 assert(cur_state == ZFPM_STATE_IDLE
1344 || cur_state == ZFPM_STATE_CONNECTING);
1345 assert(zfpm_g->t_connect);
1346 break;
1347
1348 case ZFPM_STATE_CONNECTING:
1349 assert(zfpm_g->sock);
1350 assert(cur_state == ZFPM_STATE_ACTIVE);
1351 assert(zfpm_g->t_read);
1352 assert(zfpm_g->t_write);
1353 break;
1354
1355 case ZFPM_STATE_ESTABLISHED:
1356 assert(cur_state == ZFPM_STATE_ACTIVE
1357 || cur_state == ZFPM_STATE_CONNECTING);
1358 assert(zfpm_g->sock);
1359 assert(zfpm_g->t_read);
1360 assert(zfpm_g->t_write);
1361 break;
1362 }
1363
1364 zfpm_g->state = state;
1365 }
1366
1367 /*
1368 * zfpm_calc_connect_delay
1369 *
1370 * Returns the number of seconds after which we should attempt to
1371 * reconnect to the FPM.
1372 */
1373 static long zfpm_calc_connect_delay(void)
1374 {
1375 time_t elapsed;
1376
1377 /*
1378 * Return 0 if this is our first attempt to connect.
1379 */
1380 if (zfpm_g->connect_calls == 0) {
1381 return 0;
1382 }
1383
1384 elapsed = zfpm_get_elapsed_time(zfpm_g->last_connect_call_time);
1385
1386 if (elapsed > ZFPM_CONNECT_RETRY_IVL) {
1387 return 0;
1388 }
1389
1390 return ZFPM_CONNECT_RETRY_IVL - elapsed;
1391 }
1392
1393 /*
1394 * zfpm_start_connect_timer
1395 */
1396 static void zfpm_start_connect_timer(const char *reason)
1397 {
1398 long delay_secs;
1399
1400 assert(!zfpm_g->t_connect);
1401 assert(zfpm_g->sock < 0);
1402
1403 assert(zfpm_g->state == ZFPM_STATE_IDLE
1404 || zfpm_g->state == ZFPM_STATE_ACTIVE
1405 || zfpm_g->state == ZFPM_STATE_CONNECTING);
1406
1407 delay_secs = zfpm_calc_connect_delay();
1408 zfpm_debug("scheduling connect in %ld seconds", delay_secs);
1409
1410 thread_add_timer(zfpm_g->master, zfpm_connect_cb, 0, delay_secs,
1411 &zfpm_g->t_connect);
1412 zfpm_set_state(ZFPM_STATE_ACTIVE, reason);
1413 }
1414
1415 /*
1416 * zfpm_is_enabled
1417 *
1418 * Returns true if the zebra FPM module has been enabled.
1419 */
1420 static inline int zfpm_is_enabled(void)
1421 {
1422 return zfpm_g->enabled;
1423 }
1424
1425 /*
1426 * zfpm_conn_is_up
1427 *
1428 * Returns true if the connection to the FPM is up.
1429 */
1430 static inline int zfpm_conn_is_up(void)
1431 {
1432 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED)
1433 return 0;
1434
1435 assert(zfpm_g->sock >= 0);
1436
1437 return 1;
1438 }
1439
1440 /*
1441 * zfpm_trigger_update
1442 *
1443 * The zebra code invokes this function to indicate that we should
1444 * send an update to the FPM about the given route_node.
1445 */
1446 static int zfpm_trigger_update(struct route_node *rn, const char *reason)
1447 {
1448 rib_dest_t *dest;
1449
1450 /*
1451 * Ignore if the connection is down. We will update the FPM about
1452 * all destinations once the connection comes up.
1453 */
1454 if (!zfpm_conn_is_up())
1455 return 0;
1456
1457 dest = rib_dest_from_rnode(rn);
1458
1459 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) {
1460 zfpm_g->stats.redundant_triggers++;
1461 return 0;
1462 }
1463
1464 if (reason) {
1465 zfpm_debug("%pFX triggering update to FPM - Reason: %s", &rn->p,
1466 reason);
1467 }
1468
1469 SET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1470 TAILQ_INSERT_TAIL(&zfpm_g->dest_q, dest, fpm_q_entries);
1471 zfpm_g->stats.updates_triggered++;
1472
1473 /*
1474 * Make sure that writes are enabled.
1475 */
1476 if (zfpm_g->t_write)
1477 return 0;
1478
1479 zfpm_write_on();
1480 return 0;
1481 }
1482
1483 /*
1484 * Generate Key for FPM MAC info hash entry
1485 */
1486 static unsigned int zfpm_mac_info_hash_keymake(const void *p)
1487 {
1488 struct fpm_mac_info_t *fpm_mac = (struct fpm_mac_info_t *)p;
1489 uint32_t mac_key;
1490
1491 mac_key = jhash(fpm_mac->macaddr.octet, ETH_ALEN, 0xa5a5a55a);
1492
1493 return jhash_2words(mac_key, fpm_mac->vni, 0);
1494 }
1495
1496 /*
1497 * Compare function for FPM MAC info hash lookup
1498 */
1499 static bool zfpm_mac_info_cmp(const void *p1, const void *p2)
1500 {
1501 const struct fpm_mac_info_t *fpm_mac1 = p1;
1502 const struct fpm_mac_info_t *fpm_mac2 = p2;
1503
1504 if (memcmp(fpm_mac1->macaddr.octet, fpm_mac2->macaddr.octet, ETH_ALEN)
1505 != 0)
1506 return false;
1507 if (fpm_mac1->vni != fpm_mac2->vni)
1508 return false;
1509
1510 return true;
1511 }
1512
1513 /*
1514 * Lookup FPM MAC info hash entry.
1515 */
1516 static struct fpm_mac_info_t *zfpm_mac_info_lookup(struct fpm_mac_info_t *key)
1517 {
1518 return hash_lookup(zfpm_g->fpm_mac_info_table, key);
1519 }
1520
1521 /*
1522 * Callback to allocate fpm_mac_info_t structure.
1523 */
1524 static void *zfpm_mac_info_alloc(void *p)
1525 {
1526 const struct fpm_mac_info_t *key = p;
1527 struct fpm_mac_info_t *fpm_mac;
1528
1529 fpm_mac = XCALLOC(MTYPE_FPM_MAC_INFO, sizeof(struct fpm_mac_info_t));
1530
1531 memcpy(&fpm_mac->macaddr, &key->macaddr, ETH_ALEN);
1532 fpm_mac->vni = key->vni;
1533
1534 return (void *)fpm_mac;
1535 }
1536
1537 /*
1538 * Delink and free fpm_mac_info_t.
1539 */
1540 static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac)
1541 {
1542 hash_release(zfpm_g->fpm_mac_info_table, fpm_mac);
1543 TAILQ_REMOVE(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries);
1544 XFREE(MTYPE_FPM_MAC_INFO, fpm_mac);
1545 }
1546
1547 /*
1548 * zfpm_trigger_rmac_update
1549 *
1550 * Zebra code invokes this function to indicate that we should
1551 * send an update to FPM for given MAC entry.
1552 *
1553 * This function checks if we already have enqueued an update for this RMAC,
1554 * If yes, update the same fpm_mac_info_t. Else, create and enqueue an update.
1555 */
1556 static int zfpm_trigger_rmac_update(struct zebra_mac *rmac,
1557 struct zebra_l3vni *zl3vni, bool delete,
1558 const char *reason)
1559 {
1560 struct fpm_mac_info_t *fpm_mac, key;
1561 struct interface *vxlan_if, *svi_if;
1562 bool mac_found = false;
1563
1564 /*
1565 * Ignore if the connection is down. We will update the FPM about
1566 * all destinations once the connection comes up.
1567 */
1568 if (!zfpm_conn_is_up())
1569 return 0;
1570
1571 if (reason) {
1572 zfpm_debug("triggering update to FPM - Reason: %s - %pEA",
1573 reason, &rmac->macaddr);
1574 }
1575
1576 vxlan_if = zl3vni_map_to_vxlan_if(zl3vni);
1577 svi_if = zl3vni_map_to_svi_if(zl3vni);
1578
1579 memset(&key, 0, sizeof(struct fpm_mac_info_t));
1580
1581 memcpy(&key.macaddr, &rmac->macaddr, ETH_ALEN);
1582 key.vni = zl3vni->vni;
1583
1584 /* Check if this MAC is already present in the queue. */
1585 fpm_mac = zfpm_mac_info_lookup(&key);
1586
1587 if (fpm_mac) {
1588 mac_found = true;
1589
1590 /*
1591 * If the enqueued op is "add" and current op is "delete",
1592 * this is a noop. So, Unset ZEBRA_MAC_UPDATE_FPM flag.
1593 * While processing FPM queue, we will silently delete this
1594 * MAC entry without sending any update for this MAC.
1595 */
1596 if (!CHECK_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM) &&
1597 delete == 1) {
1598 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1599 UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM);
1600 return 0;
1601 }
1602 } else {
1603 fpm_mac = hash_get(zfpm_g->fpm_mac_info_table, &key,
1604 zfpm_mac_info_alloc);
1605 if (!fpm_mac)
1606 return 0;
1607 }
1608
1609 fpm_mac->r_vtep_ip.s_addr = rmac->fwd_info.r_vtep_ip.s_addr;
1610 fpm_mac->zebra_flags = rmac->flags;
1611 fpm_mac->vxlan_if = vxlan_if ? vxlan_if->ifindex : 0;
1612 fpm_mac->svi_if = svi_if ? svi_if->ifindex : 0;
1613
1614 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM);
1615 if (delete)
1616 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1617 else
1618 UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1619
1620 if (!mac_found)
1621 TAILQ_INSERT_TAIL(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries);
1622
1623 zfpm_g->stats.updates_triggered++;
1624
1625 /* If writes are already enabled, return. */
1626 if (zfpm_g->t_write)
1627 return 0;
1628
1629 zfpm_write_on();
1630 return 0;
1631 }
1632
1633 /*
1634 * This function is called when the FPM connections is established.
1635 * Iterate over all the RMAC entries for the given L3VNI
1636 * and enqueue the RMAC for FPM processing.
1637 */
1638 static void zfpm_trigger_rmac_update_wrapper(struct hash_bucket *bucket,
1639 void *args)
1640 {
1641 struct zebra_mac *zrmac = (struct zebra_mac *)bucket->data;
1642 struct zebra_l3vni *zl3vni = (struct zebra_l3vni *)args;
1643
1644 zfpm_trigger_rmac_update(zrmac, zl3vni, false, "RMAC added");
1645 }
1646
1647 /*
1648 * This function is called when the FPM connections is established.
1649 * This function iterates over all the L3VNIs to trigger
1650 * FPM updates for RMACs currently available.
1651 */
1652 static void zfpm_iterate_rmac_table(struct hash_bucket *bucket, void *args)
1653 {
1654 struct zebra_l3vni *zl3vni = (struct zebra_l3vni *)bucket->data;
1655
1656 hash_iterate(zl3vni->rmac_table, zfpm_trigger_rmac_update_wrapper,
1657 (void *)zl3vni);
1658 }
1659
1660 /*
1661 * struct zfpm_statsimer_cb
1662 */
1663 static int zfpm_stats_timer_cb(struct thread *t)
1664 {
1665 zfpm_g->t_stats = NULL;
1666
1667 /*
1668 * Remember the stats collected in the last interval for display
1669 * purposes.
1670 */
1671 zfpm_stats_copy(&zfpm_g->stats, &zfpm_g->last_ivl_stats);
1672
1673 /*
1674 * Add the current set of stats into the cumulative statistics.
1675 */
1676 zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats,
1677 &zfpm_g->cumulative_stats);
1678
1679 /*
1680 * Start collecting stats afresh over the next interval.
1681 */
1682 zfpm_stats_reset(&zfpm_g->stats);
1683
1684 zfpm_start_stats_timer();
1685
1686 return 0;
1687 }
1688
1689 /*
1690 * zfpm_stop_stats_timer
1691 */
1692 static void zfpm_stop_stats_timer(void)
1693 {
1694 if (!zfpm_g->t_stats)
1695 return;
1696
1697 zfpm_debug("Stopping existing stats timer");
1698 thread_cancel(&zfpm_g->t_stats);
1699 }
1700
1701 /*
1702 * zfpm_start_stats_timer
1703 */
1704 void zfpm_start_stats_timer(void)
1705 {
1706 assert(!zfpm_g->t_stats);
1707
1708 thread_add_timer(zfpm_g->master, zfpm_stats_timer_cb, 0,
1709 ZFPM_STATS_IVL_SECS, &zfpm_g->t_stats);
1710 }
1711
1712 /*
1713 * Helper macro for zfpm_show_stats() below.
1714 */
1715 #define ZFPM_SHOW_STAT(counter) \
1716 do { \
1717 vty_out(vty, "%-40s %10lu %16lu\n", #counter, \
1718 total_stats.counter, zfpm_g->last_ivl_stats.counter); \
1719 } while (0)
1720
1721 /*
1722 * zfpm_show_stats
1723 */
1724 static void zfpm_show_stats(struct vty *vty)
1725 {
1726 struct zfpm_stats total_stats;
1727 time_t elapsed;
1728
1729 vty_out(vty, "\n%-40s %10s Last %2d secs\n\n", "Counter", "Total",
1730 ZFPM_STATS_IVL_SECS);
1731
1732 /*
1733 * Compute the total stats up to this instant.
1734 */
1735 zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats,
1736 &total_stats);
1737
1738 ZFPM_SHOW_STAT(connect_calls);
1739 ZFPM_SHOW_STAT(connect_no_sock);
1740 ZFPM_SHOW_STAT(read_cb_calls);
1741 ZFPM_SHOW_STAT(write_cb_calls);
1742 ZFPM_SHOW_STAT(write_calls);
1743 ZFPM_SHOW_STAT(partial_writes);
1744 ZFPM_SHOW_STAT(max_writes_hit);
1745 ZFPM_SHOW_STAT(t_write_yields);
1746 ZFPM_SHOW_STAT(nop_deletes_skipped);
1747 ZFPM_SHOW_STAT(route_adds);
1748 ZFPM_SHOW_STAT(route_dels);
1749 ZFPM_SHOW_STAT(updates_triggered);
1750 ZFPM_SHOW_STAT(redundant_triggers);
1751 ZFPM_SHOW_STAT(dests_del_after_update);
1752 ZFPM_SHOW_STAT(t_conn_down_starts);
1753 ZFPM_SHOW_STAT(t_conn_down_dests_processed);
1754 ZFPM_SHOW_STAT(t_conn_down_yields);
1755 ZFPM_SHOW_STAT(t_conn_down_finishes);
1756 ZFPM_SHOW_STAT(t_conn_up_starts);
1757 ZFPM_SHOW_STAT(t_conn_up_dests_processed);
1758 ZFPM_SHOW_STAT(t_conn_up_yields);
1759 ZFPM_SHOW_STAT(t_conn_up_aborts);
1760 ZFPM_SHOW_STAT(t_conn_up_finishes);
1761
1762 if (!zfpm_g->last_stats_clear_time)
1763 return;
1764
1765 elapsed = zfpm_get_elapsed_time(zfpm_g->last_stats_clear_time);
1766
1767 vty_out(vty, "\nStats were cleared %lu seconds ago\n",
1768 (unsigned long)elapsed);
1769 }
1770
1771 /*
1772 * zfpm_clear_stats
1773 */
1774 static void zfpm_clear_stats(struct vty *vty)
1775 {
1776 if (!zfpm_is_enabled()) {
1777 vty_out(vty, "The FPM module is not enabled...\n");
1778 return;
1779 }
1780
1781 zfpm_stats_reset(&zfpm_g->stats);
1782 zfpm_stats_reset(&zfpm_g->last_ivl_stats);
1783 zfpm_stats_reset(&zfpm_g->cumulative_stats);
1784
1785 zfpm_stop_stats_timer();
1786 zfpm_start_stats_timer();
1787
1788 zfpm_g->last_stats_clear_time = monotime(NULL);
1789
1790 vty_out(vty, "Cleared FPM stats\n");
1791 }
1792
1793 /*
1794 * show_zebra_fpm_stats
1795 */
1796 DEFUN (show_zebra_fpm_stats,
1797 show_zebra_fpm_stats_cmd,
1798 "show zebra fpm stats",
1799 SHOW_STR
1800 ZEBRA_STR
1801 "Forwarding Path Manager information\n"
1802 "Statistics\n")
1803 {
1804 zfpm_show_stats(vty);
1805 return CMD_SUCCESS;
1806 }
1807
1808 /*
1809 * clear_zebra_fpm_stats
1810 */
1811 DEFUN (clear_zebra_fpm_stats,
1812 clear_zebra_fpm_stats_cmd,
1813 "clear zebra fpm stats",
1814 CLEAR_STR
1815 ZEBRA_STR
1816 "Clear Forwarding Path Manager information\n"
1817 "Statistics\n")
1818 {
1819 zfpm_clear_stats(vty);
1820 return CMD_SUCCESS;
1821 }
1822
1823 /*
1824 * update fpm connection information
1825 */
1826 DEFUN ( fpm_remote_ip,
1827 fpm_remote_ip_cmd,
1828 "fpm connection ip A.B.C.D port (1-65535)",
1829 "fpm connection remote ip and port\n"
1830 "Remote fpm server ip A.B.C.D\n"
1831 "Enter ip ")
1832 {
1833
1834 in_addr_t fpm_server;
1835 uint32_t port_no;
1836
1837 fpm_server = inet_addr(argv[3]->arg);
1838 if (fpm_server == INADDR_NONE)
1839 return CMD_ERR_INCOMPLETE;
1840
1841 port_no = atoi(argv[5]->arg);
1842 if (port_no < TCP_MIN_PORT || port_no > TCP_MAX_PORT)
1843 return CMD_ERR_INCOMPLETE;
1844
1845 zfpm_g->fpm_server = fpm_server;
1846 zfpm_g->fpm_port = port_no;
1847
1848
1849 return CMD_SUCCESS;
1850 }
1851
1852 DEFUN ( no_fpm_remote_ip,
1853 no_fpm_remote_ip_cmd,
1854 "no fpm connection ip A.B.C.D port (1-65535)",
1855 "fpm connection remote ip and port\n"
1856 "Connection\n"
1857 "Remote fpm server ip A.B.C.D\n"
1858 "Enter ip ")
1859 {
1860 if (zfpm_g->fpm_server != inet_addr(argv[4]->arg)
1861 || zfpm_g->fpm_port != atoi(argv[6]->arg))
1862 return CMD_ERR_NO_MATCH;
1863
1864 zfpm_g->fpm_server = FPM_DEFAULT_IP;
1865 zfpm_g->fpm_port = FPM_DEFAULT_PORT;
1866
1867 return CMD_SUCCESS;
1868 }
1869
1870 /*
1871 * zfpm_init_message_format
1872 */
1873 static inline void zfpm_init_message_format(const char *format)
1874 {
1875 int have_netlink, have_protobuf;
1876
1877 #ifdef HAVE_NETLINK
1878 have_netlink = 1;
1879 #else
1880 have_netlink = 0;
1881 #endif
1882
1883 #ifdef HAVE_PROTOBUF
1884 have_protobuf = 1;
1885 #else
1886 have_protobuf = 0;
1887 #endif
1888
1889 zfpm_g->message_format = ZFPM_MSG_FORMAT_NONE;
1890
1891 if (!format) {
1892 if (have_netlink) {
1893 zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK;
1894 } else if (have_protobuf) {
1895 zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF;
1896 }
1897 return;
1898 }
1899
1900 if (!strcmp("netlink", format)) {
1901 if (!have_netlink) {
1902 flog_err(EC_ZEBRA_NETLINK_NOT_AVAILABLE,
1903 "FPM netlink message format is not available");
1904 return;
1905 }
1906 zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK;
1907 return;
1908 }
1909
1910 if (!strcmp("protobuf", format)) {
1911 if (!have_protobuf) {
1912 flog_err(
1913 EC_ZEBRA_PROTOBUF_NOT_AVAILABLE,
1914 "FPM protobuf message format is not available");
1915 return;
1916 }
1917 flog_warn(EC_ZEBRA_PROTOBUF_NOT_AVAILABLE,
1918 "FPM protobuf message format is deprecated and scheduled to be removed. Please convert to using netlink format or contact dev@lists.frrouting.org with your use case.");
1919 zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF;
1920 return;
1921 }
1922
1923 flog_warn(EC_ZEBRA_FPM_FORMAT_UNKNOWN, "Unknown fpm format '%s'",
1924 format);
1925 }
1926
1927 /**
1928 * fpm_remote_srv_write
1929 *
1930 * Module to write remote fpm connection
1931 *
1932 * Returns ZERO on success.
1933 */
1934
1935 static int fpm_remote_srv_write(struct vty *vty)
1936 {
1937 struct in_addr in;
1938
1939 in.s_addr = zfpm_g->fpm_server;
1940
1941 if ((zfpm_g->fpm_server != FPM_DEFAULT_IP
1942 && zfpm_g->fpm_server != INADDR_ANY)
1943 || (zfpm_g->fpm_port != FPM_DEFAULT_PORT && zfpm_g->fpm_port != 0))
1944 vty_out(vty, "fpm connection ip %pI4 port %d\n", &in,
1945 zfpm_g->fpm_port);
1946
1947 return 0;
1948 }
1949
1950
1951 static int fpm_remote_srv_write(struct vty *vty);
1952 /* Zebra node */
1953 static struct cmd_node zebra_node = {
1954 .name = "zebra",
1955 .node = ZEBRA_NODE,
1956 .parent_node = CONFIG_NODE,
1957 .prompt = "",
1958 .config_write = fpm_remote_srv_write,
1959 };
1960
1961
1962 /**
1963 * zfpm_init
1964 *
1965 * One-time initialization of the Zebra FPM module.
1966 *
1967 * @param[in] port port at which FPM is running.
1968 * @param[in] enable true if the zebra FPM module should be enabled
1969 * @param[in] format to use to talk to the FPM. Can be 'netink' or 'protobuf'.
1970 *
1971 * Returns true on success.
1972 */
1973 static int zfpm_init(struct thread_master *master)
1974 {
1975 int enable = 1;
1976 uint16_t port = 0;
1977 const char *format = THIS_MODULE->load_args;
1978
1979 memset(zfpm_g, 0, sizeof(*zfpm_g));
1980 zfpm_g->master = master;
1981 TAILQ_INIT(&zfpm_g->dest_q);
1982 TAILQ_INIT(&zfpm_g->mac_q);
1983
1984 /* Create hash table for fpm_mac_info_t enties */
1985 zfpm_g->fpm_mac_info_table = hash_create(zfpm_mac_info_hash_keymake,
1986 zfpm_mac_info_cmp,
1987 "FPM MAC info hash table");
1988
1989 zfpm_g->sock = -1;
1990 zfpm_g->state = ZFPM_STATE_IDLE;
1991
1992 zfpm_stats_init(&zfpm_g->stats);
1993 zfpm_stats_init(&zfpm_g->last_ivl_stats);
1994 zfpm_stats_init(&zfpm_g->cumulative_stats);
1995
1996 install_node(&zebra_node);
1997 install_element(ENABLE_NODE, &show_zebra_fpm_stats_cmd);
1998 install_element(ENABLE_NODE, &clear_zebra_fpm_stats_cmd);
1999 install_element(CONFIG_NODE, &fpm_remote_ip_cmd);
2000 install_element(CONFIG_NODE, &no_fpm_remote_ip_cmd);
2001
2002 zfpm_init_message_format(format);
2003
2004 /*
2005 * Disable FPM interface if no suitable format is available.
2006 */
2007 if (zfpm_g->message_format == ZFPM_MSG_FORMAT_NONE)
2008 enable = 0;
2009
2010 zfpm_g->enabled = enable;
2011
2012 if (!zfpm_g->fpm_server)
2013 zfpm_g->fpm_server = FPM_DEFAULT_IP;
2014
2015 if (!port)
2016 port = FPM_DEFAULT_PORT;
2017
2018 zfpm_g->fpm_port = port;
2019
2020 zfpm_g->obuf = stream_new(ZFPM_OBUF_SIZE);
2021 zfpm_g->ibuf = stream_new(ZFPM_IBUF_SIZE);
2022
2023 zfpm_start_stats_timer();
2024 zfpm_start_connect_timer("initialized");
2025 return 0;
2026 }
2027
2028 static int zfpm_fini(void)
2029 {
2030 zfpm_write_off();
2031 zfpm_read_off();
2032 zfpm_connect_off();
2033
2034 zfpm_stop_stats_timer();
2035
2036 hook_unregister(rib_update, zfpm_trigger_update);
2037 return 0;
2038 }
2039
2040 static int zebra_fpm_module_init(void)
2041 {
2042 hook_register(rib_update, zfpm_trigger_update);
2043 hook_register(zebra_rmac_update, zfpm_trigger_rmac_update);
2044 hook_register(frr_late_init, zfpm_init);
2045 hook_register(frr_early_fini, zfpm_fini);
2046 return 0;
2047 }
2048
2049 FRR_MODULE_SETUP(.name = "zebra_fpm", .version = FRR_VERSION,
2050 .description = "zebra FPM (Forwarding Plane Manager) module",
2051 .init = zebra_fpm_module_init,
2052 );