]> git.proxmox.com Git - mirror_frr.git/blob - zebra/zebra_fpm.c
Merge pull request #8317 from mjstapp/fix_short_printfrr_buf
[mirror_frr.git] / zebra / zebra_fpm.c
1 /*
2 * Main implementation file for interface to Forwarding Plane Manager.
3 *
4 * Copyright (C) 2012 by Open Source Routing.
5 * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
6 *
7 * This file is part of GNU Zebra.
8 *
9 * GNU Zebra is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the
11 * Free Software Foundation; either version 2, or (at your option) any
12 * later version.
13 *
14 * GNU Zebra is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; see the file COPYING; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include <zebra.h>
25
26 #include "log.h"
27 #include "libfrr.h"
28 #include "stream.h"
29 #include "thread.h"
30 #include "network.h"
31 #include "command.h"
32 #include "version.h"
33 #include "jhash.h"
34
35 #include "zebra/rib.h"
36 #include "zebra/zserv.h"
37 #include "zebra/zebra_ns.h"
38 #include "zebra/zebra_vrf.h"
39 #include "zebra/zebra_errors.h"
40
41 #include "fpm/fpm.h"
42 #include "zebra_fpm_private.h"
43 #include "zebra/zebra_router.h"
44 #include "zebra_vxlan_private.h"
45
46 DEFINE_MTYPE_STATIC(ZEBRA, FPM_MAC_INFO, "FPM_MAC_INFO");
47
48 /*
49 * Interval at which we attempt to connect to the FPM.
50 */
51 #define ZFPM_CONNECT_RETRY_IVL 5
52
53 /*
54 * Sizes of outgoing and incoming stream buffers for writing/reading
55 * FPM messages.
56 */
57 #define ZFPM_OBUF_SIZE (2 * FPM_MAX_MSG_LEN)
58 #define ZFPM_IBUF_SIZE (FPM_MAX_MSG_LEN)
59
60 /*
61 * The maximum number of times the FPM socket write callback can call
62 * 'write' before it yields.
63 */
64 #define ZFPM_MAX_WRITES_PER_RUN 10
65
66 /*
67 * Interval over which we collect statistics.
68 */
69 #define ZFPM_STATS_IVL_SECS 10
70 #define FPM_MAX_MAC_MSG_LEN 512
71
72 static void zfpm_iterate_rmac_table(struct hash_bucket *bucket, void *args);
73
74 /*
75 * Structure that holds state for iterating over all route_node
76 * structures that are candidates for being communicated to the FPM.
77 */
78 struct zfpm_rnodes_iter {
79 rib_tables_iter_t tables_iter;
80 route_table_iter_t iter;
81 };
82
83 /*
84 * Statistics.
85 */
86 struct zfpm_stats {
87 unsigned long connect_calls;
88 unsigned long connect_no_sock;
89
90 unsigned long read_cb_calls;
91
92 unsigned long write_cb_calls;
93 unsigned long write_calls;
94 unsigned long partial_writes;
95 unsigned long max_writes_hit;
96 unsigned long t_write_yields;
97
98 unsigned long nop_deletes_skipped;
99 unsigned long route_adds;
100 unsigned long route_dels;
101
102 unsigned long updates_triggered;
103 unsigned long redundant_triggers;
104
105 unsigned long dests_del_after_update;
106
107 unsigned long t_conn_down_starts;
108 unsigned long t_conn_down_dests_processed;
109 unsigned long t_conn_down_yields;
110 unsigned long t_conn_down_finishes;
111
112 unsigned long t_conn_up_starts;
113 unsigned long t_conn_up_dests_processed;
114 unsigned long t_conn_up_yields;
115 unsigned long t_conn_up_aborts;
116 unsigned long t_conn_up_finishes;
117 };
118
119 /*
120 * States for the FPM state machine.
121 */
122 enum zfpm_state {
123
124 /*
125 * In this state we are not yet ready to connect to the FPM. This
126 * can happen when this module is disabled, or if we're cleaning up
127 * after a connection has gone down.
128 */
129 ZFPM_STATE_IDLE,
130
131 /*
132 * Ready to talk to the FPM and periodically trying to connect to
133 * it.
134 */
135 ZFPM_STATE_ACTIVE,
136
137 /*
138 * In the middle of bringing up a TCP connection. Specifically,
139 * waiting for a connect() call to complete asynchronously.
140 */
141 ZFPM_STATE_CONNECTING,
142
143 /*
144 * TCP connection to the FPM is up.
145 */
146 ZFPM_STATE_ESTABLISHED
147
148 };
149
150 /*
151 * Message format to be used to communicate with the FPM.
152 */
153 enum zfpm_msg_format {
154 ZFPM_MSG_FORMAT_NONE,
155 ZFPM_MSG_FORMAT_NETLINK,
156 ZFPM_MSG_FORMAT_PROTOBUF,
157 };
158
159 /*
160 * Globals.
161 */
162 struct zfpm_glob {
163
164 /*
165 * True if the FPM module has been enabled.
166 */
167 int enabled;
168
169 /*
170 * Message format to be used to communicate with the fpm.
171 */
172 enum zfpm_msg_format message_format;
173
174 struct thread_master *master;
175
176 enum zfpm_state state;
177
178 in_addr_t fpm_server;
179 /*
180 * Port on which the FPM is running.
181 */
182 int fpm_port;
183
184 /*
185 * List of rib_dest_t structures to be processed
186 */
187 TAILQ_HEAD(zfpm_dest_q, rib_dest_t_) dest_q;
188
189 /*
190 * List of fpm_mac_info structures to be processed
191 */
192 TAILQ_HEAD(zfpm_mac_q, fpm_mac_info_t) mac_q;
193
194 /*
195 * Hash table of fpm_mac_info_t entries
196 *
197 * While adding fpm_mac_info_t for a MAC to the mac_q,
198 * it is possible that another fpm_mac_info_t node for the this MAC
199 * is already present in the queue.
200 * This is possible in the case of consecutive add->delete operations.
201 * To avoid such duplicate insertions in the mac_q,
202 * define a hash table for fpm_mac_info_t which can be looked up
203 * to see if an fpm_mac_info_t node for a MAC is already present
204 * in the mac_q.
205 */
206 struct hash *fpm_mac_info_table;
207
208 /*
209 * Stream socket to the FPM.
210 */
211 int sock;
212
213 /*
214 * Buffers for messages to/from the FPM.
215 */
216 struct stream *obuf;
217 struct stream *ibuf;
218
219 /*
220 * Threads for I/O.
221 */
222 struct thread *t_connect;
223 struct thread *t_write;
224 struct thread *t_read;
225
226 /*
227 * Thread to clean up after the TCP connection to the FPM goes down
228 * and the state that belongs to it.
229 */
230 struct thread *t_conn_down;
231
232 struct {
233 struct zfpm_rnodes_iter iter;
234 } t_conn_down_state;
235
236 /*
237 * Thread to take actions once the TCP conn to the FPM comes up, and
238 * the state that belongs to it.
239 */
240 struct thread *t_conn_up;
241
242 struct {
243 struct zfpm_rnodes_iter iter;
244 } t_conn_up_state;
245
246 unsigned long connect_calls;
247 time_t last_connect_call_time;
248
249 /*
250 * Stats from the start of the current statistics interval up to
251 * now. These are the counters we typically update in the code.
252 */
253 struct zfpm_stats stats;
254
255 /*
256 * Statistics that were gathered in the last collection interval.
257 */
258 struct zfpm_stats last_ivl_stats;
259
260 /*
261 * Cumulative stats from the last clear to the start of the current
262 * statistics interval.
263 */
264 struct zfpm_stats cumulative_stats;
265
266 /*
267 * Stats interval timer.
268 */
269 struct thread *t_stats;
270
271 /*
272 * If non-zero, the last time when statistics were cleared.
273 */
274 time_t last_stats_clear_time;
275
276 /*
277 * Flag to track the MAC dump status to FPM
278 */
279 bool fpm_mac_dump_done;
280 };
281
282 static struct zfpm_glob zfpm_glob_space;
283 static struct zfpm_glob *zfpm_g = &zfpm_glob_space;
284
285 static int zfpm_trigger_update(struct route_node *rn, const char *reason);
286
287 static int zfpm_read_cb(struct thread *thread);
288 static int zfpm_write_cb(struct thread *thread);
289
290 static void zfpm_set_state(enum zfpm_state state, const char *reason);
291 static void zfpm_start_connect_timer(const char *reason);
292 static void zfpm_start_stats_timer(void);
293 static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac);
294
295 /*
296 * zfpm_thread_should_yield
297 */
298 static inline int zfpm_thread_should_yield(struct thread *t)
299 {
300 return thread_should_yield(t);
301 }
302
303 /*
304 * zfpm_state_to_str
305 */
306 static const char *zfpm_state_to_str(enum zfpm_state state)
307 {
308 switch (state) {
309
310 case ZFPM_STATE_IDLE:
311 return "idle";
312
313 case ZFPM_STATE_ACTIVE:
314 return "active";
315
316 case ZFPM_STATE_CONNECTING:
317 return "connecting";
318
319 case ZFPM_STATE_ESTABLISHED:
320 return "established";
321
322 default:
323 return "unknown";
324 }
325 }
326
327 /*
328 * zfpm_get_elapsed_time
329 *
330 * Returns the time elapsed (in seconds) since the given time.
331 */
332 static time_t zfpm_get_elapsed_time(time_t reference)
333 {
334 time_t now;
335
336 now = monotime(NULL);
337
338 if (now < reference) {
339 assert(0);
340 return 0;
341 }
342
343 return now - reference;
344 }
345
346 /*
347 * zfpm_rnodes_iter_init
348 */
349 static inline void zfpm_rnodes_iter_init(struct zfpm_rnodes_iter *iter)
350 {
351 memset(iter, 0, sizeof(*iter));
352 rib_tables_iter_init(&iter->tables_iter);
353
354 /*
355 * This is a hack, but it makes implementing 'next' easier by
356 * ensuring that route_table_iter_next() will return NULL the first
357 * time we call it.
358 */
359 route_table_iter_init(&iter->iter, NULL);
360 route_table_iter_cleanup(&iter->iter);
361 }
362
363 /*
364 * zfpm_rnodes_iter_next
365 */
366 static inline struct route_node *
367 zfpm_rnodes_iter_next(struct zfpm_rnodes_iter *iter)
368 {
369 struct route_node *rn;
370 struct route_table *table;
371
372 while (1) {
373 rn = route_table_iter_next(&iter->iter);
374 if (rn)
375 return rn;
376
377 /*
378 * We've made our way through this table, go to the next one.
379 */
380 route_table_iter_cleanup(&iter->iter);
381
382 table = rib_tables_iter_next(&iter->tables_iter);
383
384 if (!table)
385 return NULL;
386
387 route_table_iter_init(&iter->iter, table);
388 }
389
390 return NULL;
391 }
392
393 /*
394 * zfpm_rnodes_iter_pause
395 */
396 static inline void zfpm_rnodes_iter_pause(struct zfpm_rnodes_iter *iter)
397 {
398 route_table_iter_pause(&iter->iter);
399 }
400
401 /*
402 * zfpm_rnodes_iter_cleanup
403 */
404 static inline void zfpm_rnodes_iter_cleanup(struct zfpm_rnodes_iter *iter)
405 {
406 route_table_iter_cleanup(&iter->iter);
407 rib_tables_iter_cleanup(&iter->tables_iter);
408 }
409
410 /*
411 * zfpm_stats_init
412 *
413 * Initialize a statistics block.
414 */
415 static inline void zfpm_stats_init(struct zfpm_stats *stats)
416 {
417 memset(stats, 0, sizeof(*stats));
418 }
419
420 /*
421 * zfpm_stats_reset
422 */
423 static inline void zfpm_stats_reset(struct zfpm_stats *stats)
424 {
425 zfpm_stats_init(stats);
426 }
427
428 /*
429 * zfpm_stats_copy
430 */
431 static inline void zfpm_stats_copy(const struct zfpm_stats *src,
432 struct zfpm_stats *dest)
433 {
434 memcpy(dest, src, sizeof(*dest));
435 }
436
437 /*
438 * zfpm_stats_compose
439 *
440 * Total up the statistics in two stats structures ('s1 and 's2') and
441 * return the result in the third argument, 'result'. Note that the
442 * pointer 'result' may be the same as 's1' or 's2'.
443 *
444 * For simplicity, the implementation below assumes that the stats
445 * structure is composed entirely of counters. This can easily be
446 * changed when necessary.
447 */
448 static void zfpm_stats_compose(const struct zfpm_stats *s1,
449 const struct zfpm_stats *s2,
450 struct zfpm_stats *result)
451 {
452 const unsigned long *p1, *p2;
453 unsigned long *result_p;
454 int i, num_counters;
455
456 p1 = (const unsigned long *)s1;
457 p2 = (const unsigned long *)s2;
458 result_p = (unsigned long *)result;
459
460 num_counters = (sizeof(struct zfpm_stats) / sizeof(unsigned long));
461
462 for (i = 0; i < num_counters; i++) {
463 result_p[i] = p1[i] + p2[i];
464 }
465 }
466
467 /*
468 * zfpm_read_on
469 */
470 static inline void zfpm_read_on(void)
471 {
472 assert(!zfpm_g->t_read);
473 assert(zfpm_g->sock >= 0);
474
475 thread_add_read(zfpm_g->master, zfpm_read_cb, 0, zfpm_g->sock,
476 &zfpm_g->t_read);
477 }
478
479 /*
480 * zfpm_write_on
481 */
482 static inline void zfpm_write_on(void)
483 {
484 assert(!zfpm_g->t_write);
485 assert(zfpm_g->sock >= 0);
486
487 thread_add_write(zfpm_g->master, zfpm_write_cb, 0, zfpm_g->sock,
488 &zfpm_g->t_write);
489 }
490
491 /*
492 * zfpm_read_off
493 */
494 static inline void zfpm_read_off(void)
495 {
496 thread_cancel(&zfpm_g->t_read);
497 }
498
499 /*
500 * zfpm_write_off
501 */
502 static inline void zfpm_write_off(void)
503 {
504 thread_cancel(&zfpm_g->t_write);
505 }
506
507 static inline void zfpm_connect_off(void)
508 {
509 thread_cancel(&zfpm_g->t_connect);
510 }
511
512 /*
513 * zfpm_conn_up_thread_cb
514 *
515 * Callback for actions to be taken when the connection to the FPM
516 * comes up.
517 */
518 static int zfpm_conn_up_thread_cb(struct thread *thread)
519 {
520 struct route_node *rnode;
521 struct zfpm_rnodes_iter *iter;
522 rib_dest_t *dest;
523
524 iter = &zfpm_g->t_conn_up_state.iter;
525
526 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED) {
527 zfpm_debug(
528 "Connection not up anymore, conn_up thread aborting");
529 zfpm_g->stats.t_conn_up_aborts++;
530 goto done;
531 }
532
533 if (!zfpm_g->fpm_mac_dump_done) {
534 /* Enqueue FPM updates for all the RMAC entries */
535 hash_iterate(zrouter.l3vni_table, zfpm_iterate_rmac_table,
536 NULL);
537 /* mark dump done so that its not repeated after yield */
538 zfpm_g->fpm_mac_dump_done = true;
539 }
540
541 while ((rnode = zfpm_rnodes_iter_next(iter))) {
542 dest = rib_dest_from_rnode(rnode);
543
544 if (dest) {
545 zfpm_g->stats.t_conn_up_dests_processed++;
546 zfpm_trigger_update(rnode, NULL);
547 }
548
549 /*
550 * Yield if need be.
551 */
552 if (!zfpm_thread_should_yield(thread))
553 continue;
554
555 zfpm_g->stats.t_conn_up_yields++;
556 zfpm_rnodes_iter_pause(iter);
557 thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb,
558 NULL, 0, &zfpm_g->t_conn_up);
559 return 0;
560 }
561
562 zfpm_g->stats.t_conn_up_finishes++;
563
564 done:
565 zfpm_rnodes_iter_cleanup(iter);
566 return 0;
567 }
568
569 /*
570 * zfpm_connection_up
571 *
572 * Called when the connection to the FPM comes up.
573 */
574 static void zfpm_connection_up(const char *detail)
575 {
576 assert(zfpm_g->sock >= 0);
577 zfpm_read_on();
578 zfpm_write_on();
579 zfpm_set_state(ZFPM_STATE_ESTABLISHED, detail);
580
581 /*
582 * Start thread to push existing routes to the FPM.
583 */
584 thread_cancel(&zfpm_g->t_conn_up);
585
586 zfpm_rnodes_iter_init(&zfpm_g->t_conn_up_state.iter);
587 zfpm_g->fpm_mac_dump_done = false;
588
589 zfpm_debug("Starting conn_up thread");
590
591 thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb, NULL, 0,
592 &zfpm_g->t_conn_up);
593 zfpm_g->stats.t_conn_up_starts++;
594 }
595
596 /*
597 * zfpm_connect_check
598 *
599 * Check if an asynchronous connect() to the FPM is complete.
600 */
601 static void zfpm_connect_check(void)
602 {
603 int status;
604 socklen_t slen;
605 int ret;
606
607 zfpm_read_off();
608 zfpm_write_off();
609
610 slen = sizeof(status);
611 ret = getsockopt(zfpm_g->sock, SOL_SOCKET, SO_ERROR, (void *)&status,
612 &slen);
613
614 if (ret >= 0 && status == 0) {
615 zfpm_connection_up("async connect complete");
616 return;
617 }
618
619 /*
620 * getsockopt() failed or indicated an error on the socket.
621 */
622 close(zfpm_g->sock);
623 zfpm_g->sock = -1;
624
625 zfpm_start_connect_timer("getsockopt() after async connect failed");
626 return;
627 }
628
629 /*
630 * zfpm_conn_down_thread_cb
631 *
632 * Callback that is invoked to clean up state after the TCP connection
633 * to the FPM goes down.
634 */
635 static int zfpm_conn_down_thread_cb(struct thread *thread)
636 {
637 struct route_node *rnode;
638 struct zfpm_rnodes_iter *iter;
639 rib_dest_t *dest;
640 struct fpm_mac_info_t *mac = NULL;
641
642 assert(zfpm_g->state == ZFPM_STATE_IDLE);
643
644 /*
645 * Delink and free all fpm_mac_info_t nodes
646 * in the mac_q and fpm_mac_info_hash
647 */
648 while ((mac = TAILQ_FIRST(&zfpm_g->mac_q)) != NULL)
649 zfpm_mac_info_del(mac);
650
651 zfpm_g->t_conn_down = NULL;
652
653 iter = &zfpm_g->t_conn_down_state.iter;
654
655 while ((rnode = zfpm_rnodes_iter_next(iter))) {
656 dest = rib_dest_from_rnode(rnode);
657
658 if (dest) {
659 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) {
660 TAILQ_REMOVE(&zfpm_g->dest_q, dest,
661 fpm_q_entries);
662 }
663
664 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
665 UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
666
667 zfpm_g->stats.t_conn_down_dests_processed++;
668
669 /*
670 * Check if the dest should be deleted.
671 */
672 rib_gc_dest(rnode);
673 }
674
675 /*
676 * Yield if need be.
677 */
678 if (!zfpm_thread_should_yield(thread))
679 continue;
680
681 zfpm_g->stats.t_conn_down_yields++;
682 zfpm_rnodes_iter_pause(iter);
683 zfpm_g->t_conn_down = NULL;
684 thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb,
685 NULL, 0, &zfpm_g->t_conn_down);
686 return 0;
687 }
688
689 zfpm_g->stats.t_conn_down_finishes++;
690 zfpm_rnodes_iter_cleanup(iter);
691
692 /*
693 * Start the process of connecting to the FPM again.
694 */
695 zfpm_start_connect_timer("cleanup complete");
696 return 0;
697 }
698
699 /*
700 * zfpm_connection_down
701 *
702 * Called when the connection to the FPM has gone down.
703 */
704 static void zfpm_connection_down(const char *detail)
705 {
706 if (!detail)
707 detail = "unknown";
708
709 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
710
711 zlog_info("connection to the FPM has gone down: %s", detail);
712
713 zfpm_read_off();
714 zfpm_write_off();
715
716 stream_reset(zfpm_g->ibuf);
717 stream_reset(zfpm_g->obuf);
718
719 if (zfpm_g->sock >= 0) {
720 close(zfpm_g->sock);
721 zfpm_g->sock = -1;
722 }
723
724 /*
725 * Start thread to clean up state after the connection goes down.
726 */
727 assert(!zfpm_g->t_conn_down);
728 zfpm_rnodes_iter_init(&zfpm_g->t_conn_down_state.iter);
729 zfpm_g->t_conn_down = NULL;
730 thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb, NULL, 0,
731 &zfpm_g->t_conn_down);
732 zfpm_g->stats.t_conn_down_starts++;
733
734 zfpm_set_state(ZFPM_STATE_IDLE, detail);
735 }
736
737 /*
738 * zfpm_read_cb
739 */
740 static int zfpm_read_cb(struct thread *thread)
741 {
742 size_t already;
743 struct stream *ibuf;
744 uint16_t msg_len;
745 fpm_msg_hdr_t *hdr;
746
747 zfpm_g->stats.read_cb_calls++;
748
749 /*
750 * Check if async connect is now done.
751 */
752 if (zfpm_g->state == ZFPM_STATE_CONNECTING) {
753 zfpm_connect_check();
754 return 0;
755 }
756
757 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
758 assert(zfpm_g->sock >= 0);
759
760 ibuf = zfpm_g->ibuf;
761
762 already = stream_get_endp(ibuf);
763 if (already < FPM_MSG_HDR_LEN) {
764 ssize_t nbyte;
765
766 nbyte = stream_read_try(ibuf, zfpm_g->sock,
767 FPM_MSG_HDR_LEN - already);
768 if (nbyte == 0 || nbyte == -1) {
769 if (nbyte == -1) {
770 char buffer[1024];
771
772 snprintf(buffer, sizeof(buffer),
773 "closed socket in read(%d): %s", errno,
774 safe_strerror(errno));
775 zfpm_connection_down(buffer);
776 } else
777 zfpm_connection_down("closed socket in read");
778 return 0;
779 }
780
781 if (nbyte != (ssize_t)(FPM_MSG_HDR_LEN - already))
782 goto done;
783
784 already = FPM_MSG_HDR_LEN;
785 }
786
787 stream_set_getp(ibuf, 0);
788
789 hdr = (fpm_msg_hdr_t *)stream_pnt(ibuf);
790
791 if (!fpm_msg_hdr_ok(hdr)) {
792 zfpm_connection_down("invalid message header");
793 return 0;
794 }
795
796 msg_len = fpm_msg_len(hdr);
797
798 /*
799 * Read out the rest of the packet.
800 */
801 if (already < msg_len) {
802 ssize_t nbyte;
803
804 nbyte = stream_read_try(ibuf, zfpm_g->sock, msg_len - already);
805
806 if (nbyte == 0 || nbyte == -1) {
807 if (nbyte == -1) {
808 char buffer[1024];
809
810 snprintf(buffer, sizeof(buffer),
811 "failed to read message(%d) %s", errno,
812 safe_strerror(errno));
813 zfpm_connection_down(buffer);
814 } else
815 zfpm_connection_down("failed to read message");
816 return 0;
817 }
818
819 if (nbyte != (ssize_t)(msg_len - already))
820 goto done;
821 }
822
823 /*
824 * Just throw it away for now.
825 */
826 stream_reset(ibuf);
827
828 done:
829 zfpm_read_on();
830 return 0;
831 }
832
833 static bool zfpm_updates_pending(void)
834 {
835 if (!(TAILQ_EMPTY(&zfpm_g->dest_q)) || !(TAILQ_EMPTY(&zfpm_g->mac_q)))
836 return true;
837
838 return false;
839 }
840
841 /*
842 * zfpm_writes_pending
843 *
844 * Returns true if we may have something to write to the FPM.
845 */
846 static int zfpm_writes_pending(void)
847 {
848
849 /*
850 * Check if there is any data in the outbound buffer that has not
851 * been written to the socket yet.
852 */
853 if (stream_get_endp(zfpm_g->obuf) - stream_get_getp(zfpm_g->obuf))
854 return 1;
855
856 /*
857 * Check if there are any updates scheduled on the outbound queues.
858 */
859 if (zfpm_updates_pending())
860 return 1;
861
862 return 0;
863 }
864
865 /*
866 * zfpm_encode_route
867 *
868 * Encode a message to the FPM with information about the given route.
869 *
870 * Returns the number of bytes written to the buffer. 0 or a negative
871 * value indicates an error.
872 */
873 static inline int zfpm_encode_route(rib_dest_t *dest, struct route_entry *re,
874 char *in_buf, size_t in_buf_len,
875 fpm_msg_type_e *msg_type)
876 {
877 size_t len;
878 #ifdef HAVE_NETLINK
879 int cmd;
880 #endif
881 len = 0;
882
883 *msg_type = FPM_MSG_TYPE_NONE;
884
885 switch (zfpm_g->message_format) {
886
887 case ZFPM_MSG_FORMAT_PROTOBUF:
888 #ifdef HAVE_PROTOBUF
889 len = zfpm_protobuf_encode_route(dest, re, (uint8_t *)in_buf,
890 in_buf_len);
891 *msg_type = FPM_MSG_TYPE_PROTOBUF;
892 #endif
893 break;
894
895 case ZFPM_MSG_FORMAT_NETLINK:
896 #ifdef HAVE_NETLINK
897 *msg_type = FPM_MSG_TYPE_NETLINK;
898 cmd = re ? RTM_NEWROUTE : RTM_DELROUTE;
899 len = zfpm_netlink_encode_route(cmd, dest, re, in_buf,
900 in_buf_len);
901 assert(fpm_msg_align(len) == len);
902 *msg_type = FPM_MSG_TYPE_NETLINK;
903 #endif /* HAVE_NETLINK */
904 break;
905
906 default:
907 break;
908 }
909
910 return len;
911 }
912
913 /*
914 * zfpm_route_for_update
915 *
916 * Returns the re that is to be sent to the FPM for a given dest.
917 */
918 struct route_entry *zfpm_route_for_update(rib_dest_t *dest)
919 {
920 return dest->selected_fib;
921 }
922
923 /*
924 * Define an enum for return codes for queue processing functions
925 *
926 * FPM_WRITE_STOP: This return code indicates that the write buffer is full.
927 * Stop processing all the queues and empty the buffer by writing its content
928 * to the socket.
929 *
930 * FPM_GOTO_NEXT_Q: This return code indicates that either this queue is
931 * empty or we have processed enough updates from this queue.
932 * So, move on to the next queue.
933 */
934 enum {
935 FPM_WRITE_STOP = 0,
936 FPM_GOTO_NEXT_Q = 1
937 };
938
939 #define FPM_QUEUE_PROCESS_LIMIT 10000
940
941 /*
942 * zfpm_build_route_updates
943 *
944 * Process the dest_q queue and write FPM messages to the outbound buffer.
945 */
946 static int zfpm_build_route_updates(void)
947 {
948 struct stream *s;
949 rib_dest_t *dest;
950 unsigned char *buf, *data, *buf_end;
951 size_t msg_len;
952 size_t data_len;
953 fpm_msg_hdr_t *hdr;
954 struct route_entry *re;
955 int is_add, write_msg;
956 fpm_msg_type_e msg_type;
957 uint16_t q_limit;
958
959 if (TAILQ_EMPTY(&zfpm_g->dest_q))
960 return FPM_GOTO_NEXT_Q;
961
962 s = zfpm_g->obuf;
963 q_limit = FPM_QUEUE_PROCESS_LIMIT;
964
965 do {
966 /*
967 * Make sure there is enough space to write another message.
968 */
969 if (STREAM_WRITEABLE(s) < FPM_MAX_MSG_LEN)
970 return FPM_WRITE_STOP;
971
972 buf = STREAM_DATA(s) + stream_get_endp(s);
973 buf_end = buf + STREAM_WRITEABLE(s);
974
975 dest = TAILQ_FIRST(&zfpm_g->dest_q);
976 if (!dest)
977 return FPM_GOTO_NEXT_Q;
978
979 assert(CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM));
980
981 hdr = (fpm_msg_hdr_t *)buf;
982 hdr->version = FPM_PROTO_VERSION;
983
984 data = fpm_msg_data(hdr);
985
986 re = zfpm_route_for_update(dest);
987 is_add = re ? 1 : 0;
988
989 write_msg = 1;
990
991 /*
992 * If this is a route deletion, and we have not sent the route
993 * to
994 * the FPM previously, skip it.
995 */
996 if (!is_add && !CHECK_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM)) {
997 write_msg = 0;
998 zfpm_g->stats.nop_deletes_skipped++;
999 }
1000
1001 if (write_msg) {
1002 data_len = zfpm_encode_route(dest, re, (char *)data,
1003 buf_end - data, &msg_type);
1004
1005 assert(data_len);
1006 if (data_len) {
1007 hdr->msg_type = msg_type;
1008 msg_len = fpm_data_len_to_msg_len(data_len);
1009 hdr->msg_len = htons(msg_len);
1010 stream_forward_endp(s, msg_len);
1011
1012 if (is_add)
1013 zfpm_g->stats.route_adds++;
1014 else
1015 zfpm_g->stats.route_dels++;
1016 }
1017 }
1018
1019 /*
1020 * Remove the dest from the queue, and reset the flag.
1021 */
1022 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1023 TAILQ_REMOVE(&zfpm_g->dest_q, dest, fpm_q_entries);
1024
1025 if (is_add) {
1026 SET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
1027 } else {
1028 UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
1029 }
1030
1031 /*
1032 * Delete the destination if necessary.
1033 */
1034 if (rib_gc_dest(dest->rnode))
1035 zfpm_g->stats.dests_del_after_update++;
1036
1037 q_limit--;
1038 if (q_limit == 0) {
1039 /*
1040 * We have processed enough updates in this queue.
1041 * Now yield for other queues.
1042 */
1043 return FPM_GOTO_NEXT_Q;
1044 }
1045 } while (true);
1046 }
1047
1048 /*
1049 * zfpm_encode_mac
1050 *
1051 * Encode a message to FPM with information about the given MAC.
1052 *
1053 * Returns the number of bytes written to the buffer.
1054 */
1055 static inline int zfpm_encode_mac(struct fpm_mac_info_t *mac, char *in_buf,
1056 size_t in_buf_len, fpm_msg_type_e *msg_type)
1057 {
1058 size_t len = 0;
1059
1060 *msg_type = FPM_MSG_TYPE_NONE;
1061
1062 switch (zfpm_g->message_format) {
1063
1064 case ZFPM_MSG_FORMAT_NONE:
1065 break;
1066 case ZFPM_MSG_FORMAT_NETLINK:
1067 #ifdef HAVE_NETLINK
1068 len = zfpm_netlink_encode_mac(mac, in_buf, in_buf_len);
1069 assert(fpm_msg_align(len) == len);
1070 *msg_type = FPM_MSG_TYPE_NETLINK;
1071 #endif /* HAVE_NETLINK */
1072 break;
1073 case ZFPM_MSG_FORMAT_PROTOBUF:
1074 break;
1075 }
1076 return len;
1077 }
1078
1079 static int zfpm_build_mac_updates(void)
1080 {
1081 struct stream *s;
1082 struct fpm_mac_info_t *mac;
1083 unsigned char *buf, *data, *buf_end;
1084 fpm_msg_hdr_t *hdr;
1085 size_t data_len, msg_len;
1086 fpm_msg_type_e msg_type;
1087 uint16_t q_limit;
1088
1089 if (TAILQ_EMPTY(&zfpm_g->mac_q))
1090 return FPM_GOTO_NEXT_Q;
1091
1092 s = zfpm_g->obuf;
1093 q_limit = FPM_QUEUE_PROCESS_LIMIT;
1094
1095 do {
1096 /* Make sure there is enough space to write another message. */
1097 if (STREAM_WRITEABLE(s) < FPM_MAX_MAC_MSG_LEN)
1098 return FPM_WRITE_STOP;
1099
1100 buf = STREAM_DATA(s) + stream_get_endp(s);
1101 buf_end = buf + STREAM_WRITEABLE(s);
1102
1103 mac = TAILQ_FIRST(&zfpm_g->mac_q);
1104 if (!mac)
1105 return FPM_GOTO_NEXT_Q;
1106
1107 /* Check for no-op */
1108 if (!CHECK_FLAG(mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM)) {
1109 zfpm_g->stats.nop_deletes_skipped++;
1110 zfpm_mac_info_del(mac);
1111 continue;
1112 }
1113
1114 hdr = (fpm_msg_hdr_t *)buf;
1115 hdr->version = FPM_PROTO_VERSION;
1116
1117 data = fpm_msg_data(hdr);
1118 data_len = zfpm_encode_mac(mac, (char *)data, buf_end - data,
1119 &msg_type);
1120 assert(data_len);
1121
1122 hdr->msg_type = msg_type;
1123 msg_len = fpm_data_len_to_msg_len(data_len);
1124 hdr->msg_len = htons(msg_len);
1125 stream_forward_endp(s, msg_len);
1126
1127 /* Remove the MAC from the queue, and delete it. */
1128 zfpm_mac_info_del(mac);
1129
1130 q_limit--;
1131 if (q_limit == 0) {
1132 /*
1133 * We have processed enough updates in this queue.
1134 * Now yield for other queues.
1135 */
1136 return FPM_GOTO_NEXT_Q;
1137 }
1138 } while (1);
1139 }
1140
1141 /*
1142 * zfpm_build_updates
1143 *
1144 * Process the outgoing queues and write messages to the outbound
1145 * buffer.
1146 */
1147 static void zfpm_build_updates(void)
1148 {
1149 struct stream *s;
1150
1151 s = zfpm_g->obuf;
1152 assert(stream_empty(s));
1153
1154 do {
1155 /*
1156 * Stop processing the queues if zfpm_g->obuf is full
1157 * or we do not have more updates to process
1158 */
1159 if (zfpm_build_mac_updates() == FPM_WRITE_STOP)
1160 break;
1161 if (zfpm_build_route_updates() == FPM_WRITE_STOP)
1162 break;
1163 } while (zfpm_updates_pending());
1164 }
1165
1166 /*
1167 * zfpm_write_cb
1168 */
1169 static int zfpm_write_cb(struct thread *thread)
1170 {
1171 struct stream *s;
1172 int num_writes;
1173
1174 zfpm_g->stats.write_cb_calls++;
1175
1176 /*
1177 * Check if async connect is now done.
1178 */
1179 if (zfpm_g->state == ZFPM_STATE_CONNECTING) {
1180 zfpm_connect_check();
1181 return 0;
1182 }
1183
1184 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
1185 assert(zfpm_g->sock >= 0);
1186
1187 num_writes = 0;
1188
1189 do {
1190 int bytes_to_write, bytes_written;
1191
1192 s = zfpm_g->obuf;
1193
1194 /*
1195 * If the stream is empty, try fill it up with data.
1196 */
1197 if (stream_empty(s)) {
1198 zfpm_build_updates();
1199 }
1200
1201 bytes_to_write = stream_get_endp(s) - stream_get_getp(s);
1202 if (!bytes_to_write)
1203 break;
1204
1205 bytes_written =
1206 write(zfpm_g->sock, stream_pnt(s), bytes_to_write);
1207 zfpm_g->stats.write_calls++;
1208 num_writes++;
1209
1210 if (bytes_written < 0) {
1211 if (ERRNO_IO_RETRY(errno))
1212 break;
1213
1214 zfpm_connection_down("failed to write to socket");
1215 return 0;
1216 }
1217
1218 if (bytes_written != bytes_to_write) {
1219
1220 /*
1221 * Partial write.
1222 */
1223 stream_forward_getp(s, bytes_written);
1224 zfpm_g->stats.partial_writes++;
1225 break;
1226 }
1227
1228 /*
1229 * We've written out the entire contents of the stream.
1230 */
1231 stream_reset(s);
1232
1233 if (num_writes >= ZFPM_MAX_WRITES_PER_RUN) {
1234 zfpm_g->stats.max_writes_hit++;
1235 break;
1236 }
1237
1238 if (zfpm_thread_should_yield(thread)) {
1239 zfpm_g->stats.t_write_yields++;
1240 break;
1241 }
1242 } while (1);
1243
1244 if (zfpm_writes_pending())
1245 zfpm_write_on();
1246
1247 return 0;
1248 }
1249
1250 /*
1251 * zfpm_connect_cb
1252 */
1253 static int zfpm_connect_cb(struct thread *t)
1254 {
1255 int sock, ret;
1256 struct sockaddr_in serv;
1257
1258 assert(zfpm_g->state == ZFPM_STATE_ACTIVE);
1259
1260 sock = socket(AF_INET, SOCK_STREAM, 0);
1261 if (sock < 0) {
1262 zlog_err("Failed to create socket for connect(): %s",
1263 strerror(errno));
1264 zfpm_g->stats.connect_no_sock++;
1265 return 0;
1266 }
1267
1268 set_nonblocking(sock);
1269
1270 /* Make server socket. */
1271 memset(&serv, 0, sizeof(serv));
1272 serv.sin_family = AF_INET;
1273 serv.sin_port = htons(zfpm_g->fpm_port);
1274 #ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
1275 serv.sin_len = sizeof(struct sockaddr_in);
1276 #endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */
1277 if (!zfpm_g->fpm_server)
1278 serv.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1279 else
1280 serv.sin_addr.s_addr = (zfpm_g->fpm_server);
1281
1282 /*
1283 * Connect to the FPM.
1284 */
1285 zfpm_g->connect_calls++;
1286 zfpm_g->stats.connect_calls++;
1287 zfpm_g->last_connect_call_time = monotime(NULL);
1288
1289 ret = connect(sock, (struct sockaddr *)&serv, sizeof(serv));
1290 if (ret >= 0) {
1291 zfpm_g->sock = sock;
1292 zfpm_connection_up("connect succeeded");
1293 return 1;
1294 }
1295
1296 if (errno == EINPROGRESS) {
1297 zfpm_g->sock = sock;
1298 zfpm_read_on();
1299 zfpm_write_on();
1300 zfpm_set_state(ZFPM_STATE_CONNECTING,
1301 "async connect in progress");
1302 return 0;
1303 }
1304
1305 zlog_info("can't connect to FPM %d: %s", sock, safe_strerror(errno));
1306 close(sock);
1307
1308 /*
1309 * Restart timer for retrying connection.
1310 */
1311 zfpm_start_connect_timer("connect() failed");
1312 return 0;
1313 }
1314
1315 /*
1316 * zfpm_set_state
1317 *
1318 * Move state machine into the given state.
1319 */
1320 static void zfpm_set_state(enum zfpm_state state, const char *reason)
1321 {
1322 enum zfpm_state cur_state = zfpm_g->state;
1323
1324 if (!reason)
1325 reason = "Unknown";
1326
1327 if (state == cur_state)
1328 return;
1329
1330 zfpm_debug("beginning state transition %s -> %s. Reason: %s",
1331 zfpm_state_to_str(cur_state), zfpm_state_to_str(state),
1332 reason);
1333
1334 switch (state) {
1335
1336 case ZFPM_STATE_IDLE:
1337 assert(cur_state == ZFPM_STATE_ESTABLISHED);
1338 break;
1339
1340 case ZFPM_STATE_ACTIVE:
1341 assert(cur_state == ZFPM_STATE_IDLE
1342 || cur_state == ZFPM_STATE_CONNECTING);
1343 assert(zfpm_g->t_connect);
1344 break;
1345
1346 case ZFPM_STATE_CONNECTING:
1347 assert(zfpm_g->sock);
1348 assert(cur_state == ZFPM_STATE_ACTIVE);
1349 assert(zfpm_g->t_read);
1350 assert(zfpm_g->t_write);
1351 break;
1352
1353 case ZFPM_STATE_ESTABLISHED:
1354 assert(cur_state == ZFPM_STATE_ACTIVE
1355 || cur_state == ZFPM_STATE_CONNECTING);
1356 assert(zfpm_g->sock);
1357 assert(zfpm_g->t_read);
1358 assert(zfpm_g->t_write);
1359 break;
1360 }
1361
1362 zfpm_g->state = state;
1363 }
1364
1365 /*
1366 * zfpm_calc_connect_delay
1367 *
1368 * Returns the number of seconds after which we should attempt to
1369 * reconnect to the FPM.
1370 */
1371 static long zfpm_calc_connect_delay(void)
1372 {
1373 time_t elapsed;
1374
1375 /*
1376 * Return 0 if this is our first attempt to connect.
1377 */
1378 if (zfpm_g->connect_calls == 0) {
1379 return 0;
1380 }
1381
1382 elapsed = zfpm_get_elapsed_time(zfpm_g->last_connect_call_time);
1383
1384 if (elapsed > ZFPM_CONNECT_RETRY_IVL) {
1385 return 0;
1386 }
1387
1388 return ZFPM_CONNECT_RETRY_IVL - elapsed;
1389 }
1390
1391 /*
1392 * zfpm_start_connect_timer
1393 */
1394 static void zfpm_start_connect_timer(const char *reason)
1395 {
1396 long delay_secs;
1397
1398 assert(!zfpm_g->t_connect);
1399 assert(zfpm_g->sock < 0);
1400
1401 assert(zfpm_g->state == ZFPM_STATE_IDLE
1402 || zfpm_g->state == ZFPM_STATE_ACTIVE
1403 || zfpm_g->state == ZFPM_STATE_CONNECTING);
1404
1405 delay_secs = zfpm_calc_connect_delay();
1406 zfpm_debug("scheduling connect in %ld seconds", delay_secs);
1407
1408 thread_add_timer(zfpm_g->master, zfpm_connect_cb, 0, delay_secs,
1409 &zfpm_g->t_connect);
1410 zfpm_set_state(ZFPM_STATE_ACTIVE, reason);
1411 }
1412
1413 /*
1414 * zfpm_is_enabled
1415 *
1416 * Returns true if the zebra FPM module has been enabled.
1417 */
1418 static inline int zfpm_is_enabled(void)
1419 {
1420 return zfpm_g->enabled;
1421 }
1422
1423 /*
1424 * zfpm_conn_is_up
1425 *
1426 * Returns true if the connection to the FPM is up.
1427 */
1428 static inline int zfpm_conn_is_up(void)
1429 {
1430 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED)
1431 return 0;
1432
1433 assert(zfpm_g->sock >= 0);
1434
1435 return 1;
1436 }
1437
1438 /*
1439 * zfpm_trigger_update
1440 *
1441 * The zebra code invokes this function to indicate that we should
1442 * send an update to the FPM about the given route_node.
1443 */
1444 static int zfpm_trigger_update(struct route_node *rn, const char *reason)
1445 {
1446 rib_dest_t *dest;
1447
1448 /*
1449 * Ignore if the connection is down. We will update the FPM about
1450 * all destinations once the connection comes up.
1451 */
1452 if (!zfpm_conn_is_up())
1453 return 0;
1454
1455 dest = rib_dest_from_rnode(rn);
1456
1457 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) {
1458 zfpm_g->stats.redundant_triggers++;
1459 return 0;
1460 }
1461
1462 if (reason) {
1463 zfpm_debug("%pFX triggering update to FPM - Reason: %s", &rn->p,
1464 reason);
1465 }
1466
1467 SET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1468 TAILQ_INSERT_TAIL(&zfpm_g->dest_q, dest, fpm_q_entries);
1469 zfpm_g->stats.updates_triggered++;
1470
1471 /*
1472 * Make sure that writes are enabled.
1473 */
1474 if (zfpm_g->t_write)
1475 return 0;
1476
1477 zfpm_write_on();
1478 return 0;
1479 }
1480
1481 /*
1482 * Generate Key for FPM MAC info hash entry
1483 */
1484 static unsigned int zfpm_mac_info_hash_keymake(const void *p)
1485 {
1486 struct fpm_mac_info_t *fpm_mac = (struct fpm_mac_info_t *)p;
1487 uint32_t mac_key;
1488
1489 mac_key = jhash(fpm_mac->macaddr.octet, ETH_ALEN, 0xa5a5a55a);
1490
1491 return jhash_2words(mac_key, fpm_mac->vni, 0);
1492 }
1493
1494 /*
1495 * Compare function for FPM MAC info hash lookup
1496 */
1497 static bool zfpm_mac_info_cmp(const void *p1, const void *p2)
1498 {
1499 const struct fpm_mac_info_t *fpm_mac1 = p1;
1500 const struct fpm_mac_info_t *fpm_mac2 = p2;
1501
1502 if (memcmp(fpm_mac1->macaddr.octet, fpm_mac2->macaddr.octet, ETH_ALEN)
1503 != 0)
1504 return false;
1505 if (fpm_mac1->vni != fpm_mac2->vni)
1506 return false;
1507
1508 return true;
1509 }
1510
1511 /*
1512 * Lookup FPM MAC info hash entry.
1513 */
1514 static struct fpm_mac_info_t *zfpm_mac_info_lookup(struct fpm_mac_info_t *key)
1515 {
1516 return hash_lookup(zfpm_g->fpm_mac_info_table, key);
1517 }
1518
1519 /*
1520 * Callback to allocate fpm_mac_info_t structure.
1521 */
1522 static void *zfpm_mac_info_alloc(void *p)
1523 {
1524 const struct fpm_mac_info_t *key = p;
1525 struct fpm_mac_info_t *fpm_mac;
1526
1527 fpm_mac = XCALLOC(MTYPE_FPM_MAC_INFO, sizeof(struct fpm_mac_info_t));
1528
1529 memcpy(&fpm_mac->macaddr, &key->macaddr, ETH_ALEN);
1530 fpm_mac->vni = key->vni;
1531
1532 return (void *)fpm_mac;
1533 }
1534
1535 /*
1536 * Delink and free fpm_mac_info_t.
1537 */
1538 static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac)
1539 {
1540 hash_release(zfpm_g->fpm_mac_info_table, fpm_mac);
1541 TAILQ_REMOVE(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries);
1542 XFREE(MTYPE_FPM_MAC_INFO, fpm_mac);
1543 }
1544
1545 /*
1546 * zfpm_trigger_rmac_update
1547 *
1548 * Zebra code invokes this function to indicate that we should
1549 * send an update to FPM for given MAC entry.
1550 *
1551 * This function checks if we already have enqueued an update for this RMAC,
1552 * If yes, update the same fpm_mac_info_t. Else, create and enqueue an update.
1553 */
1554 static int zfpm_trigger_rmac_update(zebra_mac_t *rmac, zebra_l3vni_t *zl3vni,
1555 bool delete, const char *reason)
1556 {
1557 struct fpm_mac_info_t *fpm_mac, key;
1558 struct interface *vxlan_if, *svi_if;
1559 bool mac_found = false;
1560
1561 /*
1562 * Ignore if the connection is down. We will update the FPM about
1563 * all destinations once the connection comes up.
1564 */
1565 if (!zfpm_conn_is_up())
1566 return 0;
1567
1568 if (reason) {
1569 zfpm_debug("triggering update to FPM - Reason: %s - %pEA",
1570 reason, &rmac->macaddr);
1571 }
1572
1573 vxlan_if = zl3vni_map_to_vxlan_if(zl3vni);
1574 svi_if = zl3vni_map_to_svi_if(zl3vni);
1575
1576 memset(&key, 0, sizeof(struct fpm_mac_info_t));
1577
1578 memcpy(&key.macaddr, &rmac->macaddr, ETH_ALEN);
1579 key.vni = zl3vni->vni;
1580
1581 /* Check if this MAC is already present in the queue. */
1582 fpm_mac = zfpm_mac_info_lookup(&key);
1583
1584 if (fpm_mac) {
1585 mac_found = true;
1586
1587 /*
1588 * If the enqueued op is "add" and current op is "delete",
1589 * this is a noop. So, Unset ZEBRA_MAC_UPDATE_FPM flag.
1590 * While processing FPM queue, we will silently delete this
1591 * MAC entry without sending any update for this MAC.
1592 */
1593 if (!CHECK_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM) &&
1594 delete == 1) {
1595 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1596 UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM);
1597 return 0;
1598 }
1599 } else {
1600 fpm_mac = hash_get(zfpm_g->fpm_mac_info_table, &key,
1601 zfpm_mac_info_alloc);
1602 if (!fpm_mac)
1603 return 0;
1604 }
1605
1606 fpm_mac->r_vtep_ip.s_addr = rmac->fwd_info.r_vtep_ip.s_addr;
1607 fpm_mac->zebra_flags = rmac->flags;
1608 fpm_mac->vxlan_if = vxlan_if ? vxlan_if->ifindex : 0;
1609 fpm_mac->svi_if = svi_if ? svi_if->ifindex : 0;
1610
1611 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM);
1612 if (delete)
1613 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1614 else
1615 UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1616
1617 if (!mac_found)
1618 TAILQ_INSERT_TAIL(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries);
1619
1620 zfpm_g->stats.updates_triggered++;
1621
1622 /* If writes are already enabled, return. */
1623 if (zfpm_g->t_write)
1624 return 0;
1625
1626 zfpm_write_on();
1627 return 0;
1628 }
1629
1630 /*
1631 * This function is called when the FPM connections is established.
1632 * Iterate over all the RMAC entries for the given L3VNI
1633 * and enqueue the RMAC for FPM processing.
1634 */
1635 static void zfpm_trigger_rmac_update_wrapper(struct hash_bucket *bucket,
1636 void *args)
1637 {
1638 zebra_mac_t *zrmac = (zebra_mac_t *)bucket->data;
1639 zebra_l3vni_t *zl3vni = (zebra_l3vni_t *)args;
1640
1641 zfpm_trigger_rmac_update(zrmac, zl3vni, false, "RMAC added");
1642 }
1643
1644 /*
1645 * This function is called when the FPM connections is established.
1646 * This function iterates over all the L3VNIs to trigger
1647 * FPM updates for RMACs currently available.
1648 */
1649 static void zfpm_iterate_rmac_table(struct hash_bucket *bucket, void *args)
1650 {
1651 zebra_l3vni_t *zl3vni = (zebra_l3vni_t *)bucket->data;
1652
1653 hash_iterate(zl3vni->rmac_table, zfpm_trigger_rmac_update_wrapper,
1654 (void *)zl3vni);
1655 }
1656
1657 /*
1658 * struct zfpm_statsimer_cb
1659 */
1660 static int zfpm_stats_timer_cb(struct thread *t)
1661 {
1662 zfpm_g->t_stats = NULL;
1663
1664 /*
1665 * Remember the stats collected in the last interval for display
1666 * purposes.
1667 */
1668 zfpm_stats_copy(&zfpm_g->stats, &zfpm_g->last_ivl_stats);
1669
1670 /*
1671 * Add the current set of stats into the cumulative statistics.
1672 */
1673 zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats,
1674 &zfpm_g->cumulative_stats);
1675
1676 /*
1677 * Start collecting stats afresh over the next interval.
1678 */
1679 zfpm_stats_reset(&zfpm_g->stats);
1680
1681 zfpm_start_stats_timer();
1682
1683 return 0;
1684 }
1685
1686 /*
1687 * zfpm_stop_stats_timer
1688 */
1689 static void zfpm_stop_stats_timer(void)
1690 {
1691 if (!zfpm_g->t_stats)
1692 return;
1693
1694 zfpm_debug("Stopping existing stats timer");
1695 thread_cancel(&zfpm_g->t_stats);
1696 }
1697
1698 /*
1699 * zfpm_start_stats_timer
1700 */
1701 void zfpm_start_stats_timer(void)
1702 {
1703 assert(!zfpm_g->t_stats);
1704
1705 thread_add_timer(zfpm_g->master, zfpm_stats_timer_cb, 0,
1706 ZFPM_STATS_IVL_SECS, &zfpm_g->t_stats);
1707 }
1708
1709 /*
1710 * Helper macro for zfpm_show_stats() below.
1711 */
1712 #define ZFPM_SHOW_STAT(counter) \
1713 do { \
1714 vty_out(vty, "%-40s %10lu %16lu\n", #counter, \
1715 total_stats.counter, zfpm_g->last_ivl_stats.counter); \
1716 } while (0)
1717
1718 /*
1719 * zfpm_show_stats
1720 */
1721 static void zfpm_show_stats(struct vty *vty)
1722 {
1723 struct zfpm_stats total_stats;
1724 time_t elapsed;
1725
1726 vty_out(vty, "\n%-40s %10s Last %2d secs\n\n", "Counter", "Total",
1727 ZFPM_STATS_IVL_SECS);
1728
1729 /*
1730 * Compute the total stats up to this instant.
1731 */
1732 zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats,
1733 &total_stats);
1734
1735 ZFPM_SHOW_STAT(connect_calls);
1736 ZFPM_SHOW_STAT(connect_no_sock);
1737 ZFPM_SHOW_STAT(read_cb_calls);
1738 ZFPM_SHOW_STAT(write_cb_calls);
1739 ZFPM_SHOW_STAT(write_calls);
1740 ZFPM_SHOW_STAT(partial_writes);
1741 ZFPM_SHOW_STAT(max_writes_hit);
1742 ZFPM_SHOW_STAT(t_write_yields);
1743 ZFPM_SHOW_STAT(nop_deletes_skipped);
1744 ZFPM_SHOW_STAT(route_adds);
1745 ZFPM_SHOW_STAT(route_dels);
1746 ZFPM_SHOW_STAT(updates_triggered);
1747 ZFPM_SHOW_STAT(redundant_triggers);
1748 ZFPM_SHOW_STAT(dests_del_after_update);
1749 ZFPM_SHOW_STAT(t_conn_down_starts);
1750 ZFPM_SHOW_STAT(t_conn_down_dests_processed);
1751 ZFPM_SHOW_STAT(t_conn_down_yields);
1752 ZFPM_SHOW_STAT(t_conn_down_finishes);
1753 ZFPM_SHOW_STAT(t_conn_up_starts);
1754 ZFPM_SHOW_STAT(t_conn_up_dests_processed);
1755 ZFPM_SHOW_STAT(t_conn_up_yields);
1756 ZFPM_SHOW_STAT(t_conn_up_aborts);
1757 ZFPM_SHOW_STAT(t_conn_up_finishes);
1758
1759 if (!zfpm_g->last_stats_clear_time)
1760 return;
1761
1762 elapsed = zfpm_get_elapsed_time(zfpm_g->last_stats_clear_time);
1763
1764 vty_out(vty, "\nStats were cleared %lu seconds ago\n",
1765 (unsigned long)elapsed);
1766 }
1767
1768 /*
1769 * zfpm_clear_stats
1770 */
1771 static void zfpm_clear_stats(struct vty *vty)
1772 {
1773 if (!zfpm_is_enabled()) {
1774 vty_out(vty, "The FPM module is not enabled...\n");
1775 return;
1776 }
1777
1778 zfpm_stats_reset(&zfpm_g->stats);
1779 zfpm_stats_reset(&zfpm_g->last_ivl_stats);
1780 zfpm_stats_reset(&zfpm_g->cumulative_stats);
1781
1782 zfpm_stop_stats_timer();
1783 zfpm_start_stats_timer();
1784
1785 zfpm_g->last_stats_clear_time = monotime(NULL);
1786
1787 vty_out(vty, "Cleared FPM stats\n");
1788 }
1789
1790 /*
1791 * show_zebra_fpm_stats
1792 */
1793 DEFUN (show_zebra_fpm_stats,
1794 show_zebra_fpm_stats_cmd,
1795 "show zebra fpm stats",
1796 SHOW_STR
1797 ZEBRA_STR
1798 "Forwarding Path Manager information\n"
1799 "Statistics\n")
1800 {
1801 zfpm_show_stats(vty);
1802 return CMD_SUCCESS;
1803 }
1804
1805 /*
1806 * clear_zebra_fpm_stats
1807 */
1808 DEFUN (clear_zebra_fpm_stats,
1809 clear_zebra_fpm_stats_cmd,
1810 "clear zebra fpm stats",
1811 CLEAR_STR
1812 ZEBRA_STR
1813 "Clear Forwarding Path Manager information\n"
1814 "Statistics\n")
1815 {
1816 zfpm_clear_stats(vty);
1817 return CMD_SUCCESS;
1818 }
1819
1820 /*
1821 * update fpm connection information
1822 */
1823 DEFUN ( fpm_remote_ip,
1824 fpm_remote_ip_cmd,
1825 "fpm connection ip A.B.C.D port (1-65535)",
1826 "fpm connection remote ip and port\n"
1827 "Remote fpm server ip A.B.C.D\n"
1828 "Enter ip ")
1829 {
1830
1831 in_addr_t fpm_server;
1832 uint32_t port_no;
1833
1834 fpm_server = inet_addr(argv[3]->arg);
1835 if (fpm_server == INADDR_NONE)
1836 return CMD_ERR_INCOMPLETE;
1837
1838 port_no = atoi(argv[5]->arg);
1839 if (port_no < TCP_MIN_PORT || port_no > TCP_MAX_PORT)
1840 return CMD_ERR_INCOMPLETE;
1841
1842 zfpm_g->fpm_server = fpm_server;
1843 zfpm_g->fpm_port = port_no;
1844
1845
1846 return CMD_SUCCESS;
1847 }
1848
1849 DEFUN ( no_fpm_remote_ip,
1850 no_fpm_remote_ip_cmd,
1851 "no fpm connection ip A.B.C.D port (1-65535)",
1852 "fpm connection remote ip and port\n"
1853 "Connection\n"
1854 "Remote fpm server ip A.B.C.D\n"
1855 "Enter ip ")
1856 {
1857 if (zfpm_g->fpm_server != inet_addr(argv[4]->arg)
1858 || zfpm_g->fpm_port != atoi(argv[6]->arg))
1859 return CMD_ERR_NO_MATCH;
1860
1861 zfpm_g->fpm_server = FPM_DEFAULT_IP;
1862 zfpm_g->fpm_port = FPM_DEFAULT_PORT;
1863
1864 return CMD_SUCCESS;
1865 }
1866
1867 /*
1868 * zfpm_init_message_format
1869 */
1870 static inline void zfpm_init_message_format(const char *format)
1871 {
1872 int have_netlink, have_protobuf;
1873
1874 #ifdef HAVE_NETLINK
1875 have_netlink = 1;
1876 #else
1877 have_netlink = 0;
1878 #endif
1879
1880 #ifdef HAVE_PROTOBUF
1881 have_protobuf = 1;
1882 #else
1883 have_protobuf = 0;
1884 #endif
1885
1886 zfpm_g->message_format = ZFPM_MSG_FORMAT_NONE;
1887
1888 if (!format) {
1889 if (have_netlink) {
1890 zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK;
1891 } else if (have_protobuf) {
1892 zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF;
1893 }
1894 return;
1895 }
1896
1897 if (!strcmp("netlink", format)) {
1898 if (!have_netlink) {
1899 flog_err(EC_ZEBRA_NETLINK_NOT_AVAILABLE,
1900 "FPM netlink message format is not available");
1901 return;
1902 }
1903 zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK;
1904 return;
1905 }
1906
1907 if (!strcmp("protobuf", format)) {
1908 if (!have_protobuf) {
1909 flog_err(
1910 EC_ZEBRA_PROTOBUF_NOT_AVAILABLE,
1911 "FPM protobuf message format is not available");
1912 return;
1913 }
1914 flog_warn(EC_ZEBRA_PROTOBUF_NOT_AVAILABLE,
1915 "FPM protobuf message format is deprecated and scheduled to be removed. Please convert to using netlink format or contact dev@lists.frrouting.org with your use case.");
1916 zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF;
1917 return;
1918 }
1919
1920 flog_warn(EC_ZEBRA_FPM_FORMAT_UNKNOWN, "Unknown fpm format '%s'",
1921 format);
1922 }
1923
1924 /**
1925 * fpm_remote_srv_write
1926 *
1927 * Module to write remote fpm connection
1928 *
1929 * Returns ZERO on success.
1930 */
1931
1932 static int fpm_remote_srv_write(struct vty *vty)
1933 {
1934 struct in_addr in;
1935
1936 in.s_addr = zfpm_g->fpm_server;
1937
1938 if ((zfpm_g->fpm_server != FPM_DEFAULT_IP
1939 && zfpm_g->fpm_server != INADDR_ANY)
1940 || (zfpm_g->fpm_port != FPM_DEFAULT_PORT && zfpm_g->fpm_port != 0))
1941 vty_out(vty, "fpm connection ip %pI4 port %d\n", &in,
1942 zfpm_g->fpm_port);
1943
1944 return 0;
1945 }
1946
1947
1948 static int fpm_remote_srv_write(struct vty *vty);
1949 /* Zebra node */
1950 static struct cmd_node zebra_node = {
1951 .name = "zebra",
1952 .node = ZEBRA_NODE,
1953 .parent_node = CONFIG_NODE,
1954 .prompt = "",
1955 .config_write = fpm_remote_srv_write,
1956 };
1957
1958
1959 /**
1960 * zfpm_init
1961 *
1962 * One-time initialization of the Zebra FPM module.
1963 *
1964 * @param[in] port port at which FPM is running.
1965 * @param[in] enable true if the zebra FPM module should be enabled
1966 * @param[in] format to use to talk to the FPM. Can be 'netink' or 'protobuf'.
1967 *
1968 * Returns true on success.
1969 */
1970 static int zfpm_init(struct thread_master *master)
1971 {
1972 int enable = 1;
1973 uint16_t port = 0;
1974 const char *format = THIS_MODULE->load_args;
1975
1976 memset(zfpm_g, 0, sizeof(*zfpm_g));
1977 zfpm_g->master = master;
1978 TAILQ_INIT(&zfpm_g->dest_q);
1979 TAILQ_INIT(&zfpm_g->mac_q);
1980
1981 /* Create hash table for fpm_mac_info_t enties */
1982 zfpm_g->fpm_mac_info_table = hash_create(zfpm_mac_info_hash_keymake,
1983 zfpm_mac_info_cmp,
1984 "FPM MAC info hash table");
1985
1986 zfpm_g->sock = -1;
1987 zfpm_g->state = ZFPM_STATE_IDLE;
1988
1989 zfpm_stats_init(&zfpm_g->stats);
1990 zfpm_stats_init(&zfpm_g->last_ivl_stats);
1991 zfpm_stats_init(&zfpm_g->cumulative_stats);
1992
1993 install_node(&zebra_node);
1994 install_element(ENABLE_NODE, &show_zebra_fpm_stats_cmd);
1995 install_element(ENABLE_NODE, &clear_zebra_fpm_stats_cmd);
1996 install_element(CONFIG_NODE, &fpm_remote_ip_cmd);
1997 install_element(CONFIG_NODE, &no_fpm_remote_ip_cmd);
1998
1999 zfpm_init_message_format(format);
2000
2001 /*
2002 * Disable FPM interface if no suitable format is available.
2003 */
2004 if (zfpm_g->message_format == ZFPM_MSG_FORMAT_NONE)
2005 enable = 0;
2006
2007 zfpm_g->enabled = enable;
2008
2009 if (!zfpm_g->fpm_server)
2010 zfpm_g->fpm_server = FPM_DEFAULT_IP;
2011
2012 if (!port)
2013 port = FPM_DEFAULT_PORT;
2014
2015 zfpm_g->fpm_port = port;
2016
2017 zfpm_g->obuf = stream_new(ZFPM_OBUF_SIZE);
2018 zfpm_g->ibuf = stream_new(ZFPM_IBUF_SIZE);
2019
2020 zfpm_start_stats_timer();
2021 zfpm_start_connect_timer("initialized");
2022 return 0;
2023 }
2024
2025 static int zfpm_fini(void)
2026 {
2027 zfpm_write_off();
2028 zfpm_read_off();
2029 zfpm_connect_off();
2030
2031 zfpm_stop_stats_timer();
2032
2033 hook_unregister(rib_update, zfpm_trigger_update);
2034 return 0;
2035 }
2036
2037 static int zebra_fpm_module_init(void)
2038 {
2039 hook_register(rib_update, zfpm_trigger_update);
2040 hook_register(zebra_rmac_update, zfpm_trigger_rmac_update);
2041 hook_register(frr_late_init, zfpm_init);
2042 hook_register(frr_early_fini, zfpm_fini);
2043 return 0;
2044 }
2045
2046 FRR_MODULE_SETUP(.name = "zebra_fpm", .version = FRR_VERSION,
2047 .description = "zebra FPM (Forwarding Plane Manager) module",
2048 .init = zebra_fpm_module_init,
2049 );