]> git.proxmox.com Git - mirror_frr.git/blob - zebra/zebra_fpm.c
Merge pull request #4529 from donaldsharp/vrf_conversions
[mirror_frr.git] / zebra / zebra_fpm.c
1 /*
2 * Main implementation file for interface to Forwarding Plane Manager.
3 *
4 * Copyright (C) 2012 by Open Source Routing.
5 * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
6 *
7 * This file is part of GNU Zebra.
8 *
9 * GNU Zebra is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the
11 * Free Software Foundation; either version 2, or (at your option) any
12 * later version.
13 *
14 * GNU Zebra is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; see the file COPYING; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include <zebra.h>
25
26 #include "log.h"
27 #include "libfrr.h"
28 #include "stream.h"
29 #include "thread.h"
30 #include "network.h"
31 #include "command.h"
32 #include "version.h"
33 #include "jhash.h"
34
35 #include "zebra/rib.h"
36 #include "zebra/zserv.h"
37 #include "zebra/zebra_ns.h"
38 #include "zebra/zebra_vrf.h"
39 #include "zebra/zebra_errors.h"
40 #include "zebra/zebra_memory.h"
41
42 #include "fpm/fpm.h"
43 #include "zebra_fpm_private.h"
44 #include "zebra/zebra_router.h"
45 #include "zebra_vxlan_private.h"
46
47 DEFINE_MTYPE_STATIC(ZEBRA, FPM_MAC_INFO, "FPM_MAC_INFO");
48
49 /*
50 * Interval at which we attempt to connect to the FPM.
51 */
52 #define ZFPM_CONNECT_RETRY_IVL 5
53
54 /*
55 * Sizes of outgoing and incoming stream buffers for writing/reading
56 * FPM messages.
57 */
58 #define ZFPM_OBUF_SIZE (2 * FPM_MAX_MSG_LEN)
59 #define ZFPM_IBUF_SIZE (FPM_MAX_MSG_LEN)
60
61 /*
62 * The maximum number of times the FPM socket write callback can call
63 * 'write' before it yields.
64 */
65 #define ZFPM_MAX_WRITES_PER_RUN 10
66
67 /*
68 * Interval over which we collect statistics.
69 */
70 #define ZFPM_STATS_IVL_SECS 10
71 #define FPM_MAX_MAC_MSG_LEN 512
72
73 static void zfpm_iterate_rmac_table(struct hash_backet *backet, void *args);
74
75 /*
76 * Structure that holds state for iterating over all route_node
77 * structures that are candidates for being communicated to the FPM.
78 */
79 typedef struct zfpm_rnodes_iter_t_ {
80 rib_tables_iter_t tables_iter;
81 route_table_iter_t iter;
82 } zfpm_rnodes_iter_t;
83
84 /*
85 * Statistics.
86 */
87 typedef struct zfpm_stats_t_ {
88 unsigned long connect_calls;
89 unsigned long connect_no_sock;
90
91 unsigned long read_cb_calls;
92
93 unsigned long write_cb_calls;
94 unsigned long write_calls;
95 unsigned long partial_writes;
96 unsigned long max_writes_hit;
97 unsigned long t_write_yields;
98
99 unsigned long nop_deletes_skipped;
100 unsigned long route_adds;
101 unsigned long route_dels;
102
103 unsigned long updates_triggered;
104 unsigned long redundant_triggers;
105
106 unsigned long dests_del_after_update;
107
108 unsigned long t_conn_down_starts;
109 unsigned long t_conn_down_dests_processed;
110 unsigned long t_conn_down_yields;
111 unsigned long t_conn_down_finishes;
112
113 unsigned long t_conn_up_starts;
114 unsigned long t_conn_up_dests_processed;
115 unsigned long t_conn_up_yields;
116 unsigned long t_conn_up_aborts;
117 unsigned long t_conn_up_finishes;
118
119 } zfpm_stats_t;
120
121 /*
122 * States for the FPM state machine.
123 */
124 typedef enum {
125
126 /*
127 * In this state we are not yet ready to connect to the FPM. This
128 * can happen when this module is disabled, or if we're cleaning up
129 * after a connection has gone down.
130 */
131 ZFPM_STATE_IDLE,
132
133 /*
134 * Ready to talk to the FPM and periodically trying to connect to
135 * it.
136 */
137 ZFPM_STATE_ACTIVE,
138
139 /*
140 * In the middle of bringing up a TCP connection. Specifically,
141 * waiting for a connect() call to complete asynchronously.
142 */
143 ZFPM_STATE_CONNECTING,
144
145 /*
146 * TCP connection to the FPM is up.
147 */
148 ZFPM_STATE_ESTABLISHED
149
150 } zfpm_state_t;
151
152 /*
153 * Message format to be used to communicate with the FPM.
154 */
155 typedef enum {
156 ZFPM_MSG_FORMAT_NONE,
157 ZFPM_MSG_FORMAT_NETLINK,
158 ZFPM_MSG_FORMAT_PROTOBUF,
159 } zfpm_msg_format_e;
160 /*
161 * Globals.
162 */
163 typedef struct zfpm_glob_t_ {
164
165 /*
166 * True if the FPM module has been enabled.
167 */
168 int enabled;
169
170 /*
171 * Message format to be used to communicate with the fpm.
172 */
173 zfpm_msg_format_e message_format;
174
175 struct thread_master *master;
176
177 zfpm_state_t state;
178
179 in_addr_t fpm_server;
180 /*
181 * Port on which the FPM is running.
182 */
183 int fpm_port;
184
185 /*
186 * List of rib_dest_t structures to be processed
187 */
188 TAILQ_HEAD(zfpm_dest_q, rib_dest_t_) dest_q;
189
190 /*
191 * List of fpm_mac_info structures to be processed
192 */
193 TAILQ_HEAD(zfpm_mac_q, fpm_mac_info_t) mac_q;
194
195 /*
196 * Hash table of fpm_mac_info_t entries
197 *
198 * While adding fpm_mac_info_t for a MAC to the mac_q,
199 * it is possible that another fpm_mac_info_t node for the this MAC
200 * is already present in the queue.
201 * This is possible in the case of consecutive add->delete operations.
202 * To avoid such duplicate insertions in the mac_q,
203 * define a hash table for fpm_mac_info_t which can be looked up
204 * to see if an fpm_mac_info_t node for a MAC is already present
205 * in the mac_q.
206 */
207 struct hash *fpm_mac_info_table;
208
209 /*
210 * Stream socket to the FPM.
211 */
212 int sock;
213
214 /*
215 * Buffers for messages to/from the FPM.
216 */
217 struct stream *obuf;
218 struct stream *ibuf;
219
220 /*
221 * Threads for I/O.
222 */
223 struct thread *t_connect;
224 struct thread *t_write;
225 struct thread *t_read;
226
227 /*
228 * Thread to clean up after the TCP connection to the FPM goes down
229 * and the state that belongs to it.
230 */
231 struct thread *t_conn_down;
232
233 struct {
234 zfpm_rnodes_iter_t iter;
235 } t_conn_down_state;
236
237 /*
238 * Thread to take actions once the TCP conn to the FPM comes up, and
239 * the state that belongs to it.
240 */
241 struct thread *t_conn_up;
242
243 struct {
244 zfpm_rnodes_iter_t iter;
245 } t_conn_up_state;
246
247 unsigned long connect_calls;
248 time_t last_connect_call_time;
249
250 /*
251 * Stats from the start of the current statistics interval up to
252 * now. These are the counters we typically update in the code.
253 */
254 zfpm_stats_t stats;
255
256 /*
257 * Statistics that were gathered in the last collection interval.
258 */
259 zfpm_stats_t last_ivl_stats;
260
261 /*
262 * Cumulative stats from the last clear to the start of the current
263 * statistics interval.
264 */
265 zfpm_stats_t cumulative_stats;
266
267 /*
268 * Stats interval timer.
269 */
270 struct thread *t_stats;
271
272 /*
273 * If non-zero, the last time when statistics were cleared.
274 */
275 time_t last_stats_clear_time;
276
277 } zfpm_glob_t;
278
279 static zfpm_glob_t zfpm_glob_space;
280 static zfpm_glob_t *zfpm_g = &zfpm_glob_space;
281
282 static int zfpm_trigger_update(struct route_node *rn, const char *reason);
283
284 static int zfpm_read_cb(struct thread *thread);
285 static int zfpm_write_cb(struct thread *thread);
286
287 static void zfpm_set_state(zfpm_state_t state, const char *reason);
288 static void zfpm_start_connect_timer(const char *reason);
289 static void zfpm_start_stats_timer(void);
290 static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac);
291
292 /*
293 * zfpm_thread_should_yield
294 */
295 static inline int zfpm_thread_should_yield(struct thread *t)
296 {
297 return thread_should_yield(t);
298 }
299
300 /*
301 * zfpm_state_to_str
302 */
303 static const char *zfpm_state_to_str(zfpm_state_t state)
304 {
305 switch (state) {
306
307 case ZFPM_STATE_IDLE:
308 return "idle";
309
310 case ZFPM_STATE_ACTIVE:
311 return "active";
312
313 case ZFPM_STATE_CONNECTING:
314 return "connecting";
315
316 case ZFPM_STATE_ESTABLISHED:
317 return "established";
318
319 default:
320 return "unknown";
321 }
322 }
323
324 /*
325 * zfpm_get_elapsed_time
326 *
327 * Returns the time elapsed (in seconds) since the given time.
328 */
329 static time_t zfpm_get_elapsed_time(time_t reference)
330 {
331 time_t now;
332
333 now = monotime(NULL);
334
335 if (now < reference) {
336 assert(0);
337 return 0;
338 }
339
340 return now - reference;
341 }
342
343 /*
344 * zfpm_rnodes_iter_init
345 */
346 static inline void zfpm_rnodes_iter_init(zfpm_rnodes_iter_t *iter)
347 {
348 memset(iter, 0, sizeof(*iter));
349 rib_tables_iter_init(&iter->tables_iter);
350
351 /*
352 * This is a hack, but it makes implementing 'next' easier by
353 * ensuring that route_table_iter_next() will return NULL the first
354 * time we call it.
355 */
356 route_table_iter_init(&iter->iter, NULL);
357 route_table_iter_cleanup(&iter->iter);
358 }
359
360 /*
361 * zfpm_rnodes_iter_next
362 */
363 static inline struct route_node *zfpm_rnodes_iter_next(zfpm_rnodes_iter_t *iter)
364 {
365 struct route_node *rn;
366 struct route_table *table;
367
368 while (1) {
369 rn = route_table_iter_next(&iter->iter);
370 if (rn)
371 return rn;
372
373 /*
374 * We've made our way through this table, go to the next one.
375 */
376 route_table_iter_cleanup(&iter->iter);
377
378 table = rib_tables_iter_next(&iter->tables_iter);
379
380 if (!table)
381 return NULL;
382
383 route_table_iter_init(&iter->iter, table);
384 }
385
386 return NULL;
387 }
388
389 /*
390 * zfpm_rnodes_iter_pause
391 */
392 static inline void zfpm_rnodes_iter_pause(zfpm_rnodes_iter_t *iter)
393 {
394 route_table_iter_pause(&iter->iter);
395 }
396
397 /*
398 * zfpm_rnodes_iter_cleanup
399 */
400 static inline void zfpm_rnodes_iter_cleanup(zfpm_rnodes_iter_t *iter)
401 {
402 route_table_iter_cleanup(&iter->iter);
403 rib_tables_iter_cleanup(&iter->tables_iter);
404 }
405
406 /*
407 * zfpm_stats_init
408 *
409 * Initialize a statistics block.
410 */
411 static inline void zfpm_stats_init(zfpm_stats_t *stats)
412 {
413 memset(stats, 0, sizeof(*stats));
414 }
415
416 /*
417 * zfpm_stats_reset
418 */
419 static inline void zfpm_stats_reset(zfpm_stats_t *stats)
420 {
421 zfpm_stats_init(stats);
422 }
423
424 /*
425 * zfpm_stats_copy
426 */
427 static inline void zfpm_stats_copy(const zfpm_stats_t *src, zfpm_stats_t *dest)
428 {
429 memcpy(dest, src, sizeof(*dest));
430 }
431
432 /*
433 * zfpm_stats_compose
434 *
435 * Total up the statistics in two stats structures ('s1 and 's2') and
436 * return the result in the third argument, 'result'. Note that the
437 * pointer 'result' may be the same as 's1' or 's2'.
438 *
439 * For simplicity, the implementation below assumes that the stats
440 * structure is composed entirely of counters. This can easily be
441 * changed when necessary.
442 */
443 static void zfpm_stats_compose(const zfpm_stats_t *s1, const zfpm_stats_t *s2,
444 zfpm_stats_t *result)
445 {
446 const unsigned long *p1, *p2;
447 unsigned long *result_p;
448 int i, num_counters;
449
450 p1 = (const unsigned long *)s1;
451 p2 = (const unsigned long *)s2;
452 result_p = (unsigned long *)result;
453
454 num_counters = (sizeof(zfpm_stats_t) / sizeof(unsigned long));
455
456 for (i = 0; i < num_counters; i++) {
457 result_p[i] = p1[i] + p2[i];
458 }
459 }
460
461 /*
462 * zfpm_read_on
463 */
464 static inline void zfpm_read_on(void)
465 {
466 assert(!zfpm_g->t_read);
467 assert(zfpm_g->sock >= 0);
468
469 thread_add_read(zfpm_g->master, zfpm_read_cb, 0, zfpm_g->sock,
470 &zfpm_g->t_read);
471 }
472
473 /*
474 * zfpm_write_on
475 */
476 static inline void zfpm_write_on(void)
477 {
478 assert(!zfpm_g->t_write);
479 assert(zfpm_g->sock >= 0);
480
481 thread_add_write(zfpm_g->master, zfpm_write_cb, 0, zfpm_g->sock,
482 &zfpm_g->t_write);
483 }
484
485 /*
486 * zfpm_read_off
487 */
488 static inline void zfpm_read_off(void)
489 {
490 THREAD_READ_OFF(zfpm_g->t_read);
491 }
492
493 /*
494 * zfpm_write_off
495 */
496 static inline void zfpm_write_off(void)
497 {
498 THREAD_WRITE_OFF(zfpm_g->t_write);
499 }
500
501 /*
502 * zfpm_conn_up_thread_cb
503 *
504 * Callback for actions to be taken when the connection to the FPM
505 * comes up.
506 */
507 static int zfpm_conn_up_thread_cb(struct thread *thread)
508 {
509 struct route_node *rnode;
510 zfpm_rnodes_iter_t *iter;
511 rib_dest_t *dest;
512
513 zfpm_g->t_conn_up = NULL;
514
515 iter = &zfpm_g->t_conn_up_state.iter;
516
517 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED) {
518 zfpm_debug(
519 "Connection not up anymore, conn_up thread aborting");
520 zfpm_g->stats.t_conn_up_aborts++;
521 goto done;
522 }
523
524 /* Enqueue FPM updates for all the RMAC entries */
525 hash_iterate(zrouter.l3vni_table, zfpm_iterate_rmac_table, NULL);
526
527 while ((rnode = zfpm_rnodes_iter_next(iter))) {
528 dest = rib_dest_from_rnode(rnode);
529
530 if (dest) {
531 zfpm_g->stats.t_conn_up_dests_processed++;
532 zfpm_trigger_update(rnode, NULL);
533 }
534
535 /*
536 * Yield if need be.
537 */
538 if (!zfpm_thread_should_yield(thread))
539 continue;
540
541 zfpm_g->stats.t_conn_up_yields++;
542 zfpm_rnodes_iter_pause(iter);
543 zfpm_g->t_conn_up = NULL;
544 thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb,
545 NULL, 0, &zfpm_g->t_conn_up);
546 return 0;
547 }
548
549 zfpm_g->stats.t_conn_up_finishes++;
550
551 done:
552 zfpm_rnodes_iter_cleanup(iter);
553 return 0;
554 }
555
556 /*
557 * zfpm_connection_up
558 *
559 * Called when the connection to the FPM comes up.
560 */
561 static void zfpm_connection_up(const char *detail)
562 {
563 assert(zfpm_g->sock >= 0);
564 zfpm_read_on();
565 zfpm_write_on();
566 zfpm_set_state(ZFPM_STATE_ESTABLISHED, detail);
567
568 /*
569 * Start thread to push existing routes to the FPM.
570 */
571 assert(!zfpm_g->t_conn_up);
572
573 zfpm_rnodes_iter_init(&zfpm_g->t_conn_up_state.iter);
574
575 zfpm_debug("Starting conn_up thread");
576 zfpm_g->t_conn_up = NULL;
577 thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb, NULL, 0,
578 &zfpm_g->t_conn_up);
579 zfpm_g->stats.t_conn_up_starts++;
580 }
581
582 /*
583 * zfpm_connect_check
584 *
585 * Check if an asynchronous connect() to the FPM is complete.
586 */
587 static void zfpm_connect_check(void)
588 {
589 int status;
590 socklen_t slen;
591 int ret;
592
593 zfpm_read_off();
594 zfpm_write_off();
595
596 slen = sizeof(status);
597 ret = getsockopt(zfpm_g->sock, SOL_SOCKET, SO_ERROR, (void *)&status,
598 &slen);
599
600 if (ret >= 0 && status == 0) {
601 zfpm_connection_up("async connect complete");
602 return;
603 }
604
605 /*
606 * getsockopt() failed or indicated an error on the socket.
607 */
608 close(zfpm_g->sock);
609 zfpm_g->sock = -1;
610
611 zfpm_start_connect_timer("getsockopt() after async connect failed");
612 return;
613 }
614
615 /*
616 * zfpm_conn_down_thread_cb
617 *
618 * Callback that is invoked to clean up state after the TCP connection
619 * to the FPM goes down.
620 */
621 static int zfpm_conn_down_thread_cb(struct thread *thread)
622 {
623 struct route_node *rnode;
624 zfpm_rnodes_iter_t *iter;
625 rib_dest_t *dest;
626 struct fpm_mac_info_t *mac = NULL;
627
628 assert(zfpm_g->state == ZFPM_STATE_IDLE);
629
630 /*
631 * Delink and free all fpm_mac_info_t nodes
632 * in the mac_q and fpm_mac_info_hash
633 */
634 while ((mac = TAILQ_FIRST(&zfpm_g->mac_q)) != NULL)
635 zfpm_mac_info_del(mac);
636
637 zfpm_g->t_conn_down = NULL;
638
639 iter = &zfpm_g->t_conn_down_state.iter;
640
641 while ((rnode = zfpm_rnodes_iter_next(iter))) {
642 dest = rib_dest_from_rnode(rnode);
643
644 if (dest) {
645 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) {
646 TAILQ_REMOVE(&zfpm_g->dest_q, dest,
647 fpm_q_entries);
648 }
649
650 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
651 UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
652
653 zfpm_g->stats.t_conn_down_dests_processed++;
654
655 /*
656 * Check if the dest should be deleted.
657 */
658 rib_gc_dest(rnode);
659 }
660
661 /*
662 * Yield if need be.
663 */
664 if (!zfpm_thread_should_yield(thread))
665 continue;
666
667 zfpm_g->stats.t_conn_down_yields++;
668 zfpm_rnodes_iter_pause(iter);
669 zfpm_g->t_conn_down = NULL;
670 thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb,
671 NULL, 0, &zfpm_g->t_conn_down);
672 return 0;
673 }
674
675 zfpm_g->stats.t_conn_down_finishes++;
676 zfpm_rnodes_iter_cleanup(iter);
677
678 /*
679 * Start the process of connecting to the FPM again.
680 */
681 zfpm_start_connect_timer("cleanup complete");
682 return 0;
683 }
684
685 /*
686 * zfpm_connection_down
687 *
688 * Called when the connection to the FPM has gone down.
689 */
690 static void zfpm_connection_down(const char *detail)
691 {
692 if (!detail)
693 detail = "unknown";
694
695 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
696
697 zlog_info("connection to the FPM has gone down: %s", detail);
698
699 zfpm_read_off();
700 zfpm_write_off();
701
702 stream_reset(zfpm_g->ibuf);
703 stream_reset(zfpm_g->obuf);
704
705 if (zfpm_g->sock >= 0) {
706 close(zfpm_g->sock);
707 zfpm_g->sock = -1;
708 }
709
710 /*
711 * Start thread to clean up state after the connection goes down.
712 */
713 assert(!zfpm_g->t_conn_down);
714 zfpm_rnodes_iter_init(&zfpm_g->t_conn_down_state.iter);
715 zfpm_g->t_conn_down = NULL;
716 thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb, NULL, 0,
717 &zfpm_g->t_conn_down);
718 zfpm_g->stats.t_conn_down_starts++;
719
720 zfpm_set_state(ZFPM_STATE_IDLE, detail);
721 }
722
723 /*
724 * zfpm_read_cb
725 */
726 static int zfpm_read_cb(struct thread *thread)
727 {
728 size_t already;
729 struct stream *ibuf;
730 uint16_t msg_len;
731 fpm_msg_hdr_t *hdr;
732
733 zfpm_g->stats.read_cb_calls++;
734 zfpm_g->t_read = NULL;
735
736 /*
737 * Check if async connect is now done.
738 */
739 if (zfpm_g->state == ZFPM_STATE_CONNECTING) {
740 zfpm_connect_check();
741 return 0;
742 }
743
744 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
745 assert(zfpm_g->sock >= 0);
746
747 ibuf = zfpm_g->ibuf;
748
749 already = stream_get_endp(ibuf);
750 if (already < FPM_MSG_HDR_LEN) {
751 ssize_t nbyte;
752
753 nbyte = stream_read_try(ibuf, zfpm_g->sock,
754 FPM_MSG_HDR_LEN - already);
755 if (nbyte == 0 || nbyte == -1) {
756 if (nbyte == -1) {
757 char buffer[1024];
758
759 sprintf(buffer, "closed socket in read(%d): %s",
760 errno, safe_strerror(errno));
761 zfpm_connection_down(buffer);
762 } else
763 zfpm_connection_down("closed socket in read");
764 return 0;
765 }
766
767 if (nbyte != (ssize_t)(FPM_MSG_HDR_LEN - already))
768 goto done;
769
770 already = FPM_MSG_HDR_LEN;
771 }
772
773 stream_set_getp(ibuf, 0);
774
775 hdr = (fpm_msg_hdr_t *)stream_pnt(ibuf);
776
777 if (!fpm_msg_hdr_ok(hdr)) {
778 zfpm_connection_down("invalid message header");
779 return 0;
780 }
781
782 msg_len = fpm_msg_len(hdr);
783
784 /*
785 * Read out the rest of the packet.
786 */
787 if (already < msg_len) {
788 ssize_t nbyte;
789
790 nbyte = stream_read_try(ibuf, zfpm_g->sock, msg_len - already);
791
792 if (nbyte == 0 || nbyte == -1) {
793 if (nbyte == -1) {
794 char buffer[1024];
795
796 sprintf(buffer, "failed to read message(%d) %s",
797 errno, safe_strerror(errno));
798 zfpm_connection_down(buffer);
799 } else
800 zfpm_connection_down("failed to read message");
801 return 0;
802 }
803
804 if (nbyte != (ssize_t)(msg_len - already))
805 goto done;
806 }
807
808 /*
809 * Just throw it away for now.
810 */
811 stream_reset(ibuf);
812
813 done:
814 zfpm_read_on();
815 return 0;
816 }
817
818 static bool zfpm_updates_pending(void)
819 {
820 if (!(TAILQ_EMPTY(&zfpm_g->dest_q)) || !(TAILQ_EMPTY(&zfpm_g->mac_q)))
821 return true;
822
823 return false;
824 }
825
826 /*
827 * zfpm_writes_pending
828 *
829 * Returns true if we may have something to write to the FPM.
830 */
831 static int zfpm_writes_pending(void)
832 {
833
834 /*
835 * Check if there is any data in the outbound buffer that has not
836 * been written to the socket yet.
837 */
838 if (stream_get_endp(zfpm_g->obuf) - stream_get_getp(zfpm_g->obuf))
839 return 1;
840
841 /*
842 * Check if there are any updates scheduled on the outbound queues.
843 */
844 if (zfpm_updates_pending())
845 return 1;
846
847 return 0;
848 }
849
850 /*
851 * zfpm_encode_route
852 *
853 * Encode a message to the FPM with information about the given route.
854 *
855 * Returns the number of bytes written to the buffer. 0 or a negative
856 * value indicates an error.
857 */
858 static inline int zfpm_encode_route(rib_dest_t *dest, struct route_entry *re,
859 char *in_buf, size_t in_buf_len,
860 fpm_msg_type_e *msg_type)
861 {
862 size_t len;
863 #ifdef HAVE_NETLINK
864 int cmd;
865 #endif
866 len = 0;
867
868 *msg_type = FPM_MSG_TYPE_NONE;
869
870 switch (zfpm_g->message_format) {
871
872 case ZFPM_MSG_FORMAT_PROTOBUF:
873 #ifdef HAVE_PROTOBUF
874 len = zfpm_protobuf_encode_route(dest, re, (uint8_t *)in_buf,
875 in_buf_len);
876 *msg_type = FPM_MSG_TYPE_PROTOBUF;
877 #endif
878 break;
879
880 case ZFPM_MSG_FORMAT_NETLINK:
881 #ifdef HAVE_NETLINK
882 *msg_type = FPM_MSG_TYPE_NETLINK;
883 cmd = re ? RTM_NEWROUTE : RTM_DELROUTE;
884 len = zfpm_netlink_encode_route(cmd, dest, re, in_buf,
885 in_buf_len);
886 assert(fpm_msg_align(len) == len);
887 *msg_type = FPM_MSG_TYPE_NETLINK;
888 #endif /* HAVE_NETLINK */
889 break;
890
891 default:
892 break;
893 }
894
895 return len;
896 }
897
898 /*
899 * zfpm_route_for_update
900 *
901 * Returns the re that is to be sent to the FPM for a given dest.
902 */
903 struct route_entry *zfpm_route_for_update(rib_dest_t *dest)
904 {
905 return dest->selected_fib;
906 }
907
908 /*
909 * Define an enum for return codes for queue processing functions
910 *
911 * FPM_WRITE_STOP: This return code indicates that the write buffer is full.
912 * Stop processing all the queues and empty the buffer by writing its content
913 * to the socket.
914 *
915 * FPM_GOTO_NEXT_Q: This return code indicates that either this queue is
916 * empty or we have processed enough updates from this queue.
917 * So, move on to the next queue.
918 */
919 enum {
920 FPM_WRITE_STOP = 0,
921 FPM_GOTO_NEXT_Q = 1
922 };
923
924 #define FPM_QUEUE_PROCESS_LIMIT 10000
925
926 /*
927 * zfpm_build_route_updates
928 *
929 * Process the dest_q queue and write FPM messages to the outbound buffer.
930 */
931 static int zfpm_build_route_updates(void)
932 {
933 struct stream *s;
934 rib_dest_t *dest;
935 unsigned char *buf, *data, *buf_end;
936 size_t msg_len;
937 size_t data_len;
938 fpm_msg_hdr_t *hdr;
939 struct route_entry *re;
940 int is_add, write_msg;
941 fpm_msg_type_e msg_type;
942 uint16_t q_limit;
943
944 if (TAILQ_EMPTY(&zfpm_g->dest_q))
945 return FPM_GOTO_NEXT_Q;
946
947 s = zfpm_g->obuf;
948 q_limit = FPM_QUEUE_PROCESS_LIMIT;
949
950 do {
951 /*
952 * Make sure there is enough space to write another message.
953 */
954 if (STREAM_WRITEABLE(s) < FPM_MAX_MSG_LEN)
955 return FPM_WRITE_STOP;
956
957 buf = STREAM_DATA(s) + stream_get_endp(s);
958 buf_end = buf + STREAM_WRITEABLE(s);
959
960 dest = TAILQ_FIRST(&zfpm_g->dest_q);
961 if (!dest)
962 return FPM_GOTO_NEXT_Q;
963
964 assert(CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM));
965
966 hdr = (fpm_msg_hdr_t *)buf;
967 hdr->version = FPM_PROTO_VERSION;
968
969 data = fpm_msg_data(hdr);
970
971 re = zfpm_route_for_update(dest);
972 is_add = re ? 1 : 0;
973
974 write_msg = 1;
975
976 /*
977 * If this is a route deletion, and we have not sent the route
978 * to
979 * the FPM previously, skip it.
980 */
981 if (!is_add && !CHECK_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM)) {
982 write_msg = 0;
983 zfpm_g->stats.nop_deletes_skipped++;
984 }
985
986 if (write_msg) {
987 data_len = zfpm_encode_route(dest, re, (char *)data,
988 buf_end - data, &msg_type);
989
990 assert(data_len);
991 if (data_len) {
992 hdr->msg_type = msg_type;
993 msg_len = fpm_data_len_to_msg_len(data_len);
994 hdr->msg_len = htons(msg_len);
995 stream_forward_endp(s, msg_len);
996
997 if (is_add)
998 zfpm_g->stats.route_adds++;
999 else
1000 zfpm_g->stats.route_dels++;
1001 }
1002 }
1003
1004 /*
1005 * Remove the dest from the queue, and reset the flag.
1006 */
1007 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1008 TAILQ_REMOVE(&zfpm_g->dest_q, dest, fpm_q_entries);
1009
1010 if (is_add) {
1011 SET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
1012 } else {
1013 UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
1014 }
1015
1016 /*
1017 * Delete the destination if necessary.
1018 */
1019 if (rib_gc_dest(dest->rnode))
1020 zfpm_g->stats.dests_del_after_update++;
1021
1022 q_limit--;
1023 if (q_limit == 0) {
1024 /*
1025 * We have processed enough updates in this queue.
1026 * Now yield for other queues.
1027 */
1028 return FPM_GOTO_NEXT_Q;
1029 }
1030 } while (true);
1031 }
1032
1033 /*
1034 * zfpm_encode_mac
1035 *
1036 * Encode a message to FPM with information about the given MAC.
1037 *
1038 * Returns the number of bytes written to the buffer.
1039 */
1040 static inline int zfpm_encode_mac(struct fpm_mac_info_t *mac, char *in_buf,
1041 size_t in_buf_len, fpm_msg_type_e *msg_type)
1042 {
1043 size_t len = 0;
1044
1045 *msg_type = FPM_MSG_TYPE_NONE;
1046
1047 switch (zfpm_g->message_format) {
1048
1049 case ZFPM_MSG_FORMAT_NONE:
1050 break;
1051 case ZFPM_MSG_FORMAT_NETLINK:
1052 #ifdef HAVE_NETLINK
1053 len = zfpm_netlink_encode_mac(mac, in_buf, in_buf_len);
1054 assert(fpm_msg_align(len) == len);
1055 *msg_type = FPM_MSG_TYPE_NETLINK;
1056 #endif /* HAVE_NETLINK */
1057 break;
1058 case ZFPM_MSG_FORMAT_PROTOBUF:
1059 break;
1060 }
1061 return len;
1062 }
1063
1064 static int zfpm_build_mac_updates(void)
1065 {
1066 struct stream *s;
1067 struct fpm_mac_info_t *mac;
1068 unsigned char *buf, *data, *buf_end;
1069 fpm_msg_hdr_t *hdr;
1070 size_t data_len, msg_len;
1071 fpm_msg_type_e msg_type;
1072 uint16_t q_limit;
1073
1074 if (TAILQ_EMPTY(&zfpm_g->mac_q))
1075 return FPM_GOTO_NEXT_Q;
1076
1077 s = zfpm_g->obuf;
1078 q_limit = FPM_QUEUE_PROCESS_LIMIT;
1079
1080 do {
1081 /* Make sure there is enough space to write another message. */
1082 if (STREAM_WRITEABLE(s) < FPM_MAX_MAC_MSG_LEN)
1083 return FPM_WRITE_STOP;
1084
1085 buf = STREAM_DATA(s) + stream_get_endp(s);
1086 buf_end = buf + STREAM_WRITEABLE(s);
1087
1088 mac = TAILQ_FIRST(&zfpm_g->mac_q);
1089 if (!mac)
1090 return FPM_GOTO_NEXT_Q;
1091
1092 /* Check for no-op */
1093 if (!CHECK_FLAG(mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM)) {
1094 zfpm_g->stats.nop_deletes_skipped++;
1095 zfpm_mac_info_del(mac);
1096 continue;
1097 }
1098
1099 hdr = (fpm_msg_hdr_t *)buf;
1100 hdr->version = FPM_PROTO_VERSION;
1101
1102 data = fpm_msg_data(hdr);
1103 data_len = zfpm_encode_mac(mac, (char *)data, buf_end - data,
1104 &msg_type);
1105 assert(data_len);
1106
1107 hdr->msg_type = msg_type;
1108 msg_len = fpm_data_len_to_msg_len(data_len);
1109 hdr->msg_len = htons(msg_len);
1110 stream_forward_endp(s, msg_len);
1111
1112 /* Remove the MAC from the queue, and delete it. */
1113 zfpm_mac_info_del(mac);
1114
1115 q_limit--;
1116 if (q_limit == 0) {
1117 /*
1118 * We have processed enough updates in this queue.
1119 * Now yield for other queues.
1120 */
1121 return FPM_GOTO_NEXT_Q;
1122 }
1123 } while (1);
1124 }
1125
1126 /*
1127 * zfpm_build_updates
1128 *
1129 * Process the outgoing queues and write messages to the outbound
1130 * buffer.
1131 */
1132 static void zfpm_build_updates(void)
1133 {
1134 struct stream *s;
1135
1136 s = zfpm_g->obuf;
1137 assert(stream_empty(s));
1138
1139 do {
1140 /*
1141 * Stop processing the queues if zfpm_g->obuf is full
1142 * or we do not have more updates to process
1143 */
1144 if (zfpm_build_mac_updates() == FPM_WRITE_STOP)
1145 break;
1146 if (zfpm_build_route_updates() == FPM_WRITE_STOP)
1147 break;
1148 } while (zfpm_updates_pending());
1149 }
1150
1151 /*
1152 * zfpm_write_cb
1153 */
1154 static int zfpm_write_cb(struct thread *thread)
1155 {
1156 struct stream *s;
1157 int num_writes;
1158
1159 zfpm_g->stats.write_cb_calls++;
1160 zfpm_g->t_write = NULL;
1161
1162 /*
1163 * Check if async connect is now done.
1164 */
1165 if (zfpm_g->state == ZFPM_STATE_CONNECTING) {
1166 zfpm_connect_check();
1167 return 0;
1168 }
1169
1170 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
1171 assert(zfpm_g->sock >= 0);
1172
1173 num_writes = 0;
1174
1175 do {
1176 int bytes_to_write, bytes_written;
1177
1178 s = zfpm_g->obuf;
1179
1180 /*
1181 * If the stream is empty, try fill it up with data.
1182 */
1183 if (stream_empty(s)) {
1184 zfpm_build_updates();
1185 }
1186
1187 bytes_to_write = stream_get_endp(s) - stream_get_getp(s);
1188 if (!bytes_to_write)
1189 break;
1190
1191 bytes_written =
1192 write(zfpm_g->sock, stream_pnt(s), bytes_to_write);
1193 zfpm_g->stats.write_calls++;
1194 num_writes++;
1195
1196 if (bytes_written < 0) {
1197 if (ERRNO_IO_RETRY(errno))
1198 break;
1199
1200 zfpm_connection_down("failed to write to socket");
1201 return 0;
1202 }
1203
1204 if (bytes_written != bytes_to_write) {
1205
1206 /*
1207 * Partial write.
1208 */
1209 stream_forward_getp(s, bytes_written);
1210 zfpm_g->stats.partial_writes++;
1211 break;
1212 }
1213
1214 /*
1215 * We've written out the entire contents of the stream.
1216 */
1217 stream_reset(s);
1218
1219 if (num_writes >= ZFPM_MAX_WRITES_PER_RUN) {
1220 zfpm_g->stats.max_writes_hit++;
1221 break;
1222 }
1223
1224 if (zfpm_thread_should_yield(thread)) {
1225 zfpm_g->stats.t_write_yields++;
1226 break;
1227 }
1228 } while (1);
1229
1230 if (zfpm_writes_pending())
1231 zfpm_write_on();
1232
1233 return 0;
1234 }
1235
1236 /*
1237 * zfpm_connect_cb
1238 */
1239 static int zfpm_connect_cb(struct thread *t)
1240 {
1241 int sock, ret;
1242 struct sockaddr_in serv;
1243
1244 zfpm_g->t_connect = NULL;
1245 assert(zfpm_g->state == ZFPM_STATE_ACTIVE);
1246
1247 sock = socket(AF_INET, SOCK_STREAM, 0);
1248 if (sock < 0) {
1249 zlog_err("Failed to create socket for connect(): %s",
1250 strerror(errno));
1251 zfpm_g->stats.connect_no_sock++;
1252 return 0;
1253 }
1254
1255 set_nonblocking(sock);
1256
1257 /* Make server socket. */
1258 memset(&serv, 0, sizeof(serv));
1259 serv.sin_family = AF_INET;
1260 serv.sin_port = htons(zfpm_g->fpm_port);
1261 #ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
1262 serv.sin_len = sizeof(struct sockaddr_in);
1263 #endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */
1264 if (!zfpm_g->fpm_server)
1265 serv.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1266 else
1267 serv.sin_addr.s_addr = (zfpm_g->fpm_server);
1268
1269 /*
1270 * Connect to the FPM.
1271 */
1272 zfpm_g->connect_calls++;
1273 zfpm_g->stats.connect_calls++;
1274 zfpm_g->last_connect_call_time = monotime(NULL);
1275
1276 ret = connect(sock, (struct sockaddr *)&serv, sizeof(serv));
1277 if (ret >= 0) {
1278 zfpm_g->sock = sock;
1279 zfpm_connection_up("connect succeeded");
1280 return 1;
1281 }
1282
1283 if (errno == EINPROGRESS) {
1284 zfpm_g->sock = sock;
1285 zfpm_read_on();
1286 zfpm_write_on();
1287 zfpm_set_state(ZFPM_STATE_CONNECTING,
1288 "async connect in progress");
1289 return 0;
1290 }
1291
1292 zlog_info("can't connect to FPM %d: %s", sock, safe_strerror(errno));
1293 close(sock);
1294
1295 /*
1296 * Restart timer for retrying connection.
1297 */
1298 zfpm_start_connect_timer("connect() failed");
1299 return 0;
1300 }
1301
1302 /*
1303 * zfpm_set_state
1304 *
1305 * Move state machine into the given state.
1306 */
1307 static void zfpm_set_state(zfpm_state_t state, const char *reason)
1308 {
1309 zfpm_state_t cur_state = zfpm_g->state;
1310
1311 if (!reason)
1312 reason = "Unknown";
1313
1314 if (state == cur_state)
1315 return;
1316
1317 zfpm_debug("beginning state transition %s -> %s. Reason: %s",
1318 zfpm_state_to_str(cur_state), zfpm_state_to_str(state),
1319 reason);
1320
1321 switch (state) {
1322
1323 case ZFPM_STATE_IDLE:
1324 assert(cur_state == ZFPM_STATE_ESTABLISHED);
1325 break;
1326
1327 case ZFPM_STATE_ACTIVE:
1328 assert(cur_state == ZFPM_STATE_IDLE
1329 || cur_state == ZFPM_STATE_CONNECTING);
1330 assert(zfpm_g->t_connect);
1331 break;
1332
1333 case ZFPM_STATE_CONNECTING:
1334 assert(zfpm_g->sock);
1335 assert(cur_state == ZFPM_STATE_ACTIVE);
1336 assert(zfpm_g->t_read);
1337 assert(zfpm_g->t_write);
1338 break;
1339
1340 case ZFPM_STATE_ESTABLISHED:
1341 assert(cur_state == ZFPM_STATE_ACTIVE
1342 || cur_state == ZFPM_STATE_CONNECTING);
1343 assert(zfpm_g->sock);
1344 assert(zfpm_g->t_read);
1345 assert(zfpm_g->t_write);
1346 break;
1347 }
1348
1349 zfpm_g->state = state;
1350 }
1351
1352 /*
1353 * zfpm_calc_connect_delay
1354 *
1355 * Returns the number of seconds after which we should attempt to
1356 * reconnect to the FPM.
1357 */
1358 static long zfpm_calc_connect_delay(void)
1359 {
1360 time_t elapsed;
1361
1362 /*
1363 * Return 0 if this is our first attempt to connect.
1364 */
1365 if (zfpm_g->connect_calls == 0) {
1366 return 0;
1367 }
1368
1369 elapsed = zfpm_get_elapsed_time(zfpm_g->last_connect_call_time);
1370
1371 if (elapsed > ZFPM_CONNECT_RETRY_IVL) {
1372 return 0;
1373 }
1374
1375 return ZFPM_CONNECT_RETRY_IVL - elapsed;
1376 }
1377
1378 /*
1379 * zfpm_start_connect_timer
1380 */
1381 static void zfpm_start_connect_timer(const char *reason)
1382 {
1383 long delay_secs;
1384
1385 assert(!zfpm_g->t_connect);
1386 assert(zfpm_g->sock < 0);
1387
1388 assert(zfpm_g->state == ZFPM_STATE_IDLE
1389 || zfpm_g->state == ZFPM_STATE_ACTIVE
1390 || zfpm_g->state == ZFPM_STATE_CONNECTING);
1391
1392 delay_secs = zfpm_calc_connect_delay();
1393 zfpm_debug("scheduling connect in %ld seconds", delay_secs);
1394
1395 thread_add_timer(zfpm_g->master, zfpm_connect_cb, 0, delay_secs,
1396 &zfpm_g->t_connect);
1397 zfpm_set_state(ZFPM_STATE_ACTIVE, reason);
1398 }
1399
1400 /*
1401 * zfpm_is_enabled
1402 *
1403 * Returns true if the zebra FPM module has been enabled.
1404 */
1405 static inline int zfpm_is_enabled(void)
1406 {
1407 return zfpm_g->enabled;
1408 }
1409
1410 /*
1411 * zfpm_conn_is_up
1412 *
1413 * Returns true if the connection to the FPM is up.
1414 */
1415 static inline int zfpm_conn_is_up(void)
1416 {
1417 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED)
1418 return 0;
1419
1420 assert(zfpm_g->sock >= 0);
1421
1422 return 1;
1423 }
1424
1425 /*
1426 * zfpm_trigger_update
1427 *
1428 * The zebra code invokes this function to indicate that we should
1429 * send an update to the FPM about the given route_node.
1430 */
1431 static int zfpm_trigger_update(struct route_node *rn, const char *reason)
1432 {
1433 rib_dest_t *dest;
1434 char buf[PREFIX_STRLEN];
1435
1436 /*
1437 * Ignore if the connection is down. We will update the FPM about
1438 * all destinations once the connection comes up.
1439 */
1440 if (!zfpm_conn_is_up())
1441 return 0;
1442
1443 dest = rib_dest_from_rnode(rn);
1444
1445 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) {
1446 zfpm_g->stats.redundant_triggers++;
1447 return 0;
1448 }
1449
1450 if (reason) {
1451 zfpm_debug("%s triggering update to FPM - Reason: %s",
1452 prefix2str(&rn->p, buf, sizeof(buf)), reason);
1453 }
1454
1455 SET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1456 TAILQ_INSERT_TAIL(&zfpm_g->dest_q, dest, fpm_q_entries);
1457 zfpm_g->stats.updates_triggered++;
1458
1459 /*
1460 * Make sure that writes are enabled.
1461 */
1462 if (zfpm_g->t_write)
1463 return 0;
1464
1465 zfpm_write_on();
1466 return 0;
1467 }
1468
1469 /*
1470 * Generate Key for FPM MAC info hash entry
1471 * Key is generated using MAC address and VNI id which should be sufficient
1472 * to provide uniqueness
1473 */
1474 static unsigned int zfpm_mac_info_hash_keymake(const void *p)
1475 {
1476 struct fpm_mac_info_t *fpm_mac = (struct fpm_mac_info_t *)p;
1477 uint32_t mac_key;
1478
1479 mac_key = jhash(fpm_mac->macaddr.octet, ETH_ALEN, 0xa5a5a55a);
1480
1481 return jhash_2words(mac_key, fpm_mac->vni, 0);
1482 }
1483
1484 /*
1485 * Compare function for FPM MAC info hash lookup
1486 */
1487 static bool zfpm_mac_info_cmp(const void *p1, const void *p2)
1488 {
1489 const struct fpm_mac_info_t *fpm_mac1 = p1;
1490 const struct fpm_mac_info_t *fpm_mac2 = p2;
1491
1492 if (memcmp(fpm_mac1->macaddr.octet, fpm_mac2->macaddr.octet, ETH_ALEN)
1493 != 0)
1494 return false;
1495 if (fpm_mac1->r_vtep_ip.s_addr != fpm_mac2->r_vtep_ip.s_addr)
1496 return false;
1497 if (fpm_mac1->vni != fpm_mac2->vni)
1498 return false;
1499
1500 return true;
1501 }
1502
1503 /*
1504 * Lookup FPM MAC info hash entry.
1505 */
1506 static struct fpm_mac_info_t *zfpm_mac_info_lookup(struct fpm_mac_info_t *key)
1507 {
1508 return hash_lookup(zfpm_g->fpm_mac_info_table, key);
1509 }
1510
1511 /*
1512 * Callback to allocate fpm_mac_info_t structure.
1513 */
1514 static void *zfpm_mac_info_alloc(void *p)
1515 {
1516 const struct fpm_mac_info_t *key = p;
1517 struct fpm_mac_info_t *fpm_mac;
1518
1519 fpm_mac = XCALLOC(MTYPE_FPM_MAC_INFO, sizeof(struct fpm_mac_info_t));
1520
1521 memcpy(&fpm_mac->macaddr, &key->macaddr, ETH_ALEN);
1522 memcpy(&fpm_mac->r_vtep_ip, &key->r_vtep_ip, sizeof(struct in_addr));
1523 fpm_mac->vni = key->vni;
1524
1525 return (void *)fpm_mac;
1526 }
1527
1528 /*
1529 * Delink and free fpm_mac_info_t.
1530 */
1531 static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac)
1532 {
1533 hash_release(zfpm_g->fpm_mac_info_table, fpm_mac);
1534 TAILQ_REMOVE(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries);
1535 XFREE(MTYPE_FPM_MAC_INFO, fpm_mac);
1536 }
1537
1538 /*
1539 * zfpm_trigger_rmac_update
1540 *
1541 * Zebra code invokes this function to indicate that we should
1542 * send an update to FPM for given MAC entry.
1543 *
1544 * This function checks if we already have enqueued an update for this RMAC,
1545 * If yes, update the same fpm_mac_info_t. Else, create and enqueue an update.
1546 */
1547 static int zfpm_trigger_rmac_update(zebra_mac_t *rmac, zebra_l3vni_t *zl3vni,
1548 bool delete, const char *reason)
1549 {
1550 char buf[ETHER_ADDR_STRLEN];
1551 struct fpm_mac_info_t *fpm_mac, key;
1552 struct interface *vxlan_if, *svi_if;
1553
1554 /*
1555 * Ignore if the connection is down. We will update the FPM about
1556 * all destinations once the connection comes up.
1557 */
1558 if (!zfpm_conn_is_up())
1559 return 0;
1560
1561 if (reason) {
1562 zfpm_debug("triggering update to FPM - Reason: %s - %s",
1563 reason,
1564 prefix_mac2str(&rmac->macaddr, buf, sizeof(buf)));
1565 }
1566
1567 vxlan_if = zl3vni_map_to_vxlan_if(zl3vni);
1568 svi_if = zl3vni_map_to_svi_if(zl3vni);
1569
1570 memset(&key, 0, sizeof(struct fpm_mac_info_t));
1571
1572 memcpy(&key.macaddr, &rmac->macaddr, ETH_ALEN);
1573 key.r_vtep_ip.s_addr = rmac->fwd_info.r_vtep_ip.s_addr;
1574 key.vni = zl3vni->vni;
1575
1576 /* Check if this MAC is already present in the queue. */
1577 fpm_mac = zfpm_mac_info_lookup(&key);
1578
1579 if (fpm_mac) {
1580 if (!!CHECK_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM)
1581 == delete) {
1582 /*
1583 * MAC is already present in the queue
1584 * with the same op as this one. Do nothing
1585 */
1586 zfpm_g->stats.redundant_triggers++;
1587 return 0;
1588 }
1589
1590 /*
1591 * A new op for an already existing fpm_mac_info_t node.
1592 * Update the existing node for the new op.
1593 */
1594 if (!delete) {
1595 /*
1596 * New op is "add". Previous op is "delete".
1597 * Update the fpm_mac_info_t for the new add.
1598 */
1599 fpm_mac->zebra_flags = rmac->flags;
1600
1601 fpm_mac->vxlan_if = vxlan_if ? vxlan_if->ifindex : 0;
1602 fpm_mac->svi_if = svi_if ? svi_if->ifindex : 0;
1603
1604 UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1605 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM);
1606 } else {
1607 /*
1608 * New op is "delete". Previous op is "add".
1609 * Thus, no-op. Unset ZEBRA_MAC_UPDATE_FPM flag.
1610 */
1611 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1612 UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM);
1613 }
1614
1615 return 0;
1616 }
1617
1618 fpm_mac = hash_get(zfpm_g->fpm_mac_info_table, &key,
1619 zfpm_mac_info_alloc);
1620 if (!fpm_mac)
1621 return 0;
1622
1623 fpm_mac->zebra_flags = rmac->flags;
1624 fpm_mac->vxlan_if = vxlan_if ? vxlan_if->ifindex : 0;
1625 fpm_mac->svi_if = svi_if ? svi_if->ifindex : 0;
1626
1627 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM);
1628 if (delete)
1629 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1630
1631 TAILQ_INSERT_TAIL(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries);
1632
1633 zfpm_g->stats.updates_triggered++;
1634
1635 /* If writes are already enabled, return. */
1636 if (zfpm_g->t_write)
1637 return 0;
1638
1639 zfpm_write_on();
1640 return 0;
1641 }
1642
1643 /*
1644 * This function is called when the FPM connections is established.
1645 * Iterate over all the RMAC entries for the given L3VNI
1646 * and enqueue the RMAC for FPM processing.
1647 */
1648 static void zfpm_trigger_rmac_update_wrapper(struct hash_backet *backet,
1649 void *args)
1650 {
1651 zebra_mac_t *zrmac = (zebra_mac_t *)backet->data;
1652 zebra_l3vni_t *zl3vni = (zebra_l3vni_t *)args;
1653
1654 zfpm_trigger_rmac_update(zrmac, zl3vni, false, "RMAC added");
1655 }
1656
1657 /*
1658 * This function is called when the FPM connections is established.
1659 * This function iterates over all the L3VNIs to trigger
1660 * FPM updates for RMACs currently available.
1661 */
1662 static void zfpm_iterate_rmac_table(struct hash_backet *backet, void *args)
1663 {
1664 zebra_l3vni_t *zl3vni = (zebra_l3vni_t *)backet->data;
1665
1666 hash_iterate(zl3vni->rmac_table, zfpm_trigger_rmac_update_wrapper,
1667 (void *)zl3vni);
1668 }
1669
1670 /*
1671 * zfpm_stats_timer_cb
1672 */
1673 static int zfpm_stats_timer_cb(struct thread *t)
1674 {
1675 zfpm_g->t_stats = NULL;
1676
1677 /*
1678 * Remember the stats collected in the last interval for display
1679 * purposes.
1680 */
1681 zfpm_stats_copy(&zfpm_g->stats, &zfpm_g->last_ivl_stats);
1682
1683 /*
1684 * Add the current set of stats into the cumulative statistics.
1685 */
1686 zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats,
1687 &zfpm_g->cumulative_stats);
1688
1689 /*
1690 * Start collecting stats afresh over the next interval.
1691 */
1692 zfpm_stats_reset(&zfpm_g->stats);
1693
1694 zfpm_start_stats_timer();
1695
1696 return 0;
1697 }
1698
1699 /*
1700 * zfpm_stop_stats_timer
1701 */
1702 static void zfpm_stop_stats_timer(void)
1703 {
1704 if (!zfpm_g->t_stats)
1705 return;
1706
1707 zfpm_debug("Stopping existing stats timer");
1708 THREAD_TIMER_OFF(zfpm_g->t_stats);
1709 }
1710
1711 /*
1712 * zfpm_start_stats_timer
1713 */
1714 void zfpm_start_stats_timer(void)
1715 {
1716 assert(!zfpm_g->t_stats);
1717
1718 thread_add_timer(zfpm_g->master, zfpm_stats_timer_cb, 0,
1719 ZFPM_STATS_IVL_SECS, &zfpm_g->t_stats);
1720 }
1721
1722 /*
1723 * Helper macro for zfpm_show_stats() below.
1724 */
1725 #define ZFPM_SHOW_STAT(counter) \
1726 do { \
1727 vty_out(vty, "%-40s %10lu %16lu\n", #counter, \
1728 total_stats.counter, zfpm_g->last_ivl_stats.counter); \
1729 } while (0)
1730
1731 /*
1732 * zfpm_show_stats
1733 */
1734 static void zfpm_show_stats(struct vty *vty)
1735 {
1736 zfpm_stats_t total_stats;
1737 time_t elapsed;
1738
1739 vty_out(vty, "\n%-40s %10s Last %2d secs\n\n", "Counter", "Total",
1740 ZFPM_STATS_IVL_SECS);
1741
1742 /*
1743 * Compute the total stats up to this instant.
1744 */
1745 zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats,
1746 &total_stats);
1747
1748 ZFPM_SHOW_STAT(connect_calls);
1749 ZFPM_SHOW_STAT(connect_no_sock);
1750 ZFPM_SHOW_STAT(read_cb_calls);
1751 ZFPM_SHOW_STAT(write_cb_calls);
1752 ZFPM_SHOW_STAT(write_calls);
1753 ZFPM_SHOW_STAT(partial_writes);
1754 ZFPM_SHOW_STAT(max_writes_hit);
1755 ZFPM_SHOW_STAT(t_write_yields);
1756 ZFPM_SHOW_STAT(nop_deletes_skipped);
1757 ZFPM_SHOW_STAT(route_adds);
1758 ZFPM_SHOW_STAT(route_dels);
1759 ZFPM_SHOW_STAT(updates_triggered);
1760 ZFPM_SHOW_STAT(redundant_triggers);
1761 ZFPM_SHOW_STAT(dests_del_after_update);
1762 ZFPM_SHOW_STAT(t_conn_down_starts);
1763 ZFPM_SHOW_STAT(t_conn_down_dests_processed);
1764 ZFPM_SHOW_STAT(t_conn_down_yields);
1765 ZFPM_SHOW_STAT(t_conn_down_finishes);
1766 ZFPM_SHOW_STAT(t_conn_up_starts);
1767 ZFPM_SHOW_STAT(t_conn_up_dests_processed);
1768 ZFPM_SHOW_STAT(t_conn_up_yields);
1769 ZFPM_SHOW_STAT(t_conn_up_aborts);
1770 ZFPM_SHOW_STAT(t_conn_up_finishes);
1771
1772 if (!zfpm_g->last_stats_clear_time)
1773 return;
1774
1775 elapsed = zfpm_get_elapsed_time(zfpm_g->last_stats_clear_time);
1776
1777 vty_out(vty, "\nStats were cleared %lu seconds ago\n",
1778 (unsigned long)elapsed);
1779 }
1780
1781 /*
1782 * zfpm_clear_stats
1783 */
1784 static void zfpm_clear_stats(struct vty *vty)
1785 {
1786 if (!zfpm_is_enabled()) {
1787 vty_out(vty, "The FPM module is not enabled...\n");
1788 return;
1789 }
1790
1791 zfpm_stats_reset(&zfpm_g->stats);
1792 zfpm_stats_reset(&zfpm_g->last_ivl_stats);
1793 zfpm_stats_reset(&zfpm_g->cumulative_stats);
1794
1795 zfpm_stop_stats_timer();
1796 zfpm_start_stats_timer();
1797
1798 zfpm_g->last_stats_clear_time = monotime(NULL);
1799
1800 vty_out(vty, "Cleared FPM stats\n");
1801 }
1802
1803 /*
1804 * show_zebra_fpm_stats
1805 */
1806 DEFUN (show_zebra_fpm_stats,
1807 show_zebra_fpm_stats_cmd,
1808 "show zebra fpm stats",
1809 SHOW_STR
1810 ZEBRA_STR
1811 "Forwarding Path Manager information\n"
1812 "Statistics\n")
1813 {
1814 zfpm_show_stats(vty);
1815 return CMD_SUCCESS;
1816 }
1817
1818 /*
1819 * clear_zebra_fpm_stats
1820 */
1821 DEFUN (clear_zebra_fpm_stats,
1822 clear_zebra_fpm_stats_cmd,
1823 "clear zebra fpm stats",
1824 CLEAR_STR
1825 ZEBRA_STR
1826 "Clear Forwarding Path Manager information\n"
1827 "Statistics\n")
1828 {
1829 zfpm_clear_stats(vty);
1830 return CMD_SUCCESS;
1831 }
1832
1833 /*
1834 * update fpm connection information
1835 */
1836 DEFUN ( fpm_remote_ip,
1837 fpm_remote_ip_cmd,
1838 "fpm connection ip A.B.C.D port (1-65535)",
1839 "fpm connection remote ip and port\n"
1840 "Remote fpm server ip A.B.C.D\n"
1841 "Enter ip ")
1842 {
1843
1844 in_addr_t fpm_server;
1845 uint32_t port_no;
1846
1847 fpm_server = inet_addr(argv[3]->arg);
1848 if (fpm_server == INADDR_NONE)
1849 return CMD_ERR_INCOMPLETE;
1850
1851 port_no = atoi(argv[5]->arg);
1852 if (port_no < TCP_MIN_PORT || port_no > TCP_MAX_PORT)
1853 return CMD_ERR_INCOMPLETE;
1854
1855 zfpm_g->fpm_server = fpm_server;
1856 zfpm_g->fpm_port = port_no;
1857
1858
1859 return CMD_SUCCESS;
1860 }
1861
1862 DEFUN ( no_fpm_remote_ip,
1863 no_fpm_remote_ip_cmd,
1864 "no fpm connection ip A.B.C.D port (1-65535)",
1865 "fpm connection remote ip and port\n"
1866 "Connection\n"
1867 "Remote fpm server ip A.B.C.D\n"
1868 "Enter ip ")
1869 {
1870 if (zfpm_g->fpm_server != inet_addr(argv[4]->arg)
1871 || zfpm_g->fpm_port != atoi(argv[6]->arg))
1872 return CMD_ERR_NO_MATCH;
1873
1874 zfpm_g->fpm_server = FPM_DEFAULT_IP;
1875 zfpm_g->fpm_port = FPM_DEFAULT_PORT;
1876
1877 return CMD_SUCCESS;
1878 }
1879
1880 /*
1881 * zfpm_init_message_format
1882 */
1883 static inline void zfpm_init_message_format(const char *format)
1884 {
1885 int have_netlink, have_protobuf;
1886
1887 #ifdef HAVE_NETLINK
1888 have_netlink = 1;
1889 #else
1890 have_netlink = 0;
1891 #endif
1892
1893 #ifdef HAVE_PROTOBUF
1894 have_protobuf = 1;
1895 #else
1896 have_protobuf = 0;
1897 #endif
1898
1899 zfpm_g->message_format = ZFPM_MSG_FORMAT_NONE;
1900
1901 if (!format) {
1902 if (have_netlink) {
1903 zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK;
1904 } else if (have_protobuf) {
1905 zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF;
1906 }
1907 return;
1908 }
1909
1910 if (!strcmp("netlink", format)) {
1911 if (!have_netlink) {
1912 flog_err(EC_ZEBRA_NETLINK_NOT_AVAILABLE,
1913 "FPM netlink message format is not available");
1914 return;
1915 }
1916 zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK;
1917 return;
1918 }
1919
1920 if (!strcmp("protobuf", format)) {
1921 if (!have_protobuf) {
1922 flog_err(
1923 EC_ZEBRA_PROTOBUF_NOT_AVAILABLE,
1924 "FPM protobuf message format is not available");
1925 return;
1926 }
1927 flog_warn(EC_ZEBRA_PROTOBUF_NOT_AVAILABLE,
1928 "FPM protobuf message format is deprecated and scheduled to be removed. "
1929 "Please convert to using netlink format or contact dev@lists.frrouting.org with your use case.");
1930 zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF;
1931 return;
1932 }
1933
1934 flog_warn(EC_ZEBRA_FPM_FORMAT_UNKNOWN, "Unknown fpm format '%s'",
1935 format);
1936 }
1937
1938 /**
1939 * fpm_remote_srv_write
1940 *
1941 * Module to write remote fpm connection
1942 *
1943 * Returns ZERO on success.
1944 */
1945
1946 static int fpm_remote_srv_write(struct vty *vty)
1947 {
1948 struct in_addr in;
1949
1950 in.s_addr = zfpm_g->fpm_server;
1951
1952 if ((zfpm_g->fpm_server != FPM_DEFAULT_IP
1953 && zfpm_g->fpm_server != INADDR_ANY)
1954 || (zfpm_g->fpm_port != FPM_DEFAULT_PORT && zfpm_g->fpm_port != 0))
1955 vty_out(vty, "fpm connection ip %s port %d\n", inet_ntoa(in),
1956 zfpm_g->fpm_port);
1957
1958 return 0;
1959 }
1960
1961
1962 /* Zebra node */
1963 static struct cmd_node zebra_node = {ZEBRA_NODE, "", 1};
1964
1965
1966 /**
1967 * zfpm_init
1968 *
1969 * One-time initialization of the Zebra FPM module.
1970 *
1971 * @param[in] port port at which FPM is running.
1972 * @param[in] enable true if the zebra FPM module should be enabled
1973 * @param[in] format to use to talk to the FPM. Can be 'netink' or 'protobuf'.
1974 *
1975 * Returns true on success.
1976 */
1977 static int zfpm_init(struct thread_master *master)
1978 {
1979 int enable = 1;
1980 uint16_t port = 0;
1981 const char *format = THIS_MODULE->load_args;
1982
1983 memset(zfpm_g, 0, sizeof(*zfpm_g));
1984 zfpm_g->master = master;
1985 TAILQ_INIT(&zfpm_g->dest_q);
1986 TAILQ_INIT(&zfpm_g->mac_q);
1987
1988 /* Create hash table for fpm_mac_info_t enties */
1989 zfpm_g->fpm_mac_info_table = hash_create(zfpm_mac_info_hash_keymake,
1990 zfpm_mac_info_cmp,
1991 "FPM MAC info hash table");
1992
1993 zfpm_g->sock = -1;
1994 zfpm_g->state = ZFPM_STATE_IDLE;
1995
1996 zfpm_stats_init(&zfpm_g->stats);
1997 zfpm_stats_init(&zfpm_g->last_ivl_stats);
1998 zfpm_stats_init(&zfpm_g->cumulative_stats);
1999
2000 install_node(&zebra_node, fpm_remote_srv_write);
2001 install_element(ENABLE_NODE, &show_zebra_fpm_stats_cmd);
2002 install_element(ENABLE_NODE, &clear_zebra_fpm_stats_cmd);
2003 install_element(CONFIG_NODE, &fpm_remote_ip_cmd);
2004 install_element(CONFIG_NODE, &no_fpm_remote_ip_cmd);
2005
2006 zfpm_init_message_format(format);
2007
2008 /*
2009 * Disable FPM interface if no suitable format is available.
2010 */
2011 if (zfpm_g->message_format == ZFPM_MSG_FORMAT_NONE)
2012 enable = 0;
2013
2014 zfpm_g->enabled = enable;
2015
2016 if (!zfpm_g->fpm_server)
2017 zfpm_g->fpm_server = FPM_DEFAULT_IP;
2018
2019 if (!port)
2020 port = FPM_DEFAULT_PORT;
2021
2022 zfpm_g->fpm_port = port;
2023
2024 zfpm_g->obuf = stream_new(ZFPM_OBUF_SIZE);
2025 zfpm_g->ibuf = stream_new(ZFPM_IBUF_SIZE);
2026
2027 zfpm_start_stats_timer();
2028 zfpm_start_connect_timer("initialized");
2029 return 0;
2030 }
2031
2032 static int zebra_fpm_module_init(void)
2033 {
2034 hook_register(rib_update, zfpm_trigger_update);
2035 hook_register(zebra_rmac_update, zfpm_trigger_rmac_update);
2036 hook_register(frr_late_init, zfpm_init);
2037 return 0;
2038 }
2039
2040 FRR_MODULE_SETUP(.name = "zebra_fpm", .version = FRR_VERSION,
2041 .description = "zebra FPM (Forwarding Plane Manager) module",
2042 .init = zebra_fpm_module_init, )