]> git.proxmox.com Git - mirror_frr.git/blob - zebra/zebra_fpm.c
Merge pull request #12069 from opensourcerouting/fix/local-as_reset
[mirror_frr.git] / zebra / zebra_fpm.c
1 /*
2 * Main implementation file for interface to Forwarding Plane Manager.
3 *
4 * Copyright (C) 2012 by Open Source Routing.
5 * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
6 *
7 * This file is part of GNU Zebra.
8 *
9 * GNU Zebra is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the
11 * Free Software Foundation; either version 2, or (at your option) any
12 * later version.
13 *
14 * GNU Zebra is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; see the file COPYING; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include <zebra.h>
25
26 #include "log.h"
27 #include "libfrr.h"
28 #include "stream.h"
29 #include "thread.h"
30 #include "network.h"
31 #include "command.h"
32 #include "lib/version.h"
33 #include "jhash.h"
34
35 #include "zebra/rib.h"
36 #include "zebra/zserv.h"
37 #include "zebra/zebra_ns.h"
38 #include "zebra/zebra_vrf.h"
39 #include "zebra/zebra_errors.h"
40
41 #include "fpm/fpm.h"
42 #include "zebra_fpm_private.h"
43 #include "zebra/zebra_router.h"
44 #include "zebra_vxlan_private.h"
45
46 DEFINE_MTYPE_STATIC(ZEBRA, FPM_MAC_INFO, "FPM_MAC_INFO");
47
48 /*
49 * Interval at which we attempt to connect to the FPM.
50 */
51 #define ZFPM_CONNECT_RETRY_IVL 5
52
53 /*
54 * Sizes of outgoing and incoming stream buffers for writing/reading
55 * FPM messages.
56 */
57 #define ZFPM_OBUF_SIZE (2 * FPM_MAX_MSG_LEN)
58 #define ZFPM_IBUF_SIZE (FPM_MAX_MSG_LEN)
59
60 /*
61 * The maximum number of times the FPM socket write callback can call
62 * 'write' before it yields.
63 */
64 #define ZFPM_MAX_WRITES_PER_RUN 10
65
66 /*
67 * Interval over which we collect statistics.
68 */
69 #define ZFPM_STATS_IVL_SECS 10
70 #define FPM_MAX_MAC_MSG_LEN 512
71
72 static void zfpm_iterate_rmac_table(struct hash_bucket *bucket, void *args);
73
74 /*
75 * Structure that holds state for iterating over all route_node
76 * structures that are candidates for being communicated to the FPM.
77 */
78 struct zfpm_rnodes_iter {
79 rib_tables_iter_t tables_iter;
80 route_table_iter_t iter;
81 };
82
83 /*
84 * Statistics.
85 */
86 struct zfpm_stats {
87 unsigned long connect_calls;
88 unsigned long connect_no_sock;
89
90 unsigned long read_cb_calls;
91
92 unsigned long write_cb_calls;
93 unsigned long write_calls;
94 unsigned long partial_writes;
95 unsigned long max_writes_hit;
96 unsigned long t_write_yields;
97
98 unsigned long nop_deletes_skipped;
99 unsigned long route_adds;
100 unsigned long route_dels;
101
102 unsigned long updates_triggered;
103 unsigned long redundant_triggers;
104
105 unsigned long dests_del_after_update;
106
107 unsigned long t_conn_down_starts;
108 unsigned long t_conn_down_dests_processed;
109 unsigned long t_conn_down_yields;
110 unsigned long t_conn_down_finishes;
111
112 unsigned long t_conn_up_starts;
113 unsigned long t_conn_up_dests_processed;
114 unsigned long t_conn_up_yields;
115 unsigned long t_conn_up_aborts;
116 unsigned long t_conn_up_finishes;
117 };
118
119 /*
120 * States for the FPM state machine.
121 */
122 enum zfpm_state {
123
124 /*
125 * In this state we are not yet ready to connect to the FPM. This
126 * can happen when this module is disabled, or if we're cleaning up
127 * after a connection has gone down.
128 */
129 ZFPM_STATE_IDLE,
130
131 /*
132 * Ready to talk to the FPM and periodically trying to connect to
133 * it.
134 */
135 ZFPM_STATE_ACTIVE,
136
137 /*
138 * In the middle of bringing up a TCP connection. Specifically,
139 * waiting for a connect() call to complete asynchronously.
140 */
141 ZFPM_STATE_CONNECTING,
142
143 /*
144 * TCP connection to the FPM is up.
145 */
146 ZFPM_STATE_ESTABLISHED
147
148 };
149
150 /*
151 * Message format to be used to communicate with the FPM.
152 */
153 enum zfpm_msg_format {
154 ZFPM_MSG_FORMAT_NONE,
155 ZFPM_MSG_FORMAT_NETLINK,
156 ZFPM_MSG_FORMAT_PROTOBUF,
157 };
158
159 /*
160 * Globals.
161 */
162 struct zfpm_glob {
163
164 /*
165 * True if the FPM module has been enabled.
166 */
167 int enabled;
168
169 /*
170 * Message format to be used to communicate with the fpm.
171 */
172 enum zfpm_msg_format message_format;
173
174 struct thread_master *master;
175
176 enum zfpm_state state;
177
178 in_addr_t fpm_server;
179 /*
180 * Port on which the FPM is running.
181 */
182 int fpm_port;
183
184 /*
185 * List of rib_dest_t structures to be processed
186 */
187 TAILQ_HEAD(zfpm_dest_q, rib_dest_t_) dest_q;
188
189 /*
190 * List of fpm_mac_info structures to be processed
191 */
192 TAILQ_HEAD(zfpm_mac_q, fpm_mac_info_t) mac_q;
193
194 /*
195 * Hash table of fpm_mac_info_t entries
196 *
197 * While adding fpm_mac_info_t for a MAC to the mac_q,
198 * it is possible that another fpm_mac_info_t node for the this MAC
199 * is already present in the queue.
200 * This is possible in the case of consecutive add->delete operations.
201 * To avoid such duplicate insertions in the mac_q,
202 * define a hash table for fpm_mac_info_t which can be looked up
203 * to see if an fpm_mac_info_t node for a MAC is already present
204 * in the mac_q.
205 */
206 struct hash *fpm_mac_info_table;
207
208 /*
209 * Stream socket to the FPM.
210 */
211 int sock;
212
213 /*
214 * Buffers for messages to/from the FPM.
215 */
216 struct stream *obuf;
217 struct stream *ibuf;
218
219 /*
220 * Threads for I/O.
221 */
222 struct thread *t_connect;
223 struct thread *t_write;
224 struct thread *t_read;
225
226 /*
227 * Thread to clean up after the TCP connection to the FPM goes down
228 * and the state that belongs to it.
229 */
230 struct thread *t_conn_down;
231
232 struct {
233 struct zfpm_rnodes_iter iter;
234 } t_conn_down_state;
235
236 /*
237 * Thread to take actions once the TCP conn to the FPM comes up, and
238 * the state that belongs to it.
239 */
240 struct thread *t_conn_up;
241
242 struct {
243 struct zfpm_rnodes_iter iter;
244 } t_conn_up_state;
245
246 unsigned long connect_calls;
247 time_t last_connect_call_time;
248
249 /*
250 * Stats from the start of the current statistics interval up to
251 * now. These are the counters we typically update in the code.
252 */
253 struct zfpm_stats stats;
254
255 /*
256 * Statistics that were gathered in the last collection interval.
257 */
258 struct zfpm_stats last_ivl_stats;
259
260 /*
261 * Cumulative stats from the last clear to the start of the current
262 * statistics interval.
263 */
264 struct zfpm_stats cumulative_stats;
265
266 /*
267 * Stats interval timer.
268 */
269 struct thread *t_stats;
270
271 /*
272 * If non-zero, the last time when statistics were cleared.
273 */
274 time_t last_stats_clear_time;
275
276 /*
277 * Flag to track the MAC dump status to FPM
278 */
279 bool fpm_mac_dump_done;
280 };
281
282 static struct zfpm_glob zfpm_glob_space;
283 static struct zfpm_glob *zfpm_g = &zfpm_glob_space;
284
285 static int zfpm_trigger_update(struct route_node *rn, const char *reason);
286
287 static void zfpm_read_cb(struct thread *thread);
288 static void zfpm_write_cb(struct thread *thread);
289
290 static void zfpm_set_state(enum zfpm_state state, const char *reason);
291 static void zfpm_start_connect_timer(const char *reason);
292 static void zfpm_start_stats_timer(void);
293 static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac);
294
295 static const char ipv4_ll_buf[16] = "169.254.0.1";
296 union g_addr ipv4ll_gateway;
297
298 /*
299 * zfpm_thread_should_yield
300 */
301 static inline int zfpm_thread_should_yield(struct thread *t)
302 {
303 return thread_should_yield(t);
304 }
305
306 /*
307 * zfpm_state_to_str
308 */
309 static const char *zfpm_state_to_str(enum zfpm_state state)
310 {
311 switch (state) {
312
313 case ZFPM_STATE_IDLE:
314 return "idle";
315
316 case ZFPM_STATE_ACTIVE:
317 return "active";
318
319 case ZFPM_STATE_CONNECTING:
320 return "connecting";
321
322 case ZFPM_STATE_ESTABLISHED:
323 return "established";
324
325 default:
326 return "unknown";
327 }
328 }
329
330 /*
331 * zfpm_get_elapsed_time
332 *
333 * Returns the time elapsed (in seconds) since the given time.
334 */
335 static time_t zfpm_get_elapsed_time(time_t reference)
336 {
337 time_t now;
338
339 now = monotime(NULL);
340
341 if (now < reference) {
342 assert(0);
343 return 0;
344 }
345
346 return now - reference;
347 }
348
349 /*
350 * zfpm_rnodes_iter_init
351 */
352 static inline void zfpm_rnodes_iter_init(struct zfpm_rnodes_iter *iter)
353 {
354 memset(iter, 0, sizeof(*iter));
355 rib_tables_iter_init(&iter->tables_iter);
356
357 /*
358 * This is a hack, but it makes implementing 'next' easier by
359 * ensuring that route_table_iter_next() will return NULL the first
360 * time we call it.
361 */
362 route_table_iter_init(&iter->iter, NULL);
363 route_table_iter_cleanup(&iter->iter);
364 }
365
366 /*
367 * zfpm_rnodes_iter_next
368 */
369 static inline struct route_node *
370 zfpm_rnodes_iter_next(struct zfpm_rnodes_iter *iter)
371 {
372 struct route_node *rn;
373 struct route_table *table;
374
375 while (1) {
376 rn = route_table_iter_next(&iter->iter);
377 if (rn)
378 return rn;
379
380 /*
381 * We've made our way through this table, go to the next one.
382 */
383 route_table_iter_cleanup(&iter->iter);
384
385 table = rib_tables_iter_next(&iter->tables_iter);
386
387 if (!table)
388 return NULL;
389
390 route_table_iter_init(&iter->iter, table);
391 }
392
393 return NULL;
394 }
395
396 /*
397 * zfpm_rnodes_iter_pause
398 */
399 static inline void zfpm_rnodes_iter_pause(struct zfpm_rnodes_iter *iter)
400 {
401 route_table_iter_pause(&iter->iter);
402 }
403
404 /*
405 * zfpm_rnodes_iter_cleanup
406 */
407 static inline void zfpm_rnodes_iter_cleanup(struct zfpm_rnodes_iter *iter)
408 {
409 route_table_iter_cleanup(&iter->iter);
410 rib_tables_iter_cleanup(&iter->tables_iter);
411 }
412
413 /*
414 * zfpm_stats_init
415 *
416 * Initialize a statistics block.
417 */
418 static inline void zfpm_stats_init(struct zfpm_stats *stats)
419 {
420 memset(stats, 0, sizeof(*stats));
421 }
422
423 /*
424 * zfpm_stats_reset
425 */
426 static inline void zfpm_stats_reset(struct zfpm_stats *stats)
427 {
428 zfpm_stats_init(stats);
429 }
430
431 /*
432 * zfpm_stats_copy
433 */
434 static inline void zfpm_stats_copy(const struct zfpm_stats *src,
435 struct zfpm_stats *dest)
436 {
437 memcpy(dest, src, sizeof(*dest));
438 }
439
440 /*
441 * zfpm_stats_compose
442 *
443 * Total up the statistics in two stats structures ('s1 and 's2') and
444 * return the result in the third argument, 'result'. Note that the
445 * pointer 'result' may be the same as 's1' or 's2'.
446 *
447 * For simplicity, the implementation below assumes that the stats
448 * structure is composed entirely of counters. This can easily be
449 * changed when necessary.
450 */
451 static void zfpm_stats_compose(const struct zfpm_stats *s1,
452 const struct zfpm_stats *s2,
453 struct zfpm_stats *result)
454 {
455 const unsigned long *p1, *p2;
456 unsigned long *result_p;
457 int i, num_counters;
458
459 p1 = (const unsigned long *)s1;
460 p2 = (const unsigned long *)s2;
461 result_p = (unsigned long *)result;
462
463 num_counters = (sizeof(struct zfpm_stats) / sizeof(unsigned long));
464
465 for (i = 0; i < num_counters; i++) {
466 result_p[i] = p1[i] + p2[i];
467 }
468 }
469
470 /*
471 * zfpm_read_on
472 */
473 static inline void zfpm_read_on(void)
474 {
475 assert(!zfpm_g->t_read);
476 assert(zfpm_g->sock >= 0);
477
478 thread_add_read(zfpm_g->master, zfpm_read_cb, 0, zfpm_g->sock,
479 &zfpm_g->t_read);
480 }
481
482 /*
483 * zfpm_write_on
484 */
485 static inline void zfpm_write_on(void)
486 {
487 assert(!zfpm_g->t_write);
488 assert(zfpm_g->sock >= 0);
489
490 thread_add_write(zfpm_g->master, zfpm_write_cb, 0, zfpm_g->sock,
491 &zfpm_g->t_write);
492 }
493
494 /*
495 * zfpm_read_off
496 */
497 static inline void zfpm_read_off(void)
498 {
499 THREAD_OFF(zfpm_g->t_read);
500 }
501
502 /*
503 * zfpm_write_off
504 */
505 static inline void zfpm_write_off(void)
506 {
507 THREAD_OFF(zfpm_g->t_write);
508 }
509
510 static inline void zfpm_connect_off(void)
511 {
512 THREAD_OFF(zfpm_g->t_connect);
513 }
514
515 /*
516 * zfpm_conn_up_thread_cb
517 *
518 * Callback for actions to be taken when the connection to the FPM
519 * comes up.
520 */
521 static void zfpm_conn_up_thread_cb(struct thread *thread)
522 {
523 struct route_node *rnode;
524 struct zfpm_rnodes_iter *iter;
525 rib_dest_t *dest;
526
527 iter = &zfpm_g->t_conn_up_state.iter;
528
529 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED) {
530 zfpm_debug(
531 "Connection not up anymore, conn_up thread aborting");
532 zfpm_g->stats.t_conn_up_aborts++;
533 goto done;
534 }
535
536 if (!zfpm_g->fpm_mac_dump_done) {
537 /* Enqueue FPM updates for all the RMAC entries */
538 hash_iterate(zrouter.l3vni_table, zfpm_iterate_rmac_table,
539 NULL);
540 /* mark dump done so that its not repeated after yield */
541 zfpm_g->fpm_mac_dump_done = true;
542 }
543
544 while ((rnode = zfpm_rnodes_iter_next(iter))) {
545 dest = rib_dest_from_rnode(rnode);
546
547 if (dest) {
548 zfpm_g->stats.t_conn_up_dests_processed++;
549 zfpm_trigger_update(rnode, NULL);
550 }
551
552 /*
553 * Yield if need be.
554 */
555 if (!zfpm_thread_should_yield(thread))
556 continue;
557
558 zfpm_g->stats.t_conn_up_yields++;
559 zfpm_rnodes_iter_pause(iter);
560 thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb,
561 NULL, 0, &zfpm_g->t_conn_up);
562 return;
563 }
564
565 zfpm_g->stats.t_conn_up_finishes++;
566
567 done:
568 zfpm_rnodes_iter_cleanup(iter);
569 }
570
571 /*
572 * zfpm_connection_up
573 *
574 * Called when the connection to the FPM comes up.
575 */
576 static void zfpm_connection_up(const char *detail)
577 {
578 assert(zfpm_g->sock >= 0);
579 zfpm_read_on();
580 zfpm_write_on();
581 zfpm_set_state(ZFPM_STATE_ESTABLISHED, detail);
582
583 /*
584 * Start thread to push existing routes to the FPM.
585 */
586 THREAD_OFF(zfpm_g->t_conn_up);
587
588 zfpm_rnodes_iter_init(&zfpm_g->t_conn_up_state.iter);
589 zfpm_g->fpm_mac_dump_done = false;
590
591 zfpm_debug("Starting conn_up thread");
592
593 thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb, NULL, 0,
594 &zfpm_g->t_conn_up);
595 zfpm_g->stats.t_conn_up_starts++;
596 }
597
598 /*
599 * zfpm_connect_check
600 *
601 * Check if an asynchronous connect() to the FPM is complete.
602 */
603 static void zfpm_connect_check(void)
604 {
605 int status;
606 socklen_t slen;
607 int ret;
608
609 zfpm_read_off();
610 zfpm_write_off();
611
612 slen = sizeof(status);
613 ret = getsockopt(zfpm_g->sock, SOL_SOCKET, SO_ERROR, (void *)&status,
614 &slen);
615
616 if (ret >= 0 && status == 0) {
617 zfpm_connection_up("async connect complete");
618 return;
619 }
620
621 /*
622 * getsockopt() failed or indicated an error on the socket.
623 */
624 close(zfpm_g->sock);
625 zfpm_g->sock = -1;
626
627 zfpm_start_connect_timer("getsockopt() after async connect failed");
628 return;
629 }
630
631 /*
632 * zfpm_conn_down_thread_cb
633 *
634 * Callback that is invoked to clean up state after the TCP connection
635 * to the FPM goes down.
636 */
637 static void zfpm_conn_down_thread_cb(struct thread *thread)
638 {
639 struct route_node *rnode;
640 struct zfpm_rnodes_iter *iter;
641 rib_dest_t *dest;
642 struct fpm_mac_info_t *mac = NULL;
643
644 assert(zfpm_g->state == ZFPM_STATE_IDLE);
645
646 /*
647 * Delink and free all fpm_mac_info_t nodes
648 * in the mac_q and fpm_mac_info_hash
649 */
650 while ((mac = TAILQ_FIRST(&zfpm_g->mac_q)) != NULL)
651 zfpm_mac_info_del(mac);
652
653 zfpm_g->t_conn_down = NULL;
654
655 iter = &zfpm_g->t_conn_down_state.iter;
656
657 while ((rnode = zfpm_rnodes_iter_next(iter))) {
658 dest = rib_dest_from_rnode(rnode);
659
660 if (dest) {
661 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) {
662 TAILQ_REMOVE(&zfpm_g->dest_q, dest,
663 fpm_q_entries);
664 }
665
666 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
667 UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
668
669 zfpm_g->stats.t_conn_down_dests_processed++;
670
671 /*
672 * Check if the dest should be deleted.
673 */
674 rib_gc_dest(rnode);
675 }
676
677 /*
678 * Yield if need be.
679 */
680 if (!zfpm_thread_should_yield(thread))
681 continue;
682
683 zfpm_g->stats.t_conn_down_yields++;
684 zfpm_rnodes_iter_pause(iter);
685 zfpm_g->t_conn_down = NULL;
686 thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb,
687 NULL, 0, &zfpm_g->t_conn_down);
688 return;
689 }
690
691 zfpm_g->stats.t_conn_down_finishes++;
692 zfpm_rnodes_iter_cleanup(iter);
693
694 /*
695 * Start the process of connecting to the FPM again.
696 */
697 zfpm_start_connect_timer("cleanup complete");
698 }
699
700 /*
701 * zfpm_connection_down
702 *
703 * Called when the connection to the FPM has gone down.
704 */
705 static void zfpm_connection_down(const char *detail)
706 {
707 if (!detail)
708 detail = "unknown";
709
710 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
711
712 zlog_info("connection to the FPM has gone down: %s", detail);
713
714 zfpm_read_off();
715 zfpm_write_off();
716
717 stream_reset(zfpm_g->ibuf);
718 stream_reset(zfpm_g->obuf);
719
720 if (zfpm_g->sock >= 0) {
721 close(zfpm_g->sock);
722 zfpm_g->sock = -1;
723 }
724
725 /*
726 * Start thread to clean up state after the connection goes down.
727 */
728 assert(!zfpm_g->t_conn_down);
729 zfpm_rnodes_iter_init(&zfpm_g->t_conn_down_state.iter);
730 zfpm_g->t_conn_down = NULL;
731 thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb, NULL, 0,
732 &zfpm_g->t_conn_down);
733 zfpm_g->stats.t_conn_down_starts++;
734
735 zfpm_set_state(ZFPM_STATE_IDLE, detail);
736 }
737
738 /*
739 * zfpm_read_cb
740 */
741 static void zfpm_read_cb(struct thread *thread)
742 {
743 size_t already;
744 struct stream *ibuf;
745 uint16_t msg_len;
746 fpm_msg_hdr_t *hdr;
747
748 zfpm_g->stats.read_cb_calls++;
749
750 /*
751 * Check if async connect is now done.
752 */
753 if (zfpm_g->state == ZFPM_STATE_CONNECTING) {
754 zfpm_connect_check();
755 return;
756 }
757
758 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
759 assert(zfpm_g->sock >= 0);
760
761 ibuf = zfpm_g->ibuf;
762
763 already = stream_get_endp(ibuf);
764 if (already < FPM_MSG_HDR_LEN) {
765 ssize_t nbyte;
766
767 nbyte = stream_read_try(ibuf, zfpm_g->sock,
768 FPM_MSG_HDR_LEN - already);
769 if (nbyte == 0 || nbyte == -1) {
770 if (nbyte == -1) {
771 char buffer[1024];
772
773 snprintf(buffer, sizeof(buffer),
774 "closed socket in read(%d): %s", errno,
775 safe_strerror(errno));
776 zfpm_connection_down(buffer);
777 } else
778 zfpm_connection_down("closed socket in read");
779 return;
780 }
781
782 if (nbyte != (ssize_t)(FPM_MSG_HDR_LEN - already))
783 goto done;
784
785 already = FPM_MSG_HDR_LEN;
786 }
787
788 stream_set_getp(ibuf, 0);
789
790 hdr = (fpm_msg_hdr_t *)stream_pnt(ibuf);
791
792 if (!fpm_msg_hdr_ok(hdr)) {
793 zfpm_connection_down("invalid message header");
794 return;
795 }
796
797 msg_len = fpm_msg_len(hdr);
798
799 /*
800 * Read out the rest of the packet.
801 */
802 if (already < msg_len) {
803 ssize_t nbyte;
804
805 nbyte = stream_read_try(ibuf, zfpm_g->sock, msg_len - already);
806
807 if (nbyte == 0 || nbyte == -1) {
808 if (nbyte == -1) {
809 char buffer[1024];
810
811 snprintf(buffer, sizeof(buffer),
812 "failed to read message(%d) %s", errno,
813 safe_strerror(errno));
814 zfpm_connection_down(buffer);
815 } else
816 zfpm_connection_down("failed to read message");
817 return;
818 }
819
820 if (nbyte != (ssize_t)(msg_len - already))
821 goto done;
822 }
823
824 /*
825 * Just throw it away for now.
826 */
827 stream_reset(ibuf);
828
829 done:
830 zfpm_read_on();
831 }
832
833 static bool zfpm_updates_pending(void)
834 {
835 if (!(TAILQ_EMPTY(&zfpm_g->dest_q)) || !(TAILQ_EMPTY(&zfpm_g->mac_q)))
836 return true;
837
838 return false;
839 }
840
841 /*
842 * zfpm_writes_pending
843 *
844 * Returns true if we may have something to write to the FPM.
845 */
846 static int zfpm_writes_pending(void)
847 {
848
849 /*
850 * Check if there is any data in the outbound buffer that has not
851 * been written to the socket yet.
852 */
853 if (stream_get_endp(zfpm_g->obuf) - stream_get_getp(zfpm_g->obuf))
854 return 1;
855
856 /*
857 * Check if there are any updates scheduled on the outbound queues.
858 */
859 if (zfpm_updates_pending())
860 return 1;
861
862 return 0;
863 }
864
865 /*
866 * zfpm_encode_route
867 *
868 * Encode a message to the FPM with information about the given route.
869 *
870 * Returns the number of bytes written to the buffer. 0 or a negative
871 * value indicates an error.
872 */
873 static inline int zfpm_encode_route(rib_dest_t *dest, struct route_entry *re,
874 char *in_buf, size_t in_buf_len,
875 fpm_msg_type_e *msg_type)
876 {
877 size_t len;
878 #ifdef HAVE_NETLINK
879 int cmd;
880 #endif
881 len = 0;
882
883 *msg_type = FPM_MSG_TYPE_NONE;
884
885 switch (zfpm_g->message_format) {
886
887 case ZFPM_MSG_FORMAT_PROTOBUF:
888 #ifdef HAVE_PROTOBUF
889 len = zfpm_protobuf_encode_route(dest, re, (uint8_t *)in_buf,
890 in_buf_len);
891 *msg_type = FPM_MSG_TYPE_PROTOBUF;
892 #endif
893 break;
894
895 case ZFPM_MSG_FORMAT_NETLINK:
896 #ifdef HAVE_NETLINK
897 *msg_type = FPM_MSG_TYPE_NETLINK;
898 cmd = re ? RTM_NEWROUTE : RTM_DELROUTE;
899 len = zfpm_netlink_encode_route(cmd, dest, re, in_buf,
900 in_buf_len);
901 assert(fpm_msg_align(len) == len);
902 *msg_type = FPM_MSG_TYPE_NETLINK;
903 #endif /* HAVE_NETLINK */
904 break;
905
906 default:
907 break;
908 }
909
910 return len;
911 }
912
913 /*
914 * zfpm_route_for_update
915 *
916 * Returns the re that is to be sent to the FPM for a given dest.
917 */
918 struct route_entry *zfpm_route_for_update(rib_dest_t *dest)
919 {
920 return dest->selected_fib;
921 }
922
923 /*
924 * Define an enum for return codes for queue processing functions
925 *
926 * FPM_WRITE_STOP: This return code indicates that the write buffer is full.
927 * Stop processing all the queues and empty the buffer by writing its content
928 * to the socket.
929 *
930 * FPM_GOTO_NEXT_Q: This return code indicates that either this queue is
931 * empty or we have processed enough updates from this queue.
932 * So, move on to the next queue.
933 */
934 enum {
935 FPM_WRITE_STOP = 0,
936 FPM_GOTO_NEXT_Q = 1
937 };
938
939 #define FPM_QUEUE_PROCESS_LIMIT 10000
940
941 /*
942 * zfpm_build_route_updates
943 *
944 * Process the dest_q queue and write FPM messages to the outbound buffer.
945 */
946 static int zfpm_build_route_updates(void)
947 {
948 struct stream *s;
949 rib_dest_t *dest;
950 unsigned char *buf, *data, *buf_end;
951 size_t msg_len;
952 size_t data_len;
953 fpm_msg_hdr_t *hdr;
954 struct route_entry *re;
955 int is_add, write_msg;
956 fpm_msg_type_e msg_type;
957 uint16_t q_limit;
958
959 if (TAILQ_EMPTY(&zfpm_g->dest_q))
960 return FPM_GOTO_NEXT_Q;
961
962 s = zfpm_g->obuf;
963 q_limit = FPM_QUEUE_PROCESS_LIMIT;
964
965 do {
966 /*
967 * Make sure there is enough space to write another message.
968 */
969 if (STREAM_WRITEABLE(s) < FPM_MAX_MSG_LEN)
970 return FPM_WRITE_STOP;
971
972 buf = STREAM_DATA(s) + stream_get_endp(s);
973 buf_end = buf + STREAM_WRITEABLE(s);
974
975 dest = TAILQ_FIRST(&zfpm_g->dest_q);
976 if (!dest)
977 return FPM_GOTO_NEXT_Q;
978
979 assert(CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM));
980
981 hdr = (fpm_msg_hdr_t *)buf;
982 hdr->version = FPM_PROTO_VERSION;
983
984 data = fpm_msg_data(hdr);
985
986 re = zfpm_route_for_update(dest);
987 is_add = re ? 1 : 0;
988
989 write_msg = 1;
990
991 /*
992 * If this is a route deletion, and we have not sent the route
993 * to
994 * the FPM previously, skip it.
995 */
996 if (!is_add && !CHECK_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM)) {
997 write_msg = 0;
998 zfpm_g->stats.nop_deletes_skipped++;
999 }
1000
1001 if (write_msg) {
1002 data_len = zfpm_encode_route(dest, re, (char *)data,
1003 buf_end - data, &msg_type);
1004
1005 if (data_len) {
1006 hdr->msg_type = msg_type;
1007 msg_len = fpm_data_len_to_msg_len(data_len);
1008 hdr->msg_len = htons(msg_len);
1009 stream_forward_endp(s, msg_len);
1010
1011 if (is_add)
1012 zfpm_g->stats.route_adds++;
1013 else
1014 zfpm_g->stats.route_dels++;
1015 } else {
1016 zlog_err("%s: Encoding Prefix: %pRN No valid nexthops",
1017 __func__, dest->rnode);
1018 }
1019 }
1020
1021 /*
1022 * Remove the dest from the queue, and reset the flag.
1023 */
1024 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1025 TAILQ_REMOVE(&zfpm_g->dest_q, dest, fpm_q_entries);
1026
1027 if (is_add) {
1028 SET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
1029 } else {
1030 UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
1031 }
1032
1033 /*
1034 * Delete the destination if necessary.
1035 */
1036 if (rib_gc_dest(dest->rnode))
1037 zfpm_g->stats.dests_del_after_update++;
1038
1039 q_limit--;
1040 if (q_limit == 0) {
1041 /*
1042 * We have processed enough updates in this queue.
1043 * Now yield for other queues.
1044 */
1045 return FPM_GOTO_NEXT_Q;
1046 }
1047 } while (true);
1048 }
1049
1050 /*
1051 * zfpm_encode_mac
1052 *
1053 * Encode a message to FPM with information about the given MAC.
1054 *
1055 * Returns the number of bytes written to the buffer.
1056 */
1057 static inline int zfpm_encode_mac(struct fpm_mac_info_t *mac, char *in_buf,
1058 size_t in_buf_len, fpm_msg_type_e *msg_type)
1059 {
1060 size_t len = 0;
1061
1062 *msg_type = FPM_MSG_TYPE_NONE;
1063
1064 switch (zfpm_g->message_format) {
1065
1066 case ZFPM_MSG_FORMAT_NONE:
1067 break;
1068 case ZFPM_MSG_FORMAT_NETLINK:
1069 #ifdef HAVE_NETLINK
1070 len = zfpm_netlink_encode_mac(mac, in_buf, in_buf_len);
1071 assert(fpm_msg_align(len) == len);
1072 *msg_type = FPM_MSG_TYPE_NETLINK;
1073 #endif /* HAVE_NETLINK */
1074 break;
1075 case ZFPM_MSG_FORMAT_PROTOBUF:
1076 break;
1077 }
1078 return len;
1079 }
1080
1081 static int zfpm_build_mac_updates(void)
1082 {
1083 struct stream *s;
1084 struct fpm_mac_info_t *mac;
1085 unsigned char *buf, *data, *buf_end;
1086 fpm_msg_hdr_t *hdr;
1087 size_t data_len, msg_len;
1088 fpm_msg_type_e msg_type;
1089 uint16_t q_limit;
1090
1091 if (TAILQ_EMPTY(&zfpm_g->mac_q))
1092 return FPM_GOTO_NEXT_Q;
1093
1094 s = zfpm_g->obuf;
1095 q_limit = FPM_QUEUE_PROCESS_LIMIT;
1096
1097 do {
1098 /* Make sure there is enough space to write another message. */
1099 if (STREAM_WRITEABLE(s) < FPM_MAX_MAC_MSG_LEN)
1100 return FPM_WRITE_STOP;
1101
1102 buf = STREAM_DATA(s) + stream_get_endp(s);
1103 buf_end = buf + STREAM_WRITEABLE(s);
1104
1105 mac = TAILQ_FIRST(&zfpm_g->mac_q);
1106 if (!mac)
1107 return FPM_GOTO_NEXT_Q;
1108
1109 /* Check for no-op */
1110 if (!CHECK_FLAG(mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM)) {
1111 zfpm_g->stats.nop_deletes_skipped++;
1112 zfpm_mac_info_del(mac);
1113 continue;
1114 }
1115
1116 hdr = (fpm_msg_hdr_t *)buf;
1117 hdr->version = FPM_PROTO_VERSION;
1118
1119 data = fpm_msg_data(hdr);
1120 data_len = zfpm_encode_mac(mac, (char *)data, buf_end - data,
1121 &msg_type);
1122 assert(data_len);
1123
1124 hdr->msg_type = msg_type;
1125 msg_len = fpm_data_len_to_msg_len(data_len);
1126 hdr->msg_len = htons(msg_len);
1127 stream_forward_endp(s, msg_len);
1128
1129 /* Remove the MAC from the queue, and delete it. */
1130 zfpm_mac_info_del(mac);
1131
1132 q_limit--;
1133 if (q_limit == 0) {
1134 /*
1135 * We have processed enough updates in this queue.
1136 * Now yield for other queues.
1137 */
1138 return FPM_GOTO_NEXT_Q;
1139 }
1140 } while (1);
1141 }
1142
1143 /*
1144 * zfpm_build_updates
1145 *
1146 * Process the outgoing queues and write messages to the outbound
1147 * buffer.
1148 */
1149 static void zfpm_build_updates(void)
1150 {
1151 struct stream *s;
1152
1153 s = zfpm_g->obuf;
1154 assert(stream_empty(s));
1155
1156 do {
1157 /*
1158 * Stop processing the queues if zfpm_g->obuf is full
1159 * or we do not have more updates to process
1160 */
1161 if (zfpm_build_mac_updates() == FPM_WRITE_STOP)
1162 break;
1163 if (zfpm_build_route_updates() == FPM_WRITE_STOP)
1164 break;
1165 } while (zfpm_updates_pending());
1166 }
1167
1168 /*
1169 * zfpm_write_cb
1170 */
1171 static void zfpm_write_cb(struct thread *thread)
1172 {
1173 struct stream *s;
1174 int num_writes;
1175
1176 zfpm_g->stats.write_cb_calls++;
1177
1178 /*
1179 * Check if async connect is now done.
1180 */
1181 if (zfpm_g->state == ZFPM_STATE_CONNECTING) {
1182 zfpm_connect_check();
1183 return;
1184 }
1185
1186 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
1187 assert(zfpm_g->sock >= 0);
1188
1189 num_writes = 0;
1190
1191 do {
1192 int bytes_to_write, bytes_written;
1193
1194 s = zfpm_g->obuf;
1195
1196 /*
1197 * If the stream is empty, try fill it up with data.
1198 */
1199 if (stream_empty(s)) {
1200 zfpm_build_updates();
1201 }
1202
1203 bytes_to_write = stream_get_endp(s) - stream_get_getp(s);
1204 if (!bytes_to_write)
1205 break;
1206
1207 bytes_written =
1208 write(zfpm_g->sock, stream_pnt(s), bytes_to_write);
1209 zfpm_g->stats.write_calls++;
1210 num_writes++;
1211
1212 if (bytes_written < 0) {
1213 if (ERRNO_IO_RETRY(errno))
1214 break;
1215
1216 zfpm_connection_down("failed to write to socket");
1217 return;
1218 }
1219
1220 if (bytes_written != bytes_to_write) {
1221
1222 /*
1223 * Partial write.
1224 */
1225 stream_forward_getp(s, bytes_written);
1226 zfpm_g->stats.partial_writes++;
1227 break;
1228 }
1229
1230 /*
1231 * We've written out the entire contents of the stream.
1232 */
1233 stream_reset(s);
1234
1235 if (num_writes >= ZFPM_MAX_WRITES_PER_RUN) {
1236 zfpm_g->stats.max_writes_hit++;
1237 break;
1238 }
1239
1240 if (zfpm_thread_should_yield(thread)) {
1241 zfpm_g->stats.t_write_yields++;
1242 break;
1243 }
1244 } while (1);
1245
1246 if (zfpm_writes_pending())
1247 zfpm_write_on();
1248 }
1249
1250 /*
1251 * zfpm_connect_cb
1252 */
1253 static void zfpm_connect_cb(struct thread *t)
1254 {
1255 int sock, ret;
1256 struct sockaddr_in serv;
1257
1258 assert(zfpm_g->state == ZFPM_STATE_ACTIVE);
1259
1260 sock = socket(AF_INET, SOCK_STREAM, 0);
1261 if (sock < 0) {
1262 zlog_err("Failed to create socket for connect(): %s",
1263 strerror(errno));
1264 zfpm_g->stats.connect_no_sock++;
1265 return;
1266 }
1267
1268 set_nonblocking(sock);
1269
1270 /* Make server socket. */
1271 memset(&serv, 0, sizeof(serv));
1272 serv.sin_family = AF_INET;
1273 serv.sin_port = htons(zfpm_g->fpm_port);
1274 #ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
1275 serv.sin_len = sizeof(struct sockaddr_in);
1276 #endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */
1277 if (!zfpm_g->fpm_server)
1278 serv.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1279 else
1280 serv.sin_addr.s_addr = (zfpm_g->fpm_server);
1281
1282 /*
1283 * Connect to the FPM.
1284 */
1285 zfpm_g->connect_calls++;
1286 zfpm_g->stats.connect_calls++;
1287 zfpm_g->last_connect_call_time = monotime(NULL);
1288
1289 ret = connect(sock, (struct sockaddr *)&serv, sizeof(serv));
1290 if (ret >= 0) {
1291 zfpm_g->sock = sock;
1292 zfpm_connection_up("connect succeeded");
1293 return;
1294 }
1295
1296 if (errno == EINPROGRESS) {
1297 zfpm_g->sock = sock;
1298 zfpm_read_on();
1299 zfpm_write_on();
1300 zfpm_set_state(ZFPM_STATE_CONNECTING,
1301 "async connect in progress");
1302 return;
1303 }
1304
1305 zlog_info("can't connect to FPM %d: %s", sock, safe_strerror(errno));
1306 close(sock);
1307
1308 /*
1309 * Restart timer for retrying connection.
1310 */
1311 zfpm_start_connect_timer("connect() failed");
1312 }
1313
1314 /*
1315 * zfpm_set_state
1316 *
1317 * Move state machine into the given state.
1318 */
1319 static void zfpm_set_state(enum zfpm_state state, const char *reason)
1320 {
1321 enum zfpm_state cur_state = zfpm_g->state;
1322
1323 if (!reason)
1324 reason = "Unknown";
1325
1326 if (state == cur_state)
1327 return;
1328
1329 zfpm_debug("beginning state transition %s -> %s. Reason: %s",
1330 zfpm_state_to_str(cur_state), zfpm_state_to_str(state),
1331 reason);
1332
1333 switch (state) {
1334
1335 case ZFPM_STATE_IDLE:
1336 assert(cur_state == ZFPM_STATE_ESTABLISHED);
1337 break;
1338
1339 case ZFPM_STATE_ACTIVE:
1340 assert(cur_state == ZFPM_STATE_IDLE
1341 || cur_state == ZFPM_STATE_CONNECTING);
1342 assert(zfpm_g->t_connect);
1343 break;
1344
1345 case ZFPM_STATE_CONNECTING:
1346 assert(zfpm_g->sock);
1347 assert(cur_state == ZFPM_STATE_ACTIVE);
1348 assert(zfpm_g->t_read);
1349 assert(zfpm_g->t_write);
1350 break;
1351
1352 case ZFPM_STATE_ESTABLISHED:
1353 assert(cur_state == ZFPM_STATE_ACTIVE
1354 || cur_state == ZFPM_STATE_CONNECTING);
1355 assert(zfpm_g->sock);
1356 assert(zfpm_g->t_read);
1357 assert(zfpm_g->t_write);
1358 break;
1359 }
1360
1361 zfpm_g->state = state;
1362 }
1363
1364 /*
1365 * zfpm_calc_connect_delay
1366 *
1367 * Returns the number of seconds after which we should attempt to
1368 * reconnect to the FPM.
1369 */
1370 static long zfpm_calc_connect_delay(void)
1371 {
1372 time_t elapsed;
1373
1374 /*
1375 * Return 0 if this is our first attempt to connect.
1376 */
1377 if (zfpm_g->connect_calls == 0) {
1378 return 0;
1379 }
1380
1381 elapsed = zfpm_get_elapsed_time(zfpm_g->last_connect_call_time);
1382
1383 if (elapsed > ZFPM_CONNECT_RETRY_IVL) {
1384 return 0;
1385 }
1386
1387 return ZFPM_CONNECT_RETRY_IVL - elapsed;
1388 }
1389
1390 /*
1391 * zfpm_start_connect_timer
1392 */
1393 static void zfpm_start_connect_timer(const char *reason)
1394 {
1395 long delay_secs;
1396
1397 assert(!zfpm_g->t_connect);
1398 assert(zfpm_g->sock < 0);
1399
1400 assert(zfpm_g->state == ZFPM_STATE_IDLE
1401 || zfpm_g->state == ZFPM_STATE_ACTIVE
1402 || zfpm_g->state == ZFPM_STATE_CONNECTING);
1403
1404 delay_secs = zfpm_calc_connect_delay();
1405 zfpm_debug("scheduling connect in %ld seconds", delay_secs);
1406
1407 thread_add_timer(zfpm_g->master, zfpm_connect_cb, 0, delay_secs,
1408 &zfpm_g->t_connect);
1409 zfpm_set_state(ZFPM_STATE_ACTIVE, reason);
1410 }
1411
1412 /*
1413 * zfpm_is_enabled
1414 *
1415 * Returns true if the zebra FPM module has been enabled.
1416 */
1417 static inline int zfpm_is_enabled(void)
1418 {
1419 return zfpm_g->enabled;
1420 }
1421
1422 /*
1423 * zfpm_conn_is_up
1424 *
1425 * Returns true if the connection to the FPM is up.
1426 */
1427 static inline int zfpm_conn_is_up(void)
1428 {
1429 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED)
1430 return 0;
1431
1432 assert(zfpm_g->sock >= 0);
1433
1434 return 1;
1435 }
1436
1437 /*
1438 * zfpm_trigger_update
1439 *
1440 * The zebra code invokes this function to indicate that we should
1441 * send an update to the FPM about the given route_node.
1442 */
1443 static int zfpm_trigger_update(struct route_node *rn, const char *reason)
1444 {
1445 rib_dest_t *dest;
1446
1447 /*
1448 * Ignore if the connection is down. We will update the FPM about
1449 * all destinations once the connection comes up.
1450 */
1451 if (!zfpm_conn_is_up())
1452 return 0;
1453
1454 dest = rib_dest_from_rnode(rn);
1455
1456 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) {
1457 zfpm_g->stats.redundant_triggers++;
1458 return 0;
1459 }
1460
1461 if (reason) {
1462 zfpm_debug("%pFX triggering update to FPM - Reason: %s", &rn->p,
1463 reason);
1464 }
1465
1466 SET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1467 TAILQ_INSERT_TAIL(&zfpm_g->dest_q, dest, fpm_q_entries);
1468 zfpm_g->stats.updates_triggered++;
1469
1470 /*
1471 * Make sure that writes are enabled.
1472 */
1473 if (zfpm_g->t_write)
1474 return 0;
1475
1476 zfpm_write_on();
1477 return 0;
1478 }
1479
1480 /*
1481 * zfpm_trigger_remove
1482 *
1483 * The zebra code invokes this function to indicate that we should
1484 * send an remove to the FPM about the given route_node.
1485 */
1486
1487 static int zfpm_trigger_remove(struct route_node *rn)
1488 {
1489 rib_dest_t *dest;
1490
1491 if (!zfpm_conn_is_up())
1492 return 0;
1493
1494 dest = rib_dest_from_rnode(rn);
1495 if (!CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM))
1496 return 0;
1497
1498 zfpm_debug("%pRN Removing from update queue shutting down", rn);
1499
1500 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1501 TAILQ_REMOVE(&zfpm_g->dest_q, dest, fpm_q_entries);
1502
1503 return 0;
1504 }
1505
1506 /*
1507 * Generate Key for FPM MAC info hash entry
1508 */
1509 static unsigned int zfpm_mac_info_hash_keymake(const void *p)
1510 {
1511 struct fpm_mac_info_t *fpm_mac = (struct fpm_mac_info_t *)p;
1512 uint32_t mac_key;
1513
1514 mac_key = jhash(fpm_mac->macaddr.octet, ETH_ALEN, 0xa5a5a55a);
1515
1516 return jhash_2words(mac_key, fpm_mac->vni, 0);
1517 }
1518
1519 /*
1520 * Compare function for FPM MAC info hash lookup
1521 */
1522 static bool zfpm_mac_info_cmp(const void *p1, const void *p2)
1523 {
1524 const struct fpm_mac_info_t *fpm_mac1 = p1;
1525 const struct fpm_mac_info_t *fpm_mac2 = p2;
1526
1527 if (memcmp(fpm_mac1->macaddr.octet, fpm_mac2->macaddr.octet, ETH_ALEN)
1528 != 0)
1529 return false;
1530 if (fpm_mac1->vni != fpm_mac2->vni)
1531 return false;
1532
1533 return true;
1534 }
1535
1536 /*
1537 * Lookup FPM MAC info hash entry.
1538 */
1539 static struct fpm_mac_info_t *zfpm_mac_info_lookup(struct fpm_mac_info_t *key)
1540 {
1541 return hash_lookup(zfpm_g->fpm_mac_info_table, key);
1542 }
1543
1544 /*
1545 * Callback to allocate fpm_mac_info_t structure.
1546 */
1547 static void *zfpm_mac_info_alloc(void *p)
1548 {
1549 const struct fpm_mac_info_t *key = p;
1550 struct fpm_mac_info_t *fpm_mac;
1551
1552 fpm_mac = XCALLOC(MTYPE_FPM_MAC_INFO, sizeof(struct fpm_mac_info_t));
1553
1554 memcpy(&fpm_mac->macaddr, &key->macaddr, ETH_ALEN);
1555 fpm_mac->vni = key->vni;
1556
1557 return (void *)fpm_mac;
1558 }
1559
1560 /*
1561 * Delink and free fpm_mac_info_t.
1562 */
1563 static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac)
1564 {
1565 hash_release(zfpm_g->fpm_mac_info_table, fpm_mac);
1566 TAILQ_REMOVE(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries);
1567 XFREE(MTYPE_FPM_MAC_INFO, fpm_mac);
1568 }
1569
1570 /*
1571 * zfpm_trigger_rmac_update
1572 *
1573 * Zebra code invokes this function to indicate that we should
1574 * send an update to FPM for given MAC entry.
1575 *
1576 * This function checks if we already have enqueued an update for this RMAC,
1577 * If yes, update the same fpm_mac_info_t. Else, create and enqueue an update.
1578 */
1579 static int zfpm_trigger_rmac_update(struct zebra_mac *rmac,
1580 struct zebra_l3vni *zl3vni, bool delete,
1581 const char *reason)
1582 {
1583 struct fpm_mac_info_t *fpm_mac, key;
1584 struct interface *vxlan_if, *svi_if;
1585 bool mac_found = false;
1586
1587 /*
1588 * Ignore if the connection is down. We will update the FPM about
1589 * all destinations once the connection comes up.
1590 */
1591 if (!zfpm_conn_is_up())
1592 return 0;
1593
1594 if (reason) {
1595 zfpm_debug("triggering update to FPM - Reason: %s - %pEA",
1596 reason, &rmac->macaddr);
1597 }
1598
1599 vxlan_if = zl3vni_map_to_vxlan_if(zl3vni);
1600 svi_if = zl3vni_map_to_svi_if(zl3vni);
1601
1602 memset(&key, 0, sizeof(key));
1603
1604 memcpy(&key.macaddr, &rmac->macaddr, ETH_ALEN);
1605 key.vni = zl3vni->vni;
1606
1607 /* Check if this MAC is already present in the queue. */
1608 fpm_mac = zfpm_mac_info_lookup(&key);
1609
1610 if (fpm_mac) {
1611 mac_found = true;
1612
1613 /*
1614 * If the enqueued op is "add" and current op is "delete",
1615 * this is a noop. So, Unset ZEBRA_MAC_UPDATE_FPM flag.
1616 * While processing FPM queue, we will silently delete this
1617 * MAC entry without sending any update for this MAC.
1618 */
1619 if (!CHECK_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM) &&
1620 delete == 1) {
1621 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1622 UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM);
1623 return 0;
1624 }
1625 } else
1626 fpm_mac = hash_get(zfpm_g->fpm_mac_info_table, &key,
1627 zfpm_mac_info_alloc);
1628
1629 fpm_mac->r_vtep_ip.s_addr = rmac->fwd_info.r_vtep_ip.s_addr;
1630 fpm_mac->zebra_flags = rmac->flags;
1631 fpm_mac->vxlan_if = vxlan_if ? vxlan_if->ifindex : 0;
1632 fpm_mac->svi_if = svi_if ? svi_if->ifindex : 0;
1633
1634 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM);
1635 if (delete)
1636 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1637 else
1638 UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1639
1640 if (!mac_found)
1641 TAILQ_INSERT_TAIL(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries);
1642
1643 zfpm_g->stats.updates_triggered++;
1644
1645 /* If writes are already enabled, return. */
1646 if (zfpm_g->t_write)
1647 return 0;
1648
1649 zfpm_write_on();
1650 return 0;
1651 }
1652
1653 /*
1654 * This function is called when the FPM connections is established.
1655 * Iterate over all the RMAC entries for the given L3VNI
1656 * and enqueue the RMAC for FPM processing.
1657 */
1658 static void zfpm_trigger_rmac_update_wrapper(struct hash_bucket *bucket,
1659 void *args)
1660 {
1661 struct zebra_mac *zrmac = (struct zebra_mac *)bucket->data;
1662 struct zebra_l3vni *zl3vni = (struct zebra_l3vni *)args;
1663
1664 zfpm_trigger_rmac_update(zrmac, zl3vni, false, "RMAC added");
1665 }
1666
1667 /*
1668 * This function is called when the FPM connections is established.
1669 * This function iterates over all the L3VNIs to trigger
1670 * FPM updates for RMACs currently available.
1671 */
1672 static void zfpm_iterate_rmac_table(struct hash_bucket *bucket, void *args)
1673 {
1674 struct zebra_l3vni *zl3vni = (struct zebra_l3vni *)bucket->data;
1675
1676 hash_iterate(zl3vni->rmac_table, zfpm_trigger_rmac_update_wrapper,
1677 (void *)zl3vni);
1678 }
1679
1680 /*
1681 * struct zfpm_statsimer_cb
1682 */
1683 static void zfpm_stats_timer_cb(struct thread *t)
1684 {
1685 zfpm_g->t_stats = NULL;
1686
1687 /*
1688 * Remember the stats collected in the last interval for display
1689 * purposes.
1690 */
1691 zfpm_stats_copy(&zfpm_g->stats, &zfpm_g->last_ivl_stats);
1692
1693 /*
1694 * Add the current set of stats into the cumulative statistics.
1695 */
1696 zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats,
1697 &zfpm_g->cumulative_stats);
1698
1699 /*
1700 * Start collecting stats afresh over the next interval.
1701 */
1702 zfpm_stats_reset(&zfpm_g->stats);
1703
1704 zfpm_start_stats_timer();
1705 }
1706
1707 /*
1708 * zfpm_stop_stats_timer
1709 */
1710 static void zfpm_stop_stats_timer(void)
1711 {
1712 if (!zfpm_g->t_stats)
1713 return;
1714
1715 zfpm_debug("Stopping existing stats timer");
1716 THREAD_OFF(zfpm_g->t_stats);
1717 }
1718
1719 /*
1720 * zfpm_start_stats_timer
1721 */
1722 void zfpm_start_stats_timer(void)
1723 {
1724 assert(!zfpm_g->t_stats);
1725
1726 thread_add_timer(zfpm_g->master, zfpm_stats_timer_cb, 0,
1727 ZFPM_STATS_IVL_SECS, &zfpm_g->t_stats);
1728 }
1729
1730 /*
1731 * Helper macro for zfpm_show_stats() below.
1732 */
1733 #define ZFPM_SHOW_STAT(counter) \
1734 do { \
1735 vty_out(vty, "%-40s %10lu %16lu\n", #counter, \
1736 total_stats.counter, zfpm_g->last_ivl_stats.counter); \
1737 } while (0)
1738
1739 /*
1740 * zfpm_show_stats
1741 */
1742 static void zfpm_show_stats(struct vty *vty)
1743 {
1744 struct zfpm_stats total_stats;
1745 time_t elapsed;
1746
1747 vty_out(vty, "\n%-40s %10s Last %2d secs\n\n", "Counter", "Total",
1748 ZFPM_STATS_IVL_SECS);
1749
1750 /*
1751 * Compute the total stats up to this instant.
1752 */
1753 zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats,
1754 &total_stats);
1755
1756 ZFPM_SHOW_STAT(connect_calls);
1757 ZFPM_SHOW_STAT(connect_no_sock);
1758 ZFPM_SHOW_STAT(read_cb_calls);
1759 ZFPM_SHOW_STAT(write_cb_calls);
1760 ZFPM_SHOW_STAT(write_calls);
1761 ZFPM_SHOW_STAT(partial_writes);
1762 ZFPM_SHOW_STAT(max_writes_hit);
1763 ZFPM_SHOW_STAT(t_write_yields);
1764 ZFPM_SHOW_STAT(nop_deletes_skipped);
1765 ZFPM_SHOW_STAT(route_adds);
1766 ZFPM_SHOW_STAT(route_dels);
1767 ZFPM_SHOW_STAT(updates_triggered);
1768 ZFPM_SHOW_STAT(redundant_triggers);
1769 ZFPM_SHOW_STAT(dests_del_after_update);
1770 ZFPM_SHOW_STAT(t_conn_down_starts);
1771 ZFPM_SHOW_STAT(t_conn_down_dests_processed);
1772 ZFPM_SHOW_STAT(t_conn_down_yields);
1773 ZFPM_SHOW_STAT(t_conn_down_finishes);
1774 ZFPM_SHOW_STAT(t_conn_up_starts);
1775 ZFPM_SHOW_STAT(t_conn_up_dests_processed);
1776 ZFPM_SHOW_STAT(t_conn_up_yields);
1777 ZFPM_SHOW_STAT(t_conn_up_aborts);
1778 ZFPM_SHOW_STAT(t_conn_up_finishes);
1779
1780 if (!zfpm_g->last_stats_clear_time)
1781 return;
1782
1783 elapsed = zfpm_get_elapsed_time(zfpm_g->last_stats_clear_time);
1784
1785 vty_out(vty, "\nStats were cleared %lu seconds ago\n",
1786 (unsigned long)elapsed);
1787 }
1788
1789 /*
1790 * zfpm_clear_stats
1791 */
1792 static void zfpm_clear_stats(struct vty *vty)
1793 {
1794 if (!zfpm_is_enabled()) {
1795 vty_out(vty, "The FPM module is not enabled...\n");
1796 return;
1797 }
1798
1799 zfpm_stats_reset(&zfpm_g->stats);
1800 zfpm_stats_reset(&zfpm_g->last_ivl_stats);
1801 zfpm_stats_reset(&zfpm_g->cumulative_stats);
1802
1803 zfpm_stop_stats_timer();
1804 zfpm_start_stats_timer();
1805
1806 zfpm_g->last_stats_clear_time = monotime(NULL);
1807
1808 vty_out(vty, "Cleared FPM stats\n");
1809 }
1810
1811 /*
1812 * show_zebra_fpm_stats
1813 */
1814 DEFUN (show_zebra_fpm_stats,
1815 show_zebra_fpm_stats_cmd,
1816 "show zebra fpm stats",
1817 SHOW_STR
1818 ZEBRA_STR
1819 "Forwarding Path Manager information\n"
1820 "Statistics\n")
1821 {
1822 zfpm_show_stats(vty);
1823 return CMD_SUCCESS;
1824 }
1825
1826 /*
1827 * clear_zebra_fpm_stats
1828 */
1829 DEFUN (clear_zebra_fpm_stats,
1830 clear_zebra_fpm_stats_cmd,
1831 "clear zebra fpm stats",
1832 CLEAR_STR
1833 ZEBRA_STR
1834 "Clear Forwarding Path Manager information\n"
1835 "Statistics\n")
1836 {
1837 zfpm_clear_stats(vty);
1838 return CMD_SUCCESS;
1839 }
1840
1841 /*
1842 * update fpm connection information
1843 */
1844 DEFUN (fpm_remote_ip,
1845 fpm_remote_ip_cmd,
1846 "fpm connection ip A.B.C.D port (1-65535)",
1847 "Forwarding Path Manager\n"
1848 "Configure FPM connection\n"
1849 "Connect to IPv4 address\n"
1850 "Connect to IPv4 address\n"
1851 "TCP port number\n"
1852 "TCP port number\n")
1853 {
1854
1855 in_addr_t fpm_server;
1856 uint32_t port_no;
1857
1858 fpm_server = inet_addr(argv[3]->arg);
1859 if (fpm_server == INADDR_NONE)
1860 return CMD_ERR_INCOMPLETE;
1861
1862 port_no = atoi(argv[5]->arg);
1863 if (port_no < TCP_MIN_PORT || port_no > TCP_MAX_PORT)
1864 return CMD_ERR_INCOMPLETE;
1865
1866 zfpm_g->fpm_server = fpm_server;
1867 zfpm_g->fpm_port = port_no;
1868
1869
1870 return CMD_SUCCESS;
1871 }
1872
1873 DEFUN (no_fpm_remote_ip,
1874 no_fpm_remote_ip_cmd,
1875 "no fpm connection ip A.B.C.D port (1-65535)",
1876 NO_STR
1877 "Forwarding Path Manager\n"
1878 "Remove configured FPM connection\n"
1879 "Connect to IPv4 address\n"
1880 "Connect to IPv4 address\n"
1881 "TCP port number\n"
1882 "TCP port number\n")
1883 {
1884 if (zfpm_g->fpm_server != inet_addr(argv[4]->arg)
1885 || zfpm_g->fpm_port != atoi(argv[6]->arg))
1886 return CMD_ERR_NO_MATCH;
1887
1888 zfpm_g->fpm_server = FPM_DEFAULT_IP;
1889 zfpm_g->fpm_port = FPM_DEFAULT_PORT;
1890
1891 return CMD_SUCCESS;
1892 }
1893
1894 /*
1895 * zfpm_init_message_format
1896 */
1897 static inline void zfpm_init_message_format(const char *format)
1898 {
1899 int have_netlink, have_protobuf;
1900
1901 #ifdef HAVE_NETLINK
1902 have_netlink = 1;
1903 #else
1904 have_netlink = 0;
1905 #endif
1906
1907 #ifdef HAVE_PROTOBUF
1908 have_protobuf = 1;
1909 #else
1910 have_protobuf = 0;
1911 #endif
1912
1913 zfpm_g->message_format = ZFPM_MSG_FORMAT_NONE;
1914
1915 if (!format) {
1916 if (have_netlink) {
1917 zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK;
1918 } else if (have_protobuf) {
1919 zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF;
1920 }
1921 return;
1922 }
1923
1924 if (!strcmp("netlink", format)) {
1925 if (!have_netlink) {
1926 flog_err(EC_ZEBRA_NETLINK_NOT_AVAILABLE,
1927 "FPM netlink message format is not available");
1928 return;
1929 }
1930 zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK;
1931 return;
1932 }
1933
1934 if (!strcmp("protobuf", format)) {
1935 if (!have_protobuf) {
1936 flog_err(
1937 EC_ZEBRA_PROTOBUF_NOT_AVAILABLE,
1938 "FPM protobuf message format is not available");
1939 return;
1940 }
1941 flog_warn(EC_ZEBRA_PROTOBUF_NOT_AVAILABLE,
1942 "FPM protobuf message format is deprecated and scheduled to be removed. Please convert to using netlink format or contact dev@lists.frrouting.org with your use case.");
1943 zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF;
1944 return;
1945 }
1946
1947 flog_warn(EC_ZEBRA_FPM_FORMAT_UNKNOWN, "Unknown fpm format '%s'",
1948 format);
1949 }
1950
1951 /**
1952 * fpm_remote_srv_write
1953 *
1954 * Module to write remote fpm connection
1955 *
1956 * Returns ZERO on success.
1957 */
1958
1959 static int fpm_remote_srv_write(struct vty *vty)
1960 {
1961 struct in_addr in;
1962
1963 in.s_addr = zfpm_g->fpm_server;
1964
1965 if ((zfpm_g->fpm_server != FPM_DEFAULT_IP
1966 && zfpm_g->fpm_server != INADDR_ANY)
1967 || (zfpm_g->fpm_port != FPM_DEFAULT_PORT && zfpm_g->fpm_port != 0))
1968 vty_out(vty, "fpm connection ip %pI4 port %d\n", &in,
1969 zfpm_g->fpm_port);
1970
1971 return 0;
1972 }
1973
1974
1975 static int fpm_remote_srv_write(struct vty *vty);
1976 /* Zebra node */
1977 static struct cmd_node zebra_node = {
1978 .name = "zebra",
1979 .node = ZEBRA_NODE,
1980 .parent_node = CONFIG_NODE,
1981 .prompt = "",
1982 .config_write = fpm_remote_srv_write,
1983 };
1984
1985
1986 /**
1987 * zfpm_init
1988 *
1989 * One-time initialization of the Zebra FPM module.
1990 *
1991 * @param[in] port port at which FPM is running.
1992 * @param[in] enable true if the zebra FPM module should be enabled
1993 * @param[in] format to use to talk to the FPM. Can be 'netink' or 'protobuf'.
1994 *
1995 * Returns true on success.
1996 */
1997 static int zfpm_init(struct thread_master *master)
1998 {
1999 int enable = 1;
2000 uint16_t port = 0;
2001 const char *format = THIS_MODULE->load_args;
2002
2003 memset(zfpm_g, 0, sizeof(*zfpm_g));
2004 zfpm_g->master = master;
2005 TAILQ_INIT(&zfpm_g->dest_q);
2006 TAILQ_INIT(&zfpm_g->mac_q);
2007
2008 /* Create hash table for fpm_mac_info_t enties */
2009 zfpm_g->fpm_mac_info_table = hash_create(zfpm_mac_info_hash_keymake,
2010 zfpm_mac_info_cmp,
2011 "FPM MAC info hash table");
2012
2013 zfpm_g->sock = -1;
2014 zfpm_g->state = ZFPM_STATE_IDLE;
2015
2016 zfpm_stats_init(&zfpm_g->stats);
2017 zfpm_stats_init(&zfpm_g->last_ivl_stats);
2018 zfpm_stats_init(&zfpm_g->cumulative_stats);
2019
2020 memset(&ipv4ll_gateway, 0, sizeof(ipv4ll_gateway));
2021 if (inet_pton(AF_INET, ipv4_ll_buf, &ipv4ll_gateway.ipv4) != 1)
2022 zlog_warn("inet_pton failed for %s", ipv4_ll_buf);
2023
2024 install_node(&zebra_node);
2025 install_element(ENABLE_NODE, &show_zebra_fpm_stats_cmd);
2026 install_element(ENABLE_NODE, &clear_zebra_fpm_stats_cmd);
2027 install_element(CONFIG_NODE, &fpm_remote_ip_cmd);
2028 install_element(CONFIG_NODE, &no_fpm_remote_ip_cmd);
2029
2030 zfpm_init_message_format(format);
2031
2032 /*
2033 * Disable FPM interface if no suitable format is available.
2034 */
2035 if (zfpm_g->message_format == ZFPM_MSG_FORMAT_NONE)
2036 enable = 0;
2037
2038 zfpm_g->enabled = enable;
2039
2040 if (!zfpm_g->fpm_server)
2041 zfpm_g->fpm_server = FPM_DEFAULT_IP;
2042
2043 if (!port)
2044 port = FPM_DEFAULT_PORT;
2045
2046 zfpm_g->fpm_port = port;
2047
2048 zfpm_g->obuf = stream_new(ZFPM_OBUF_SIZE);
2049 zfpm_g->ibuf = stream_new(ZFPM_IBUF_SIZE);
2050
2051 zfpm_start_stats_timer();
2052 zfpm_start_connect_timer("initialized");
2053 return 0;
2054 }
2055
2056 static int zfpm_fini(void)
2057 {
2058 zfpm_write_off();
2059 zfpm_read_off();
2060 zfpm_connect_off();
2061
2062 zfpm_stop_stats_timer();
2063
2064 hook_unregister(rib_update, zfpm_trigger_update);
2065 return 0;
2066 }
2067
2068 static int zebra_fpm_module_init(void)
2069 {
2070 hook_register(rib_update, zfpm_trigger_update);
2071 hook_register(rib_shutdown, zfpm_trigger_remove);
2072 hook_register(zebra_rmac_update, zfpm_trigger_rmac_update);
2073 hook_register(frr_late_init, zfpm_init);
2074 hook_register(frr_early_fini, zfpm_fini);
2075 return 0;
2076 }
2077
2078 FRR_MODULE_SETUP(.name = "zebra_fpm", .version = FRR_VERSION,
2079 .description = "zebra FPM (Forwarding Plane Manager) module",
2080 .init = zebra_fpm_module_init,
2081 );