]> git.proxmox.com Git - mirror_frr.git/blob - zebra/zebra_fpm.c
doc: Add `show ipv6 rpf X:X::X:X` command to docs
[mirror_frr.git] / zebra / zebra_fpm.c
1 /*
2 * Main implementation file for interface to Forwarding Plane Manager.
3 *
4 * Copyright (C) 2012 by Open Source Routing.
5 * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
6 *
7 * This file is part of GNU Zebra.
8 *
9 * GNU Zebra is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the
11 * Free Software Foundation; either version 2, or (at your option) any
12 * later version.
13 *
14 * GNU Zebra is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; see the file COPYING; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include <zebra.h>
25
26 #include "log.h"
27 #include "libfrr.h"
28 #include "stream.h"
29 #include "thread.h"
30 #include "network.h"
31 #include "command.h"
32 #include "lib/version.h"
33 #include "jhash.h"
34
35 #include "zebra/rib.h"
36 #include "zebra/zserv.h"
37 #include "zebra/zebra_ns.h"
38 #include "zebra/zebra_vrf.h"
39 #include "zebra/zebra_errors.h"
40
41 #include "fpm/fpm.h"
42 #include "zebra_fpm_private.h"
43 #include "zebra/zebra_router.h"
44 #include "zebra_vxlan_private.h"
45
46 DEFINE_MTYPE_STATIC(ZEBRA, FPM_MAC_INFO, "FPM_MAC_INFO");
47
48 /*
49 * Interval at which we attempt to connect to the FPM.
50 */
51 #define ZFPM_CONNECT_RETRY_IVL 5
52
53 /*
54 * Sizes of outgoing and incoming stream buffers for writing/reading
55 * FPM messages.
56 */
57 #define ZFPM_OBUF_SIZE (2 * FPM_MAX_MSG_LEN)
58 #define ZFPM_IBUF_SIZE (FPM_MAX_MSG_LEN)
59
60 /*
61 * The maximum number of times the FPM socket write callback can call
62 * 'write' before it yields.
63 */
64 #define ZFPM_MAX_WRITES_PER_RUN 10
65
66 /*
67 * Interval over which we collect statistics.
68 */
69 #define ZFPM_STATS_IVL_SECS 10
70 #define FPM_MAX_MAC_MSG_LEN 512
71
72 static void zfpm_iterate_rmac_table(struct hash_bucket *bucket, void *args);
73
74 /*
75 * Structure that holds state for iterating over all route_node
76 * structures that are candidates for being communicated to the FPM.
77 */
78 struct zfpm_rnodes_iter {
79 rib_tables_iter_t tables_iter;
80 route_table_iter_t iter;
81 };
82
83 /*
84 * Statistics.
85 */
86 struct zfpm_stats {
87 unsigned long connect_calls;
88 unsigned long connect_no_sock;
89
90 unsigned long read_cb_calls;
91
92 unsigned long write_cb_calls;
93 unsigned long write_calls;
94 unsigned long partial_writes;
95 unsigned long max_writes_hit;
96 unsigned long t_write_yields;
97
98 unsigned long nop_deletes_skipped;
99 unsigned long route_adds;
100 unsigned long route_dels;
101
102 unsigned long updates_triggered;
103 unsigned long redundant_triggers;
104
105 unsigned long dests_del_after_update;
106
107 unsigned long t_conn_down_starts;
108 unsigned long t_conn_down_dests_processed;
109 unsigned long t_conn_down_yields;
110 unsigned long t_conn_down_finishes;
111
112 unsigned long t_conn_up_starts;
113 unsigned long t_conn_up_dests_processed;
114 unsigned long t_conn_up_yields;
115 unsigned long t_conn_up_aborts;
116 unsigned long t_conn_up_finishes;
117 };
118
119 /*
120 * States for the FPM state machine.
121 */
122 enum zfpm_state {
123
124 /*
125 * In this state we are not yet ready to connect to the FPM. This
126 * can happen when this module is disabled, or if we're cleaning up
127 * after a connection has gone down.
128 */
129 ZFPM_STATE_IDLE,
130
131 /*
132 * Ready to talk to the FPM and periodically trying to connect to
133 * it.
134 */
135 ZFPM_STATE_ACTIVE,
136
137 /*
138 * In the middle of bringing up a TCP connection. Specifically,
139 * waiting for a connect() call to complete asynchronously.
140 */
141 ZFPM_STATE_CONNECTING,
142
143 /*
144 * TCP connection to the FPM is up.
145 */
146 ZFPM_STATE_ESTABLISHED
147
148 };
149
150 /*
151 * Message format to be used to communicate with the FPM.
152 */
153 enum zfpm_msg_format {
154 ZFPM_MSG_FORMAT_NONE,
155 ZFPM_MSG_FORMAT_NETLINK,
156 ZFPM_MSG_FORMAT_PROTOBUF,
157 };
158
159 /*
160 * Globals.
161 */
162 struct zfpm_glob {
163
164 /*
165 * True if the FPM module has been enabled.
166 */
167 int enabled;
168
169 /*
170 * Message format to be used to communicate with the fpm.
171 */
172 enum zfpm_msg_format message_format;
173
174 struct thread_master *master;
175
176 enum zfpm_state state;
177
178 in_addr_t fpm_server;
179 /*
180 * Port on which the FPM is running.
181 */
182 int fpm_port;
183
184 /*
185 * List of rib_dest_t structures to be processed
186 */
187 TAILQ_HEAD(zfpm_dest_q, rib_dest_t_) dest_q;
188
189 /*
190 * List of fpm_mac_info structures to be processed
191 */
192 TAILQ_HEAD(zfpm_mac_q, fpm_mac_info_t) mac_q;
193
194 /*
195 * Hash table of fpm_mac_info_t entries
196 *
197 * While adding fpm_mac_info_t for a MAC to the mac_q,
198 * it is possible that another fpm_mac_info_t node for the this MAC
199 * is already present in the queue.
200 * This is possible in the case of consecutive add->delete operations.
201 * To avoid such duplicate insertions in the mac_q,
202 * define a hash table for fpm_mac_info_t which can be looked up
203 * to see if an fpm_mac_info_t node for a MAC is already present
204 * in the mac_q.
205 */
206 struct hash *fpm_mac_info_table;
207
208 /*
209 * Stream socket to the FPM.
210 */
211 int sock;
212
213 /*
214 * Buffers for messages to/from the FPM.
215 */
216 struct stream *obuf;
217 struct stream *ibuf;
218
219 /*
220 * Threads for I/O.
221 */
222 struct thread *t_connect;
223 struct thread *t_write;
224 struct thread *t_read;
225
226 /*
227 * Thread to clean up after the TCP connection to the FPM goes down
228 * and the state that belongs to it.
229 */
230 struct thread *t_conn_down;
231
232 struct {
233 struct zfpm_rnodes_iter iter;
234 } t_conn_down_state;
235
236 /*
237 * Thread to take actions once the TCP conn to the FPM comes up, and
238 * the state that belongs to it.
239 */
240 struct thread *t_conn_up;
241
242 struct {
243 struct zfpm_rnodes_iter iter;
244 } t_conn_up_state;
245
246 unsigned long connect_calls;
247 time_t last_connect_call_time;
248
249 /*
250 * Stats from the start of the current statistics interval up to
251 * now. These are the counters we typically update in the code.
252 */
253 struct zfpm_stats stats;
254
255 /*
256 * Statistics that were gathered in the last collection interval.
257 */
258 struct zfpm_stats last_ivl_stats;
259
260 /*
261 * Cumulative stats from the last clear to the start of the current
262 * statistics interval.
263 */
264 struct zfpm_stats cumulative_stats;
265
266 /*
267 * Stats interval timer.
268 */
269 struct thread *t_stats;
270
271 /*
272 * If non-zero, the last time when statistics were cleared.
273 */
274 time_t last_stats_clear_time;
275
276 /*
277 * Flag to track the MAC dump status to FPM
278 */
279 bool fpm_mac_dump_done;
280 };
281
282 static struct zfpm_glob zfpm_glob_space;
283 static struct zfpm_glob *zfpm_g = &zfpm_glob_space;
284
285 static int zfpm_trigger_update(struct route_node *rn, const char *reason);
286
287 static void zfpm_read_cb(struct thread *thread);
288 static void zfpm_write_cb(struct thread *thread);
289
290 static void zfpm_set_state(enum zfpm_state state, const char *reason);
291 static void zfpm_start_connect_timer(const char *reason);
292 static void zfpm_start_stats_timer(void);
293 static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac);
294
295 static const char ipv4_ll_buf[16] = "169.254.0.1";
296 union g_addr ipv4ll_gateway;
297
298 /*
299 * zfpm_thread_should_yield
300 */
301 static inline int zfpm_thread_should_yield(struct thread *t)
302 {
303 return thread_should_yield(t);
304 }
305
306 /*
307 * zfpm_state_to_str
308 */
309 static const char *zfpm_state_to_str(enum zfpm_state state)
310 {
311 switch (state) {
312
313 case ZFPM_STATE_IDLE:
314 return "idle";
315
316 case ZFPM_STATE_ACTIVE:
317 return "active";
318
319 case ZFPM_STATE_CONNECTING:
320 return "connecting";
321
322 case ZFPM_STATE_ESTABLISHED:
323 return "established";
324
325 default:
326 return "unknown";
327 }
328 }
329
330 /*
331 * zfpm_get_elapsed_time
332 *
333 * Returns the time elapsed (in seconds) since the given time.
334 */
335 static time_t zfpm_get_elapsed_time(time_t reference)
336 {
337 time_t now;
338
339 now = monotime(NULL);
340
341 if (now < reference) {
342 assert(0);
343 return 0;
344 }
345
346 return now - reference;
347 }
348
349 /*
350 * zfpm_rnodes_iter_init
351 */
352 static inline void zfpm_rnodes_iter_init(struct zfpm_rnodes_iter *iter)
353 {
354 memset(iter, 0, sizeof(*iter));
355 rib_tables_iter_init(&iter->tables_iter);
356
357 /*
358 * This is a hack, but it makes implementing 'next' easier by
359 * ensuring that route_table_iter_next() will return NULL the first
360 * time we call it.
361 */
362 route_table_iter_init(&iter->iter, NULL);
363 route_table_iter_cleanup(&iter->iter);
364 }
365
366 /*
367 * zfpm_rnodes_iter_next
368 */
369 static inline struct route_node *
370 zfpm_rnodes_iter_next(struct zfpm_rnodes_iter *iter)
371 {
372 struct route_node *rn;
373 struct route_table *table;
374
375 while (1) {
376 rn = route_table_iter_next(&iter->iter);
377 if (rn)
378 return rn;
379
380 /*
381 * We've made our way through this table, go to the next one.
382 */
383 route_table_iter_cleanup(&iter->iter);
384
385 table = rib_tables_iter_next(&iter->tables_iter);
386
387 if (!table)
388 return NULL;
389
390 route_table_iter_init(&iter->iter, table);
391 }
392
393 return NULL;
394 }
395
396 /*
397 * zfpm_rnodes_iter_pause
398 */
399 static inline void zfpm_rnodes_iter_pause(struct zfpm_rnodes_iter *iter)
400 {
401 route_table_iter_pause(&iter->iter);
402 }
403
404 /*
405 * zfpm_rnodes_iter_cleanup
406 */
407 static inline void zfpm_rnodes_iter_cleanup(struct zfpm_rnodes_iter *iter)
408 {
409 route_table_iter_cleanup(&iter->iter);
410 rib_tables_iter_cleanup(&iter->tables_iter);
411 }
412
413 /*
414 * zfpm_stats_init
415 *
416 * Initialize a statistics block.
417 */
418 static inline void zfpm_stats_init(struct zfpm_stats *stats)
419 {
420 memset(stats, 0, sizeof(*stats));
421 }
422
423 /*
424 * zfpm_stats_reset
425 */
426 static inline void zfpm_stats_reset(struct zfpm_stats *stats)
427 {
428 zfpm_stats_init(stats);
429 }
430
431 /*
432 * zfpm_stats_copy
433 */
434 static inline void zfpm_stats_copy(const struct zfpm_stats *src,
435 struct zfpm_stats *dest)
436 {
437 memcpy(dest, src, sizeof(*dest));
438 }
439
440 /*
441 * zfpm_stats_compose
442 *
443 * Total up the statistics in two stats structures ('s1 and 's2') and
444 * return the result in the third argument, 'result'. Note that the
445 * pointer 'result' may be the same as 's1' or 's2'.
446 *
447 * For simplicity, the implementation below assumes that the stats
448 * structure is composed entirely of counters. This can easily be
449 * changed when necessary.
450 */
451 static void zfpm_stats_compose(const struct zfpm_stats *s1,
452 const struct zfpm_stats *s2,
453 struct zfpm_stats *result)
454 {
455 const unsigned long *p1, *p2;
456 unsigned long *result_p;
457 int i, num_counters;
458
459 p1 = (const unsigned long *)s1;
460 p2 = (const unsigned long *)s2;
461 result_p = (unsigned long *)result;
462
463 num_counters = (sizeof(struct zfpm_stats) / sizeof(unsigned long));
464
465 for (i = 0; i < num_counters; i++) {
466 result_p[i] = p1[i] + p2[i];
467 }
468 }
469
470 /*
471 * zfpm_read_on
472 */
473 static inline void zfpm_read_on(void)
474 {
475 assert(!zfpm_g->t_read);
476 assert(zfpm_g->sock >= 0);
477
478 thread_add_read(zfpm_g->master, zfpm_read_cb, 0, zfpm_g->sock,
479 &zfpm_g->t_read);
480 }
481
482 /*
483 * zfpm_write_on
484 */
485 static inline void zfpm_write_on(void)
486 {
487 assert(!zfpm_g->t_write);
488 assert(zfpm_g->sock >= 0);
489
490 thread_add_write(zfpm_g->master, zfpm_write_cb, 0, zfpm_g->sock,
491 &zfpm_g->t_write);
492 }
493
494 /*
495 * zfpm_read_off
496 */
497 static inline void zfpm_read_off(void)
498 {
499 THREAD_OFF(zfpm_g->t_read);
500 }
501
502 /*
503 * zfpm_write_off
504 */
505 static inline void zfpm_write_off(void)
506 {
507 THREAD_OFF(zfpm_g->t_write);
508 }
509
510 static inline void zfpm_connect_off(void)
511 {
512 THREAD_OFF(zfpm_g->t_connect);
513 }
514
515 /*
516 * zfpm_conn_up_thread_cb
517 *
518 * Callback for actions to be taken when the connection to the FPM
519 * comes up.
520 */
521 static void zfpm_conn_up_thread_cb(struct thread *thread)
522 {
523 struct route_node *rnode;
524 struct zfpm_rnodes_iter *iter;
525 rib_dest_t *dest;
526
527 iter = &zfpm_g->t_conn_up_state.iter;
528
529 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED) {
530 zfpm_debug(
531 "Connection not up anymore, conn_up thread aborting");
532 zfpm_g->stats.t_conn_up_aborts++;
533 goto done;
534 }
535
536 if (!zfpm_g->fpm_mac_dump_done) {
537 /* Enqueue FPM updates for all the RMAC entries */
538 hash_iterate(zrouter.l3vni_table, zfpm_iterate_rmac_table,
539 NULL);
540 /* mark dump done so that its not repeated after yield */
541 zfpm_g->fpm_mac_dump_done = true;
542 }
543
544 while ((rnode = zfpm_rnodes_iter_next(iter))) {
545 dest = rib_dest_from_rnode(rnode);
546
547 if (dest) {
548 zfpm_g->stats.t_conn_up_dests_processed++;
549 zfpm_trigger_update(rnode, NULL);
550 }
551
552 /*
553 * Yield if need be.
554 */
555 if (!zfpm_thread_should_yield(thread))
556 continue;
557
558 zfpm_g->stats.t_conn_up_yields++;
559 zfpm_rnodes_iter_pause(iter);
560 thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb,
561 NULL, 0, &zfpm_g->t_conn_up);
562 return;
563 }
564
565 zfpm_g->stats.t_conn_up_finishes++;
566
567 done:
568 zfpm_rnodes_iter_cleanup(iter);
569 }
570
571 /*
572 * zfpm_connection_up
573 *
574 * Called when the connection to the FPM comes up.
575 */
576 static void zfpm_connection_up(const char *detail)
577 {
578 assert(zfpm_g->sock >= 0);
579 zfpm_read_on();
580 zfpm_write_on();
581 zfpm_set_state(ZFPM_STATE_ESTABLISHED, detail);
582
583 /*
584 * Start thread to push existing routes to the FPM.
585 */
586 THREAD_OFF(zfpm_g->t_conn_up);
587
588 zfpm_rnodes_iter_init(&zfpm_g->t_conn_up_state.iter);
589 zfpm_g->fpm_mac_dump_done = false;
590
591 zfpm_debug("Starting conn_up thread");
592
593 thread_add_timer_msec(zfpm_g->master, zfpm_conn_up_thread_cb, NULL, 0,
594 &zfpm_g->t_conn_up);
595 zfpm_g->stats.t_conn_up_starts++;
596 }
597
598 /*
599 * zfpm_connect_check
600 *
601 * Check if an asynchronous connect() to the FPM is complete.
602 */
603 static void zfpm_connect_check(void)
604 {
605 int status;
606 socklen_t slen;
607 int ret;
608
609 zfpm_read_off();
610 zfpm_write_off();
611
612 slen = sizeof(status);
613 ret = getsockopt(zfpm_g->sock, SOL_SOCKET, SO_ERROR, (void *)&status,
614 &slen);
615
616 if (ret >= 0 && status == 0) {
617 zfpm_connection_up("async connect complete");
618 return;
619 }
620
621 /*
622 * getsockopt() failed or indicated an error on the socket.
623 */
624 close(zfpm_g->sock);
625 zfpm_g->sock = -1;
626
627 zfpm_start_connect_timer("getsockopt() after async connect failed");
628 return;
629 }
630
631 /*
632 * zfpm_conn_down_thread_cb
633 *
634 * Callback that is invoked to clean up state after the TCP connection
635 * to the FPM goes down.
636 */
637 static void zfpm_conn_down_thread_cb(struct thread *thread)
638 {
639 struct route_node *rnode;
640 struct zfpm_rnodes_iter *iter;
641 rib_dest_t *dest;
642 struct fpm_mac_info_t *mac = NULL;
643
644 assert(zfpm_g->state == ZFPM_STATE_IDLE);
645
646 /*
647 * Delink and free all fpm_mac_info_t nodes
648 * in the mac_q and fpm_mac_info_hash
649 */
650 while ((mac = TAILQ_FIRST(&zfpm_g->mac_q)) != NULL)
651 zfpm_mac_info_del(mac);
652
653 zfpm_g->t_conn_down = NULL;
654
655 iter = &zfpm_g->t_conn_down_state.iter;
656
657 while ((rnode = zfpm_rnodes_iter_next(iter))) {
658 dest = rib_dest_from_rnode(rnode);
659
660 if (dest) {
661 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) {
662 TAILQ_REMOVE(&zfpm_g->dest_q, dest,
663 fpm_q_entries);
664 }
665
666 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
667 UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
668
669 zfpm_g->stats.t_conn_down_dests_processed++;
670
671 /*
672 * Check if the dest should be deleted.
673 */
674 rib_gc_dest(rnode);
675 }
676
677 /*
678 * Yield if need be.
679 */
680 if (!zfpm_thread_should_yield(thread))
681 continue;
682
683 zfpm_g->stats.t_conn_down_yields++;
684 zfpm_rnodes_iter_pause(iter);
685 zfpm_g->t_conn_down = NULL;
686 thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb,
687 NULL, 0, &zfpm_g->t_conn_down);
688 return;
689 }
690
691 zfpm_g->stats.t_conn_down_finishes++;
692 zfpm_rnodes_iter_cleanup(iter);
693
694 /*
695 * Start the process of connecting to the FPM again.
696 */
697 zfpm_start_connect_timer("cleanup complete");
698 }
699
700 /*
701 * zfpm_connection_down
702 *
703 * Called when the connection to the FPM has gone down.
704 */
705 static void zfpm_connection_down(const char *detail)
706 {
707 if (!detail)
708 detail = "unknown";
709
710 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
711
712 zlog_info("connection to the FPM has gone down: %s", detail);
713
714 zfpm_read_off();
715 zfpm_write_off();
716
717 stream_reset(zfpm_g->ibuf);
718 stream_reset(zfpm_g->obuf);
719
720 if (zfpm_g->sock >= 0) {
721 close(zfpm_g->sock);
722 zfpm_g->sock = -1;
723 }
724
725 /*
726 * Start thread to clean up state after the connection goes down.
727 */
728 assert(!zfpm_g->t_conn_down);
729 zfpm_rnodes_iter_init(&zfpm_g->t_conn_down_state.iter);
730 zfpm_g->t_conn_down = NULL;
731 thread_add_timer_msec(zfpm_g->master, zfpm_conn_down_thread_cb, NULL, 0,
732 &zfpm_g->t_conn_down);
733 zfpm_g->stats.t_conn_down_starts++;
734
735 zfpm_set_state(ZFPM_STATE_IDLE, detail);
736 }
737
738 /*
739 * zfpm_read_cb
740 */
741 static void zfpm_read_cb(struct thread *thread)
742 {
743 size_t already;
744 struct stream *ibuf;
745 uint16_t msg_len;
746 fpm_msg_hdr_t *hdr;
747
748 zfpm_g->stats.read_cb_calls++;
749
750 /*
751 * Check if async connect is now done.
752 */
753 if (zfpm_g->state == ZFPM_STATE_CONNECTING) {
754 zfpm_connect_check();
755 return;
756 }
757
758 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
759 assert(zfpm_g->sock >= 0);
760
761 ibuf = zfpm_g->ibuf;
762
763 already = stream_get_endp(ibuf);
764 if (already < FPM_MSG_HDR_LEN) {
765 ssize_t nbyte;
766
767 nbyte = stream_read_try(ibuf, zfpm_g->sock,
768 FPM_MSG_HDR_LEN - already);
769 if (nbyte == 0 || nbyte == -1) {
770 if (nbyte == -1) {
771 char buffer[1024];
772
773 snprintf(buffer, sizeof(buffer),
774 "closed socket in read(%d): %s", errno,
775 safe_strerror(errno));
776 zfpm_connection_down(buffer);
777 } else
778 zfpm_connection_down("closed socket in read");
779 return;
780 }
781
782 if (nbyte != (ssize_t)(FPM_MSG_HDR_LEN - already))
783 goto done;
784
785 already = FPM_MSG_HDR_LEN;
786 }
787
788 stream_set_getp(ibuf, 0);
789
790 hdr = (fpm_msg_hdr_t *)stream_pnt(ibuf);
791
792 if (!fpm_msg_hdr_ok(hdr)) {
793 zfpm_connection_down("invalid message header");
794 return;
795 }
796
797 msg_len = fpm_msg_len(hdr);
798
799 /*
800 * Read out the rest of the packet.
801 */
802 if (already < msg_len) {
803 ssize_t nbyte;
804
805 nbyte = stream_read_try(ibuf, zfpm_g->sock, msg_len - already);
806
807 if (nbyte == 0 || nbyte == -1) {
808 if (nbyte == -1) {
809 char buffer[1024];
810
811 snprintf(buffer, sizeof(buffer),
812 "failed to read message(%d) %s", errno,
813 safe_strerror(errno));
814 zfpm_connection_down(buffer);
815 } else
816 zfpm_connection_down("failed to read message");
817 return;
818 }
819
820 if (nbyte != (ssize_t)(msg_len - already))
821 goto done;
822 }
823
824 /*
825 * Just throw it away for now.
826 */
827 stream_reset(ibuf);
828
829 done:
830 zfpm_read_on();
831 }
832
833 static bool zfpm_updates_pending(void)
834 {
835 if (!(TAILQ_EMPTY(&zfpm_g->dest_q)) || !(TAILQ_EMPTY(&zfpm_g->mac_q)))
836 return true;
837
838 return false;
839 }
840
841 /*
842 * zfpm_writes_pending
843 *
844 * Returns true if we may have something to write to the FPM.
845 */
846 static int zfpm_writes_pending(void)
847 {
848
849 /*
850 * Check if there is any data in the outbound buffer that has not
851 * been written to the socket yet.
852 */
853 if (stream_get_endp(zfpm_g->obuf) - stream_get_getp(zfpm_g->obuf))
854 return 1;
855
856 /*
857 * Check if there are any updates scheduled on the outbound queues.
858 */
859 if (zfpm_updates_pending())
860 return 1;
861
862 return 0;
863 }
864
865 /*
866 * zfpm_encode_route
867 *
868 * Encode a message to the FPM with information about the given route.
869 *
870 * Returns the number of bytes written to the buffer. 0 or a negative
871 * value indicates an error.
872 */
873 static inline int zfpm_encode_route(rib_dest_t *dest, struct route_entry *re,
874 char *in_buf, size_t in_buf_len,
875 fpm_msg_type_e *msg_type)
876 {
877 size_t len;
878 #ifdef HAVE_NETLINK
879 int cmd;
880 #endif
881 len = 0;
882
883 *msg_type = FPM_MSG_TYPE_NONE;
884
885 switch (zfpm_g->message_format) {
886
887 case ZFPM_MSG_FORMAT_PROTOBUF:
888 #ifdef HAVE_PROTOBUF
889 len = zfpm_protobuf_encode_route(dest, re, (uint8_t *)in_buf,
890 in_buf_len);
891 *msg_type = FPM_MSG_TYPE_PROTOBUF;
892 #endif
893 break;
894
895 case ZFPM_MSG_FORMAT_NETLINK:
896 #ifdef HAVE_NETLINK
897 *msg_type = FPM_MSG_TYPE_NETLINK;
898 cmd = re ? RTM_NEWROUTE : RTM_DELROUTE;
899 len = zfpm_netlink_encode_route(cmd, dest, re, in_buf,
900 in_buf_len);
901 assert(fpm_msg_align(len) == len);
902 #endif /* HAVE_NETLINK */
903 break;
904
905 case ZFPM_MSG_FORMAT_NONE:
906 break;
907 }
908
909 return len;
910 }
911
912 /*
913 * zfpm_route_for_update
914 *
915 * Returns the re that is to be sent to the FPM for a given dest.
916 */
917 struct route_entry *zfpm_route_for_update(rib_dest_t *dest)
918 {
919 return dest->selected_fib;
920 }
921
922 /*
923 * Define an enum for return codes for queue processing functions
924 *
925 * FPM_WRITE_STOP: This return code indicates that the write buffer is full.
926 * Stop processing all the queues and empty the buffer by writing its content
927 * to the socket.
928 *
929 * FPM_GOTO_NEXT_Q: This return code indicates that either this queue is
930 * empty or we have processed enough updates from this queue.
931 * So, move on to the next queue.
932 */
933 enum {
934 FPM_WRITE_STOP = 0,
935 FPM_GOTO_NEXT_Q = 1
936 };
937
938 #define FPM_QUEUE_PROCESS_LIMIT 10000
939
940 /*
941 * zfpm_build_route_updates
942 *
943 * Process the dest_q queue and write FPM messages to the outbound buffer.
944 */
945 static int zfpm_build_route_updates(void)
946 {
947 struct stream *s;
948 rib_dest_t *dest;
949 unsigned char *buf, *data, *buf_end;
950 size_t msg_len;
951 size_t data_len;
952 fpm_msg_hdr_t *hdr;
953 struct route_entry *re;
954 int is_add, write_msg;
955 fpm_msg_type_e msg_type;
956 uint16_t q_limit;
957
958 if (TAILQ_EMPTY(&zfpm_g->dest_q))
959 return FPM_GOTO_NEXT_Q;
960
961 s = zfpm_g->obuf;
962 q_limit = FPM_QUEUE_PROCESS_LIMIT;
963
964 do {
965 /*
966 * Make sure there is enough space to write another message.
967 */
968 if (STREAM_WRITEABLE(s) < FPM_MAX_MSG_LEN)
969 return FPM_WRITE_STOP;
970
971 buf = STREAM_DATA(s) + stream_get_endp(s);
972 buf_end = buf + STREAM_WRITEABLE(s);
973
974 dest = TAILQ_FIRST(&zfpm_g->dest_q);
975 if (!dest)
976 return FPM_GOTO_NEXT_Q;
977
978 assert(CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM));
979
980 hdr = (fpm_msg_hdr_t *)buf;
981 hdr->version = FPM_PROTO_VERSION;
982
983 data = fpm_msg_data(hdr);
984
985 re = zfpm_route_for_update(dest);
986 is_add = re ? 1 : 0;
987
988 write_msg = 1;
989
990 /*
991 * If this is a route deletion, and we have not sent the route
992 * to
993 * the FPM previously, skip it.
994 */
995 if (!is_add && !CHECK_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM)) {
996 write_msg = 0;
997 zfpm_g->stats.nop_deletes_skipped++;
998 }
999
1000 if (write_msg) {
1001 data_len = zfpm_encode_route(dest, re, (char *)data,
1002 buf_end - data, &msg_type);
1003
1004 if (data_len) {
1005 hdr->msg_type = msg_type;
1006 msg_len = fpm_data_len_to_msg_len(data_len);
1007 hdr->msg_len = htons(msg_len);
1008 stream_forward_endp(s, msg_len);
1009
1010 if (is_add)
1011 zfpm_g->stats.route_adds++;
1012 else
1013 zfpm_g->stats.route_dels++;
1014 } else {
1015 zlog_err("%s: Encoding Prefix: %pRN No valid nexthops",
1016 __func__, dest->rnode);
1017 }
1018 }
1019
1020 /*
1021 * Remove the dest from the queue, and reset the flag.
1022 */
1023 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1024 TAILQ_REMOVE(&zfpm_g->dest_q, dest, fpm_q_entries);
1025
1026 if (is_add) {
1027 SET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
1028 } else {
1029 UNSET_FLAG(dest->flags, RIB_DEST_SENT_TO_FPM);
1030 }
1031
1032 /*
1033 * Delete the destination if necessary.
1034 */
1035 if (rib_gc_dest(dest->rnode))
1036 zfpm_g->stats.dests_del_after_update++;
1037
1038 q_limit--;
1039 if (q_limit == 0) {
1040 /*
1041 * We have processed enough updates in this queue.
1042 * Now yield for other queues.
1043 */
1044 return FPM_GOTO_NEXT_Q;
1045 }
1046 } while (true);
1047 }
1048
1049 /*
1050 * zfpm_encode_mac
1051 *
1052 * Encode a message to FPM with information about the given MAC.
1053 *
1054 * Returns the number of bytes written to the buffer.
1055 */
1056 static inline int zfpm_encode_mac(struct fpm_mac_info_t *mac, char *in_buf,
1057 size_t in_buf_len, fpm_msg_type_e *msg_type)
1058 {
1059 size_t len = 0;
1060
1061 *msg_type = FPM_MSG_TYPE_NONE;
1062
1063 switch (zfpm_g->message_format) {
1064
1065 case ZFPM_MSG_FORMAT_NONE:
1066 break;
1067 case ZFPM_MSG_FORMAT_NETLINK:
1068 #ifdef HAVE_NETLINK
1069 len = zfpm_netlink_encode_mac(mac, in_buf, in_buf_len);
1070 assert(fpm_msg_align(len) == len);
1071 *msg_type = FPM_MSG_TYPE_NETLINK;
1072 #endif /* HAVE_NETLINK */
1073 break;
1074 case ZFPM_MSG_FORMAT_PROTOBUF:
1075 break;
1076 }
1077 return len;
1078 }
1079
1080 static int zfpm_build_mac_updates(void)
1081 {
1082 struct stream *s;
1083 struct fpm_mac_info_t *mac;
1084 unsigned char *buf, *data, *buf_end;
1085 fpm_msg_hdr_t *hdr;
1086 size_t data_len, msg_len;
1087 fpm_msg_type_e msg_type;
1088 uint16_t q_limit;
1089
1090 if (TAILQ_EMPTY(&zfpm_g->mac_q))
1091 return FPM_GOTO_NEXT_Q;
1092
1093 s = zfpm_g->obuf;
1094 q_limit = FPM_QUEUE_PROCESS_LIMIT;
1095
1096 do {
1097 /* Make sure there is enough space to write another message. */
1098 if (STREAM_WRITEABLE(s) < FPM_MAX_MAC_MSG_LEN)
1099 return FPM_WRITE_STOP;
1100
1101 buf = STREAM_DATA(s) + stream_get_endp(s);
1102 buf_end = buf + STREAM_WRITEABLE(s);
1103
1104 mac = TAILQ_FIRST(&zfpm_g->mac_q);
1105 if (!mac)
1106 return FPM_GOTO_NEXT_Q;
1107
1108 /* Check for no-op */
1109 if (!CHECK_FLAG(mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM)) {
1110 zfpm_g->stats.nop_deletes_skipped++;
1111 zfpm_mac_info_del(mac);
1112 continue;
1113 }
1114
1115 hdr = (fpm_msg_hdr_t *)buf;
1116 hdr->version = FPM_PROTO_VERSION;
1117
1118 data = fpm_msg_data(hdr);
1119 data_len = zfpm_encode_mac(mac, (char *)data, buf_end - data,
1120 &msg_type);
1121 assert(data_len);
1122
1123 hdr->msg_type = msg_type;
1124 msg_len = fpm_data_len_to_msg_len(data_len);
1125 hdr->msg_len = htons(msg_len);
1126 stream_forward_endp(s, msg_len);
1127
1128 /* Remove the MAC from the queue, and delete it. */
1129 zfpm_mac_info_del(mac);
1130
1131 q_limit--;
1132 if (q_limit == 0) {
1133 /*
1134 * We have processed enough updates in this queue.
1135 * Now yield for other queues.
1136 */
1137 return FPM_GOTO_NEXT_Q;
1138 }
1139 } while (1);
1140 }
1141
1142 /*
1143 * zfpm_build_updates
1144 *
1145 * Process the outgoing queues and write messages to the outbound
1146 * buffer.
1147 */
1148 static void zfpm_build_updates(void)
1149 {
1150 struct stream *s;
1151
1152 s = zfpm_g->obuf;
1153 assert(stream_empty(s));
1154
1155 do {
1156 /*
1157 * Stop processing the queues if zfpm_g->obuf is full
1158 * or we do not have more updates to process
1159 */
1160 if (zfpm_build_mac_updates() == FPM_WRITE_STOP)
1161 break;
1162 if (zfpm_build_route_updates() == FPM_WRITE_STOP)
1163 break;
1164 } while (zfpm_updates_pending());
1165 }
1166
1167 /*
1168 * zfpm_write_cb
1169 */
1170 static void zfpm_write_cb(struct thread *thread)
1171 {
1172 struct stream *s;
1173 int num_writes;
1174
1175 zfpm_g->stats.write_cb_calls++;
1176
1177 /*
1178 * Check if async connect is now done.
1179 */
1180 if (zfpm_g->state == ZFPM_STATE_CONNECTING) {
1181 zfpm_connect_check();
1182 return;
1183 }
1184
1185 assert(zfpm_g->state == ZFPM_STATE_ESTABLISHED);
1186 assert(zfpm_g->sock >= 0);
1187
1188 num_writes = 0;
1189
1190 do {
1191 int bytes_to_write, bytes_written;
1192
1193 s = zfpm_g->obuf;
1194
1195 /*
1196 * If the stream is empty, try fill it up with data.
1197 */
1198 if (stream_empty(s)) {
1199 zfpm_build_updates();
1200 }
1201
1202 bytes_to_write = stream_get_endp(s) - stream_get_getp(s);
1203 if (!bytes_to_write)
1204 break;
1205
1206 bytes_written =
1207 write(zfpm_g->sock, stream_pnt(s), bytes_to_write);
1208 zfpm_g->stats.write_calls++;
1209 num_writes++;
1210
1211 if (bytes_written < 0) {
1212 if (ERRNO_IO_RETRY(errno))
1213 break;
1214
1215 zfpm_connection_down("failed to write to socket");
1216 return;
1217 }
1218
1219 if (bytes_written != bytes_to_write) {
1220
1221 /*
1222 * Partial write.
1223 */
1224 stream_forward_getp(s, bytes_written);
1225 zfpm_g->stats.partial_writes++;
1226 break;
1227 }
1228
1229 /*
1230 * We've written out the entire contents of the stream.
1231 */
1232 stream_reset(s);
1233
1234 if (num_writes >= ZFPM_MAX_WRITES_PER_RUN) {
1235 zfpm_g->stats.max_writes_hit++;
1236 break;
1237 }
1238
1239 if (zfpm_thread_should_yield(thread)) {
1240 zfpm_g->stats.t_write_yields++;
1241 break;
1242 }
1243 } while (1);
1244
1245 if (zfpm_writes_pending())
1246 zfpm_write_on();
1247 }
1248
1249 /*
1250 * zfpm_connect_cb
1251 */
1252 static void zfpm_connect_cb(struct thread *t)
1253 {
1254 int sock, ret;
1255 struct sockaddr_in serv;
1256
1257 assert(zfpm_g->state == ZFPM_STATE_ACTIVE);
1258
1259 sock = socket(AF_INET, SOCK_STREAM, 0);
1260 if (sock < 0) {
1261 zlog_err("Failed to create socket for connect(): %s",
1262 strerror(errno));
1263 zfpm_g->stats.connect_no_sock++;
1264 return;
1265 }
1266
1267 set_nonblocking(sock);
1268
1269 /* Make server socket. */
1270 memset(&serv, 0, sizeof(serv));
1271 serv.sin_family = AF_INET;
1272 serv.sin_port = htons(zfpm_g->fpm_port);
1273 #ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
1274 serv.sin_len = sizeof(struct sockaddr_in);
1275 #endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */
1276 if (!zfpm_g->fpm_server)
1277 serv.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1278 else
1279 serv.sin_addr.s_addr = (zfpm_g->fpm_server);
1280
1281 /*
1282 * Connect to the FPM.
1283 */
1284 zfpm_g->connect_calls++;
1285 zfpm_g->stats.connect_calls++;
1286 zfpm_g->last_connect_call_time = monotime(NULL);
1287
1288 ret = connect(sock, (struct sockaddr *)&serv, sizeof(serv));
1289 if (ret >= 0) {
1290 zfpm_g->sock = sock;
1291 zfpm_connection_up("connect succeeded");
1292 return;
1293 }
1294
1295 if (errno == EINPROGRESS) {
1296 zfpm_g->sock = sock;
1297 zfpm_read_on();
1298 zfpm_write_on();
1299 zfpm_set_state(ZFPM_STATE_CONNECTING,
1300 "async connect in progress");
1301 return;
1302 }
1303
1304 zlog_info("can't connect to FPM %d: %s", sock, safe_strerror(errno));
1305 close(sock);
1306
1307 /*
1308 * Restart timer for retrying connection.
1309 */
1310 zfpm_start_connect_timer("connect() failed");
1311 }
1312
1313 /*
1314 * zfpm_set_state
1315 *
1316 * Move state machine into the given state.
1317 */
1318 static void zfpm_set_state(enum zfpm_state state, const char *reason)
1319 {
1320 enum zfpm_state cur_state = zfpm_g->state;
1321
1322 if (!reason)
1323 reason = "Unknown";
1324
1325 if (state == cur_state)
1326 return;
1327
1328 zfpm_debug("beginning state transition %s -> %s. Reason: %s",
1329 zfpm_state_to_str(cur_state), zfpm_state_to_str(state),
1330 reason);
1331
1332 switch (state) {
1333
1334 case ZFPM_STATE_IDLE:
1335 assert(cur_state == ZFPM_STATE_ESTABLISHED);
1336 break;
1337
1338 case ZFPM_STATE_ACTIVE:
1339 assert(cur_state == ZFPM_STATE_IDLE
1340 || cur_state == ZFPM_STATE_CONNECTING);
1341 assert(zfpm_g->t_connect);
1342 break;
1343
1344 case ZFPM_STATE_CONNECTING:
1345 assert(zfpm_g->sock);
1346 assert(cur_state == ZFPM_STATE_ACTIVE);
1347 assert(zfpm_g->t_read);
1348 assert(zfpm_g->t_write);
1349 break;
1350
1351 case ZFPM_STATE_ESTABLISHED:
1352 assert(cur_state == ZFPM_STATE_ACTIVE
1353 || cur_state == ZFPM_STATE_CONNECTING);
1354 assert(zfpm_g->sock);
1355 assert(zfpm_g->t_read);
1356 assert(zfpm_g->t_write);
1357 break;
1358 }
1359
1360 zfpm_g->state = state;
1361 }
1362
1363 /*
1364 * zfpm_calc_connect_delay
1365 *
1366 * Returns the number of seconds after which we should attempt to
1367 * reconnect to the FPM.
1368 */
1369 static long zfpm_calc_connect_delay(void)
1370 {
1371 time_t elapsed;
1372
1373 /*
1374 * Return 0 if this is our first attempt to connect.
1375 */
1376 if (zfpm_g->connect_calls == 0) {
1377 return 0;
1378 }
1379
1380 elapsed = zfpm_get_elapsed_time(zfpm_g->last_connect_call_time);
1381
1382 if (elapsed > ZFPM_CONNECT_RETRY_IVL) {
1383 return 0;
1384 }
1385
1386 return ZFPM_CONNECT_RETRY_IVL - elapsed;
1387 }
1388
1389 /*
1390 * zfpm_start_connect_timer
1391 */
1392 static void zfpm_start_connect_timer(const char *reason)
1393 {
1394 long delay_secs;
1395
1396 assert(!zfpm_g->t_connect);
1397 assert(zfpm_g->sock < 0);
1398
1399 assert(zfpm_g->state == ZFPM_STATE_IDLE
1400 || zfpm_g->state == ZFPM_STATE_ACTIVE
1401 || zfpm_g->state == ZFPM_STATE_CONNECTING);
1402
1403 delay_secs = zfpm_calc_connect_delay();
1404 zfpm_debug("scheduling connect in %ld seconds", delay_secs);
1405
1406 thread_add_timer(zfpm_g->master, zfpm_connect_cb, 0, delay_secs,
1407 &zfpm_g->t_connect);
1408 zfpm_set_state(ZFPM_STATE_ACTIVE, reason);
1409 }
1410
1411 /*
1412 * zfpm_is_enabled
1413 *
1414 * Returns true if the zebra FPM module has been enabled.
1415 */
1416 static inline int zfpm_is_enabled(void)
1417 {
1418 return zfpm_g->enabled;
1419 }
1420
1421 /*
1422 * zfpm_conn_is_up
1423 *
1424 * Returns true if the connection to the FPM is up.
1425 */
1426 static inline int zfpm_conn_is_up(void)
1427 {
1428 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED)
1429 return 0;
1430
1431 assert(zfpm_g->sock >= 0);
1432
1433 return 1;
1434 }
1435
1436 /*
1437 * zfpm_trigger_update
1438 *
1439 * The zebra code invokes this function to indicate that we should
1440 * send an update to the FPM about the given route_node.
1441 */
1442 static int zfpm_trigger_update(struct route_node *rn, const char *reason)
1443 {
1444 rib_dest_t *dest;
1445
1446 /*
1447 * Ignore if the connection is down. We will update the FPM about
1448 * all destinations once the connection comes up.
1449 */
1450 if (!zfpm_conn_is_up())
1451 return 0;
1452
1453 dest = rib_dest_from_rnode(rn);
1454
1455 if (CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM)) {
1456 zfpm_g->stats.redundant_triggers++;
1457 return 0;
1458 }
1459
1460 if (reason) {
1461 zfpm_debug("%pFX triggering update to FPM - Reason: %s", &rn->p,
1462 reason);
1463 }
1464
1465 SET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1466 TAILQ_INSERT_TAIL(&zfpm_g->dest_q, dest, fpm_q_entries);
1467 zfpm_g->stats.updates_triggered++;
1468
1469 /*
1470 * Make sure that writes are enabled.
1471 */
1472 if (zfpm_g->t_write)
1473 return 0;
1474
1475 zfpm_write_on();
1476 return 0;
1477 }
1478
1479 /*
1480 * zfpm_trigger_remove
1481 *
1482 * The zebra code invokes this function to indicate that we should
1483 * send an remove to the FPM about the given route_node.
1484 */
1485
1486 static int zfpm_trigger_remove(struct route_node *rn)
1487 {
1488 rib_dest_t *dest;
1489
1490 if (!zfpm_conn_is_up())
1491 return 0;
1492
1493 dest = rib_dest_from_rnode(rn);
1494 if (!CHECK_FLAG(dest->flags, RIB_DEST_UPDATE_FPM))
1495 return 0;
1496
1497 zfpm_debug("%pRN Removing from update queue shutting down", rn);
1498
1499 UNSET_FLAG(dest->flags, RIB_DEST_UPDATE_FPM);
1500 TAILQ_REMOVE(&zfpm_g->dest_q, dest, fpm_q_entries);
1501
1502 return 0;
1503 }
1504
1505 /*
1506 * Generate Key for FPM MAC info hash entry
1507 */
1508 static unsigned int zfpm_mac_info_hash_keymake(const void *p)
1509 {
1510 struct fpm_mac_info_t *fpm_mac = (struct fpm_mac_info_t *)p;
1511 uint32_t mac_key;
1512
1513 mac_key = jhash(fpm_mac->macaddr.octet, ETH_ALEN, 0xa5a5a55a);
1514
1515 return jhash_2words(mac_key, fpm_mac->vni, 0);
1516 }
1517
1518 /*
1519 * Compare function for FPM MAC info hash lookup
1520 */
1521 static bool zfpm_mac_info_cmp(const void *p1, const void *p2)
1522 {
1523 const struct fpm_mac_info_t *fpm_mac1 = p1;
1524 const struct fpm_mac_info_t *fpm_mac2 = p2;
1525
1526 if (memcmp(fpm_mac1->macaddr.octet, fpm_mac2->macaddr.octet, ETH_ALEN)
1527 != 0)
1528 return false;
1529 if (fpm_mac1->vni != fpm_mac2->vni)
1530 return false;
1531
1532 return true;
1533 }
1534
1535 /*
1536 * Lookup FPM MAC info hash entry.
1537 */
1538 static struct fpm_mac_info_t *zfpm_mac_info_lookup(struct fpm_mac_info_t *key)
1539 {
1540 return hash_lookup(zfpm_g->fpm_mac_info_table, key);
1541 }
1542
1543 /*
1544 * Callback to allocate fpm_mac_info_t structure.
1545 */
1546 static void *zfpm_mac_info_alloc(void *p)
1547 {
1548 const struct fpm_mac_info_t *key = p;
1549 struct fpm_mac_info_t *fpm_mac;
1550
1551 fpm_mac = XCALLOC(MTYPE_FPM_MAC_INFO, sizeof(struct fpm_mac_info_t));
1552
1553 memcpy(&fpm_mac->macaddr, &key->macaddr, ETH_ALEN);
1554 fpm_mac->vni = key->vni;
1555
1556 return (void *)fpm_mac;
1557 }
1558
1559 /*
1560 * Delink and free fpm_mac_info_t.
1561 */
1562 static void zfpm_mac_info_del(struct fpm_mac_info_t *fpm_mac)
1563 {
1564 hash_release(zfpm_g->fpm_mac_info_table, fpm_mac);
1565 TAILQ_REMOVE(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries);
1566 XFREE(MTYPE_FPM_MAC_INFO, fpm_mac);
1567 }
1568
1569 /*
1570 * zfpm_trigger_rmac_update
1571 *
1572 * Zebra code invokes this function to indicate that we should
1573 * send an update to FPM for given MAC entry.
1574 *
1575 * This function checks if we already have enqueued an update for this RMAC,
1576 * If yes, update the same fpm_mac_info_t. Else, create and enqueue an update.
1577 */
1578 static int zfpm_trigger_rmac_update(struct zebra_mac *rmac,
1579 struct zebra_l3vni *zl3vni, bool delete,
1580 const char *reason)
1581 {
1582 struct fpm_mac_info_t *fpm_mac, key;
1583 struct interface *vxlan_if, *svi_if;
1584 bool mac_found = false;
1585
1586 /*
1587 * Ignore if the connection is down. We will update the FPM about
1588 * all destinations once the connection comes up.
1589 */
1590 if (!zfpm_conn_is_up())
1591 return 0;
1592
1593 if (reason) {
1594 zfpm_debug("triggering update to FPM - Reason: %s - %pEA",
1595 reason, &rmac->macaddr);
1596 }
1597
1598 vxlan_if = zl3vni_map_to_vxlan_if(zl3vni);
1599 svi_if = zl3vni_map_to_svi_if(zl3vni);
1600
1601 memset(&key, 0, sizeof(key));
1602
1603 memcpy(&key.macaddr, &rmac->macaddr, ETH_ALEN);
1604 key.vni = zl3vni->vni;
1605
1606 /* Check if this MAC is already present in the queue. */
1607 fpm_mac = zfpm_mac_info_lookup(&key);
1608
1609 if (fpm_mac) {
1610 mac_found = true;
1611
1612 /*
1613 * If the enqueued op is "add" and current op is "delete",
1614 * this is a noop. So, Unset ZEBRA_MAC_UPDATE_FPM flag.
1615 * While processing FPM queue, we will silently delete this
1616 * MAC entry without sending any update for this MAC.
1617 */
1618 if (!CHECK_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM) &&
1619 delete == 1) {
1620 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1621 UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM);
1622 return 0;
1623 }
1624 } else
1625 fpm_mac = hash_get(zfpm_g->fpm_mac_info_table, &key,
1626 zfpm_mac_info_alloc);
1627
1628 fpm_mac->r_vtep_ip.s_addr = rmac->fwd_info.r_vtep_ip.s_addr;
1629 fpm_mac->zebra_flags = rmac->flags;
1630 fpm_mac->vxlan_if = vxlan_if ? vxlan_if->ifindex : 0;
1631 fpm_mac->svi_if = svi_if ? svi_if->ifindex : 0;
1632
1633 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_UPDATE_FPM);
1634 if (delete)
1635 SET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1636 else
1637 UNSET_FLAG(fpm_mac->fpm_flags, ZEBRA_MAC_DELETE_FPM);
1638
1639 if (!mac_found)
1640 TAILQ_INSERT_TAIL(&zfpm_g->mac_q, fpm_mac, fpm_mac_q_entries);
1641
1642 zfpm_g->stats.updates_triggered++;
1643
1644 /* If writes are already enabled, return. */
1645 if (zfpm_g->t_write)
1646 return 0;
1647
1648 zfpm_write_on();
1649 return 0;
1650 }
1651
1652 /*
1653 * This function is called when the FPM connections is established.
1654 * Iterate over all the RMAC entries for the given L3VNI
1655 * and enqueue the RMAC for FPM processing.
1656 */
1657 static void zfpm_trigger_rmac_update_wrapper(struct hash_bucket *bucket,
1658 void *args)
1659 {
1660 struct zebra_mac *zrmac = (struct zebra_mac *)bucket->data;
1661 struct zebra_l3vni *zl3vni = (struct zebra_l3vni *)args;
1662
1663 zfpm_trigger_rmac_update(zrmac, zl3vni, false, "RMAC added");
1664 }
1665
1666 /*
1667 * This function is called when the FPM connections is established.
1668 * This function iterates over all the L3VNIs to trigger
1669 * FPM updates for RMACs currently available.
1670 */
1671 static void zfpm_iterate_rmac_table(struct hash_bucket *bucket, void *args)
1672 {
1673 struct zebra_l3vni *zl3vni = (struct zebra_l3vni *)bucket->data;
1674
1675 hash_iterate(zl3vni->rmac_table, zfpm_trigger_rmac_update_wrapper,
1676 (void *)zl3vni);
1677 }
1678
1679 /*
1680 * struct zfpm_statsimer_cb
1681 */
1682 static void zfpm_stats_timer_cb(struct thread *t)
1683 {
1684 zfpm_g->t_stats = NULL;
1685
1686 /*
1687 * Remember the stats collected in the last interval for display
1688 * purposes.
1689 */
1690 zfpm_stats_copy(&zfpm_g->stats, &zfpm_g->last_ivl_stats);
1691
1692 /*
1693 * Add the current set of stats into the cumulative statistics.
1694 */
1695 zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats,
1696 &zfpm_g->cumulative_stats);
1697
1698 /*
1699 * Start collecting stats afresh over the next interval.
1700 */
1701 zfpm_stats_reset(&zfpm_g->stats);
1702
1703 zfpm_start_stats_timer();
1704 }
1705
1706 /*
1707 * zfpm_stop_stats_timer
1708 */
1709 static void zfpm_stop_stats_timer(void)
1710 {
1711 if (!zfpm_g->t_stats)
1712 return;
1713
1714 zfpm_debug("Stopping existing stats timer");
1715 THREAD_OFF(zfpm_g->t_stats);
1716 }
1717
1718 /*
1719 * zfpm_start_stats_timer
1720 */
1721 void zfpm_start_stats_timer(void)
1722 {
1723 assert(!zfpm_g->t_stats);
1724
1725 thread_add_timer(zfpm_g->master, zfpm_stats_timer_cb, 0,
1726 ZFPM_STATS_IVL_SECS, &zfpm_g->t_stats);
1727 }
1728
1729 /*
1730 * Helper macro for zfpm_show_stats() below.
1731 */
1732 #define ZFPM_SHOW_STAT(counter) \
1733 do { \
1734 vty_out(vty, "%-40s %10lu %16lu\n", #counter, \
1735 total_stats.counter, zfpm_g->last_ivl_stats.counter); \
1736 } while (0)
1737
1738 /*
1739 * zfpm_show_stats
1740 */
1741 static void zfpm_show_stats(struct vty *vty)
1742 {
1743 struct zfpm_stats total_stats;
1744 time_t elapsed;
1745
1746 vty_out(vty, "\n%-40s %10s Last %2d secs\n\n", "Counter", "Total",
1747 ZFPM_STATS_IVL_SECS);
1748
1749 /*
1750 * Compute the total stats up to this instant.
1751 */
1752 zfpm_stats_compose(&zfpm_g->cumulative_stats, &zfpm_g->stats,
1753 &total_stats);
1754
1755 ZFPM_SHOW_STAT(connect_calls);
1756 ZFPM_SHOW_STAT(connect_no_sock);
1757 ZFPM_SHOW_STAT(read_cb_calls);
1758 ZFPM_SHOW_STAT(write_cb_calls);
1759 ZFPM_SHOW_STAT(write_calls);
1760 ZFPM_SHOW_STAT(partial_writes);
1761 ZFPM_SHOW_STAT(max_writes_hit);
1762 ZFPM_SHOW_STAT(t_write_yields);
1763 ZFPM_SHOW_STAT(nop_deletes_skipped);
1764 ZFPM_SHOW_STAT(route_adds);
1765 ZFPM_SHOW_STAT(route_dels);
1766 ZFPM_SHOW_STAT(updates_triggered);
1767 ZFPM_SHOW_STAT(redundant_triggers);
1768 ZFPM_SHOW_STAT(dests_del_after_update);
1769 ZFPM_SHOW_STAT(t_conn_down_starts);
1770 ZFPM_SHOW_STAT(t_conn_down_dests_processed);
1771 ZFPM_SHOW_STAT(t_conn_down_yields);
1772 ZFPM_SHOW_STAT(t_conn_down_finishes);
1773 ZFPM_SHOW_STAT(t_conn_up_starts);
1774 ZFPM_SHOW_STAT(t_conn_up_dests_processed);
1775 ZFPM_SHOW_STAT(t_conn_up_yields);
1776 ZFPM_SHOW_STAT(t_conn_up_aborts);
1777 ZFPM_SHOW_STAT(t_conn_up_finishes);
1778
1779 if (!zfpm_g->last_stats_clear_time)
1780 return;
1781
1782 elapsed = zfpm_get_elapsed_time(zfpm_g->last_stats_clear_time);
1783
1784 vty_out(vty, "\nStats were cleared %lu seconds ago\n",
1785 (unsigned long)elapsed);
1786 }
1787
1788 /*
1789 * zfpm_clear_stats
1790 */
1791 static void zfpm_clear_stats(struct vty *vty)
1792 {
1793 if (!zfpm_is_enabled()) {
1794 vty_out(vty, "The FPM module is not enabled...\n");
1795 return;
1796 }
1797
1798 zfpm_stats_reset(&zfpm_g->stats);
1799 zfpm_stats_reset(&zfpm_g->last_ivl_stats);
1800 zfpm_stats_reset(&zfpm_g->cumulative_stats);
1801
1802 zfpm_stop_stats_timer();
1803 zfpm_start_stats_timer();
1804
1805 zfpm_g->last_stats_clear_time = monotime(NULL);
1806
1807 vty_out(vty, "Cleared FPM stats\n");
1808 }
1809
1810 /*
1811 * show_zebra_fpm_stats
1812 */
1813 DEFUN (show_zebra_fpm_stats,
1814 show_zebra_fpm_stats_cmd,
1815 "show zebra fpm stats",
1816 SHOW_STR
1817 ZEBRA_STR
1818 "Forwarding Path Manager information\n"
1819 "Statistics\n")
1820 {
1821 zfpm_show_stats(vty);
1822 return CMD_SUCCESS;
1823 }
1824
1825 /*
1826 * clear_zebra_fpm_stats
1827 */
1828 DEFUN (clear_zebra_fpm_stats,
1829 clear_zebra_fpm_stats_cmd,
1830 "clear zebra fpm stats",
1831 CLEAR_STR
1832 ZEBRA_STR
1833 "Clear Forwarding Path Manager information\n"
1834 "Statistics\n")
1835 {
1836 zfpm_clear_stats(vty);
1837 return CMD_SUCCESS;
1838 }
1839
1840 /*
1841 * update fpm connection information
1842 */
1843 DEFUN (fpm_remote_ip,
1844 fpm_remote_ip_cmd,
1845 "fpm connection ip A.B.C.D port (1-65535)",
1846 "Forwarding Path Manager\n"
1847 "Configure FPM connection\n"
1848 "Connect to IPv4 address\n"
1849 "Connect to IPv4 address\n"
1850 "TCP port number\n"
1851 "TCP port number\n")
1852 {
1853
1854 in_addr_t fpm_server;
1855 uint32_t port_no;
1856
1857 fpm_server = inet_addr(argv[3]->arg);
1858 if (fpm_server == INADDR_NONE)
1859 return CMD_ERR_INCOMPLETE;
1860
1861 port_no = atoi(argv[5]->arg);
1862 if (port_no < TCP_MIN_PORT || port_no > TCP_MAX_PORT)
1863 return CMD_ERR_INCOMPLETE;
1864
1865 zfpm_g->fpm_server = fpm_server;
1866 zfpm_g->fpm_port = port_no;
1867
1868
1869 return CMD_SUCCESS;
1870 }
1871
1872 DEFUN (no_fpm_remote_ip,
1873 no_fpm_remote_ip_cmd,
1874 "no fpm connection ip A.B.C.D port (1-65535)",
1875 NO_STR
1876 "Forwarding Path Manager\n"
1877 "Remove configured FPM connection\n"
1878 "Connect to IPv4 address\n"
1879 "Connect to IPv4 address\n"
1880 "TCP port number\n"
1881 "TCP port number\n")
1882 {
1883 if (zfpm_g->fpm_server != inet_addr(argv[4]->arg)
1884 || zfpm_g->fpm_port != atoi(argv[6]->arg))
1885 return CMD_ERR_NO_MATCH;
1886
1887 zfpm_g->fpm_server = FPM_DEFAULT_IP;
1888 zfpm_g->fpm_port = FPM_DEFAULT_PORT;
1889
1890 return CMD_SUCCESS;
1891 }
1892
1893 /*
1894 * zfpm_init_message_format
1895 */
1896 static inline void zfpm_init_message_format(const char *format)
1897 {
1898 int have_netlink, have_protobuf;
1899
1900 #ifdef HAVE_NETLINK
1901 have_netlink = 1;
1902 #else
1903 have_netlink = 0;
1904 #endif
1905
1906 #ifdef HAVE_PROTOBUF
1907 have_protobuf = 1;
1908 #else
1909 have_protobuf = 0;
1910 #endif
1911
1912 zfpm_g->message_format = ZFPM_MSG_FORMAT_NONE;
1913
1914 if (!format) {
1915 if (have_netlink) {
1916 zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK;
1917 } else if (have_protobuf) {
1918 zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF;
1919 }
1920 return;
1921 }
1922
1923 if (!strcmp("netlink", format)) {
1924 if (!have_netlink) {
1925 flog_err(EC_ZEBRA_NETLINK_NOT_AVAILABLE,
1926 "FPM netlink message format is not available");
1927 return;
1928 }
1929 zfpm_g->message_format = ZFPM_MSG_FORMAT_NETLINK;
1930 return;
1931 }
1932
1933 if (!strcmp("protobuf", format)) {
1934 if (!have_protobuf) {
1935 flog_err(
1936 EC_ZEBRA_PROTOBUF_NOT_AVAILABLE,
1937 "FPM protobuf message format is not available");
1938 return;
1939 }
1940 flog_warn(EC_ZEBRA_PROTOBUF_NOT_AVAILABLE,
1941 "FPM protobuf message format is deprecated and scheduled to be removed. Please convert to using netlink format or contact dev@lists.frrouting.org with your use case.");
1942 zfpm_g->message_format = ZFPM_MSG_FORMAT_PROTOBUF;
1943 return;
1944 }
1945
1946 flog_warn(EC_ZEBRA_FPM_FORMAT_UNKNOWN, "Unknown fpm format '%s'",
1947 format);
1948 }
1949
1950 /**
1951 * fpm_remote_srv_write
1952 *
1953 * Module to write remote fpm connection
1954 *
1955 * Returns ZERO on success.
1956 */
1957
1958 static int fpm_remote_srv_write(struct vty *vty)
1959 {
1960 struct in_addr in;
1961
1962 in.s_addr = zfpm_g->fpm_server;
1963
1964 if ((zfpm_g->fpm_server != FPM_DEFAULT_IP
1965 && zfpm_g->fpm_server != INADDR_ANY)
1966 || (zfpm_g->fpm_port != FPM_DEFAULT_PORT && zfpm_g->fpm_port != 0))
1967 vty_out(vty, "fpm connection ip %pI4 port %d\n", &in,
1968 zfpm_g->fpm_port);
1969
1970 return 0;
1971 }
1972
1973
1974 static int fpm_remote_srv_write(struct vty *vty);
1975 /* Zebra node */
1976 static struct cmd_node zebra_node = {
1977 .name = "zebra",
1978 .node = ZEBRA_NODE,
1979 .parent_node = CONFIG_NODE,
1980 .prompt = "",
1981 .config_write = fpm_remote_srv_write,
1982 };
1983
1984
1985 /**
1986 * zfpm_init
1987 *
1988 * One-time initialization of the Zebra FPM module.
1989 *
1990 * @param[in] port port at which FPM is running.
1991 * @param[in] enable true if the zebra FPM module should be enabled
1992 * @param[in] format to use to talk to the FPM. Can be 'netink' or 'protobuf'.
1993 *
1994 * Returns true on success.
1995 */
1996 static int zfpm_init(struct thread_master *master)
1997 {
1998 int enable = 1;
1999 uint16_t port = 0;
2000 const char *format = THIS_MODULE->load_args;
2001
2002 memset(zfpm_g, 0, sizeof(*zfpm_g));
2003 zfpm_g->master = master;
2004 TAILQ_INIT(&zfpm_g->dest_q);
2005 TAILQ_INIT(&zfpm_g->mac_q);
2006
2007 /* Create hash table for fpm_mac_info_t enties */
2008 zfpm_g->fpm_mac_info_table = hash_create(zfpm_mac_info_hash_keymake,
2009 zfpm_mac_info_cmp,
2010 "FPM MAC info hash table");
2011
2012 zfpm_g->sock = -1;
2013 zfpm_g->state = ZFPM_STATE_IDLE;
2014
2015 zfpm_stats_init(&zfpm_g->stats);
2016 zfpm_stats_init(&zfpm_g->last_ivl_stats);
2017 zfpm_stats_init(&zfpm_g->cumulative_stats);
2018
2019 memset(&ipv4ll_gateway, 0, sizeof(ipv4ll_gateway));
2020 if (inet_pton(AF_INET, ipv4_ll_buf, &ipv4ll_gateway.ipv4) != 1)
2021 zlog_warn("inet_pton failed for %s", ipv4_ll_buf);
2022
2023 install_node(&zebra_node);
2024 install_element(ENABLE_NODE, &show_zebra_fpm_stats_cmd);
2025 install_element(ENABLE_NODE, &clear_zebra_fpm_stats_cmd);
2026 install_element(CONFIG_NODE, &fpm_remote_ip_cmd);
2027 install_element(CONFIG_NODE, &no_fpm_remote_ip_cmd);
2028
2029 zfpm_init_message_format(format);
2030
2031 /*
2032 * Disable FPM interface if no suitable format is available.
2033 */
2034 if (zfpm_g->message_format == ZFPM_MSG_FORMAT_NONE)
2035 enable = 0;
2036
2037 zfpm_g->enabled = enable;
2038
2039 if (!zfpm_g->fpm_server)
2040 zfpm_g->fpm_server = FPM_DEFAULT_IP;
2041
2042 if (!port)
2043 port = FPM_DEFAULT_PORT;
2044
2045 zfpm_g->fpm_port = port;
2046
2047 zfpm_g->obuf = stream_new(ZFPM_OBUF_SIZE);
2048 zfpm_g->ibuf = stream_new(ZFPM_IBUF_SIZE);
2049
2050 zfpm_start_stats_timer();
2051 zfpm_start_connect_timer("initialized");
2052 return 0;
2053 }
2054
2055 static int zfpm_fini(void)
2056 {
2057 zfpm_write_off();
2058 zfpm_read_off();
2059 zfpm_connect_off();
2060
2061 zfpm_stop_stats_timer();
2062
2063 hook_unregister(rib_update, zfpm_trigger_update);
2064 return 0;
2065 }
2066
2067 static int zebra_fpm_module_init(void)
2068 {
2069 hook_register(rib_update, zfpm_trigger_update);
2070 hook_register(rib_shutdown, zfpm_trigger_remove);
2071 hook_register(zebra_rmac_update, zfpm_trigger_rmac_update);
2072 hook_register(frr_late_init, zfpm_init);
2073 hook_register(frr_early_fini, zfpm_fini);
2074 return 0;
2075 }
2076
2077 FRR_MODULE_SETUP(.name = "zebra_fpm", .version = FRR_VERSION,
2078 .description = "zebra FPM (Forwarding Plane Manager) module",
2079 .init = zebra_fpm_module_init,
2080 );