]> git.proxmox.com Git - mirror_frr.git/blame - zebra/zebra_fpm.c
zebra: remove recursively derived static routes correctly
[mirror_frr.git] / zebra / zebra_fpm.c
CommitLineData
5adc2528
AS
1/*
2 * Main implementation file for interface to Forwarding Plane Manager.
3 *
4 * Copyright (C) 2012 by Open Source Routing.
5 * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
6 *
7 * This file is part of GNU Zebra.
8 *
9 * GNU Zebra is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the
11 * Free Software Foundation; either version 2, or (at your option) any
12 * later version.
13 *
14 * GNU Zebra is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with GNU Zebra; see the file COPYING. If not, write to the Free
21 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 * 02111-1307, USA.
23 */
24
25#include <zebra.h>
26
27#include "log.h"
28#include "stream.h"
29#include "thread.h"
30#include "network.h"
31#include "command.h"
32
33#include "zebra/rib.h"
7c551956
DS
34#include "zebra/zserv.h"
35#include "zebra/zebra_ns.h"
36#include "zebra/zebra_vrf.h"
5adc2528
AS
37
38#include "fpm/fpm.h"
39#include "zebra_fpm.h"
40#include "zebra_fpm_private.h"
41
42/*
43 * Interval at which we attempt to connect to the FPM.
44 */
45#define ZFPM_CONNECT_RETRY_IVL 5
46
47/*
48 * Sizes of outgoing and incoming stream buffers for writing/reading
49 * FPM messages.
50 */
51#define ZFPM_OBUF_SIZE (2 * FPM_MAX_MSG_LEN)
52#define ZFPM_IBUF_SIZE (FPM_MAX_MSG_LEN)
53
54/*
55 * The maximum number of times the FPM socket write callback can call
56 * 'write' before it yields.
57 */
58#define ZFPM_MAX_WRITES_PER_RUN 10
59
60/*
61 * Interval over which we collect statistics.
62 */
63#define ZFPM_STATS_IVL_SECS 10
64
65/*
66 * Structure that holds state for iterating over all route_node
67 * structures that are candidates for being communicated to the FPM.
68 */
69typedef struct zfpm_rnodes_iter_t_
70{
71 rib_tables_iter_t tables_iter;
72 route_table_iter_t iter;
73} zfpm_rnodes_iter_t;
74
75/*
76 * Statistics.
77 */
78typedef struct zfpm_stats_t_ {
79 unsigned long connect_calls;
80 unsigned long connect_no_sock;
81
82 unsigned long read_cb_calls;
83
84 unsigned long write_cb_calls;
85 unsigned long write_calls;
86 unsigned long partial_writes;
87 unsigned long max_writes_hit;
88 unsigned long t_write_yields;
89
90 unsigned long nop_deletes_skipped;
91 unsigned long route_adds;
92 unsigned long route_dels;
93
94 unsigned long updates_triggered;
95 unsigned long redundant_triggers;
96 unsigned long non_fpm_table_triggers;
97
98 unsigned long dests_del_after_update;
99
100 unsigned long t_conn_down_starts;
101 unsigned long t_conn_down_dests_processed;
102 unsigned long t_conn_down_yields;
103 unsigned long t_conn_down_finishes;
104
105 unsigned long t_conn_up_starts;
106 unsigned long t_conn_up_dests_processed;
107 unsigned long t_conn_up_yields;
108 unsigned long t_conn_up_aborts;
109 unsigned long t_conn_up_finishes;
110
111} zfpm_stats_t;
112
113/*
114 * States for the FPM state machine.
115 */
116typedef enum {
117
118 /*
119 * In this state we are not yet ready to connect to the FPM. This
120 * can happen when this module is disabled, or if we're cleaning up
121 * after a connection has gone down.
122 */
123 ZFPM_STATE_IDLE,
124
125 /*
126 * Ready to talk to the FPM and periodically trying to connect to
127 * it.
128 */
129 ZFPM_STATE_ACTIVE,
130
131 /*
132 * In the middle of bringing up a TCP connection. Specifically,
133 * waiting for a connect() call to complete asynchronously.
134 */
135 ZFPM_STATE_CONNECTING,
136
137 /*
138 * TCP connection to the FPM is up.
139 */
140 ZFPM_STATE_ESTABLISHED
141
142} zfpm_state_t;
143
144/*
145 * Globals.
146 */
147typedef struct zfpm_glob_t_
148{
149
150 /*
151 * True if the FPM module has been enabled.
152 */
153 int enabled;
154
155 struct thread_master *master;
156
157 zfpm_state_t state;
158
159 /*
160 * Port on which the FPM is running.
161 */
162 int fpm_port;
163
164 /*
165 * List of rib_dest_t structures to be processed
166 */
167 TAILQ_HEAD (zfpm_dest_q, rib_dest_t_) dest_q;
168
169 /*
170 * Stream socket to the FPM.
171 */
172 int sock;
173
174 /*
175 * Buffers for messages to/from the FPM.
176 */
177 struct stream *obuf;
178 struct stream *ibuf;
179
180 /*
181 * Threads for I/O.
182 */
183 struct thread *t_connect;
184 struct thread *t_write;
185 struct thread *t_read;
186
187 /*
188 * Thread to clean up after the TCP connection to the FPM goes down
189 * and the state that belongs to it.
190 */
191 struct thread *t_conn_down;
192
193 struct {
194 zfpm_rnodes_iter_t iter;
195 } t_conn_down_state;
196
197 /*
198 * Thread to take actions once the TCP conn to the FPM comes up, and
199 * the state that belongs to it.
200 */
201 struct thread *t_conn_up;
202
203 struct {
204 zfpm_rnodes_iter_t iter;
205 } t_conn_up_state;
206
207 unsigned long connect_calls;
208 time_t last_connect_call_time;
209
210 /*
211 * Stats from the start of the current statistics interval up to
212 * now. These are the counters we typically update in the code.
213 */
214 zfpm_stats_t stats;
215
216 /*
217 * Statistics that were gathered in the last collection interval.
218 */
219 zfpm_stats_t last_ivl_stats;
220
221 /*
222 * Cumulative stats from the last clear to the start of the current
223 * statistics interval.
224 */
225 zfpm_stats_t cumulative_stats;
226
227 /*
228 * Stats interval timer.
229 */
230 struct thread *t_stats;
231
232 /*
233 * If non-zero, the last time when statistics were cleared.
234 */
235 time_t last_stats_clear_time;
236
237} zfpm_glob_t;
238
239static zfpm_glob_t zfpm_glob_space;
240static zfpm_glob_t *zfpm_g = &zfpm_glob_space;
241
242static int zfpm_read_cb (struct thread *thread);
243static int zfpm_write_cb (struct thread *thread);
244
245static void zfpm_set_state (zfpm_state_t state, const char *reason);
246static void zfpm_start_connect_timer (const char *reason);
247static void zfpm_start_stats_timer (void);
248
249/*
250 * zfpm_thread_should_yield
251 */
252static inline int
253zfpm_thread_should_yield (struct thread *t)
254{
255 return thread_should_yield (t);
256}
257
258/*
259 * zfpm_state_to_str
260 */
261static const char *
262zfpm_state_to_str (zfpm_state_t state)
263{
264 switch (state)
265 {
266
267 case ZFPM_STATE_IDLE:
268 return "idle";
269
270 case ZFPM_STATE_ACTIVE:
271 return "active";
272
273 case ZFPM_STATE_CONNECTING:
274 return "connecting";
275
276 case ZFPM_STATE_ESTABLISHED:
277 return "established";
278
279 default:
280 return "unknown";
281 }
282}
283
284/*
285 * zfpm_get_time
286 */
287static time_t
288zfpm_get_time (void)
289{
290 struct timeval tv;
291
292 if (quagga_gettime (QUAGGA_CLK_MONOTONIC, &tv) < 0)
293 zlog_warn ("FPM: quagga_gettime failed!!");
294
295 return tv.tv_sec;
296}
297
298/*
299 * zfpm_get_elapsed_time
300 *
301 * Returns the time elapsed (in seconds) since the given time.
302 */
303static time_t
304zfpm_get_elapsed_time (time_t reference)
305{
306 time_t now;
307
308 now = zfpm_get_time ();
309
310 if (now < reference)
311 {
312 assert (0);
313 return 0;
314 }
315
316 return now - reference;
317}
318
319/*
320 * zfpm_is_table_for_fpm
321 *
322 * Returns TRUE if the the given table is to be communicated to the
323 * FPM.
324 */
325static inline int
326zfpm_is_table_for_fpm (struct route_table *table)
327{
328 rib_table_info_t *info;
329
330 info = rib_table_info (table);
331
332 /*
333 * We only send the unicast tables in the main instance to the FPM
334 * at this point.
335 */
b72ede27 336 if (info->zvrf->vrf_id != 0)
5adc2528
AS
337 return 0;
338
339 if (info->safi != SAFI_UNICAST)
340 return 0;
341
342 return 1;
343}
344
345/*
346 * zfpm_rnodes_iter_init
347 */
348static inline void
349zfpm_rnodes_iter_init (zfpm_rnodes_iter_t *iter)
350{
351 memset (iter, 0, sizeof (*iter));
352 rib_tables_iter_init (&iter->tables_iter);
353
354 /*
355 * This is a hack, but it makes implementing 'next' easier by
356 * ensuring that route_table_iter_next() will return NULL the first
357 * time we call it.
358 */
359 route_table_iter_init (&iter->iter, NULL);
360 route_table_iter_cleanup (&iter->iter);
361}
362
363/*
364 * zfpm_rnodes_iter_next
365 */
366static inline struct route_node *
367zfpm_rnodes_iter_next (zfpm_rnodes_iter_t *iter)
368{
369 struct route_node *rn;
370 struct route_table *table;
371
372 while (1)
373 {
374 rn = route_table_iter_next (&iter->iter);
375 if (rn)
376 return rn;
377
378 /*
379 * We've made our way through this table, go to the next one.
380 */
381 route_table_iter_cleanup (&iter->iter);
382
383 while ((table = rib_tables_iter_next (&iter->tables_iter)))
384 {
385 if (zfpm_is_table_for_fpm (table))
386 break;
387 }
388
389 if (!table)
390 return NULL;
391
392 route_table_iter_init (&iter->iter, table);
393 }
394
395 return NULL;
396}
397
398/*
399 * zfpm_rnodes_iter_pause
400 */
401static inline void
402zfpm_rnodes_iter_pause (zfpm_rnodes_iter_t *iter)
403{
404 route_table_iter_pause (&iter->iter);
405}
406
407/*
408 * zfpm_rnodes_iter_cleanup
409 */
410static inline void
411zfpm_rnodes_iter_cleanup (zfpm_rnodes_iter_t *iter)
412{
413 route_table_iter_cleanup (&iter->iter);
414 rib_tables_iter_cleanup (&iter->tables_iter);
415}
416
417/*
418 * zfpm_stats_init
419 *
420 * Initialize a statistics block.
421 */
422static inline void
423zfpm_stats_init (zfpm_stats_t *stats)
424{
425 memset (stats, 0, sizeof (*stats));
426}
427
428/*
429 * zfpm_stats_reset
430 */
431static inline void
432zfpm_stats_reset (zfpm_stats_t *stats)
433{
434 zfpm_stats_init (stats);
435}
436
437/*
438 * zfpm_stats_copy
439 */
440static inline void
441zfpm_stats_copy (const zfpm_stats_t *src, zfpm_stats_t *dest)
442{
443 memcpy (dest, src, sizeof (*dest));
444}
445
446/*
447 * zfpm_stats_compose
448 *
449 * Total up the statistics in two stats structures ('s1 and 's2') and
450 * return the result in the third argument, 'result'. Note that the
451 * pointer 'result' may be the same as 's1' or 's2'.
452 *
453 * For simplicity, the implementation below assumes that the stats
454 * structure is composed entirely of counters. This can easily be
455 * changed when necessary.
456 */
457static void
458zfpm_stats_compose (const zfpm_stats_t *s1, const zfpm_stats_t *s2,
459 zfpm_stats_t *result)
460{
461 const unsigned long *p1, *p2;
462 unsigned long *result_p;
463 int i, num_counters;
464
465 p1 = (const unsigned long *) s1;
466 p2 = (const unsigned long *) s2;
467 result_p = (unsigned long *) result;
468
469 num_counters = (sizeof (zfpm_stats_t) / sizeof (unsigned long));
470
471 for (i = 0; i < num_counters; i++)
472 {
473 result_p[i] = p1[i] + p2[i];
474 }
475}
476
477/*
478 * zfpm_read_on
479 */
480static inline void
481zfpm_read_on (void)
482{
483 assert (!zfpm_g->t_read);
484 assert (zfpm_g->sock >= 0);
485
486 THREAD_READ_ON (zfpm_g->master, zfpm_g->t_read, zfpm_read_cb, 0,
487 zfpm_g->sock);
488}
489
490/*
491 * zfpm_write_on
492 */
493static inline void
494zfpm_write_on (void)
495{
496 assert (!zfpm_g->t_write);
497 assert (zfpm_g->sock >= 0);
498
499 THREAD_WRITE_ON (zfpm_g->master, zfpm_g->t_write, zfpm_write_cb, 0,
500 zfpm_g->sock);
501}
502
503/*
504 * zfpm_read_off
505 */
506static inline void
507zfpm_read_off (void)
508{
509 THREAD_READ_OFF (zfpm_g->t_read);
510}
511
512/*
513 * zfpm_write_off
514 */
515static inline void
516zfpm_write_off (void)
517{
518 THREAD_WRITE_OFF (zfpm_g->t_write);
519}
520
521/*
522 * zfpm_conn_up_thread_cb
523 *
524 * Callback for actions to be taken when the connection to the FPM
525 * comes up.
526 */
527static int
528zfpm_conn_up_thread_cb (struct thread *thread)
529{
530 struct route_node *rnode;
531 zfpm_rnodes_iter_t *iter;
532 rib_dest_t *dest;
533
534 assert (zfpm_g->t_conn_up);
535 zfpm_g->t_conn_up = NULL;
536
537 iter = &zfpm_g->t_conn_up_state.iter;
538
539 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED)
540 {
541 zfpm_debug ("Connection not up anymore, conn_up thread aborting");
542 zfpm_g->stats.t_conn_up_aborts++;
543 goto done;
544 }
545
546 while ((rnode = zfpm_rnodes_iter_next (iter)))
547 {
548 dest = rib_dest_from_rnode (rnode);
549
550 if (dest)
551 {
552 zfpm_g->stats.t_conn_up_dests_processed++;
553 zfpm_trigger_update (rnode, NULL);
554 }
555
556 /*
557 * Yield if need be.
558 */
559 if (!zfpm_thread_should_yield (thread))
560 continue;
561
562 zfpm_g->stats.t_conn_up_yields++;
563 zfpm_rnodes_iter_pause (iter);
564 zfpm_g->t_conn_up = thread_add_background (zfpm_g->master,
565 zfpm_conn_up_thread_cb,
566 0, 0);
567 return 0;
568 }
569
570 zfpm_g->stats.t_conn_up_finishes++;
571
572 done:
573 zfpm_rnodes_iter_cleanup (iter);
574 return 0;
575}
576
577/*
578 * zfpm_connection_up
579 *
580 * Called when the connection to the FPM comes up.
581 */
582static void
583zfpm_connection_up (const char *detail)
584{
585 assert (zfpm_g->sock >= 0);
586 zfpm_read_on ();
587 zfpm_write_on ();
588 zfpm_set_state (ZFPM_STATE_ESTABLISHED, detail);
589
590 /*
591 * Start thread to push existing routes to the FPM.
592 */
593 assert (!zfpm_g->t_conn_up);
594
595 zfpm_rnodes_iter_init (&zfpm_g->t_conn_up_state.iter);
596
597 zfpm_debug ("Starting conn_up thread");
598 zfpm_g->t_conn_up = thread_add_background (zfpm_g->master,
599 zfpm_conn_up_thread_cb, 0, 0);
600 zfpm_g->stats.t_conn_up_starts++;
601}
602
603/*
604 * zfpm_connect_check
605 *
606 * Check if an asynchronous connect() to the FPM is complete.
607 */
608static void
35dece84 609zfpm_connect_check (void)
5adc2528
AS
610{
611 int status;
612 socklen_t slen;
613 int ret;
614
615 zfpm_read_off ();
616 zfpm_write_off ();
617
618 slen = sizeof (status);
619 ret = getsockopt (zfpm_g->sock, SOL_SOCKET, SO_ERROR, (void *) &status,
620 &slen);
621
622 if (ret >= 0 && status == 0)
623 {
624 zfpm_connection_up ("async connect complete");
625 return;
626 }
627
628 /*
629 * getsockopt() failed or indicated an error on the socket.
630 */
631 close (zfpm_g->sock);
632 zfpm_g->sock = -1;
633
634 zfpm_start_connect_timer ("getsockopt() after async connect failed");
635 return;
636}
637
638/*
639 * zfpm_conn_down_thread_cb
640 *
641 * Callback that is invoked to clean up state after the TCP connection
642 * to the FPM goes down.
643 */
644static int
645zfpm_conn_down_thread_cb (struct thread *thread)
646{
647 struct route_node *rnode;
648 zfpm_rnodes_iter_t *iter;
649 rib_dest_t *dest;
650
651 assert (zfpm_g->state == ZFPM_STATE_IDLE);
652
653 assert (zfpm_g->t_conn_down);
654 zfpm_g->t_conn_down = NULL;
655
656 iter = &zfpm_g->t_conn_down_state.iter;
657
658 while ((rnode = zfpm_rnodes_iter_next (iter)))
659 {
660 dest = rib_dest_from_rnode (rnode);
661
662 if (dest)
663 {
664 if (CHECK_FLAG (dest->flags, RIB_DEST_UPDATE_FPM))
665 {
666 TAILQ_REMOVE (&zfpm_g->dest_q, dest, fpm_q_entries);
667 }
668
669 UNSET_FLAG (dest->flags, RIB_DEST_UPDATE_FPM);
670 UNSET_FLAG (dest->flags, RIB_DEST_SENT_TO_FPM);
671
672 zfpm_g->stats.t_conn_down_dests_processed++;
673
674 /*
675 * Check if the dest should be deleted.
676 */
677 rib_gc_dest(rnode);
678 }
679
680 /*
681 * Yield if need be.
682 */
683 if (!zfpm_thread_should_yield (thread))
684 continue;
685
686 zfpm_g->stats.t_conn_down_yields++;
687 zfpm_rnodes_iter_pause (iter);
688 zfpm_g->t_conn_down = thread_add_background (zfpm_g->master,
689 zfpm_conn_down_thread_cb,
690 0, 0);
691 return 0;
692 }
693
694 zfpm_g->stats.t_conn_down_finishes++;
695 zfpm_rnodes_iter_cleanup (iter);
696
697 /*
698 * Start the process of connecting to the FPM again.
699 */
700 zfpm_start_connect_timer ("cleanup complete");
701 return 0;
702}
703
704/*
705 * zfpm_connection_down
706 *
707 * Called when the connection to the FPM has gone down.
708 */
709static void
710zfpm_connection_down (const char *detail)
711{
712 if (!detail)
713 detail = "unknown";
714
715 assert (zfpm_g->state == ZFPM_STATE_ESTABLISHED);
716
717 zlog_info ("connection to the FPM has gone down: %s", detail);
718
719 zfpm_read_off ();
720 zfpm_write_off ();
721
722 stream_reset (zfpm_g->ibuf);
723 stream_reset (zfpm_g->obuf);
724
725 if (zfpm_g->sock >= 0) {
726 close (zfpm_g->sock);
727 zfpm_g->sock = -1;
728 }
729
730 /*
731 * Start thread to clean up state after the connection goes down.
732 */
733 assert (!zfpm_g->t_conn_down);
734 zfpm_debug ("Starting conn_down thread");
735 zfpm_rnodes_iter_init (&zfpm_g->t_conn_down_state.iter);
736 zfpm_g->t_conn_down = thread_add_background (zfpm_g->master,
737 zfpm_conn_down_thread_cb, 0, 0);
738 zfpm_g->stats.t_conn_down_starts++;
739
740 zfpm_set_state (ZFPM_STATE_IDLE, detail);
741}
742
743/*
744 * zfpm_read_cb
745 */
746static int
747zfpm_read_cb (struct thread *thread)
748{
749 size_t already;
750 struct stream *ibuf;
751 uint16_t msg_len;
752 fpm_msg_hdr_t *hdr;
753
754 zfpm_g->stats.read_cb_calls++;
755 assert (zfpm_g->t_read);
756 zfpm_g->t_read = NULL;
757
758 /*
759 * Check if async connect is now done.
760 */
761 if (zfpm_g->state == ZFPM_STATE_CONNECTING)
762 {
763 zfpm_connect_check();
764 return 0;
765 }
766
767 assert (zfpm_g->state == ZFPM_STATE_ESTABLISHED);
768 assert (zfpm_g->sock >= 0);
769
770 ibuf = zfpm_g->ibuf;
771
772 already = stream_get_endp (ibuf);
773 if (already < FPM_MSG_HDR_LEN)
774 {
775 ssize_t nbyte;
776
777 nbyte = stream_read_try (ibuf, zfpm_g->sock, FPM_MSG_HDR_LEN - already);
778 if (nbyte == 0 || nbyte == -1)
779 {
780 zfpm_connection_down ("closed socket in read");
781 return 0;
782 }
783
784 if (nbyte != (ssize_t) (FPM_MSG_HDR_LEN - already))
785 goto done;
786
787 already = FPM_MSG_HDR_LEN;
788 }
789
790 stream_set_getp (ibuf, 0);
791
792 hdr = (fpm_msg_hdr_t *) stream_pnt (ibuf);
793
794 if (!fpm_msg_hdr_ok (hdr))
795 {
796 zfpm_connection_down ("invalid message header");
797 return 0;
798 }
799
800 msg_len = fpm_msg_len (hdr);
801
802 /*
803 * Read out the rest of the packet.
804 */
805 if (already < msg_len)
806 {
807 ssize_t nbyte;
808
809 nbyte = stream_read_try (ibuf, zfpm_g->sock, msg_len - already);
810
811 if (nbyte == 0 || nbyte == -1)
812 {
813 zfpm_connection_down ("failed to read message");
814 return 0;
815 }
816
817 if (nbyte != (ssize_t) (msg_len - already))
818 goto done;
819 }
820
821 zfpm_debug ("Read out a full fpm message");
822
823 /*
824 * Just throw it away for now.
825 */
826 stream_reset (ibuf);
827
828 done:
829 zfpm_read_on ();
830 return 0;
831}
832
833/*
834 * zfpm_writes_pending
835 *
836 * Returns TRUE if we may have something to write to the FPM.
837 */
838static int
839zfpm_writes_pending (void)
840{
841
842 /*
843 * Check if there is any data in the outbound buffer that has not
844 * been written to the socket yet.
845 */
846 if (stream_get_endp (zfpm_g->obuf) - stream_get_getp (zfpm_g->obuf))
847 return 1;
848
849 /*
850 * Check if there are any prefixes on the outbound queue.
851 */
852 if (!TAILQ_EMPTY (&zfpm_g->dest_q))
853 return 1;
854
855 return 0;
856}
857
858/*
859 * zfpm_encode_route
860 *
861 * Encode a message to the FPM with information about the given route.
862 *
863 * Returns the number of bytes written to the buffer. 0 or a negative
864 * value indicates an error.
865 */
866static inline int
867zfpm_encode_route (rib_dest_t *dest, struct rib *rib, char *in_buf,
868 size_t in_buf_len)
869{
870#ifndef HAVE_NETLINK
871 return 0;
872#else
873
874 int cmd;
875
876 cmd = rib ? RTM_NEWROUTE : RTM_DELROUTE;
877
878 return zfpm_netlink_encode_route (cmd, dest, rib, in_buf, in_buf_len);
879
880#endif /* HAVE_NETLINK */
881}
882
883/*
884 * zfpm_route_for_update
885 *
886 * Returns the rib that is to be sent to the FPM for a given dest.
887 */
888static struct rib *
889zfpm_route_for_update (rib_dest_t *dest)
890{
891 struct rib *rib;
892
893 RIB_DEST_FOREACH_ROUTE (dest, rib)
894 {
895 if (!CHECK_FLAG (rib->flags, ZEBRA_FLAG_SELECTED))
896 continue;
897
898 return rib;
899 }
900
901 /*
902 * We have no route for this destination.
903 */
904 return NULL;
905}
906
907/*
908 * zfpm_build_updates
909 *
910 * Process the outgoing queue and write messages to the outbound
911 * buffer.
912 */
913static void
914zfpm_build_updates (void)
915{
916 struct stream *s;
917 rib_dest_t *dest;
918 unsigned char *buf, *data, *buf_end;
919 size_t msg_len;
920 size_t data_len;
921 fpm_msg_hdr_t *hdr;
922 struct rib *rib;
923 int is_add, write_msg;
924
925 s = zfpm_g->obuf;
926
927 assert (stream_empty (s));
928
929 do {
930
931 /*
932 * Make sure there is enough space to write another message.
933 */
934 if (STREAM_WRITEABLE (s) < FPM_MAX_MSG_LEN)
935 break;
936
937 buf = STREAM_DATA (s) + stream_get_endp (s);
938 buf_end = buf + STREAM_WRITEABLE (s);
939
940 dest = TAILQ_FIRST (&zfpm_g->dest_q);
941 if (!dest)
942 break;
943
944 assert (CHECK_FLAG (dest->flags, RIB_DEST_UPDATE_FPM));
945
946 hdr = (fpm_msg_hdr_t *) buf;
947 hdr->version = FPM_PROTO_VERSION;
948 hdr->msg_type = FPM_MSG_TYPE_NETLINK;
949
950 data = fpm_msg_data (hdr);
951
952 rib = zfpm_route_for_update (dest);
953 is_add = rib ? 1 : 0;
954
955 write_msg = 1;
956
957 /*
958 * If this is a route deletion, and we have not sent the route to
959 * the FPM previously, skip it.
960 */
961 if (!is_add && !CHECK_FLAG (dest->flags, RIB_DEST_SENT_TO_FPM))
962 {
963 write_msg = 0;
964 zfpm_g->stats.nop_deletes_skipped++;
965 }
966
967 if (write_msg) {
968 data_len = zfpm_encode_route (dest, rib, (char *) data, buf_end - data);
969
970 assert (data_len);
971 if (data_len)
972 {
973 msg_len = fpm_data_len_to_msg_len (data_len);
974 hdr->msg_len = htons (msg_len);
975 stream_forward_endp (s, msg_len);
976
977 if (is_add)
978 zfpm_g->stats.route_adds++;
979 else
980 zfpm_g->stats.route_dels++;
981 }
982 }
983
984 /*
985 * Remove the dest from the queue, and reset the flag.
986 */
987 UNSET_FLAG (dest->flags, RIB_DEST_UPDATE_FPM);
988 TAILQ_REMOVE (&zfpm_g->dest_q, dest, fpm_q_entries);
989
990 if (is_add)
991 {
992 SET_FLAG (dest->flags, RIB_DEST_SENT_TO_FPM);
993 }
994 else
995 {
996 UNSET_FLAG (dest->flags, RIB_DEST_SENT_TO_FPM);
997 }
998
999 /*
1000 * Delete the destination if necessary.
1001 */
1002 if (rib_gc_dest (dest->rnode))
1003 zfpm_g->stats.dests_del_after_update++;
1004
1005 } while (1);
1006
1007}
1008
1009/*
1010 * zfpm_write_cb
1011 */
1012static int
1013zfpm_write_cb (struct thread *thread)
1014{
1015 struct stream *s;
1016 int num_writes;
1017
1018 zfpm_g->stats.write_cb_calls++;
1019 assert (zfpm_g->t_write);
1020 zfpm_g->t_write = NULL;
1021
1022 /*
1023 * Check if async connect is now done.
1024 */
1025 if (zfpm_g->state == ZFPM_STATE_CONNECTING)
1026 {
1027 zfpm_connect_check ();
1028 return 0;
1029 }
1030
1031 assert (zfpm_g->state == ZFPM_STATE_ESTABLISHED);
1032 assert (zfpm_g->sock >= 0);
1033
1034 num_writes = 0;
1035
1036 do
1037 {
1038 int bytes_to_write, bytes_written;
1039
1040 s = zfpm_g->obuf;
1041
1042 /*
1043 * If the stream is empty, try fill it up with data.
1044 */
1045 if (stream_empty (s))
1046 {
1047 zfpm_build_updates ();
1048 }
1049
1050 bytes_to_write = stream_get_endp (s) - stream_get_getp (s);
1051 if (!bytes_to_write)
1052 break;
1053
1054 bytes_written = write (zfpm_g->sock, STREAM_PNT (s), bytes_to_write);
1055 zfpm_g->stats.write_calls++;
1056 num_writes++;
1057
1058 if (bytes_written < 0)
1059 {
1060 if (ERRNO_IO_RETRY (errno))
1061 break;
1062
1063 zfpm_connection_down ("failed to write to socket");
1064 return 0;
1065 }
1066
1067 if (bytes_written != bytes_to_write)
1068 {
1069
1070 /*
1071 * Partial write.
1072 */
1073 stream_forward_getp (s, bytes_written);
1074 zfpm_g->stats.partial_writes++;
1075 break;
1076 }
1077
1078 /*
1079 * We've written out the entire contents of the stream.
1080 */
1081 stream_reset (s);
1082
1083 if (num_writes >= ZFPM_MAX_WRITES_PER_RUN)
1084 {
1085 zfpm_g->stats.max_writes_hit++;
1086 break;
1087 }
1088
1089 if (zfpm_thread_should_yield (thread))
1090 {
1091 zfpm_g->stats.t_write_yields++;
1092 break;
1093 }
1094 } while (1);
1095
1096 if (zfpm_writes_pending ())
1097 zfpm_write_on ();
1098
1099 return 0;
1100}
1101
1102/*
1103 * zfpm_connect_cb
1104 */
1105static int
1106zfpm_connect_cb (struct thread *t)
1107{
1108 int sock, ret;
1109 struct sockaddr_in serv;
1110
1111 assert (zfpm_g->t_connect);
1112 zfpm_g->t_connect = NULL;
1113 assert (zfpm_g->state == ZFPM_STATE_ACTIVE);
1114
1115 sock = socket (AF_INET, SOCK_STREAM, 0);
1116 if (sock < 0)
1117 {
1118 zfpm_debug ("Failed to create socket for connect(): %s", strerror(errno));
1119 zfpm_g->stats.connect_no_sock++;
1120 return 0;
1121 }
1122
1123 set_nonblocking(sock);
1124
1125 /* Make server socket. */
1126 memset (&serv, 0, sizeof (serv));
1127 serv.sin_family = AF_INET;
1128 serv.sin_port = htons (zfpm_g->fpm_port);
1129#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
1130 serv.sin_len = sizeof (struct sockaddr_in);
1131#endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */
1132 serv.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
1133
1134 /*
1135 * Connect to the FPM.
1136 */
1137 zfpm_g->connect_calls++;
1138 zfpm_g->stats.connect_calls++;
1139 zfpm_g->last_connect_call_time = zfpm_get_time ();
1140
1141 ret = connect (sock, (struct sockaddr *) &serv, sizeof (serv));
1142 if (ret >= 0)
1143 {
1144 zfpm_g->sock = sock;
1145 zfpm_connection_up ("connect succeeded");
1146 return 1;
1147 }
1148
1149 if (errno == EINPROGRESS)
1150 {
1151 zfpm_g->sock = sock;
1152 zfpm_read_on ();
1153 zfpm_write_on ();
1154 zfpm_set_state (ZFPM_STATE_CONNECTING, "async connect in progress");
1155 return 0;
1156 }
1157
1158 zlog_info ("can't connect to FPM %d: %s", sock, safe_strerror (errno));
1159 close (sock);
1160
1161 /*
1162 * Restart timer for retrying connection.
1163 */
1164 zfpm_start_connect_timer ("connect() failed");
1165 return 0;
1166}
1167
1168/*
1169 * zfpm_set_state
1170 *
1171 * Move state machine into the given state.
1172 */
1173static void
1174zfpm_set_state (zfpm_state_t state, const char *reason)
1175{
1176 zfpm_state_t cur_state = zfpm_g->state;
1177
1178 if (!reason)
1179 reason = "Unknown";
1180
1181 if (state == cur_state)
1182 return;
1183
1184 zfpm_debug("beginning state transition %s -> %s. Reason: %s",
1185 zfpm_state_to_str (cur_state), zfpm_state_to_str (state),
1186 reason);
1187
1188 switch (state) {
1189
1190 case ZFPM_STATE_IDLE:
1191 assert (cur_state == ZFPM_STATE_ESTABLISHED);
1192 break;
1193
1194 case ZFPM_STATE_ACTIVE:
1195 assert (cur_state == ZFPM_STATE_IDLE ||
1196 cur_state == ZFPM_STATE_CONNECTING);
1197 assert (zfpm_g->t_connect);
1198 break;
1199
1200 case ZFPM_STATE_CONNECTING:
1201 assert (zfpm_g->sock);
1202 assert (cur_state == ZFPM_STATE_ACTIVE);
1203 assert (zfpm_g->t_read);
1204 assert (zfpm_g->t_write);
1205 break;
1206
1207 case ZFPM_STATE_ESTABLISHED:
1208 assert (cur_state == ZFPM_STATE_ACTIVE ||
1209 cur_state == ZFPM_STATE_CONNECTING);
1210 assert (zfpm_g->sock);
1211 assert (zfpm_g->t_read);
1212 assert (zfpm_g->t_write);
1213 break;
1214 }
1215
1216 zfpm_g->state = state;
1217}
1218
1219/*
1220 * zfpm_calc_connect_delay
1221 *
1222 * Returns the number of seconds after which we should attempt to
1223 * reconnect to the FPM.
1224 */
1225static long
1226zfpm_calc_connect_delay (void)
1227{
1228 time_t elapsed;
1229
1230 /*
1231 * Return 0 if this is our first attempt to connect.
1232 */
1233 if (zfpm_g->connect_calls == 0)
1234 {
1235 return 0;
1236 }
1237
1238 elapsed = zfpm_get_elapsed_time (zfpm_g->last_connect_call_time);
1239
1240 if (elapsed > ZFPM_CONNECT_RETRY_IVL) {
1241 return 0;
1242 }
1243
1244 return ZFPM_CONNECT_RETRY_IVL - elapsed;
1245}
1246
1247/*
1248 * zfpm_start_connect_timer
1249 */
1250static void
1251zfpm_start_connect_timer (const char *reason)
1252{
1253 long delay_secs;
1254
1255 assert (!zfpm_g->t_connect);
1256 assert (zfpm_g->sock < 0);
1257
1258 assert(zfpm_g->state == ZFPM_STATE_IDLE ||
1259 zfpm_g->state == ZFPM_STATE_ACTIVE ||
1260 zfpm_g->state == ZFPM_STATE_CONNECTING);
1261
1262 delay_secs = zfpm_calc_connect_delay();
1263 zfpm_debug ("scheduling connect in %ld seconds", delay_secs);
1264
1265 THREAD_TIMER_ON (zfpm_g->master, zfpm_g->t_connect, zfpm_connect_cb, 0,
1266 delay_secs);
1267 zfpm_set_state (ZFPM_STATE_ACTIVE, reason);
1268}
1269
1270/*
1271 * zfpm_is_enabled
1272 *
1273 * Returns TRUE if the zebra FPM module has been enabled.
1274 */
1275static inline int
1276zfpm_is_enabled (void)
1277{
1278 return zfpm_g->enabled;
1279}
1280
1281/*
1282 * zfpm_conn_is_up
1283 *
1284 * Returns TRUE if the connection to the FPM is up.
1285 */
1286static inline int
1287zfpm_conn_is_up (void)
1288{
1289 if (zfpm_g->state != ZFPM_STATE_ESTABLISHED)
1290 return 0;
1291
1292 assert (zfpm_g->sock >= 0);
1293
1294 return 1;
1295}
1296
1297/*
1298 * zfpm_trigger_update
1299 *
1300 * The zebra code invokes this function to indicate that we should
1301 * send an update to the FPM about the given route_node.
1302 */
1303void
1304zfpm_trigger_update (struct route_node *rn, const char *reason)
1305{
1306 rib_dest_t *dest;
35d921cc 1307 char buf[PREFIX_STRLEN];
5adc2528
AS
1308
1309 /*
1310 * Ignore if the connection is down. We will update the FPM about
1311 * all destinations once the connection comes up.
1312 */
1313 if (!zfpm_conn_is_up ())
1314 return;
1315
1316 dest = rib_dest_from_rnode (rn);
1317
1318 /*
1319 * Ignore the trigger if the dest is not in a table that we would
1320 * send to the FPM.
1321 */
1322 if (!zfpm_is_table_for_fpm (rib_dest_table (dest)))
1323 {
1324 zfpm_g->stats.non_fpm_table_triggers++;
1325 return;
1326 }
1327
1328 if (CHECK_FLAG (dest->flags, RIB_DEST_UPDATE_FPM)) {
1329 zfpm_g->stats.redundant_triggers++;
1330 return;
1331 }
1332
1333 if (reason)
1334 {
35d921cc
TT
1335 zfpm_debug ("%s triggering update to FPM - Reason: %s",
1336 prefix2str (&rn->p, buf, sizeof(buf)), reason);
5adc2528
AS
1337 }
1338
1339 SET_FLAG (dest->flags, RIB_DEST_UPDATE_FPM);
1340 TAILQ_INSERT_TAIL (&zfpm_g->dest_q, dest, fpm_q_entries);
1341 zfpm_g->stats.updates_triggered++;
1342
1343 /*
1344 * Make sure that writes are enabled.
1345 */
1346 if (zfpm_g->t_write)
1347 return;
1348
1349 zfpm_write_on ();
1350}
1351
1352/*
1353 * zfpm_stats_timer_cb
1354 */
1355static int
1356zfpm_stats_timer_cb (struct thread *t)
1357{
1358 assert (zfpm_g->t_stats);
1359 zfpm_g->t_stats = NULL;
1360
1361 /*
1362 * Remember the stats collected in the last interval for display
1363 * purposes.
1364 */
1365 zfpm_stats_copy (&zfpm_g->stats, &zfpm_g->last_ivl_stats);
1366
1367 /*
1368 * Add the current set of stats into the cumulative statistics.
1369 */
1370 zfpm_stats_compose (&zfpm_g->cumulative_stats, &zfpm_g->stats,
1371 &zfpm_g->cumulative_stats);
1372
1373 /*
1374 * Start collecting stats afresh over the next interval.
1375 */
1376 zfpm_stats_reset (&zfpm_g->stats);
1377
1378 zfpm_start_stats_timer ();
1379
1380 return 0;
1381}
1382
1383/*
1384 * zfpm_stop_stats_timer
1385 */
1386static void
1387zfpm_stop_stats_timer (void)
1388{
1389 if (!zfpm_g->t_stats)
1390 return;
1391
1392 zfpm_debug ("Stopping existing stats timer");
1393 THREAD_TIMER_OFF (zfpm_g->t_stats);
1394}
1395
1396/*
1397 * zfpm_start_stats_timer
1398 */
1399void
1400zfpm_start_stats_timer (void)
1401{
1402 assert (!zfpm_g->t_stats);
1403
1404 THREAD_TIMER_ON (zfpm_g->master, zfpm_g->t_stats, zfpm_stats_timer_cb, 0,
1405 ZFPM_STATS_IVL_SECS);
1406}
1407
1408/*
1409 * Helper macro for zfpm_show_stats() below.
1410 */
1411#define ZFPM_SHOW_STAT(counter) \
1412 do { \
1413 vty_out (vty, "%-40s %10lu %16lu%s", #counter, total_stats.counter, \
1414 zfpm_g->last_ivl_stats.counter, VTY_NEWLINE); \
1415 } while (0)
1416
1417/*
1418 * zfpm_show_stats
1419 */
1420static void
1421zfpm_show_stats (struct vty *vty)
1422{
1423 zfpm_stats_t total_stats;
1424 time_t elapsed;
1425
1426 vty_out (vty, "%s%-40s %10s Last %2d secs%s%s", VTY_NEWLINE, "Counter",
1427 "Total", ZFPM_STATS_IVL_SECS, VTY_NEWLINE, VTY_NEWLINE);
1428
1429 /*
1430 * Compute the total stats up to this instant.
1431 */
1432 zfpm_stats_compose (&zfpm_g->cumulative_stats, &zfpm_g->stats,
1433 &total_stats);
1434
1435 ZFPM_SHOW_STAT (connect_calls);
1436 ZFPM_SHOW_STAT (connect_no_sock);
1437 ZFPM_SHOW_STAT (read_cb_calls);
1438 ZFPM_SHOW_STAT (write_cb_calls);
1439 ZFPM_SHOW_STAT (write_calls);
1440 ZFPM_SHOW_STAT (partial_writes);
1441 ZFPM_SHOW_STAT (max_writes_hit);
1442 ZFPM_SHOW_STAT (t_write_yields);
1443 ZFPM_SHOW_STAT (nop_deletes_skipped);
1444 ZFPM_SHOW_STAT (route_adds);
1445 ZFPM_SHOW_STAT (route_dels);
1446 ZFPM_SHOW_STAT (updates_triggered);
1447 ZFPM_SHOW_STAT (non_fpm_table_triggers);
1448 ZFPM_SHOW_STAT (redundant_triggers);
1449 ZFPM_SHOW_STAT (dests_del_after_update);
1450 ZFPM_SHOW_STAT (t_conn_down_starts);
1451 ZFPM_SHOW_STAT (t_conn_down_dests_processed);
1452 ZFPM_SHOW_STAT (t_conn_down_yields);
1453 ZFPM_SHOW_STAT (t_conn_down_finishes);
1454 ZFPM_SHOW_STAT (t_conn_up_starts);
1455 ZFPM_SHOW_STAT (t_conn_up_dests_processed);
1456 ZFPM_SHOW_STAT (t_conn_up_yields);
1457 ZFPM_SHOW_STAT (t_conn_up_aborts);
1458 ZFPM_SHOW_STAT (t_conn_up_finishes);
1459
1460 if (!zfpm_g->last_stats_clear_time)
1461 return;
1462
1463 elapsed = zfpm_get_elapsed_time (zfpm_g->last_stats_clear_time);
1464
1465 vty_out (vty, "%sStats were cleared %lu seconds ago%s", VTY_NEWLINE,
1466 (unsigned long) elapsed, VTY_NEWLINE);
1467}
1468
1469/*
1470 * zfpm_clear_stats
1471 */
1472static void
1473zfpm_clear_stats (struct vty *vty)
1474{
1475 if (!zfpm_is_enabled ())
1476 {
1477 vty_out (vty, "The FPM module is not enabled...%s", VTY_NEWLINE);
1478 return;
1479 }
1480
1481 zfpm_stats_reset (&zfpm_g->stats);
1482 zfpm_stats_reset (&zfpm_g->last_ivl_stats);
1483 zfpm_stats_reset (&zfpm_g->cumulative_stats);
1484
1485 zfpm_stop_stats_timer ();
1486 zfpm_start_stats_timer ();
1487
1488 zfpm_g->last_stats_clear_time = zfpm_get_time();
1489
1490 vty_out (vty, "Cleared FPM stats%s", VTY_NEWLINE);
1491}
1492
1493/*
1494 * show_zebra_fpm_stats
1495 */
1496DEFUN (show_zebra_fpm_stats,
1497 show_zebra_fpm_stats_cmd,
1498 "show zebra fpm stats",
1499 SHOW_STR
1500 "Zebra information\n"
1501 "Forwarding Path Manager information\n"
1502 "Statistics\n")
1503{
1504 zfpm_show_stats (vty);
1505 return CMD_SUCCESS;
1506}
1507
1508/*
1509 * clear_zebra_fpm_stats
1510 */
1511DEFUN (clear_zebra_fpm_stats,
1512 clear_zebra_fpm_stats_cmd,
1513 "clear zebra fpm stats",
1514 CLEAR_STR
1515 "Zebra information\n"
1516 "Clear Forwarding Path Manager information\n"
1517 "Statistics\n")
1518{
1519 zfpm_clear_stats (vty);
1520 return CMD_SUCCESS;
1521}
1522
1523/**
1524 * zfpm_init
1525 *
1526 * One-time initialization of the Zebra FPM module.
1527 *
1528 * @param[in] port port at which FPM is running.
1529 * @param[in] enable TRUE if the zebra FPM module should be enabled
1530 *
1531 * Returns TRUE on success.
1532 */
1533int
1534zfpm_init (struct thread_master *master, int enable, uint16_t port)
1535{
1536 static int initialized = 0;
1537
1538 if (initialized) {
1539 return 1;
1540 }
1541
1542 initialized = 1;
1543
1544 memset (zfpm_g, 0, sizeof (*zfpm_g));
1545 zfpm_g->master = master;
1546 TAILQ_INIT(&zfpm_g->dest_q);
1547 zfpm_g->sock = -1;
1548 zfpm_g->state = ZFPM_STATE_IDLE;
1549
1550 /*
1551 * Netlink must currently be available for the Zebra-FPM interface
1552 * to be enabled.
1553 */
1554#ifndef HAVE_NETLINK
1555 enable = 0;
1556#endif
1557
1558 zfpm_g->enabled = enable;
1559
1560 zfpm_stats_init (&zfpm_g->stats);
1561 zfpm_stats_init (&zfpm_g->last_ivl_stats);
1562 zfpm_stats_init (&zfpm_g->cumulative_stats);
1563
1564 install_element (ENABLE_NODE, &show_zebra_fpm_stats_cmd);
1565 install_element (ENABLE_NODE, &clear_zebra_fpm_stats_cmd);
1566
1567 if (!enable) {
1568 return 1;
1569 }
1570
1571 if (!port)
1572 port = FPM_DEFAULT_PORT;
1573
1574 zfpm_g->fpm_port = port;
1575
1576 zfpm_g->obuf = stream_new (ZFPM_OBUF_SIZE);
1577 zfpm_g->ibuf = stream_new (ZFPM_IBUF_SIZE);
1578
1579 zfpm_start_stats_timer ();
1580 zfpm_start_connect_timer ("initialized");
1581
1582 return 1;
1583}