2 * Code for encoding/decoding FPM messages that are in netlink format.
4 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
5 * Copyright (C) 2012 by Open Source Routing.
6 * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
8 * This file is part of GNU Zebra.
10 * GNU Zebra is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the
12 * Free Software Foundation; either version 2, or (at your option) any
15 * GNU Zebra is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
20 * You should have received a copy of the GNU General Public License along
21 * with this program; see the file COPYING; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
34 #include "zebra/zserv.h"
35 #include "zebra/zebra_router.h"
36 #include "zebra/zebra_dplane.h"
37 #include "zebra/zebra_ns.h"
38 #include "zebra/zebra_vrf.h"
39 #include "zebra/kernel_netlink.h"
40 #include "zebra/rt_netlink.h"
43 #include "zebra/zebra_fpm_private.h"
44 #include "zebra/zebra_vxlan_private.h"
49 * Returns string representation of an address of the given AF.
51 static inline const char *addr_to_a(uint8_t af
, void *addr
)
54 return "<No address>";
59 return inet_ntoa(*((struct in_addr
*)addr
));
62 return inet6_ntoa(*((struct in6_addr
*)addr
));
65 return "<Addr in unknown AF>";
73 * Convience wrapper that returns a human-readable string for the
74 * address in a prefix.
76 static const char *prefix_addr_to_a(struct prefix
*prefix
)
79 return "<No address>";
81 return addr_to_a(prefix
->family
, &prefix
->u
.prefix
);
87 * The size of an address in a given address family.
89 static size_t af_addr_size(uint8_t af
)
106 * We plan to use RTA_ENCAP_TYPE attribute for VxLAN encap as well.
107 * Currently, values 0 to 8 for this attribute are used by lwtunnel_encap_types
108 * So, we cannot use these values for VxLAN encap.
110 enum fpm_nh_encap_type_t
{
111 FPM_NH_ENCAP_NONE
= 0,
112 FPM_NH_ENCAP_VXLAN
= 100,
117 * fpm_nh_encap_type_to_str
119 static const char *fpm_nh_encap_type_to_str(enum fpm_nh_encap_type_t encap_type
)
121 switch (encap_type
) {
122 case FPM_NH_ENCAP_NONE
:
125 case FPM_NH_ENCAP_VXLAN
:
128 case FPM_NH_ENCAP_MAX
:
135 struct vxlan_encap_info_t
{
139 enum vxlan_encap_info_type_t
{
143 struct fpm_nh_encap_info_t
{
144 enum fpm_nh_encap_type_t encap_type
;
146 struct vxlan_encap_info_t vxlan_encap
;
153 * Holds information about a single nexthop for netlink. These info
154 * structures are transient and may contain pointers into rib
155 * data structures for convenience.
157 typedef struct netlink_nh_info_t_
{
159 union g_addr
*gateway
;
162 * Information from the struct nexthop from which this nh was
163 * derived. For debug purposes only.
166 enum nexthop_types_t type
;
167 struct fpm_nh_encap_info_t encap_info
;
171 * netlink_route_info_t
173 * A structure for holding information for a netlink route message.
175 typedef struct netlink_route_info_t_
{
179 uint8_t rtm_protocol
;
181 struct prefix
*prefix
;
183 unsigned int num_nhs
;
188 netlink_nh_info_t nhs
[MULTIPATH_NUM
];
189 union g_addr
*pref_src
;
190 } netlink_route_info_t
;
193 * netlink_route_info_add_nh
195 * Add information about the given nexthop to the given route info
198 * Returns TRUE if a nexthop was added, FALSE otherwise.
200 static int netlink_route_info_add_nh(netlink_route_info_t
*ri
,
201 struct nexthop
*nexthop
,
202 struct route_entry
*re
)
204 netlink_nh_info_t nhi
;
206 zebra_l3vni_t
*zl3vni
= NULL
;
208 memset(&nhi
, 0, sizeof(nhi
));
211 if (ri
->num_nhs
>= (int)array_size(ri
->nhs
))
214 nhi
.recursive
= nexthop
->rparent
? 1 : 0;
215 nhi
.type
= nexthop
->type
;
216 nhi
.if_index
= nexthop
->ifindex
;
218 if (nexthop
->type
== NEXTHOP_TYPE_IPV4
219 || nexthop
->type
== NEXTHOP_TYPE_IPV4_IFINDEX
) {
220 nhi
.gateway
= &nexthop
->gate
;
221 if (nexthop
->src
.ipv4
.s_addr
)
225 if (nexthop
->type
== NEXTHOP_TYPE_IPV6
226 || nexthop
->type
== NEXTHOP_TYPE_IPV6_IFINDEX
) {
227 nhi
.gateway
= &nexthop
->gate
;
230 if (nexthop
->type
== NEXTHOP_TYPE_IFINDEX
) {
231 if (nexthop
->src
.ipv4
.s_addr
)
235 if (!nhi
.gateway
&& nhi
.if_index
== 0)
238 if (re
&& CHECK_FLAG(re
->flags
, ZEBRA_FLAG_EVPN_ROUTE
)) {
239 nhi
.encap_info
.encap_type
= FPM_NH_ENCAP_VXLAN
;
241 zl3vni
= zl3vni_from_vrf(ri
->rtm_table
);
242 if (zl3vni
&& is_l3vni_oper_up(zl3vni
)) {
244 /* Add VNI to VxLAN encap info */
245 nhi
.encap_info
.vxlan_encap
.vni
= zl3vni
->vni
;
250 * We have a valid nhi. Copy the structure over to the route_info.
252 ri
->nhs
[ri
->num_nhs
] = nhi
;
255 if (src
&& !ri
->pref_src
)
262 * netlink_proto_from_route_type
264 static uint8_t netlink_proto_from_route_type(int type
)
267 case ZEBRA_ROUTE_KERNEL
:
268 case ZEBRA_ROUTE_CONNECT
:
269 return RTPROT_KERNEL
;
277 * netlink_route_info_fill
279 * Fill out the route information object from the given route.
281 * Returns TRUE on success and FALSE on failure.
283 static int netlink_route_info_fill(netlink_route_info_t
*ri
, int cmd
,
284 rib_dest_t
*dest
, struct route_entry
*re
)
286 struct nexthop
*nexthop
;
287 struct zebra_vrf
*zvrf
;
289 memset(ri
, 0, sizeof(*ri
));
291 ri
->prefix
= rib_dest_prefix(dest
);
292 ri
->af
= rib_dest_af(dest
);
294 ri
->nlmsg_type
= cmd
;
295 zvrf
= rib_dest_vrf(dest
);
297 ri
->rtm_table
= zvrf
->table_id
;
298 ri
->rtm_protocol
= RTPROT_UNSPEC
;
301 * An RTM_DELROUTE need not be accompanied by any nexthops,
302 * particularly in our communication with the FPM.
304 if (cmd
== RTM_DELROUTE
&& !re
)
308 zfpm_debug("%s: Expected non-NULL re pointer",
309 __PRETTY_FUNCTION__
);
313 ri
->rtm_protocol
= netlink_proto_from_route_type(re
->type
);
314 ri
->rtm_type
= RTN_UNICAST
;
315 ri
->metric
= &re
->metric
;
317 for (ALL_NEXTHOPS(re
->ng
, nexthop
)) {
318 if (ri
->num_nhs
>= zrouter
.multipath_num
)
321 if (CHECK_FLAG(nexthop
->flags
, NEXTHOP_FLAG_RECURSIVE
))
324 if (nexthop
->type
== NEXTHOP_TYPE_BLACKHOLE
) {
325 switch (nexthop
->bh_type
) {
326 case BLACKHOLE_ADMINPROHIB
:
327 ri
->rtm_type
= RTN_PROHIBIT
;
329 case BLACKHOLE_REJECT
:
330 ri
->rtm_type
= RTN_UNREACHABLE
;
334 ri
->rtm_type
= RTN_BLACKHOLE
;
339 if ((cmd
== RTM_NEWROUTE
340 && CHECK_FLAG(nexthop
->flags
, NEXTHOP_FLAG_ACTIVE
))
341 || (cmd
== RTM_DELROUTE
342 && CHECK_FLAG(re
->status
, ROUTE_ENTRY_INSTALLED
))) {
343 netlink_route_info_add_nh(ri
, nexthop
, re
);
347 /* If there is no useful nexthop then return. */
348 if (ri
->num_nhs
== 0) {
349 zfpm_debug("netlink_encode_route(): No useful nexthop.");
357 * netlink_route_info_encode
359 * Returns the number of bytes written to the buffer. 0 or a negative
360 * value indicates an error.
362 static int netlink_route_info_encode(netlink_route_info_t
*ri
, char *in_buf
,
366 unsigned int nexthop_num
= 0;
368 netlink_nh_info_t
*nhi
;
369 enum fpm_nh_encap_type_t encap
;
371 struct vxlan_encap_info_t
*vxlan
;
380 req
= (void *)in_buf
;
382 buf_offset
= ((char *)req
->buf
) - ((char *)req
);
384 if (in_buf_len
< buf_offset
) {
389 memset(req
, 0, buf_offset
);
391 bytelen
= af_addr_size(ri
->af
);
393 req
->n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
394 req
->n
.nlmsg_flags
= NLM_F_CREATE
| NLM_F_REQUEST
;
395 req
->n
.nlmsg_type
= ri
->nlmsg_type
;
396 req
->r
.rtm_family
= ri
->af
;
399 * rtm_table field is a uchar field which can accomodate table_id less
401 * To support table id greater than 255, if the table_id is greater than
402 * 255, set rtm_table to RT_TABLE_UNSPEC and add RTA_TABLE attribute
403 * with 32 bit value as the table_id.
405 if (ri
->rtm_table
< 256)
406 req
->r
.rtm_table
= ri
->rtm_table
;
408 req
->r
.rtm_table
= RT_TABLE_UNSPEC
;
409 addattr32(&req
->n
, in_buf_len
, RTA_TABLE
, ri
->rtm_table
);
412 req
->r
.rtm_dst_len
= ri
->prefix
->prefixlen
;
413 req
->r
.rtm_protocol
= ri
->rtm_protocol
;
414 req
->r
.rtm_scope
= RT_SCOPE_UNIVERSE
;
416 addattr_l(&req
->n
, in_buf_len
, RTA_DST
, &ri
->prefix
->u
.prefix
, bytelen
);
418 req
->r
.rtm_type
= ri
->rtm_type
;
422 addattr32(&req
->n
, in_buf_len
, RTA_PRIORITY
, *ri
->metric
);
424 if (ri
->num_nhs
== 0)
427 if (ri
->num_nhs
== 1) {
431 addattr_l(&req
->n
, in_buf_len
, RTA_GATEWAY
,
432 nhi
->gateway
, bytelen
);
436 addattr32(&req
->n
, in_buf_len
, RTA_OIF
, nhi
->if_index
);
439 encap
= nhi
->encap_info
.encap_type
;
440 if (encap
> FPM_NH_ENCAP_NONE
) {
441 addattr_l(&req
->n
, in_buf_len
, RTA_ENCAP_TYPE
, &encap
,
444 case FPM_NH_ENCAP_NONE
:
446 case FPM_NH_ENCAP_VXLAN
:
447 vxlan
= &nhi
->encap_info
.vxlan_encap
;
448 nest
= addattr_nest(&req
->n
, in_buf_len
,
450 addattr32(&req
->n
, in_buf_len
, VXLAN_VNI
,
452 addattr_nest_end(&req
->n
, nest
);
454 case FPM_NH_ENCAP_MAX
:
465 char buf
[NL_PKT_BUF_SIZE
];
466 struct rtattr
*rta
= (void *)buf
;
467 struct rtnexthop
*rtnh
;
469 rta
->rta_type
= RTA_MULTIPATH
;
470 rta
->rta_len
= RTA_LENGTH(0);
471 rtnh
= RTA_DATA(rta
);
473 for (nexthop_num
= 0; nexthop_num
< ri
->num_nhs
; nexthop_num
++) {
474 nhi
= &ri
->nhs
[nexthop_num
];
476 rtnh
->rtnh_len
= sizeof(*rtnh
);
477 rtnh
->rtnh_flags
= 0;
479 rtnh
->rtnh_ifindex
= 0;
480 rta
->rta_len
+= rtnh
->rtnh_len
;
483 rta_addattr_l(rta
, sizeof(buf
), RTA_GATEWAY
,
484 nhi
->gateway
, bytelen
);
485 rtnh
->rtnh_len
+= sizeof(struct rtattr
) + bytelen
;
489 rtnh
->rtnh_ifindex
= nhi
->if_index
;
492 encap
= nhi
->encap_info
.encap_type
;
493 if (encap
> FPM_NH_ENCAP_NONE
) {
494 rta_addattr_l(rta
, sizeof(buf
), RTA_ENCAP_TYPE
,
495 &encap
, sizeof(uint16_t));
496 rtnh
->rtnh_len
+= sizeof(struct rtattr
) +
499 case FPM_NH_ENCAP_NONE
:
501 case FPM_NH_ENCAP_VXLAN
:
502 vxlan
= &nhi
->encap_info
.vxlan_encap
;
503 nest
= rta_nest(rta
, sizeof(buf
), RTA_ENCAP
);
504 rta_addattr_l(rta
, sizeof(buf
), VXLAN_VNI
,
505 &vxlan
->vni
, sizeof(uint32_t));
506 nest_len
= rta_nest_end(rta
, nest
);
507 rtnh
->rtnh_len
+= nest_len
;
509 case FPM_NH_ENCAP_MAX
:
514 rtnh
= RTNH_NEXT(rtnh
);
517 assert(rta
->rta_len
> RTA_LENGTH(0));
518 addattr_l(&req
->n
, in_buf_len
, RTA_MULTIPATH
, RTA_DATA(rta
),
524 addattr_l(&req
->n
, in_buf_len
, RTA_PREFSRC
, &ri
->pref_src
,
528 assert(req
->n
.nlmsg_len
< in_buf_len
);
529 return req
->n
.nlmsg_len
;
533 * zfpm_log_route_info
535 * Helper function to log the information in a route_info structure.
537 static void zfpm_log_route_info(netlink_route_info_t
*ri
, const char *label
)
539 netlink_nh_info_t
*nhi
;
542 zfpm_debug("%s : %s %s/%d, Proto: %s, Metric: %u", label
,
543 nl_msg_type_to_str(ri
->nlmsg_type
),
544 prefix_addr_to_a(ri
->prefix
), ri
->prefix
->prefixlen
,
545 nl_rtproto_to_str(ri
->rtm_protocol
),
546 ri
->metric
? *ri
->metric
: 0);
548 for (i
= 0; i
< ri
->num_nhs
; i
++) {
550 zfpm_debug(" Intf: %u, Gateway: %s, Recursive: %s, Type: %s, Encap type: %s",
551 nhi
->if_index
, addr_to_a(ri
->af
, nhi
->gateway
),
552 nhi
->recursive
? "yes" : "no",
553 nexthop_type_to_str(nhi
->type
),
554 fpm_nh_encap_type_to_str(nhi
->encap_info
.encap_type
)
560 * zfpm_netlink_encode_route
562 * Create a netlink message corresponding to the given route in the
563 * given buffer space.
565 * Returns the number of bytes written to the buffer. 0 or a negative
566 * value indicates an error.
568 int zfpm_netlink_encode_route(int cmd
, rib_dest_t
*dest
, struct route_entry
*re
,
569 char *in_buf
, size_t in_buf_len
)
571 netlink_route_info_t ri_space
, *ri
;
575 if (!netlink_route_info_fill(ri
, cmd
, dest
, re
))
578 zfpm_log_route_info(ri
, __FUNCTION__
);
580 return netlink_route_info_encode(ri
, in_buf
, in_buf_len
);
583 #endif /* HAVE_NETLINK */