1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Code for encoding/decoding FPM messages that are in netlink format.
5 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
6 * Copyright (C) 2012 by Open Source Routing.
7 * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
19 #include "zebra/zserv.h"
20 #include "zebra/zebra_router.h"
21 #include "zebra/zebra_dplane.h"
22 #include "zebra/zebra_ns.h"
23 #include "zebra/zebra_vrf.h"
24 #include "zebra/kernel_netlink.h"
25 #include "zebra/rt_netlink.h"
28 #include "zebra/zebra_fpm_private.h"
29 #include "zebra/zebra_vxlan_private.h"
30 #include "zebra/interface.h"
35 * The size of an address in a given address family.
37 static size_t af_addr_size(uint8_t af
)
52 * We plan to use RTA_ENCAP_TYPE attribute for VxLAN encap as well.
53 * Currently, values 0 to 8 for this attribute are used by lwtunnel_encap_types
54 * So, we cannot use these values for VxLAN encap.
56 enum fpm_nh_encap_type_t
{
57 FPM_NH_ENCAP_NONE
= 0,
58 FPM_NH_ENCAP_VXLAN
= 100,
63 * fpm_nh_encap_type_to_str
65 static const char *fpm_nh_encap_type_to_str(enum fpm_nh_encap_type_t encap_type
)
68 case FPM_NH_ENCAP_NONE
:
71 case FPM_NH_ENCAP_VXLAN
:
74 case FPM_NH_ENCAP_MAX
:
81 struct vxlan_encap_info_t
{
85 enum vxlan_encap_info_type_t
{
89 struct fpm_nh_encap_info_t
{
90 enum fpm_nh_encap_type_t encap_type
;
92 struct vxlan_encap_info_t vxlan_encap
;
99 * Holds information about a single nexthop for netlink. These info
100 * structures are transient and may contain pointers into rib
101 * data structures for convenience.
103 struct netlink_nh_info
{
104 /* Weight of the nexthop ( for unequal cost ECMP ) */
107 union g_addr
*gateway
;
110 * Information from the struct nexthop from which this nh was
111 * derived. For debug purposes only.
114 enum nexthop_types_t type
;
115 struct fpm_nh_encap_info_t encap_info
;
121 * A structure for holding information for a netlink route message.
123 struct netlink_route_info
{
128 uint8_t rtm_protocol
;
130 struct prefix
*prefix
;
132 unsigned int num_nhs
;
137 struct netlink_nh_info nhs
[MULTIPATH_NUM
];
138 union g_addr
*pref_src
;
142 * netlink_route_info_add_nh
144 * Add information about the given nexthop to the given route info
147 * Returns true if a nexthop was added, false otherwise.
149 static int netlink_route_info_add_nh(struct netlink_route_info
*ri
,
150 struct nexthop
*nexthop
,
151 struct route_entry
*re
)
153 struct netlink_nh_info nhi
;
155 struct zebra_vrf
*zvrf
= NULL
;
156 struct interface
*ifp
= NULL
, *link_if
= NULL
;
157 struct zebra_if
*zif
= NULL
;
160 memset(&nhi
, 0, sizeof(nhi
));
163 if (ri
->num_nhs
>= (int)array_size(ri
->nhs
))
166 nhi
.recursive
= nexthop
->rparent
? 1 : 0;
167 nhi
.type
= nexthop
->type
;
168 nhi
.if_index
= nexthop
->ifindex
;
169 nhi
.weight
= nexthop
->weight
;
171 if (nexthop
->type
== NEXTHOP_TYPE_IPV4
172 || nexthop
->type
== NEXTHOP_TYPE_IPV4_IFINDEX
) {
173 nhi
.gateway
= &nexthop
->gate
;
174 if (nexthop
->src
.ipv4
.s_addr
!= INADDR_ANY
)
178 if (nexthop
->type
== NEXTHOP_TYPE_IPV6
179 || nexthop
->type
== NEXTHOP_TYPE_IPV6_IFINDEX
) {
180 /* Special handling for IPv4 route with IPv6 Link Local next hop
182 if (ri
->af
== AF_INET
)
183 nhi
.gateway
= &ipv4ll_gateway
;
185 nhi
.gateway
= &nexthop
->gate
;
188 if (nexthop
->type
== NEXTHOP_TYPE_IFINDEX
) {
189 if (nexthop
->src
.ipv4
.s_addr
!= INADDR_ANY
)
193 if (!nhi
.gateway
&& nhi
.if_index
== 0)
196 if (CHECK_FLAG(nexthop
->flags
, NEXTHOP_FLAG_EVPN
)) {
197 nhi
.encap_info
.encap_type
= FPM_NH_ENCAP_VXLAN
;
199 /* Extract VNI id for the nexthop SVI interface */
200 zvrf
= zebra_vrf_lookup_by_id(nexthop
->vrf_id
);
202 ifp
= if_lookup_by_index_per_ns(zvrf
->zns
,
205 zif
= (struct zebra_if
*)ifp
->info
;
207 if (IS_ZEBRA_IF_BRIDGE(ifp
))
209 else if (IS_ZEBRA_IF_VLAN(ifp
))
211 if_lookup_by_index_per_ns(
215 vni
= vni_id_from_svi(ifp
,
221 nhi
.encap_info
.vxlan_encap
.vni
= vni
;
225 * We have a valid nhi. Copy the structure over to the route_info.
227 ri
->nhs
[ri
->num_nhs
] = nhi
;
230 if (src
&& !ri
->pref_src
)
237 * netlink_proto_from_route_type
239 static uint8_t netlink_proto_from_route_type(int type
)
241 return zebra2proto(type
);
245 * netlink_route_info_fill
247 * Fill out the route information object from the given route.
249 * Returns true on success and false on failure.
251 static int netlink_route_info_fill(struct netlink_route_info
*ri
, int cmd
,
252 rib_dest_t
*dest
, struct route_entry
*re
)
254 struct nexthop
*nexthop
;
256 memset(ri
, 0, sizeof(*ri
));
258 ri
->prefix
= rib_dest_prefix(dest
);
259 ri
->af
= rib_dest_af(dest
);
263 ri
->nlmsg_type
= cmd
;
264 ri
->rtm_protocol
= RTPROT_UNSPEC
;
267 * An RTM_DELROUTE need not be accompanied by any nexthops,
268 * particularly in our communication with the FPM.
270 if (cmd
== RTM_DELROUTE
&& !re
)
274 zfpm_debug("%s: Expected non-NULL re pointer", __func__
);
278 ri
->rtm_table
= re
->table
;
280 ri
->rtm_protocol
= netlink_proto_from_route_type(re
->type
);
281 ri
->rtm_type
= RTN_UNICAST
;
282 ri
->metric
= &re
->metric
;
284 for (ALL_NEXTHOPS(re
->nhe
->nhg
, nexthop
)) {
285 if (ri
->num_nhs
>= zrouter
.multipath_num
)
288 if (CHECK_FLAG(nexthop
->flags
, NEXTHOP_FLAG_RECURSIVE
))
291 if (nexthop
->type
== NEXTHOP_TYPE_BLACKHOLE
) {
292 switch (nexthop
->bh_type
) {
293 case BLACKHOLE_ADMINPROHIB
:
294 ri
->rtm_type
= RTN_PROHIBIT
;
296 case BLACKHOLE_REJECT
:
297 ri
->rtm_type
= RTN_UNREACHABLE
;
300 case BLACKHOLE_UNSPEC
:
301 ri
->rtm_type
= RTN_BLACKHOLE
;
306 if ((cmd
== RTM_NEWROUTE
307 && CHECK_FLAG(nexthop
->flags
, NEXTHOP_FLAG_ACTIVE
))
308 || (cmd
== RTM_DELROUTE
309 && CHECK_FLAG(re
->status
, ROUTE_ENTRY_INSTALLED
))) {
310 netlink_route_info_add_nh(ri
, nexthop
, re
);
314 if (ri
->num_nhs
== 0) {
315 switch (ri
->rtm_type
) {
317 case RTN_UNREACHABLE
:
321 /* If there is no useful nexthop then return. */
323 "netlink_encode_route(): No useful nexthop.");
332 * netlink_route_info_encode
334 * Returns the number of bytes written to the buffer. 0 or a negative
335 * value indicates an error.
337 static int netlink_route_info_encode(struct netlink_route_info
*ri
,
338 char *in_buf
, size_t in_buf_len
)
341 unsigned int nexthop_num
= 0;
343 struct netlink_nh_info
*nhi
;
344 enum fpm_nh_encap_type_t encap
;
345 struct rtattr
*nest
, *inner_nest
;
346 struct rtnexthop
*rtnh
;
347 struct vxlan_encap_info_t
*vxlan
;
348 struct in6_addr ipv6
;
356 req
= (void *)in_buf
;
358 buf_offset
= ((char *)req
->buf
) - ((char *)req
);
360 if (in_buf_len
< buf_offset
) {
365 memset(req
, 0, buf_offset
);
367 bytelen
= af_addr_size(ri
->af
);
369 req
->n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
370 req
->n
.nlmsg_flags
= NLM_F_CREATE
| NLM_F_REQUEST
;
371 req
->n
.nlmsg_pid
= ri
->nlmsg_pid
;
372 req
->n
.nlmsg_type
= ri
->nlmsg_type
;
373 req
->r
.rtm_family
= ri
->af
;
376 * rtm_table field is a uchar field which can accommodate table_id less
378 * To support table id greater than 255, if the table_id is greater than
379 * 255, set rtm_table to RT_TABLE_UNSPEC and add RTA_TABLE attribute
380 * with 32 bit value as the table_id.
382 if (ri
->rtm_table
< 256)
383 req
->r
.rtm_table
= ri
->rtm_table
;
385 req
->r
.rtm_table
= RT_TABLE_UNSPEC
;
386 nl_attr_put32(&req
->n
, in_buf_len
, RTA_TABLE
, ri
->rtm_table
);
389 req
->r
.rtm_dst_len
= ri
->prefix
->prefixlen
;
390 req
->r
.rtm_protocol
= ri
->rtm_protocol
;
391 req
->r
.rtm_scope
= RT_SCOPE_UNIVERSE
;
393 nl_attr_put(&req
->n
, in_buf_len
, RTA_DST
, &ri
->prefix
->u
.prefix
,
396 req
->r
.rtm_type
= ri
->rtm_type
;
400 nl_attr_put32(&req
->n
, in_buf_len
, RTA_PRIORITY
, *ri
->metric
);
402 if (ri
->num_nhs
== 0)
405 if (ri
->num_nhs
== 1) {
409 if (nhi
->type
== NEXTHOP_TYPE_IPV4_IFINDEX
410 && ri
->af
== AF_INET6
) {
411 ipv4_to_ipv4_mapped_ipv6(&ipv6
,
413 nl_attr_put(&req
->n
, in_buf_len
, RTA_GATEWAY
,
416 nl_attr_put(&req
->n
, in_buf_len
, RTA_GATEWAY
,
417 nhi
->gateway
, bytelen
);
421 nl_attr_put32(&req
->n
, in_buf_len
, RTA_OIF
,
425 encap
= nhi
->encap_info
.encap_type
;
427 case FPM_NH_ENCAP_NONE
:
428 case FPM_NH_ENCAP_MAX
:
430 case FPM_NH_ENCAP_VXLAN
:
431 nl_attr_put16(&req
->n
, in_buf_len
, RTA_ENCAP_TYPE
,
433 vxlan
= &nhi
->encap_info
.vxlan_encap
;
434 nest
= nl_attr_nest(&req
->n
, in_buf_len
, RTA_ENCAP
);
435 nl_attr_put32(&req
->n
, in_buf_len
, VXLAN_VNI
,
437 nl_attr_nest_end(&req
->n
, nest
);
447 nest
= nl_attr_nest(&req
->n
, in_buf_len
, RTA_MULTIPATH
);
449 for (nexthop_num
= 0; nexthop_num
< ri
->num_nhs
; nexthop_num
++) {
450 rtnh
= nl_attr_rtnh(&req
->n
, in_buf_len
);
451 nhi
= &ri
->nhs
[nexthop_num
];
454 nl_attr_put(&req
->n
, in_buf_len
, RTA_GATEWAY
,
455 nhi
->gateway
, bytelen
);
458 rtnh
->rtnh_ifindex
= nhi
->if_index
;
461 rtnh
->rtnh_hops
= nhi
->weight
;
463 encap
= nhi
->encap_info
.encap_type
;
465 case FPM_NH_ENCAP_NONE
:
466 case FPM_NH_ENCAP_MAX
:
468 case FPM_NH_ENCAP_VXLAN
:
469 nl_attr_put16(&req
->n
, in_buf_len
, RTA_ENCAP_TYPE
,
471 vxlan
= &nhi
->encap_info
.vxlan_encap
;
473 nl_attr_nest(&req
->n
, in_buf_len
, RTA_ENCAP
);
474 nl_attr_put32(&req
->n
, in_buf_len
, VXLAN_VNI
,
476 nl_attr_nest_end(&req
->n
, inner_nest
);
480 nl_attr_rtnh_end(&req
->n
, rtnh
);
483 nl_attr_nest_end(&req
->n
, nest
);
484 assert(nest
->rta_len
> RTA_LENGTH(0));
489 nl_attr_put(&req
->n
, in_buf_len
, RTA_PREFSRC
, ri
->pref_src
,
493 assert(req
->n
.nlmsg_len
< in_buf_len
);
494 return req
->n
.nlmsg_len
;
498 * zfpm_log_route_info
500 * Helper function to log the information in a route_info structure.
502 static void zfpm_log_route_info(struct netlink_route_info
*ri
,
505 struct netlink_nh_info
*nhi
;
507 char buf
[PREFIX_STRLEN
];
509 zfpm_debug("%s : %s %pFX, Proto: %s, Metric: %u", label
,
510 nl_msg_type_to_str(ri
->nlmsg_type
), ri
->prefix
,
511 nl_rtproto_to_str(ri
->rtm_protocol
),
512 ri
->metric
? *ri
->metric
: 0);
514 for (i
= 0; i
< ri
->num_nhs
; i
++) {
518 if (ri
->af
== AF_INET
)
519 inet_ntop(AF_INET
, nhi
->gateway
, buf
,
522 inet_ntop(AF_INET6
, nhi
->gateway
, buf
,
525 strlcpy(buf
, "none", sizeof(buf
));
527 zfpm_debug(" Intf: %u, Gateway: %s, Recursive: %s, Type: %s, Encap type: %s",
528 nhi
->if_index
, buf
, nhi
->recursive
? "yes" : "no",
529 nexthop_type_to_str(nhi
->type
),
530 fpm_nh_encap_type_to_str(nhi
->encap_info
.encap_type
)
536 * zfpm_netlink_encode_route
538 * Create a netlink message corresponding to the given route in the
539 * given buffer space.
541 * Returns the number of bytes written to the buffer. 0 or a negative
542 * value indicates an error.
544 int zfpm_netlink_encode_route(int cmd
, rib_dest_t
*dest
, struct route_entry
*re
,
545 char *in_buf
, size_t in_buf_len
)
547 struct netlink_route_info ri_space
, *ri
;
551 if (!netlink_route_info_fill(ri
, cmd
, dest
, re
))
554 zfpm_log_route_info(ri
, __func__
);
556 return netlink_route_info_encode(ri
, in_buf
, in_buf_len
);
560 * zfpm_netlink_encode_mac
562 * Create a netlink message corresponding to the given MAC.
564 * Returns the number of bytes written to the buffer. 0 or a negative
565 * value indicates an error.
567 int zfpm_netlink_encode_mac(struct fpm_mac_info_t
*mac
, char *in_buf
,
577 req
= (void *)in_buf
;
579 buf_offset
= offsetof(struct macmsg
, buf
);
580 if (in_buf_len
< buf_offset
)
582 memset(req
, 0, buf_offset
);
584 /* Construct nlmsg header */
585 req
->hdr
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct ndmsg
));
586 req
->hdr
.nlmsg_type
= CHECK_FLAG(mac
->fpm_flags
, ZEBRA_MAC_DELETE_FPM
) ?
587 RTM_DELNEIGH
: RTM_NEWNEIGH
;
588 req
->hdr
.nlmsg_flags
= NLM_F_REQUEST
;
589 if (req
->hdr
.nlmsg_type
== RTM_NEWNEIGH
)
590 req
->hdr
.nlmsg_flags
|= (NLM_F_CREATE
| NLM_F_REPLACE
);
592 /* Construct ndmsg */
593 req
->ndm
.ndm_family
= AF_BRIDGE
;
594 req
->ndm
.ndm_ifindex
= mac
->vxlan_if
;
596 req
->ndm
.ndm_state
= NUD_REACHABLE
;
597 req
->ndm
.ndm_flags
|= NTF_SELF
| NTF_MASTER
;
598 if (CHECK_FLAG(mac
->zebra_flags
,
599 (ZEBRA_MAC_STICKY
| ZEBRA_MAC_REMOTE_DEF_GW
)))
600 req
->ndm
.ndm_state
|= NUD_NOARP
;
602 req
->ndm
.ndm_flags
|= NTF_EXT_LEARNED
;
605 nl_attr_put(&req
->hdr
, in_buf_len
, NDA_LLADDR
, &mac
->macaddr
, 6);
606 nl_attr_put(&req
->hdr
, in_buf_len
, NDA_DST
, &mac
->r_vtep_ip
, 4);
607 nl_attr_put32(&req
->hdr
, in_buf_len
, NDA_MASTER
, mac
->svi_if
);
608 nl_attr_put32(&req
->hdr
, in_buf_len
, NDA_VNI
, mac
->vni
);
610 assert(req
->hdr
.nlmsg_len
< in_buf_len
);
612 zfpm_debug("Tx %s family %s ifindex %u MAC %pEA DEST %pI4",
613 nl_msg_type_to_str(req
->hdr
.nlmsg_type
),
614 nl_family_to_str(req
->ndm
.ndm_family
), req
->ndm
.ndm_ifindex
,
615 &mac
->macaddr
, &mac
->r_vtep_ip
);
617 return req
->hdr
.nlmsg_len
;
620 #endif /* HAVE_NETLINK */