]> git.proxmox.com Git - mirror_frr.git/blob - zebra/zebra_fpm_netlink.c
tools: config clang-format to allow aligned macros
[mirror_frr.git] / zebra / zebra_fpm_netlink.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Code for encoding/decoding FPM messages that are in netlink format.
4 *
5 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
6 * Copyright (C) 2012 by Open Source Routing.
7 * Copyright (C) 2012 by Internet Systems Consortium, Inc. ("ISC")
8 */
9
10 #include <zebra.h>
11
12 #ifdef HAVE_NETLINK
13
14 #include "log.h"
15 #include "rib.h"
16 #include "vty.h"
17 #include "prefix.h"
18
19 #include "zebra/zserv.h"
20 #include "zebra/zebra_router.h"
21 #include "zebra/zebra_dplane.h"
22 #include "zebra/zebra_ns.h"
23 #include "zebra/zebra_vrf.h"
24 #include "zebra/kernel_netlink.h"
25 #include "zebra/rt_netlink.h"
26 #include "nexthop.h"
27
28 #include "zebra/zebra_fpm_private.h"
29 #include "zebra/zebra_vxlan_private.h"
30 #include "zebra/interface.h"
31
32 /*
33 * af_addr_size
34 *
35 * The size of an address in a given address family.
36 */
37 static size_t af_addr_size(uint8_t af)
38 {
39 switch (af) {
40
41 case AF_INET:
42 return 4;
43 case AF_INET6:
44 return 16;
45 default:
46 assert(0);
47 return 16;
48 }
49 }
50
51 /*
52 * We plan to use RTA_ENCAP_TYPE attribute for VxLAN encap as well.
53 * Currently, values 0 to 8 for this attribute are used by lwtunnel_encap_types
54 * So, we cannot use these values for VxLAN encap.
55 */
56 enum fpm_nh_encap_type_t {
57 FPM_NH_ENCAP_NONE = 0,
58 FPM_NH_ENCAP_VXLAN = 100,
59 FPM_NH_ENCAP_MAX,
60 };
61
62 /*
63 * fpm_nh_encap_type_to_str
64 */
65 static const char *fpm_nh_encap_type_to_str(enum fpm_nh_encap_type_t encap_type)
66 {
67 switch (encap_type) {
68 case FPM_NH_ENCAP_NONE:
69 return "none";
70
71 case FPM_NH_ENCAP_VXLAN:
72 return "VxLAN";
73
74 case FPM_NH_ENCAP_MAX:
75 return "invalid";
76 }
77
78 return "invalid";
79 }
80
81 struct vxlan_encap_info_t {
82 vni_t vni;
83 };
84
85 enum vxlan_encap_info_type_t {
86 VXLAN_VNI = 0,
87 };
88
89 struct fpm_nh_encap_info_t {
90 enum fpm_nh_encap_type_t encap_type;
91 union {
92 struct vxlan_encap_info_t vxlan_encap;
93 };
94 };
95
96 /*
97 * netlink_nh_info
98 *
99 * Holds information about a single nexthop for netlink. These info
100 * structures are transient and may contain pointers into rib
101 * data structures for convenience.
102 */
103 struct netlink_nh_info {
104 /* Weight of the nexthop ( for unequal cost ECMP ) */
105 uint8_t weight;
106 uint32_t if_index;
107 union g_addr *gateway;
108
109 /*
110 * Information from the struct nexthop from which this nh was
111 * derived. For debug purposes only.
112 */
113 int recursive;
114 enum nexthop_types_t type;
115 struct fpm_nh_encap_info_t encap_info;
116 };
117
118 /*
119 * netlink_route_info
120 *
121 * A structure for holding information for a netlink route message.
122 */
123 struct netlink_route_info {
124 uint32_t nlmsg_pid;
125 uint16_t nlmsg_type;
126 uint8_t rtm_type;
127 uint32_t rtm_table;
128 uint8_t rtm_protocol;
129 uint8_t af;
130 struct prefix *prefix;
131 uint32_t *metric;
132 unsigned int num_nhs;
133
134 /*
135 * Nexthop structures
136 */
137 struct netlink_nh_info nhs[MULTIPATH_NUM];
138 union g_addr *pref_src;
139 };
140
141 /*
142 * netlink_route_info_add_nh
143 *
144 * Add information about the given nexthop to the given route info
145 * structure.
146 *
147 * Returns true if a nexthop was added, false otherwise.
148 */
149 static int netlink_route_info_add_nh(struct netlink_route_info *ri,
150 struct nexthop *nexthop,
151 struct route_entry *re)
152 {
153 struct netlink_nh_info nhi;
154 union g_addr *src;
155 struct zebra_vrf *zvrf = NULL;
156 struct interface *ifp = NULL, *link_if = NULL;
157 struct zebra_if *zif = NULL;
158 vni_t vni = 0;
159
160 memset(&nhi, 0, sizeof(nhi));
161 src = NULL;
162
163 if (ri->num_nhs >= (int)array_size(ri->nhs))
164 return 0;
165
166 nhi.recursive = nexthop->rparent ? 1 : 0;
167 nhi.type = nexthop->type;
168 nhi.if_index = nexthop->ifindex;
169 nhi.weight = nexthop->weight;
170
171 if (nexthop->type == NEXTHOP_TYPE_IPV4
172 || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
173 nhi.gateway = &nexthop->gate;
174 if (nexthop->src.ipv4.s_addr != INADDR_ANY)
175 src = &nexthop->src;
176 }
177
178 if (nexthop->type == NEXTHOP_TYPE_IPV6
179 || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
180 /* Special handling for IPv4 route with IPv6 Link Local next hop
181 */
182 if (ri->af == AF_INET)
183 nhi.gateway = &ipv4ll_gateway;
184 else
185 nhi.gateway = &nexthop->gate;
186 }
187
188 if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
189 if (nexthop->src.ipv4.s_addr != INADDR_ANY)
190 src = &nexthop->src;
191 }
192
193 if (!nhi.gateway && nhi.if_index == 0)
194 return 0;
195
196 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_EVPN)) {
197 nhi.encap_info.encap_type = FPM_NH_ENCAP_VXLAN;
198
199 /* Extract VNI id for the nexthop SVI interface */
200 zvrf = zebra_vrf_lookup_by_id(nexthop->vrf_id);
201 if (zvrf) {
202 ifp = if_lookup_by_index_per_ns(zvrf->zns,
203 nexthop->ifindex);
204 if (ifp) {
205 zif = (struct zebra_if *)ifp->info;
206 if (zif) {
207 if (IS_ZEBRA_IF_BRIDGE(ifp))
208 link_if = ifp;
209 else if (IS_ZEBRA_IF_VLAN(ifp))
210 link_if =
211 if_lookup_by_index_per_ns(
212 zvrf->zns,
213 zif->link_ifindex);
214 if (link_if)
215 vni = vni_id_from_svi(ifp,
216 link_if);
217 }
218 }
219 }
220
221 nhi.encap_info.vxlan_encap.vni = vni;
222 }
223
224 /*
225 * We have a valid nhi. Copy the structure over to the route_info.
226 */
227 ri->nhs[ri->num_nhs] = nhi;
228 ri->num_nhs++;
229
230 if (src && !ri->pref_src)
231 ri->pref_src = src;
232
233 return 1;
234 }
235
236 /*
237 * netlink_proto_from_route_type
238 */
239 static uint8_t netlink_proto_from_route_type(int type)
240 {
241 return zebra2proto(type);
242 }
243
244 /*
245 * netlink_route_info_fill
246 *
247 * Fill out the route information object from the given route.
248 *
249 * Returns true on success and false on failure.
250 */
251 static int netlink_route_info_fill(struct netlink_route_info *ri, int cmd,
252 rib_dest_t *dest, struct route_entry *re)
253 {
254 struct nexthop *nexthop;
255 struct rib_table_info *table_info =
256 rib_table_info(rib_dest_table(dest));
257 struct zebra_vrf *zvrf = table_info->zvrf;
258
259 memset(ri, 0, sizeof(*ri));
260
261 ri->prefix = rib_dest_prefix(dest);
262 ri->af = rib_dest_af(dest);
263
264 if (zvrf && zvrf->zns)
265 ri->nlmsg_pid = zvrf->zns->netlink_dplane_out.snl.nl_pid;
266
267 ri->nlmsg_type = cmd;
268 ri->rtm_table = table_info->table_id;
269 ri->rtm_protocol = RTPROT_UNSPEC;
270
271 /*
272 * An RTM_DELROUTE need not be accompanied by any nexthops,
273 * particularly in our communication with the FPM.
274 */
275 if (cmd == RTM_DELROUTE && !re)
276 return 1;
277
278 if (!re) {
279 zfpm_debug("%s: Expected non-NULL re pointer", __func__);
280 return 0;
281 }
282
283 ri->rtm_protocol = netlink_proto_from_route_type(re->type);
284 ri->rtm_type = RTN_UNICAST;
285 ri->metric = &re->metric;
286
287 for (ALL_NEXTHOPS(re->nhe->nhg, nexthop)) {
288 if (ri->num_nhs >= zrouter.multipath_num)
289 break;
290
291 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
292 continue;
293
294 if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
295 switch (nexthop->bh_type) {
296 case BLACKHOLE_ADMINPROHIB:
297 ri->rtm_type = RTN_PROHIBIT;
298 break;
299 case BLACKHOLE_REJECT:
300 ri->rtm_type = RTN_UNREACHABLE;
301 break;
302 case BLACKHOLE_NULL:
303 case BLACKHOLE_UNSPEC:
304 ri->rtm_type = RTN_BLACKHOLE;
305 break;
306 }
307 }
308
309 if ((cmd == RTM_NEWROUTE
310 && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
311 || (cmd == RTM_DELROUTE
312 && CHECK_FLAG(re->status, ROUTE_ENTRY_INSTALLED))) {
313 netlink_route_info_add_nh(ri, nexthop, re);
314 }
315 }
316
317 if (ri->num_nhs == 0) {
318 switch (ri->rtm_type) {
319 case RTN_PROHIBIT:
320 case RTN_UNREACHABLE:
321 case RTN_BLACKHOLE:
322 break;
323 default:
324 /* If there is no useful nexthop then return. */
325 zfpm_debug(
326 "netlink_encode_route(): No useful nexthop.");
327 return 0;
328 }
329 }
330
331 return 1;
332 }
333
334 /*
335 * netlink_route_info_encode
336 *
337 * Returns the number of bytes written to the buffer. 0 or a negative
338 * value indicates an error.
339 */
340 static int netlink_route_info_encode(struct netlink_route_info *ri,
341 char *in_buf, size_t in_buf_len)
342 {
343 size_t bytelen;
344 unsigned int nexthop_num = 0;
345 size_t buf_offset;
346 struct netlink_nh_info *nhi;
347 enum fpm_nh_encap_type_t encap;
348 struct rtattr *nest, *inner_nest;
349 struct rtnexthop *rtnh;
350 struct vxlan_encap_info_t *vxlan;
351 struct in6_addr ipv6;
352
353 struct {
354 struct nlmsghdr n;
355 struct rtmsg r;
356 char buf[1];
357 } * req;
358
359 req = (void *)in_buf;
360
361 buf_offset = ((char *)req->buf) - ((char *)req);
362
363 if (in_buf_len < buf_offset) {
364 assert(0);
365 return 0;
366 }
367
368 memset(req, 0, buf_offset);
369
370 bytelen = af_addr_size(ri->af);
371
372 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
373 req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
374 req->n.nlmsg_pid = ri->nlmsg_pid;
375 req->n.nlmsg_type = ri->nlmsg_type;
376 req->r.rtm_family = ri->af;
377
378 /*
379 * rtm_table field is a uchar field which can accommodate table_id less
380 * than 256.
381 * To support table id greater than 255, if the table_id is greater than
382 * 255, set rtm_table to RT_TABLE_UNSPEC and add RTA_TABLE attribute
383 * with 32 bit value as the table_id.
384 */
385 if (ri->rtm_table < 256)
386 req->r.rtm_table = ri->rtm_table;
387 else {
388 req->r.rtm_table = RT_TABLE_UNSPEC;
389 nl_attr_put32(&req->n, in_buf_len, RTA_TABLE, ri->rtm_table);
390 }
391
392 req->r.rtm_dst_len = ri->prefix->prefixlen;
393 req->r.rtm_protocol = ri->rtm_protocol;
394 req->r.rtm_scope = RT_SCOPE_UNIVERSE;
395
396 nl_attr_put(&req->n, in_buf_len, RTA_DST, &ri->prefix->u.prefix,
397 bytelen);
398
399 req->r.rtm_type = ri->rtm_type;
400
401 /* Metric. */
402 if (ri->metric)
403 nl_attr_put32(&req->n, in_buf_len, RTA_PRIORITY, *ri->metric);
404
405 if (ri->num_nhs == 0)
406 goto done;
407
408 if (ri->num_nhs == 1) {
409 nhi = &ri->nhs[0];
410
411 if (nhi->gateway) {
412 if (nhi->type == NEXTHOP_TYPE_IPV4_IFINDEX
413 && ri->af == AF_INET6) {
414 ipv4_to_ipv4_mapped_ipv6(&ipv6,
415 nhi->gateway->ipv4);
416 nl_attr_put(&req->n, in_buf_len, RTA_GATEWAY,
417 &ipv6, bytelen);
418 } else
419 nl_attr_put(&req->n, in_buf_len, RTA_GATEWAY,
420 nhi->gateway, bytelen);
421 }
422
423 if (nhi->if_index) {
424 nl_attr_put32(&req->n, in_buf_len, RTA_OIF,
425 nhi->if_index);
426 }
427
428 encap = nhi->encap_info.encap_type;
429 switch (encap) {
430 case FPM_NH_ENCAP_NONE:
431 case FPM_NH_ENCAP_MAX:
432 break;
433 case FPM_NH_ENCAP_VXLAN:
434 nl_attr_put16(&req->n, in_buf_len, RTA_ENCAP_TYPE,
435 encap);
436 vxlan = &nhi->encap_info.vxlan_encap;
437 nest = nl_attr_nest(&req->n, in_buf_len, RTA_ENCAP);
438 nl_attr_put32(&req->n, in_buf_len, VXLAN_VNI,
439 vxlan->vni);
440 nl_attr_nest_end(&req->n, nest);
441 break;
442 }
443
444 goto done;
445 }
446
447 /*
448 * Multipath case.
449 */
450 nest = nl_attr_nest(&req->n, in_buf_len, RTA_MULTIPATH);
451
452 for (nexthop_num = 0; nexthop_num < ri->num_nhs; nexthop_num++) {
453 rtnh = nl_attr_rtnh(&req->n, in_buf_len);
454 nhi = &ri->nhs[nexthop_num];
455
456 if (nhi->gateway)
457 nl_attr_put(&req->n, in_buf_len, RTA_GATEWAY,
458 nhi->gateway, bytelen);
459
460 if (nhi->if_index) {
461 rtnh->rtnh_ifindex = nhi->if_index;
462 }
463
464 rtnh->rtnh_hops = nhi->weight;
465
466 encap = nhi->encap_info.encap_type;
467 switch (encap) {
468 case FPM_NH_ENCAP_NONE:
469 case FPM_NH_ENCAP_MAX:
470 break;
471 case FPM_NH_ENCAP_VXLAN:
472 nl_attr_put16(&req->n, in_buf_len, RTA_ENCAP_TYPE,
473 encap);
474 vxlan = &nhi->encap_info.vxlan_encap;
475 inner_nest =
476 nl_attr_nest(&req->n, in_buf_len, RTA_ENCAP);
477 nl_attr_put32(&req->n, in_buf_len, VXLAN_VNI,
478 vxlan->vni);
479 nl_attr_nest_end(&req->n, inner_nest);
480 break;
481 }
482
483 nl_attr_rtnh_end(&req->n, rtnh);
484 }
485
486 nl_attr_nest_end(&req->n, nest);
487 assert(nest->rta_len > RTA_LENGTH(0));
488
489 done:
490
491 if (ri->pref_src) {
492 nl_attr_put(&req->n, in_buf_len, RTA_PREFSRC, ri->pref_src,
493 bytelen);
494 }
495
496 assert(req->n.nlmsg_len < in_buf_len);
497 return req->n.nlmsg_len;
498 }
499
500 /*
501 * zfpm_log_route_info
502 *
503 * Helper function to log the information in a route_info structure.
504 */
505 static void zfpm_log_route_info(struct netlink_route_info *ri,
506 const char *label)
507 {
508 struct netlink_nh_info *nhi;
509 unsigned int i;
510 char buf[PREFIX_STRLEN];
511
512 zfpm_debug("%s : %s %pFX, Proto: %s, Metric: %u", label,
513 nl_msg_type_to_str(ri->nlmsg_type), ri->prefix,
514 nl_rtproto_to_str(ri->rtm_protocol),
515 ri->metric ? *ri->metric : 0);
516
517 for (i = 0; i < ri->num_nhs; i++) {
518 nhi = &ri->nhs[i];
519
520 if (nhi->gateway) {
521 if (ri->af == AF_INET)
522 inet_ntop(AF_INET, nhi->gateway, buf,
523 sizeof(buf));
524 else
525 inet_ntop(AF_INET6, nhi->gateway, buf,
526 sizeof(buf));
527 } else
528 strlcpy(buf, "none", sizeof(buf));
529
530 zfpm_debug(" Intf: %u, Gateway: %s, Recursive: %s, Type: %s, Encap type: %s",
531 nhi->if_index, buf, nhi->recursive ? "yes" : "no",
532 nexthop_type_to_str(nhi->type),
533 fpm_nh_encap_type_to_str(nhi->encap_info.encap_type)
534 );
535 }
536 }
537
538 /*
539 * zfpm_netlink_encode_route
540 *
541 * Create a netlink message corresponding to the given route in the
542 * given buffer space.
543 *
544 * Returns the number of bytes written to the buffer. 0 or a negative
545 * value indicates an error.
546 */
547 int zfpm_netlink_encode_route(int cmd, rib_dest_t *dest, struct route_entry *re,
548 char *in_buf, size_t in_buf_len)
549 {
550 struct netlink_route_info ri_space, *ri;
551
552 ri = &ri_space;
553
554 if (!netlink_route_info_fill(ri, cmd, dest, re))
555 return 0;
556
557 zfpm_log_route_info(ri, __func__);
558
559 return netlink_route_info_encode(ri, in_buf, in_buf_len);
560 }
561
562 /*
563 * zfpm_netlink_encode_mac
564 *
565 * Create a netlink message corresponding to the given MAC.
566 *
567 * Returns the number of bytes written to the buffer. 0 or a negative
568 * value indicates an error.
569 */
570 int zfpm_netlink_encode_mac(struct fpm_mac_info_t *mac, char *in_buf,
571 size_t in_buf_len)
572 {
573 size_t buf_offset;
574
575 struct macmsg {
576 struct nlmsghdr hdr;
577 struct ndmsg ndm;
578 char buf[0];
579 } *req;
580 req = (void *)in_buf;
581
582 buf_offset = offsetof(struct macmsg, buf);
583 if (in_buf_len < buf_offset)
584 return 0;
585 memset(req, 0, buf_offset);
586
587 /* Construct nlmsg header */
588 req->hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
589 req->hdr.nlmsg_type = CHECK_FLAG(mac->fpm_flags, ZEBRA_MAC_DELETE_FPM) ?
590 RTM_DELNEIGH : RTM_NEWNEIGH;
591 req->hdr.nlmsg_flags = NLM_F_REQUEST;
592 if (req->hdr.nlmsg_type == RTM_NEWNEIGH)
593 req->hdr.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
594
595 /* Construct ndmsg */
596 req->ndm.ndm_family = AF_BRIDGE;
597 req->ndm.ndm_ifindex = mac->vxlan_if;
598
599 req->ndm.ndm_state = NUD_REACHABLE;
600 req->ndm.ndm_flags |= NTF_SELF | NTF_MASTER;
601 if (CHECK_FLAG(mac->zebra_flags,
602 (ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW)))
603 req->ndm.ndm_state |= NUD_NOARP;
604 else
605 req->ndm.ndm_flags |= NTF_EXT_LEARNED;
606
607 /* Add attributes */
608 nl_attr_put(&req->hdr, in_buf_len, NDA_LLADDR, &mac->macaddr, 6);
609 nl_attr_put(&req->hdr, in_buf_len, NDA_DST, &mac->r_vtep_ip, 4);
610 nl_attr_put32(&req->hdr, in_buf_len, NDA_MASTER, mac->svi_if);
611 nl_attr_put32(&req->hdr, in_buf_len, NDA_VNI, mac->vni);
612
613 assert(req->hdr.nlmsg_len < in_buf_len);
614
615 zfpm_debug("Tx %s family %s ifindex %u MAC %pEA DEST %pI4",
616 nl_msg_type_to_str(req->hdr.nlmsg_type),
617 nl_family_to_str(req->ndm.ndm_family), req->ndm.ndm_ifindex,
618 &mac->macaddr, &mac->r_vtep_ip);
619
620 return req->hdr.nlmsg_len;
621 }
622
623 #endif /* HAVE_NETLINK */