1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Zebra Policy Based Routing (PBR) interaction with the kernel using
5 * Copyright (C) 2018 Cumulus Networks, Inc.
16 #include <linux/fib_rules.h>
17 #include "zebra/zserv.h"
18 #include "zebra/zebra_ns.h"
19 #include "zebra/zebra_vrf.h"
21 #include "zebra/interface.h"
22 #include "zebra/debug.h"
23 #include "zebra/rtadv.h"
24 #include "zebra/kernel_netlink.h"
25 #include "zebra/rule_netlink.h"
26 #include "zebra/zebra_pbr.h"
27 #include "zebra/zebra_errors.h"
28 #include "zebra/zebra_dplane.h"
29 #include "zebra/zebra_trace.h"
33 /* static function declarations */
35 /* Private functions */
39 * netlink_rule_msg_encode
41 * Encodes netlink RTM_ADDRULE/RTM_DELRULE message to buffer buf of size buflen.
43 * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
44 * or the number of bytes written to buf.
46 static ssize_t
netlink_rule_msg_encode(
47 int cmd
, const struct zebra_dplane_ctx
*ctx
, uint32_t filter_bm
,
48 uint32_t priority
, uint32_t table
, const struct prefix
*src_ip
,
49 const struct prefix
*dst_ip
, uint32_t fwmark
, uint8_t dsfield
,
50 uint8_t ip_protocol
, void *buf
, size_t buflen
)
52 uint8_t protocol
= RTPROT_ZEBRA
;
57 struct fib_rule_hdr frh
;
61 const char *ifname
= dplane_ctx_rule_get_ifname(ctx
);
63 if (buflen
< sizeof(*req
))
65 memset(req
, 0, sizeof(*req
));
67 /* Assume ipv4 if no src/dst set, we only support ipv4/ipv6 */
68 if (PREFIX_FAMILY(src_ip
))
69 family
= PREFIX_FAMILY(src_ip
);
70 else if (PREFIX_FAMILY(dst_ip
))
71 family
= PREFIX_FAMILY(dst_ip
);
75 bytelen
= (family
== AF_INET
? 4 : 16);
77 req
->n
.nlmsg_type
= cmd
;
78 req
->n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
79 req
->n
.nlmsg_flags
= NLM_F_REQUEST
;
81 req
->frh
.family
= family
;
82 req
->frh
.action
= FR_ACT_TO_TBL
;
84 if (!nl_attr_put(&req
->n
, buflen
, FRA_PROTOCOL
, &protocol
,
89 if (!nl_attr_put32(&req
->n
, buflen
, FRA_PRIORITY
, priority
))
92 /* interface on which applied */
93 if (!nl_attr_put(&req
->n
, buflen
, FRA_IFNAME
, ifname
,
97 /* source IP, if specified */
98 if (filter_bm
& PBR_FILTER_SRC_IP
) {
99 req
->frh
.src_len
= src_ip
->prefixlen
;
100 if (!nl_attr_put(&req
->n
, buflen
, FRA_SRC
, &src_ip
->u
.prefix
,
105 /* destination IP, if specified */
106 if (filter_bm
& PBR_FILTER_DST_IP
) {
107 req
->frh
.dst_len
= dst_ip
->prefixlen
;
108 if (!nl_attr_put(&req
->n
, buflen
, FRA_DST
, &dst_ip
->u
.prefix
,
113 /* fwmark, if specified */
114 if (filter_bm
& PBR_FILTER_FWMARK
) {
115 if (!nl_attr_put32(&req
->n
, buflen
, FRA_FWMARK
, fwmark
))
119 /* dsfield, if specified */
120 if (filter_bm
& PBR_FILTER_DSFIELD
)
121 req
->frh
.tos
= dsfield
;
123 /* protocol to match on */
124 if (filter_bm
& PBR_FILTER_IP_PROTOCOL
)
125 nl_attr_put8(&req
->n
, buflen
, FRA_IP_PROTO
, ip_protocol
);
127 /* Route table to use to forward, if filter criteria matches. */
129 req
->frh
.table
= table
;
131 req
->frh
.table
= RT_TABLE_UNSPEC
;
132 if (!nl_attr_put32(&req
->n
, buflen
, FRA_TABLE
, table
))
136 if (IS_ZEBRA_DEBUG_KERNEL
)
138 "Tx %s family %s IF %s Pref %u Fwmark %u Src %pFX Dst %pFX Table %u",
139 nl_msg_type_to_str(cmd
), nl_family_to_str(family
),
140 ifname
, priority
, fwmark
, src_ip
, dst_ip
, table
);
142 return NLMSG_ALIGN(req
->n
.nlmsg_len
);
145 static ssize_t
netlink_rule_msg_encoder(struct zebra_dplane_ctx
*ctx
, void *buf
,
148 int cmd
= RTM_NEWRULE
;
150 if (dplane_ctx_get_op(ctx
) == DPLANE_OP_RULE_DELETE
)
153 return netlink_rule_msg_encode(
154 cmd
, ctx
, dplane_ctx_rule_get_filter_bm(ctx
),
155 dplane_ctx_rule_get_priority(ctx
),
156 dplane_ctx_rule_get_table(ctx
), dplane_ctx_rule_get_src_ip(ctx
),
157 dplane_ctx_rule_get_dst_ip(ctx
),
158 dplane_ctx_rule_get_fwmark(ctx
),
159 dplane_ctx_rule_get_dsfield(ctx
),
160 dplane_ctx_rule_get_ipproto(ctx
), buf
, buflen
);
163 static ssize_t
netlink_oldrule_msg_encoder(struct zebra_dplane_ctx
*ctx
,
164 void *buf
, size_t buflen
)
166 return netlink_rule_msg_encode(
167 RTM_DELRULE
, ctx
, dplane_ctx_rule_get_old_filter_bm(ctx
),
168 dplane_ctx_rule_get_old_priority(ctx
),
169 dplane_ctx_rule_get_old_table(ctx
),
170 dplane_ctx_rule_get_old_src_ip(ctx
),
171 dplane_ctx_rule_get_old_dst_ip(ctx
),
172 dplane_ctx_rule_get_old_fwmark(ctx
),
173 dplane_ctx_rule_get_old_dsfield(ctx
),
174 dplane_ctx_rule_get_old_ipproto(ctx
), buf
, buflen
);
177 /* Public functions */
179 enum netlink_msg_status
180 netlink_put_rule_update_msg(struct nl_batch
*bth
, struct zebra_dplane_ctx
*ctx
)
183 enum netlink_msg_status ret
;
185 op
= dplane_ctx_get_op(ctx
);
186 if (!(op
== DPLANE_OP_RULE_ADD
|| op
== DPLANE_OP_RULE_UPDATE
187 || op
== DPLANE_OP_RULE_DELETE
)) {
189 EC_ZEBRA_PBR_RULE_UPDATE
,
190 "Context received for kernel rule update with incorrect OP code (%u)",
192 return FRR_NETLINK_ERROR
;
195 ret
= netlink_batch_add_msg(bth
, ctx
, netlink_rule_msg_encoder
, false);
198 * Delete the old one.
200 * Don't care about this result right?
202 if (op
== DPLANE_OP_RULE_UPDATE
)
203 netlink_batch_add_msg(bth
, ctx
, netlink_oldrule_msg_encoder
,
210 * Handle netlink notification informing a rule add or delete.
211 * Handling of an ADD is TBD.
212 * DELs are notified up, if other attributes indicate it may be a
213 * notification of interest. The expectation is that if this corresponds
214 * to a PBR rule added by FRR, it will be readded.
216 * If startup and we see a rule we created, delete it as its leftover
217 * from a previous instance and should have been removed on shutdown.
220 int netlink_rule_change(struct nlmsghdr
*h
, ns_id_t ns_id
, int startup
)
222 struct zebra_ns
*zns
;
223 struct fib_rule_hdr
*frh
;
224 struct rtattr
*tb
[FRA_MAX
+ 1];
227 struct zebra_pbr_rule rule
= {};
229 uint8_t ip_proto
= 0;
231 frrtrace(3, frr_zebra
, netlink_rule_change
, h
, ns_id
, startup
);
233 /* Basic validation followed by extracting attributes. */
234 if (h
->nlmsg_type
!= RTM_NEWRULE
&& h
->nlmsg_type
!= RTM_DELRULE
)
237 len
= h
->nlmsg_len
- NLMSG_LENGTH(sizeof(struct fib_rule_hdr
));
240 "%s: Message received from netlink is of a broken size: %d %zu",
241 __func__
, h
->nlmsg_len
,
242 (size_t)NLMSG_LENGTH(sizeof(struct fib_rule_hdr
)));
248 if (frh
->family
!= AF_INET
&& frh
->family
!= AF_INET6
) {
249 if (frh
->family
== RTNL_FAMILY_IPMR
250 || frh
->family
== RTNL_FAMILY_IP6MR
) {
251 if (IS_ZEBRA_DEBUG_KERNEL
)
253 "Received rule netlink that we are ignoring for family %u, rule change: %u",
254 frh
->family
, h
->nlmsg_type
);
258 EC_ZEBRA_NETLINK_INVALID_AF
,
259 "Invalid address family: %u received from kernel rule change: %u",
260 frh
->family
, h
->nlmsg_type
);
263 if (frh
->action
!= FR_ACT_TO_TBL
)
266 memset(tb
, 0, sizeof(tb
));
267 netlink_parse_rtattr(tb
, FRA_MAX
, RTM_RTA(frh
), len
);
269 if (tb
[FRA_PRIORITY
])
270 rule
.rule
.priority
= *(uint32_t *)RTA_DATA(tb
[FRA_PRIORITY
]);
273 if (frh
->family
== AF_INET
)
274 memcpy(&rule
.rule
.filter
.src_ip
.u
.prefix4
,
275 RTA_DATA(tb
[FRA_SRC
]), 4);
277 memcpy(&rule
.rule
.filter
.src_ip
.u
.prefix6
,
278 RTA_DATA(tb
[FRA_SRC
]), 16);
279 rule
.rule
.filter
.src_ip
.prefixlen
= frh
->src_len
;
280 rule
.rule
.filter
.src_ip
.family
= frh
->family
;
281 rule
.rule
.filter
.filter_bm
|= PBR_FILTER_SRC_IP
;
285 if (frh
->family
== AF_INET
)
286 memcpy(&rule
.rule
.filter
.dst_ip
.u
.prefix4
,
287 RTA_DATA(tb
[FRA_DST
]), 4);
289 memcpy(&rule
.rule
.filter
.dst_ip
.u
.prefix6
,
290 RTA_DATA(tb
[FRA_DST
]), 16);
291 rule
.rule
.filter
.dst_ip
.prefixlen
= frh
->dst_len
;
292 rule
.rule
.filter
.dst_ip
.family
= frh
->family
;
293 rule
.rule
.filter
.filter_bm
|= PBR_FILTER_DST_IP
;
297 rule
.rule
.action
.table
= *(uint32_t *)RTA_DATA(tb
[FRA_TABLE
]);
299 rule
.rule
.action
.table
= frh
->table
;
301 /* TBD: We don't care about rules not specifying an IIF. */
302 if (tb
[FRA_IFNAME
] == NULL
)
305 if (tb
[FRA_PROTOCOL
])
306 proto
= *(uint8_t *)RTA_DATA(tb
[FRA_PROTOCOL
]);
308 if (tb
[FRA_IP_PROTO
])
309 ip_proto
= *(uint8_t *)RTA_DATA(tb
[FRA_IP_PROTO
]);
311 ifname
= (char *)RTA_DATA(tb
[FRA_IFNAME
]);
312 strlcpy(rule
.ifname
, ifname
, sizeof(rule
.ifname
));
314 if (h
->nlmsg_type
== RTM_NEWRULE
) {
316 * If we see a rule at startup we created, delete it now.
317 * It should have been flushed on a previous shutdown.
319 if (startup
&& proto
== RTPROT_ZEBRA
) {
320 enum zebra_dplane_result ret
;
322 ret
= dplane_pbr_rule_delete(&rule
);
325 "%s: %s leftover rule: family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u",
327 ((ret
== ZEBRA_DPLANE_REQUEST_FAILURE
)
330 nl_family_to_str(frh
->family
), rule
.ifname
,
331 rule
.rule
.priority
, &rule
.rule
.filter
.src_ip
,
332 &rule
.rule
.filter
.dst_ip
,
333 rule
.rule
.action
.table
, ip_proto
);
340 zns
= zebra_ns_lookup(ns_id
);
342 /* If we don't know the interface, we don't care. */
343 if (!if_lookup_by_name_per_ns(zns
, ifname
))
346 if (IS_ZEBRA_DEBUG_KERNEL
)
348 "Rx %s family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u",
349 nl_msg_type_to_str(h
->nlmsg_type
),
350 nl_family_to_str(frh
->family
), rule
.ifname
,
351 rule
.rule
.priority
, &rule
.rule
.filter
.src_ip
,
352 &rule
.rule
.filter
.dst_ip
, rule
.rule
.action
.table
,
355 return kernel_pbr_rule_del(&rule
);
359 * Request rules from the kernel
361 static int netlink_request_rules(struct zebra_ns
*zns
, int family
, int type
)
365 struct fib_rule_hdr frh
;
366 char buf
[NL_PKT_BUF_SIZE
];
369 memset(&req
, 0, sizeof(req
));
370 req
.n
.nlmsg_type
= type
;
371 req
.n
.nlmsg_flags
= NLM_F_ROOT
| NLM_F_MATCH
| NLM_F_REQUEST
;
372 req
.n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct fib_rule_hdr
));
373 req
.frh
.family
= family
;
375 return netlink_request(&zns
->netlink_cmd
, &req
);
379 * Get to know existing PBR rules in the kernel - typically called at startup.
381 int netlink_rules_read(struct zebra_ns
*zns
)
384 struct zebra_dplane_info dp_info
;
386 zebra_dplane_info_from_zns(&dp_info
, zns
, true);
388 ret
= netlink_request_rules(zns
, AF_INET
, RTM_GETRULE
);
392 ret
= netlink_parse_info(netlink_rule_change
, &zns
->netlink_cmd
,
397 ret
= netlink_request_rules(zns
, AF_INET6
, RTM_GETRULE
);
401 ret
= netlink_parse_info(netlink_rule_change
, &zns
->netlink_cmd
,
406 #endif /* HAVE_NETLINK */