2 * Zebra Policy Based Routing (PBR) interaction with the kernel using
4 * Copyright (C) 2018 Cumulus Networks, Inc.
6 * This file is part of FRR.
8 * FRR is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2, or (at your option) any
13 * FRR is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FRR; see the file COPYING. If not, write to the Free
20 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
32 #include <linux/fib_rules.h>
33 #include "zebra/zserv.h"
34 #include "zebra/zebra_ns.h"
35 #include "zebra/zebra_vrf.h"
37 #include "zebra/interface.h"
38 #include "zebra/debug.h"
39 #include "zebra/rtadv.h"
40 #include "zebra/kernel_netlink.h"
41 #include "zebra/rule_netlink.h"
42 #include "zebra/zebra_pbr.h"
43 #include "zebra/zebra_errors.h"
44 #include "zebra/zebra_dplane.h"
48 /* static function declarations */
50 /* Private functions */
54 * netlink_rule_msg_encode
56 * Encodes netlink RTM_ADDRULE/RTM_DELRULE message to buffer buf of size buflen.
58 * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
59 * or the number of bytes written to buf.
62 netlink_rule_msg_encode(int cmd
, const struct zebra_dplane_ctx
*ctx
,
63 uint32_t filter_bm
, uint32_t priority
, uint32_t table
,
64 const struct prefix
*src_ip
,
65 const struct prefix
*dst_ip
, uint32_t fwmark
,
66 uint8_t dsfield
, void *buf
, size_t buflen
)
68 uint8_t protocol
= RTPROT_ZEBRA
;
73 struct fib_rule_hdr frh
;
77 const char *ifname
= dplane_ctx_rule_get_ifname(ctx
);
79 if (buflen
< sizeof(*req
))
81 memset(req
, 0, sizeof(*req
));
83 /* Assume ipv4 if no src/dst set, we only support ipv4/ipv6 */
84 if (PREFIX_FAMILY(src_ip
))
85 family
= PREFIX_FAMILY(src_ip
);
86 else if (PREFIX_FAMILY(dst_ip
))
87 family
= PREFIX_FAMILY(dst_ip
);
91 bytelen
= (family
== AF_INET
? 4 : 16);
93 req
->n
.nlmsg_type
= cmd
;
94 req
->n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
95 req
->n
.nlmsg_flags
= NLM_F_REQUEST
;
97 req
->frh
.family
= family
;
98 req
->frh
.action
= FR_ACT_TO_TBL
;
100 if (!nl_attr_put(&req
->n
, buflen
, FRA_PROTOCOL
, &protocol
,
105 if (!nl_attr_put32(&req
->n
, buflen
, FRA_PRIORITY
, priority
))
108 /* interface on which applied */
109 if (!nl_attr_put(&req
->n
, buflen
, FRA_IFNAME
, ifname
,
113 /* source IP, if specified */
114 if (filter_bm
& PBR_FILTER_SRC_IP
) {
115 req
->frh
.src_len
= src_ip
->prefixlen
;
116 if (!nl_attr_put(&req
->n
, buflen
, FRA_SRC
, &src_ip
->u
.prefix
,
121 /* destination IP, if specified */
122 if (filter_bm
& PBR_FILTER_DST_IP
) {
123 req
->frh
.dst_len
= dst_ip
->prefixlen
;
124 if (!nl_attr_put(&req
->n
, buflen
, FRA_DST
, &dst_ip
->u
.prefix
,
129 /* fwmark, if specified */
130 if (filter_bm
& PBR_FILTER_FWMARK
) {
131 if (!nl_attr_put32(&req
->n
, buflen
, FRA_FWMARK
, fwmark
))
135 /* dsfield, if specified */
136 if (filter_bm
& PBR_FILTER_DSFIELD
)
137 req
->frh
.tos
= dsfield
;
139 /* Route table to use to forward, if filter criteria matches. */
141 req
->frh
.table
= table
;
143 req
->frh
.table
= RT_TABLE_UNSPEC
;
144 if (!nl_attr_put32(&req
->n
, buflen
, FRA_TABLE
, table
))
148 if (IS_ZEBRA_DEBUG_KERNEL
)
150 "Tx %s family %s IF %s Pref %u Fwmark %u Src %pFX Dst %pFX Table %u",
151 nl_msg_type_to_str(cmd
), nl_family_to_str(family
),
152 ifname
, priority
, fwmark
, src_ip
, dst_ip
, table
);
154 return NLMSG_ALIGN(req
->n
.nlmsg_len
);
157 static ssize_t
netlink_rule_msg_encoder(struct zebra_dplane_ctx
*ctx
, void *buf
,
160 int cmd
= RTM_NEWRULE
;
162 if (dplane_ctx_get_op(ctx
) == DPLANE_OP_RULE_DELETE
)
165 return netlink_rule_msg_encode(
166 cmd
, ctx
, dplane_ctx_rule_get_filter_bm(ctx
),
167 dplane_ctx_rule_get_priority(ctx
),
168 dplane_ctx_rule_get_table(ctx
), dplane_ctx_rule_get_src_ip(ctx
),
169 dplane_ctx_rule_get_dst_ip(ctx
),
170 dplane_ctx_rule_get_fwmark(ctx
),
171 dplane_ctx_rule_get_dsfield(ctx
), buf
, buflen
);
174 static ssize_t
netlink_oldrule_msg_encoder(struct zebra_dplane_ctx
*ctx
,
175 void *buf
, size_t buflen
)
177 return netlink_rule_msg_encode(
178 RTM_DELRULE
, ctx
, dplane_ctx_rule_get_old_filter_bm(ctx
),
179 dplane_ctx_rule_get_old_priority(ctx
),
180 dplane_ctx_rule_get_old_table(ctx
),
181 dplane_ctx_rule_get_old_src_ip(ctx
),
182 dplane_ctx_rule_get_old_dst_ip(ctx
),
183 dplane_ctx_rule_get_old_fwmark(ctx
),
184 dplane_ctx_rule_get_old_dsfield(ctx
), buf
, buflen
);
187 /* Public functions */
189 enum netlink_msg_status
190 netlink_put_rule_update_msg(struct nl_batch
*bth
, struct zebra_dplane_ctx
*ctx
)
193 enum netlink_msg_status ret
;
195 op
= dplane_ctx_get_op(ctx
);
196 if (!(op
== DPLANE_OP_RULE_ADD
|| op
== DPLANE_OP_RULE_UPDATE
197 || op
== DPLANE_OP_RULE_DELETE
)) {
199 EC_ZEBRA_PBR_RULE_UPDATE
,
200 "Context received for kernel rule update with incorrect OP code (%u)",
202 return FRR_NETLINK_ERROR
;
205 ret
= netlink_batch_add_msg(bth
, ctx
, netlink_rule_msg_encoder
, false);
208 * Delete the old one.
210 * Don't care about this result right?
212 if (op
== DPLANE_OP_RULE_UPDATE
)
213 netlink_batch_add_msg(bth
, ctx
, netlink_oldrule_msg_encoder
,
220 * Handle netlink notification informing a rule add or delete.
221 * Handling of an ADD is TBD.
222 * DELs are notified up, if other attributes indicate it may be a
223 * notification of interest. The expectation is that if this corresponds
224 * to a PBR rule added by FRR, it will be readded.
226 * If startup and we see a rule we created, delete it as its leftover
227 * from a previous instance and should have been removed on shutdown.
230 int netlink_rule_change(struct nlmsghdr
*h
, ns_id_t ns_id
, int startup
)
232 struct zebra_ns
*zns
;
233 struct fib_rule_hdr
*frh
;
234 struct rtattr
*tb
[FRA_MAX
+ 1];
237 struct zebra_pbr_rule rule
= {};
240 /* Basic validation followed by extracting attributes. */
241 if (h
->nlmsg_type
!= RTM_NEWRULE
&& h
->nlmsg_type
!= RTM_DELRULE
)
244 len
= h
->nlmsg_len
- NLMSG_LENGTH(sizeof(struct fib_rule_hdr
));
247 "%s: Message received from netlink is of a broken size: %d %zu",
248 __func__
, h
->nlmsg_len
,
249 (size_t)NLMSG_LENGTH(sizeof(struct fib_rule_hdr
)));
255 if (frh
->family
!= AF_INET
&& frh
->family
!= AF_INET6
) {
256 if (frh
->family
== RTNL_FAMILY_IPMR
257 || frh
->family
== RTNL_FAMILY_IP6MR
) {
258 if (IS_ZEBRA_DEBUG_KERNEL
)
260 "Received rule netlink that we are ignoring for family %u, rule change: %u",
261 frh
->family
, h
->nlmsg_type
);
265 EC_ZEBRA_NETLINK_INVALID_AF
,
266 "Invalid address family: %u received from kernel rule change: %u",
267 frh
->family
, h
->nlmsg_type
);
270 if (frh
->action
!= FR_ACT_TO_TBL
)
273 memset(tb
, 0, sizeof(tb
));
274 netlink_parse_rtattr(tb
, FRA_MAX
, RTM_RTA(frh
), len
);
276 if (tb
[FRA_PRIORITY
])
277 rule
.rule
.priority
= *(uint32_t *)RTA_DATA(tb
[FRA_PRIORITY
]);
280 if (frh
->family
== AF_INET
)
281 memcpy(&rule
.rule
.filter
.src_ip
.u
.prefix4
,
282 RTA_DATA(tb
[FRA_SRC
]), 4);
284 memcpy(&rule
.rule
.filter
.src_ip
.u
.prefix6
,
285 RTA_DATA(tb
[FRA_SRC
]), 16);
286 rule
.rule
.filter
.src_ip
.prefixlen
= frh
->src_len
;
287 rule
.rule
.filter
.src_ip
.family
= frh
->family
;
288 rule
.rule
.filter
.filter_bm
|= PBR_FILTER_SRC_IP
;
292 if (frh
->family
== AF_INET
)
293 memcpy(&rule
.rule
.filter
.dst_ip
.u
.prefix4
,
294 RTA_DATA(tb
[FRA_DST
]), 4);
296 memcpy(&rule
.rule
.filter
.dst_ip
.u
.prefix6
,
297 RTA_DATA(tb
[FRA_DST
]), 16);
298 rule
.rule
.filter
.dst_ip
.prefixlen
= frh
->dst_len
;
299 rule
.rule
.filter
.dst_ip
.family
= frh
->family
;
300 rule
.rule
.filter
.filter_bm
|= PBR_FILTER_DST_IP
;
304 rule
.rule
.action
.table
= *(uint32_t *)RTA_DATA(tb
[FRA_TABLE
]);
306 rule
.rule
.action
.table
= frh
->table
;
308 /* TBD: We don't care about rules not specifying an IIF. */
309 if (tb
[FRA_IFNAME
] == NULL
)
312 if (tb
[FRA_PROTOCOL
])
313 proto
= *(uint8_t *)RTA_DATA(tb
[FRA_PROTOCOL
]);
315 ifname
= (char *)RTA_DATA(tb
[FRA_IFNAME
]);
316 strlcpy(rule
.ifname
, ifname
, sizeof(rule
.ifname
));
318 if (h
->nlmsg_type
== RTM_NEWRULE
) {
320 * If we see a rule at startup we created, delete it now.
321 * It should have been flushed on a previous shutdown.
323 if (startup
&& proto
== RTPROT_ZEBRA
) {
324 enum zebra_dplane_result ret
;
326 ret
= dplane_pbr_rule_delete(&rule
);
329 "%s: %s leftover rule: family %s IF %s Pref %u Src %pFX Dst %pFX Table %u",
331 ((ret
== ZEBRA_DPLANE_REQUEST_FAILURE
)
334 nl_family_to_str(frh
->family
), rule
.ifname
,
335 rule
.rule
.priority
, &rule
.rule
.filter
.src_ip
,
336 &rule
.rule
.filter
.dst_ip
,
337 rule
.rule
.action
.table
);
344 zns
= zebra_ns_lookup(ns_id
);
346 /* If we don't know the interface, we don't care. */
347 if (!if_lookup_by_name_per_ns(zns
, ifname
))
350 if (IS_ZEBRA_DEBUG_KERNEL
)
352 "Rx %s family %s IF %s Pref %u Src %pFX Dst %pFX Table %u",
353 nl_msg_type_to_str(h
->nlmsg_type
),
354 nl_family_to_str(frh
->family
), rule
.ifname
,
355 rule
.rule
.priority
, &rule
.rule
.filter
.src_ip
,
356 &rule
.rule
.filter
.dst_ip
, rule
.rule
.action
.table
);
358 return kernel_pbr_rule_del(&rule
);
362 * Request rules from the kernel
364 static int netlink_request_rules(struct zebra_ns
*zns
, int family
, int type
)
368 struct fib_rule_hdr frh
;
369 char buf
[NL_PKT_BUF_SIZE
];
372 memset(&req
, 0, sizeof(req
));
373 req
.n
.nlmsg_type
= type
;
374 req
.n
.nlmsg_flags
= NLM_F_ROOT
| NLM_F_MATCH
| NLM_F_REQUEST
;
375 req
.n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct fib_rule_hdr
));
376 req
.frh
.family
= family
;
378 return netlink_request(&zns
->netlink_cmd
, &req
);
382 * Get to know existing PBR rules in the kernel - typically called at startup.
384 int netlink_rules_read(struct zebra_ns
*zns
)
387 struct zebra_dplane_info dp_info
;
389 zebra_dplane_info_from_zns(&dp_info
, zns
, true);
391 ret
= netlink_request_rules(zns
, AF_INET
, RTM_GETRULE
);
395 ret
= netlink_parse_info(netlink_rule_change
, &zns
->netlink_cmd
,
400 ret
= netlink_request_rules(zns
, AF_INET6
, RTM_GETRULE
);
404 ret
= netlink_parse_info(netlink_rule_change
, &zns
->netlink_cmd
,
409 #endif /* HAVE_NETLINK */