2 * Zebra Policy Based Routing (PBR) interaction with the kernel using
4 * Copyright (C) 2018 Cumulus Networks, Inc.
6 * This file is part of FRR.
8 * FRR is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2, or (at your option) any
13 * FRR is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FRR; see the file COPYING. If not, write to the Free
20 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
32 #include <linux/fib_rules.h>
33 #include "zebra/zserv.h"
34 #include "zebra/zebra_ns.h"
35 #include "zebra/zebra_vrf.h"
37 #include "zebra/interface.h"
38 #include "zebra/debug.h"
39 #include "zebra/rtadv.h"
40 #include "zebra/kernel_netlink.h"
41 #include "zebra/rule_netlink.h"
42 #include "zebra/zebra_pbr.h"
43 #include "zebra/zebra_errors.h"
44 #include "zebra/zebra_dplane.h"
48 /* static function declarations */
50 /* Private functions */
54 * netlink_rule_msg_encode
56 * Encodes netlink RTM_ADDRULE/RTM_DELRULE message to buffer buf of size buflen.
58 * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
59 * or the number of bytes written to buf.
61 static ssize_t
netlink_rule_msg_encode(
62 int cmd
, const struct zebra_dplane_ctx
*ctx
, uint32_t filter_bm
,
63 uint32_t priority
, uint32_t table
, const struct prefix
*src_ip
,
64 const struct prefix
*dst_ip
, uint32_t fwmark
, uint8_t dsfield
,
65 uint8_t ip_protocol
, void *buf
, size_t buflen
)
67 uint8_t protocol
= RTPROT_ZEBRA
;
72 struct fib_rule_hdr frh
;
76 const char *ifname
= dplane_ctx_rule_get_ifname(ctx
);
78 if (buflen
< sizeof(*req
))
80 memset(req
, 0, sizeof(*req
));
82 /* Assume ipv4 if no src/dst set, we only support ipv4/ipv6 */
83 if (PREFIX_FAMILY(src_ip
))
84 family
= PREFIX_FAMILY(src_ip
);
85 else if (PREFIX_FAMILY(dst_ip
))
86 family
= PREFIX_FAMILY(dst_ip
);
90 bytelen
= (family
== AF_INET
? 4 : 16);
92 req
->n
.nlmsg_type
= cmd
;
93 req
->n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
94 req
->n
.nlmsg_flags
= NLM_F_REQUEST
;
96 req
->frh
.family
= family
;
97 req
->frh
.action
= FR_ACT_TO_TBL
;
99 if (!nl_attr_put(&req
->n
, buflen
, FRA_PROTOCOL
, &protocol
,
104 if (!nl_attr_put32(&req
->n
, buflen
, FRA_PRIORITY
, priority
))
107 /* interface on which applied */
108 if (!nl_attr_put(&req
->n
, buflen
, FRA_IFNAME
, ifname
,
112 /* source IP, if specified */
113 if (filter_bm
& PBR_FILTER_SRC_IP
) {
114 req
->frh
.src_len
= src_ip
->prefixlen
;
115 if (!nl_attr_put(&req
->n
, buflen
, FRA_SRC
, &src_ip
->u
.prefix
,
120 /* destination IP, if specified */
121 if (filter_bm
& PBR_FILTER_DST_IP
) {
122 req
->frh
.dst_len
= dst_ip
->prefixlen
;
123 if (!nl_attr_put(&req
->n
, buflen
, FRA_DST
, &dst_ip
->u
.prefix
,
128 /* fwmark, if specified */
129 if (filter_bm
& PBR_FILTER_FWMARK
) {
130 if (!nl_attr_put32(&req
->n
, buflen
, FRA_FWMARK
, fwmark
))
134 /* dsfield, if specified */
135 if (filter_bm
& PBR_FILTER_DSFIELD
)
136 req
->frh
.tos
= dsfield
;
138 /* protocol to match on */
139 if (filter_bm
& PBR_FILTER_IP_PROTOCOL
)
140 nl_attr_put8(&req
->n
, buflen
, FRA_IP_PROTO
, ip_protocol
);
142 /* Route table to use to forward, if filter criteria matches. */
144 req
->frh
.table
= table
;
146 req
->frh
.table
= RT_TABLE_UNSPEC
;
147 if (!nl_attr_put32(&req
->n
, buflen
, FRA_TABLE
, table
))
151 if (IS_ZEBRA_DEBUG_KERNEL
)
153 "Tx %s family %s IF %s Pref %u Fwmark %u Src %pFX Dst %pFX Table %u",
154 nl_msg_type_to_str(cmd
), nl_family_to_str(family
),
155 ifname
, priority
, fwmark
, src_ip
, dst_ip
, table
);
157 return NLMSG_ALIGN(req
->n
.nlmsg_len
);
160 static ssize_t
netlink_rule_msg_encoder(struct zebra_dplane_ctx
*ctx
, void *buf
,
163 int cmd
= RTM_NEWRULE
;
165 if (dplane_ctx_get_op(ctx
) == DPLANE_OP_RULE_DELETE
)
168 return netlink_rule_msg_encode(
169 cmd
, ctx
, dplane_ctx_rule_get_filter_bm(ctx
),
170 dplane_ctx_rule_get_priority(ctx
),
171 dplane_ctx_rule_get_table(ctx
), dplane_ctx_rule_get_src_ip(ctx
),
172 dplane_ctx_rule_get_dst_ip(ctx
),
173 dplane_ctx_rule_get_fwmark(ctx
),
174 dplane_ctx_rule_get_dsfield(ctx
),
175 dplane_ctx_rule_get_ipproto(ctx
), buf
, buflen
);
178 static ssize_t
netlink_oldrule_msg_encoder(struct zebra_dplane_ctx
*ctx
,
179 void *buf
, size_t buflen
)
181 return netlink_rule_msg_encode(
182 RTM_DELRULE
, ctx
, dplane_ctx_rule_get_old_filter_bm(ctx
),
183 dplane_ctx_rule_get_old_priority(ctx
),
184 dplane_ctx_rule_get_old_table(ctx
),
185 dplane_ctx_rule_get_old_src_ip(ctx
),
186 dplane_ctx_rule_get_old_dst_ip(ctx
),
187 dplane_ctx_rule_get_old_fwmark(ctx
),
188 dplane_ctx_rule_get_old_dsfield(ctx
),
189 dplane_ctx_rule_get_old_ipproto(ctx
), buf
, buflen
);
192 /* Public functions */
194 enum netlink_msg_status
195 netlink_put_rule_update_msg(struct nl_batch
*bth
, struct zebra_dplane_ctx
*ctx
)
198 enum netlink_msg_status ret
;
200 op
= dplane_ctx_get_op(ctx
);
201 if (!(op
== DPLANE_OP_RULE_ADD
|| op
== DPLANE_OP_RULE_UPDATE
202 || op
== DPLANE_OP_RULE_DELETE
)) {
204 EC_ZEBRA_PBR_RULE_UPDATE
,
205 "Context received for kernel rule update with incorrect OP code (%u)",
207 return FRR_NETLINK_ERROR
;
210 ret
= netlink_batch_add_msg(bth
, ctx
, netlink_rule_msg_encoder
, false);
213 * Delete the old one.
215 * Don't care about this result right?
217 if (op
== DPLANE_OP_RULE_UPDATE
)
218 netlink_batch_add_msg(bth
, ctx
, netlink_oldrule_msg_encoder
,
225 * Handle netlink notification informing a rule add or delete.
226 * Handling of an ADD is TBD.
227 * DELs are notified up, if other attributes indicate it may be a
228 * notification of interest. The expectation is that if this corresponds
229 * to a PBR rule added by FRR, it will be readded.
231 * If startup and we see a rule we created, delete it as its leftover
232 * from a previous instance and should have been removed on shutdown.
235 int netlink_rule_change(struct nlmsghdr
*h
, ns_id_t ns_id
, int startup
)
237 struct zebra_ns
*zns
;
238 struct fib_rule_hdr
*frh
;
239 struct rtattr
*tb
[FRA_MAX
+ 1];
242 struct zebra_pbr_rule rule
= {};
244 uint8_t ip_proto
= 0;
246 /* Basic validation followed by extracting attributes. */
247 if (h
->nlmsg_type
!= RTM_NEWRULE
&& h
->nlmsg_type
!= RTM_DELRULE
)
250 len
= h
->nlmsg_len
- NLMSG_LENGTH(sizeof(struct fib_rule_hdr
));
253 "%s: Message received from netlink is of a broken size: %d %zu",
254 __func__
, h
->nlmsg_len
,
255 (size_t)NLMSG_LENGTH(sizeof(struct fib_rule_hdr
)));
261 if (frh
->family
!= AF_INET
&& frh
->family
!= AF_INET6
) {
262 if (frh
->family
== RTNL_FAMILY_IPMR
263 || frh
->family
== RTNL_FAMILY_IP6MR
) {
264 if (IS_ZEBRA_DEBUG_KERNEL
)
266 "Received rule netlink that we are ignoring for family %u, rule change: %u",
267 frh
->family
, h
->nlmsg_type
);
271 EC_ZEBRA_NETLINK_INVALID_AF
,
272 "Invalid address family: %u received from kernel rule change: %u",
273 frh
->family
, h
->nlmsg_type
);
276 if (frh
->action
!= FR_ACT_TO_TBL
)
279 memset(tb
, 0, sizeof(tb
));
280 netlink_parse_rtattr(tb
, FRA_MAX
, RTM_RTA(frh
), len
);
282 if (tb
[FRA_PRIORITY
])
283 rule
.rule
.priority
= *(uint32_t *)RTA_DATA(tb
[FRA_PRIORITY
]);
286 if (frh
->family
== AF_INET
)
287 memcpy(&rule
.rule
.filter
.src_ip
.u
.prefix4
,
288 RTA_DATA(tb
[FRA_SRC
]), 4);
290 memcpy(&rule
.rule
.filter
.src_ip
.u
.prefix6
,
291 RTA_DATA(tb
[FRA_SRC
]), 16);
292 rule
.rule
.filter
.src_ip
.prefixlen
= frh
->src_len
;
293 rule
.rule
.filter
.src_ip
.family
= frh
->family
;
294 rule
.rule
.filter
.filter_bm
|= PBR_FILTER_SRC_IP
;
298 if (frh
->family
== AF_INET
)
299 memcpy(&rule
.rule
.filter
.dst_ip
.u
.prefix4
,
300 RTA_DATA(tb
[FRA_DST
]), 4);
302 memcpy(&rule
.rule
.filter
.dst_ip
.u
.prefix6
,
303 RTA_DATA(tb
[FRA_DST
]), 16);
304 rule
.rule
.filter
.dst_ip
.prefixlen
= frh
->dst_len
;
305 rule
.rule
.filter
.dst_ip
.family
= frh
->family
;
306 rule
.rule
.filter
.filter_bm
|= PBR_FILTER_DST_IP
;
310 rule
.rule
.action
.table
= *(uint32_t *)RTA_DATA(tb
[FRA_TABLE
]);
312 rule
.rule
.action
.table
= frh
->table
;
314 /* TBD: We don't care about rules not specifying an IIF. */
315 if (tb
[FRA_IFNAME
] == NULL
)
318 if (tb
[FRA_PROTOCOL
])
319 proto
= *(uint8_t *)RTA_DATA(tb
[FRA_PROTOCOL
]);
321 if (tb
[FRA_IP_PROTO
])
322 ip_proto
= *(uint8_t *)RTA_DATA(tb
[FRA_IP_PROTO
]);
324 ifname
= (char *)RTA_DATA(tb
[FRA_IFNAME
]);
325 strlcpy(rule
.ifname
, ifname
, sizeof(rule
.ifname
));
327 if (h
->nlmsg_type
== RTM_NEWRULE
) {
329 * If we see a rule at startup we created, delete it now.
330 * It should have been flushed on a previous shutdown.
332 if (startup
&& proto
== RTPROT_ZEBRA
) {
333 enum zebra_dplane_result ret
;
335 ret
= dplane_pbr_rule_delete(&rule
);
338 "%s: %s leftover rule: family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u",
340 ((ret
== ZEBRA_DPLANE_REQUEST_FAILURE
)
343 nl_family_to_str(frh
->family
), rule
.ifname
,
344 rule
.rule
.priority
, &rule
.rule
.filter
.src_ip
,
345 &rule
.rule
.filter
.dst_ip
,
346 rule
.rule
.action
.table
, ip_proto
);
353 zns
= zebra_ns_lookup(ns_id
);
355 /* If we don't know the interface, we don't care. */
356 if (!if_lookup_by_name_per_ns(zns
, ifname
))
359 if (IS_ZEBRA_DEBUG_KERNEL
)
361 "Rx %s family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u",
362 nl_msg_type_to_str(h
->nlmsg_type
),
363 nl_family_to_str(frh
->family
), rule
.ifname
,
364 rule
.rule
.priority
, &rule
.rule
.filter
.src_ip
,
365 &rule
.rule
.filter
.dst_ip
, rule
.rule
.action
.table
,
368 return kernel_pbr_rule_del(&rule
);
372 * Request rules from the kernel
374 static int netlink_request_rules(struct zebra_ns
*zns
, int family
, int type
)
378 struct fib_rule_hdr frh
;
379 char buf
[NL_PKT_BUF_SIZE
];
382 memset(&req
, 0, sizeof(req
));
383 req
.n
.nlmsg_type
= type
;
384 req
.n
.nlmsg_flags
= NLM_F_ROOT
| NLM_F_MATCH
| NLM_F_REQUEST
;
385 req
.n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct fib_rule_hdr
));
386 req
.frh
.family
= family
;
388 return netlink_request(&zns
->netlink_cmd
, &req
);
392 * Get to know existing PBR rules in the kernel - typically called at startup.
394 int netlink_rules_read(struct zebra_ns
*zns
)
397 struct zebra_dplane_info dp_info
;
399 zebra_dplane_info_from_zns(&dp_info
, zns
, true);
401 ret
= netlink_request_rules(zns
, AF_INET
, RTM_GETRULE
);
405 ret
= netlink_parse_info(netlink_rule_change
, &zns
->netlink_cmd
,
410 ret
= netlink_request_rules(zns
, AF_INET6
, RTM_GETRULE
);
414 ret
= netlink_parse_info(netlink_rule_change
, &zns
->netlink_cmd
,
419 #endif /* HAVE_NETLINK */