2 * Zebra Policy Based Routing (PBR) interaction with the kernel using
4 * Copyright (C) 2018 Cumulus Networks, Inc.
6 * This file is part of FRR.
8 * FRR is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2, or (at your option) any
13 * FRR is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with FRR; see the file COPYING. If not, write to the Free
20 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
32 #include <linux/fib_rules.h>
33 #include "zebra/zserv.h"
34 #include "zebra/zebra_ns.h"
35 #include "zebra/zebra_vrf.h"
37 #include "zebra/interface.h"
38 #include "zebra/debug.h"
39 #include "zebra/rtadv.h"
40 #include "zebra/kernel_netlink.h"
41 #include "zebra/rule_netlink.h"
42 #include "zebra/zebra_pbr.h"
43 #include "zebra/zebra_errors.h"
44 #include "zebra/zebra_dplane.h"
45 #include "zebra/zebra_trace.h"
49 /* static function declarations */
51 /* Private functions */
55 * netlink_rule_msg_encode
57 * Encodes netlink RTM_ADDRULE/RTM_DELRULE message to buffer buf of size buflen.
59 * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
60 * or the number of bytes written to buf.
62 static ssize_t
netlink_rule_msg_encode(
63 int cmd
, const struct zebra_dplane_ctx
*ctx
, uint32_t filter_bm
,
64 uint32_t priority
, uint32_t table
, const struct prefix
*src_ip
,
65 const struct prefix
*dst_ip
, uint32_t fwmark
, uint8_t dsfield
,
66 uint8_t ip_protocol
, void *buf
, size_t buflen
)
68 uint8_t protocol
= RTPROT_ZEBRA
;
73 struct fib_rule_hdr frh
;
77 const char *ifname
= dplane_ctx_rule_get_ifname(ctx
);
79 if (buflen
< sizeof(*req
))
81 memset(req
, 0, sizeof(*req
));
83 /* Assume ipv4 if no src/dst set, we only support ipv4/ipv6 */
84 if (PREFIX_FAMILY(src_ip
))
85 family
= PREFIX_FAMILY(src_ip
);
86 else if (PREFIX_FAMILY(dst_ip
))
87 family
= PREFIX_FAMILY(dst_ip
);
91 bytelen
= (family
== AF_INET
? 4 : 16);
93 req
->n
.nlmsg_type
= cmd
;
94 req
->n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
95 req
->n
.nlmsg_flags
= NLM_F_REQUEST
;
97 req
->frh
.family
= family
;
98 req
->frh
.action
= FR_ACT_TO_TBL
;
100 if (!nl_attr_put(&req
->n
, buflen
, FRA_PROTOCOL
, &protocol
,
105 if (!nl_attr_put32(&req
->n
, buflen
, FRA_PRIORITY
, priority
))
108 /* interface on which applied */
109 if (!nl_attr_put(&req
->n
, buflen
, FRA_IFNAME
, ifname
,
113 /* source IP, if specified */
114 if (filter_bm
& PBR_FILTER_SRC_IP
) {
115 req
->frh
.src_len
= src_ip
->prefixlen
;
116 if (!nl_attr_put(&req
->n
, buflen
, FRA_SRC
, &src_ip
->u
.prefix
,
121 /* destination IP, if specified */
122 if (filter_bm
& PBR_FILTER_DST_IP
) {
123 req
->frh
.dst_len
= dst_ip
->prefixlen
;
124 if (!nl_attr_put(&req
->n
, buflen
, FRA_DST
, &dst_ip
->u
.prefix
,
129 /* fwmark, if specified */
130 if (filter_bm
& PBR_FILTER_FWMARK
) {
131 if (!nl_attr_put32(&req
->n
, buflen
, FRA_FWMARK
, fwmark
))
135 /* dsfield, if specified */
136 if (filter_bm
& PBR_FILTER_DSFIELD
)
137 req
->frh
.tos
= dsfield
;
139 /* protocol to match on */
140 if (filter_bm
& PBR_FILTER_IP_PROTOCOL
)
141 nl_attr_put8(&req
->n
, buflen
, FRA_IP_PROTO
, ip_protocol
);
143 /* Route table to use to forward, if filter criteria matches. */
145 req
->frh
.table
= table
;
147 req
->frh
.table
= RT_TABLE_UNSPEC
;
148 if (!nl_attr_put32(&req
->n
, buflen
, FRA_TABLE
, table
))
152 if (IS_ZEBRA_DEBUG_KERNEL
)
154 "Tx %s family %s IF %s Pref %u Fwmark %u Src %pFX Dst %pFX Table %u",
155 nl_msg_type_to_str(cmd
), nl_family_to_str(family
),
156 ifname
, priority
, fwmark
, src_ip
, dst_ip
, table
);
158 return NLMSG_ALIGN(req
->n
.nlmsg_len
);
161 static ssize_t
netlink_rule_msg_encoder(struct zebra_dplane_ctx
*ctx
, void *buf
,
164 int cmd
= RTM_NEWRULE
;
166 if (dplane_ctx_get_op(ctx
) == DPLANE_OP_RULE_DELETE
)
169 return netlink_rule_msg_encode(
170 cmd
, ctx
, dplane_ctx_rule_get_filter_bm(ctx
),
171 dplane_ctx_rule_get_priority(ctx
),
172 dplane_ctx_rule_get_table(ctx
), dplane_ctx_rule_get_src_ip(ctx
),
173 dplane_ctx_rule_get_dst_ip(ctx
),
174 dplane_ctx_rule_get_fwmark(ctx
),
175 dplane_ctx_rule_get_dsfield(ctx
),
176 dplane_ctx_rule_get_ipproto(ctx
), buf
, buflen
);
179 static ssize_t
netlink_oldrule_msg_encoder(struct zebra_dplane_ctx
*ctx
,
180 void *buf
, size_t buflen
)
182 return netlink_rule_msg_encode(
183 RTM_DELRULE
, ctx
, dplane_ctx_rule_get_old_filter_bm(ctx
),
184 dplane_ctx_rule_get_old_priority(ctx
),
185 dplane_ctx_rule_get_old_table(ctx
),
186 dplane_ctx_rule_get_old_src_ip(ctx
),
187 dplane_ctx_rule_get_old_dst_ip(ctx
),
188 dplane_ctx_rule_get_old_fwmark(ctx
),
189 dplane_ctx_rule_get_old_dsfield(ctx
),
190 dplane_ctx_rule_get_old_ipproto(ctx
), buf
, buflen
);
193 /* Public functions */
195 enum netlink_msg_status
196 netlink_put_rule_update_msg(struct nl_batch
*bth
, struct zebra_dplane_ctx
*ctx
)
199 enum netlink_msg_status ret
;
201 op
= dplane_ctx_get_op(ctx
);
202 if (!(op
== DPLANE_OP_RULE_ADD
|| op
== DPLANE_OP_RULE_UPDATE
203 || op
== DPLANE_OP_RULE_DELETE
)) {
205 EC_ZEBRA_PBR_RULE_UPDATE
,
206 "Context received for kernel rule update with incorrect OP code (%u)",
208 return FRR_NETLINK_ERROR
;
211 ret
= netlink_batch_add_msg(bth
, ctx
, netlink_rule_msg_encoder
, false);
214 * Delete the old one.
216 * Don't care about this result right?
218 if (op
== DPLANE_OP_RULE_UPDATE
)
219 netlink_batch_add_msg(bth
, ctx
, netlink_oldrule_msg_encoder
,
226 * Handle netlink notification informing a rule add or delete.
227 * Handling of an ADD is TBD.
228 * DELs are notified up, if other attributes indicate it may be a
229 * notification of interest. The expectation is that if this corresponds
230 * to a PBR rule added by FRR, it will be readded.
232 * If startup and we see a rule we created, delete it as its leftover
233 * from a previous instance and should have been removed on shutdown.
236 int netlink_rule_change(struct nlmsghdr
*h
, ns_id_t ns_id
, int startup
)
238 struct zebra_ns
*zns
;
239 struct fib_rule_hdr
*frh
;
240 struct rtattr
*tb
[FRA_MAX
+ 1];
243 struct zebra_pbr_rule rule
= {};
245 uint8_t ip_proto
= 0;
247 frrtrace(3, frr_zebra
, netlink_rule_change
, h
, ns_id
, startup
);
249 /* Basic validation followed by extracting attributes. */
250 if (h
->nlmsg_type
!= RTM_NEWRULE
&& h
->nlmsg_type
!= RTM_DELRULE
)
253 len
= h
->nlmsg_len
- NLMSG_LENGTH(sizeof(struct fib_rule_hdr
));
256 "%s: Message received from netlink is of a broken size: %d %zu",
257 __func__
, h
->nlmsg_len
,
258 (size_t)NLMSG_LENGTH(sizeof(struct fib_rule_hdr
)));
264 if (frh
->family
!= AF_INET
&& frh
->family
!= AF_INET6
) {
265 if (frh
->family
== RTNL_FAMILY_IPMR
266 || frh
->family
== RTNL_FAMILY_IP6MR
) {
267 if (IS_ZEBRA_DEBUG_KERNEL
)
269 "Received rule netlink that we are ignoring for family %u, rule change: %u",
270 frh
->family
, h
->nlmsg_type
);
274 EC_ZEBRA_NETLINK_INVALID_AF
,
275 "Invalid address family: %u received from kernel rule change: %u",
276 frh
->family
, h
->nlmsg_type
);
279 if (frh
->action
!= FR_ACT_TO_TBL
)
282 memset(tb
, 0, sizeof(tb
));
283 netlink_parse_rtattr(tb
, FRA_MAX
, RTM_RTA(frh
), len
);
285 if (tb
[FRA_PRIORITY
])
286 rule
.rule
.priority
= *(uint32_t *)RTA_DATA(tb
[FRA_PRIORITY
]);
289 if (frh
->family
== AF_INET
)
290 memcpy(&rule
.rule
.filter
.src_ip
.u
.prefix4
,
291 RTA_DATA(tb
[FRA_SRC
]), 4);
293 memcpy(&rule
.rule
.filter
.src_ip
.u
.prefix6
,
294 RTA_DATA(tb
[FRA_SRC
]), 16);
295 rule
.rule
.filter
.src_ip
.prefixlen
= frh
->src_len
;
296 rule
.rule
.filter
.src_ip
.family
= frh
->family
;
297 rule
.rule
.filter
.filter_bm
|= PBR_FILTER_SRC_IP
;
301 if (frh
->family
== AF_INET
)
302 memcpy(&rule
.rule
.filter
.dst_ip
.u
.prefix4
,
303 RTA_DATA(tb
[FRA_DST
]), 4);
305 memcpy(&rule
.rule
.filter
.dst_ip
.u
.prefix6
,
306 RTA_DATA(tb
[FRA_DST
]), 16);
307 rule
.rule
.filter
.dst_ip
.prefixlen
= frh
->dst_len
;
308 rule
.rule
.filter
.dst_ip
.family
= frh
->family
;
309 rule
.rule
.filter
.filter_bm
|= PBR_FILTER_DST_IP
;
313 rule
.rule
.action
.table
= *(uint32_t *)RTA_DATA(tb
[FRA_TABLE
]);
315 rule
.rule
.action
.table
= frh
->table
;
317 /* TBD: We don't care about rules not specifying an IIF. */
318 if (tb
[FRA_IFNAME
] == NULL
)
321 if (tb
[FRA_PROTOCOL
])
322 proto
= *(uint8_t *)RTA_DATA(tb
[FRA_PROTOCOL
]);
324 if (tb
[FRA_IP_PROTO
])
325 ip_proto
= *(uint8_t *)RTA_DATA(tb
[FRA_IP_PROTO
]);
327 ifname
= (char *)RTA_DATA(tb
[FRA_IFNAME
]);
328 strlcpy(rule
.ifname
, ifname
, sizeof(rule
.ifname
));
330 if (h
->nlmsg_type
== RTM_NEWRULE
) {
332 * If we see a rule at startup we created, delete it now.
333 * It should have been flushed on a previous shutdown.
335 if (startup
&& proto
== RTPROT_ZEBRA
) {
336 enum zebra_dplane_result ret
;
338 ret
= dplane_pbr_rule_delete(&rule
);
341 "%s: %s leftover rule: family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u",
343 ((ret
== ZEBRA_DPLANE_REQUEST_FAILURE
)
346 nl_family_to_str(frh
->family
), rule
.ifname
,
347 rule
.rule
.priority
, &rule
.rule
.filter
.src_ip
,
348 &rule
.rule
.filter
.dst_ip
,
349 rule
.rule
.action
.table
, ip_proto
);
356 zns
= zebra_ns_lookup(ns_id
);
358 /* If we don't know the interface, we don't care. */
359 if (!if_lookup_by_name_per_ns(zns
, ifname
))
362 if (IS_ZEBRA_DEBUG_KERNEL
)
364 "Rx %s family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u",
365 nl_msg_type_to_str(h
->nlmsg_type
),
366 nl_family_to_str(frh
->family
), rule
.ifname
,
367 rule
.rule
.priority
, &rule
.rule
.filter
.src_ip
,
368 &rule
.rule
.filter
.dst_ip
, rule
.rule
.action
.table
,
371 return kernel_pbr_rule_del(&rule
);
375 * Request rules from the kernel
377 static int netlink_request_rules(struct zebra_ns
*zns
, int family
, int type
)
381 struct fib_rule_hdr frh
;
382 char buf
[NL_PKT_BUF_SIZE
];
385 memset(&req
, 0, sizeof(req
));
386 req
.n
.nlmsg_type
= type
;
387 req
.n
.nlmsg_flags
= NLM_F_ROOT
| NLM_F_MATCH
| NLM_F_REQUEST
;
388 req
.n
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct fib_rule_hdr
));
389 req
.frh
.family
= family
;
391 return netlink_request(&zns
->netlink_cmd
, &req
);
395 * Get to know existing PBR rules in the kernel - typically called at startup.
397 int netlink_rules_read(struct zebra_ns
*zns
)
400 struct zebra_dplane_info dp_info
;
402 zebra_dplane_info_from_zns(&dp_info
, zns
, true);
404 ret
= netlink_request_rules(zns
, AF_INET
, RTM_GETRULE
);
408 ret
= netlink_parse_info(netlink_rule_change
, &zns
->netlink_cmd
,
413 ret
= netlink_request_rules(zns
, AF_INET6
, RTM_GETRULE
);
417 ret
= netlink_parse_info(netlink_rule_change
, &zns
->netlink_cmd
,
422 #endif /* HAVE_NETLINK */