]> git.proxmox.com Git - mirror_frr.git/blob - zebra/rule_netlink.c
Merge pull request #9007 from donaldsharp/pbr_stuff
[mirror_frr.git] / zebra / rule_netlink.c
1 /*
2 * Zebra Policy Based Routing (PBR) interaction with the kernel using
3 * netlink.
4 * Copyright (C) 2018 Cumulus Networks, Inc.
5 *
6 * This file is part of FRR.
7 *
8 * FRR is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2, or (at your option) any
11 * later version.
12 *
13 * FRR is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FRR; see the file COPYING. If not, write to the Free
20 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 * 02111-1307, USA.
22 */
23
24 #include <zebra.h>
25
26 #ifdef HAVE_NETLINK
27
28 #include "if.h"
29 #include "prefix.h"
30 #include "vrf.h"
31
32 #include <linux/fib_rules.h>
33 #include "zebra/zserv.h"
34 #include "zebra/zebra_ns.h"
35 #include "zebra/zebra_vrf.h"
36 #include "zebra/rt.h"
37 #include "zebra/interface.h"
38 #include "zebra/debug.h"
39 #include "zebra/rtadv.h"
40 #include "zebra/kernel_netlink.h"
41 #include "zebra/rule_netlink.h"
42 #include "zebra/zebra_pbr.h"
43 #include "zebra/zebra_errors.h"
44 #include "zebra/zebra_dplane.h"
45
46 /* definitions */
47
48 /* static function declarations */
49
50 /* Private functions */
51
52
53 /*
54 * netlink_rule_msg_encode
55 *
56 * Encodes netlink RTM_ADDRULE/RTM_DELRULE message to buffer buf of size buflen.
57 *
58 * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
59 * or the number of bytes written to buf.
60 */
61 static ssize_t netlink_rule_msg_encode(
62 int cmd, const struct zebra_dplane_ctx *ctx, uint32_t filter_bm,
63 uint32_t priority, uint32_t table, const struct prefix *src_ip,
64 const struct prefix *dst_ip, uint32_t fwmark, uint8_t dsfield,
65 uint8_t ip_protocol, void *buf, size_t buflen)
66 {
67 uint8_t protocol = RTPROT_ZEBRA;
68 int family;
69 int bytelen;
70 struct {
71 struct nlmsghdr n;
72 struct fib_rule_hdr frh;
73 char buf[];
74 } *req = buf;
75
76 const char *ifname = dplane_ctx_rule_get_ifname(ctx);
77
78 if (buflen < sizeof(*req))
79 return 0;
80 memset(req, 0, sizeof(*req));
81
82 /* Assume ipv4 if no src/dst set, we only support ipv4/ipv6 */
83 if (PREFIX_FAMILY(src_ip))
84 family = PREFIX_FAMILY(src_ip);
85 else if (PREFIX_FAMILY(dst_ip))
86 family = PREFIX_FAMILY(dst_ip);
87 else
88 family = AF_INET;
89
90 bytelen = (family == AF_INET ? 4 : 16);
91
92 req->n.nlmsg_type = cmd;
93 req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
94 req->n.nlmsg_flags = NLM_F_REQUEST;
95
96 req->frh.family = family;
97 req->frh.action = FR_ACT_TO_TBL;
98
99 if (!nl_attr_put(&req->n, buflen, FRA_PROTOCOL, &protocol,
100 sizeof(protocol)))
101 return 0;
102
103 /* rule's pref # */
104 if (!nl_attr_put32(&req->n, buflen, FRA_PRIORITY, priority))
105 return 0;
106
107 /* interface on which applied */
108 if (!nl_attr_put(&req->n, buflen, FRA_IFNAME, ifname,
109 strlen(ifname) + 1))
110 return 0;
111
112 /* source IP, if specified */
113 if (filter_bm & PBR_FILTER_SRC_IP) {
114 req->frh.src_len = src_ip->prefixlen;
115 if (!nl_attr_put(&req->n, buflen, FRA_SRC, &src_ip->u.prefix,
116 bytelen))
117 return 0;
118 }
119
120 /* destination IP, if specified */
121 if (filter_bm & PBR_FILTER_DST_IP) {
122 req->frh.dst_len = dst_ip->prefixlen;
123 if (!nl_attr_put(&req->n, buflen, FRA_DST, &dst_ip->u.prefix,
124 bytelen))
125 return 0;
126 }
127
128 /* fwmark, if specified */
129 if (filter_bm & PBR_FILTER_FWMARK) {
130 if (!nl_attr_put32(&req->n, buflen, FRA_FWMARK, fwmark))
131 return 0;
132 }
133
134 /* dsfield, if specified */
135 if (filter_bm & PBR_FILTER_DSFIELD)
136 req->frh.tos = dsfield;
137
138 /* protocol to match on */
139 if (filter_bm & PBR_FILTER_IP_PROTOCOL)
140 nl_attr_put8(&req->n, buflen, FRA_IP_PROTO, ip_protocol);
141
142 /* Route table to use to forward, if filter criteria matches. */
143 if (table < 256)
144 req->frh.table = table;
145 else {
146 req->frh.table = RT_TABLE_UNSPEC;
147 if (!nl_attr_put32(&req->n, buflen, FRA_TABLE, table))
148 return 0;
149 }
150
151 if (IS_ZEBRA_DEBUG_KERNEL)
152 zlog_debug(
153 "Tx %s family %s IF %s Pref %u Fwmark %u Src %pFX Dst %pFX Table %u",
154 nl_msg_type_to_str(cmd), nl_family_to_str(family),
155 ifname, priority, fwmark, src_ip, dst_ip, table);
156
157 return NLMSG_ALIGN(req->n.nlmsg_len);
158 }
159
160 static ssize_t netlink_rule_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf,
161 size_t buflen)
162 {
163 int cmd = RTM_NEWRULE;
164
165 if (dplane_ctx_get_op(ctx) == DPLANE_OP_RULE_DELETE)
166 cmd = RTM_DELRULE;
167
168 return netlink_rule_msg_encode(
169 cmd, ctx, dplane_ctx_rule_get_filter_bm(ctx),
170 dplane_ctx_rule_get_priority(ctx),
171 dplane_ctx_rule_get_table(ctx), dplane_ctx_rule_get_src_ip(ctx),
172 dplane_ctx_rule_get_dst_ip(ctx),
173 dplane_ctx_rule_get_fwmark(ctx),
174 dplane_ctx_rule_get_dsfield(ctx),
175 dplane_ctx_rule_get_ipproto(ctx), buf, buflen);
176 }
177
178 static ssize_t netlink_oldrule_msg_encoder(struct zebra_dplane_ctx *ctx,
179 void *buf, size_t buflen)
180 {
181 return netlink_rule_msg_encode(
182 RTM_DELRULE, ctx, dplane_ctx_rule_get_old_filter_bm(ctx),
183 dplane_ctx_rule_get_old_priority(ctx),
184 dplane_ctx_rule_get_old_table(ctx),
185 dplane_ctx_rule_get_old_src_ip(ctx),
186 dplane_ctx_rule_get_old_dst_ip(ctx),
187 dplane_ctx_rule_get_old_fwmark(ctx),
188 dplane_ctx_rule_get_old_dsfield(ctx),
189 dplane_ctx_rule_get_old_ipproto(ctx), buf, buflen);
190 }
191
192 /* Public functions */
193
194 enum netlink_msg_status
195 netlink_put_rule_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx)
196 {
197 enum dplane_op_e op;
198 enum netlink_msg_status ret;
199
200 op = dplane_ctx_get_op(ctx);
201 if (!(op == DPLANE_OP_RULE_ADD || op == DPLANE_OP_RULE_UPDATE
202 || op == DPLANE_OP_RULE_DELETE)) {
203 flog_err(
204 EC_ZEBRA_PBR_RULE_UPDATE,
205 "Context received for kernel rule update with incorrect OP code (%u)",
206 op);
207 return FRR_NETLINK_ERROR;
208 }
209
210 ret = netlink_batch_add_msg(bth, ctx, netlink_rule_msg_encoder, false);
211
212 /**
213 * Delete the old one.
214 *
215 * Don't care about this result right?
216 */
217 if (op == DPLANE_OP_RULE_UPDATE)
218 netlink_batch_add_msg(bth, ctx, netlink_oldrule_msg_encoder,
219 true);
220
221 return ret;
222 }
223
224 /*
225 * Handle netlink notification informing a rule add or delete.
226 * Handling of an ADD is TBD.
227 * DELs are notified up, if other attributes indicate it may be a
228 * notification of interest. The expectation is that if this corresponds
229 * to a PBR rule added by FRR, it will be readded.
230 *
231 * If startup and we see a rule we created, delete it as its leftover
232 * from a previous instance and should have been removed on shutdown.
233 *
234 */
235 int netlink_rule_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
236 {
237 struct zebra_ns *zns;
238 struct fib_rule_hdr *frh;
239 struct rtattr *tb[FRA_MAX + 1];
240 int len;
241 char *ifname;
242 struct zebra_pbr_rule rule = {};
243 uint8_t proto = 0;
244 uint8_t ip_proto = 0;
245
246 /* Basic validation followed by extracting attributes. */
247 if (h->nlmsg_type != RTM_NEWRULE && h->nlmsg_type != RTM_DELRULE)
248 return 0;
249
250 len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct fib_rule_hdr));
251 if (len < 0) {
252 zlog_err(
253 "%s: Message received from netlink is of a broken size: %d %zu",
254 __func__, h->nlmsg_len,
255 (size_t)NLMSG_LENGTH(sizeof(struct fib_rule_hdr)));
256 return -1;
257 }
258
259 frh = NLMSG_DATA(h);
260
261 if (frh->family != AF_INET && frh->family != AF_INET6) {
262 if (frh->family == RTNL_FAMILY_IPMR
263 || frh->family == RTNL_FAMILY_IP6MR) {
264 if (IS_ZEBRA_DEBUG_KERNEL)
265 zlog_debug(
266 "Received rule netlink that we are ignoring for family %u, rule change: %u",
267 frh->family, h->nlmsg_type);
268 return 0;
269 }
270 flog_warn(
271 EC_ZEBRA_NETLINK_INVALID_AF,
272 "Invalid address family: %u received from kernel rule change: %u",
273 frh->family, h->nlmsg_type);
274 return 0;
275 }
276 if (frh->action != FR_ACT_TO_TBL)
277 return 0;
278
279 memset(tb, 0, sizeof(tb));
280 netlink_parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);
281
282 if (tb[FRA_PRIORITY])
283 rule.rule.priority = *(uint32_t *)RTA_DATA(tb[FRA_PRIORITY]);
284
285 if (tb[FRA_SRC]) {
286 if (frh->family == AF_INET)
287 memcpy(&rule.rule.filter.src_ip.u.prefix4,
288 RTA_DATA(tb[FRA_SRC]), 4);
289 else
290 memcpy(&rule.rule.filter.src_ip.u.prefix6,
291 RTA_DATA(tb[FRA_SRC]), 16);
292 rule.rule.filter.src_ip.prefixlen = frh->src_len;
293 rule.rule.filter.src_ip.family = frh->family;
294 rule.rule.filter.filter_bm |= PBR_FILTER_SRC_IP;
295 }
296
297 if (tb[FRA_DST]) {
298 if (frh->family == AF_INET)
299 memcpy(&rule.rule.filter.dst_ip.u.prefix4,
300 RTA_DATA(tb[FRA_DST]), 4);
301 else
302 memcpy(&rule.rule.filter.dst_ip.u.prefix6,
303 RTA_DATA(tb[FRA_DST]), 16);
304 rule.rule.filter.dst_ip.prefixlen = frh->dst_len;
305 rule.rule.filter.dst_ip.family = frh->family;
306 rule.rule.filter.filter_bm |= PBR_FILTER_DST_IP;
307 }
308
309 if (tb[FRA_TABLE])
310 rule.rule.action.table = *(uint32_t *)RTA_DATA(tb[FRA_TABLE]);
311 else
312 rule.rule.action.table = frh->table;
313
314 /* TBD: We don't care about rules not specifying an IIF. */
315 if (tb[FRA_IFNAME] == NULL)
316 return 0;
317
318 if (tb[FRA_PROTOCOL])
319 proto = *(uint8_t *)RTA_DATA(tb[FRA_PROTOCOL]);
320
321 if (tb[FRA_IP_PROTO])
322 ip_proto = *(uint8_t *)RTA_DATA(tb[FRA_IP_PROTO]);
323
324 ifname = (char *)RTA_DATA(tb[FRA_IFNAME]);
325 strlcpy(rule.ifname, ifname, sizeof(rule.ifname));
326
327 if (h->nlmsg_type == RTM_NEWRULE) {
328 /*
329 * If we see a rule at startup we created, delete it now.
330 * It should have been flushed on a previous shutdown.
331 */
332 if (startup && proto == RTPROT_ZEBRA) {
333 enum zebra_dplane_result ret;
334
335 ret = dplane_pbr_rule_delete(&rule);
336
337 zlog_debug(
338 "%s: %s leftover rule: family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u",
339 __func__,
340 ((ret == ZEBRA_DPLANE_REQUEST_FAILURE)
341 ? "Failed to remove"
342 : "Removed"),
343 nl_family_to_str(frh->family), rule.ifname,
344 rule.rule.priority, &rule.rule.filter.src_ip,
345 &rule.rule.filter.dst_ip,
346 rule.rule.action.table, ip_proto);
347 }
348
349 /* TBD */
350 return 0;
351 }
352
353 zns = zebra_ns_lookup(ns_id);
354
355 /* If we don't know the interface, we don't care. */
356 if (!if_lookup_by_name_per_ns(zns, ifname))
357 return 0;
358
359 if (IS_ZEBRA_DEBUG_KERNEL)
360 zlog_debug(
361 "Rx %s family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u",
362 nl_msg_type_to_str(h->nlmsg_type),
363 nl_family_to_str(frh->family), rule.ifname,
364 rule.rule.priority, &rule.rule.filter.src_ip,
365 &rule.rule.filter.dst_ip, rule.rule.action.table,
366 ip_proto);
367
368 return kernel_pbr_rule_del(&rule);
369 }
370
371 /*
372 * Request rules from the kernel
373 */
374 static int netlink_request_rules(struct zebra_ns *zns, int family, int type)
375 {
376 struct {
377 struct nlmsghdr n;
378 struct fib_rule_hdr frh;
379 char buf[NL_PKT_BUF_SIZE];
380 } req;
381
382 memset(&req, 0, sizeof(req));
383 req.n.nlmsg_type = type;
384 req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
385 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct fib_rule_hdr));
386 req.frh.family = family;
387
388 return netlink_request(&zns->netlink_cmd, &req);
389 }
390
391 /*
392 * Get to know existing PBR rules in the kernel - typically called at startup.
393 */
394 int netlink_rules_read(struct zebra_ns *zns)
395 {
396 int ret;
397 struct zebra_dplane_info dp_info;
398
399 zebra_dplane_info_from_zns(&dp_info, zns, true);
400
401 ret = netlink_request_rules(zns, AF_INET, RTM_GETRULE);
402 if (ret < 0)
403 return ret;
404
405 ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd,
406 &dp_info, 0, 1);
407 if (ret < 0)
408 return ret;
409
410 ret = netlink_request_rules(zns, AF_INET6, RTM_GETRULE);
411 if (ret < 0)
412 return ret;
413
414 ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd,
415 &dp_info, 0, 1);
416 return ret;
417 }
418
419 #endif /* HAVE_NETLINK */