]>
Commit | Line | Data |
---|---|---|
acddc0ed | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
942bf97b | 2 | /* |
3 | * Zebra Policy Based Routing (PBR) interaction with the kernel using | |
4 | * netlink. | |
5 | * Copyright (C) 2018 Cumulus Networks, Inc. | |
942bf97b | 6 | */ |
7 | ||
8 | #include <zebra.h> | |
9 | ||
10 | #ifdef HAVE_NETLINK | |
11 | ||
12 | #include "if.h" | |
13 | #include "prefix.h" | |
14 | #include "vrf.h" | |
15 | ||
16 | #include <linux/fib_rules.h> | |
17 | #include "zebra/zserv.h" | |
18 | #include "zebra/zebra_ns.h" | |
19 | #include "zebra/zebra_vrf.h" | |
20 | #include "zebra/rt.h" | |
21 | #include "zebra/interface.h" | |
22 | #include "zebra/debug.h" | |
23 | #include "zebra/rtadv.h" | |
24 | #include "zebra/kernel_netlink.h" | |
25 | #include "zebra/rule_netlink.h" | |
26 | #include "zebra/zebra_pbr.h" | |
9df414fe | 27 | #include "zebra/zebra_errors.h" |
f62e5480 | 28 | #include "zebra/zebra_dplane.h" |
1239b60c | 29 | #include "zebra/zebra_trace.h" |
942bf97b | 30 | |
31 | /* definitions */ | |
32 | ||
33 | /* static function declarations */ | |
34 | ||
35 | /* Private functions */ | |
36 | ||
99e387d5 JU |
37 | |
38 | /* | |
39 | * netlink_rule_msg_encode | |
40 | * | |
41 | * Encodes netlink RTM_ADDRULE/RTM_DELRULE message to buffer buf of size buflen. | |
42 | * | |
312a6bee JU |
43 | * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer |
44 | * or the number of bytes written to buf. | |
942bf97b | 45 | */ |
8096bd72 DS |
46 | static ssize_t netlink_rule_msg_encode( |
47 | int cmd, const struct zebra_dplane_ctx *ctx, uint32_t filter_bm, | |
48 | uint32_t priority, uint32_t table, const struct prefix *src_ip, | |
49 | const struct prefix *dst_ip, uint32_t fwmark, uint8_t dsfield, | |
50 | uint8_t ip_protocol, void *buf, size_t buflen) | |
942bf97b | 51 | { |
f3dbec60 | 52 | uint8_t protocol = RTPROT_ZEBRA; |
942bf97b | 53 | int family; |
54 | int bytelen; | |
55 | struct { | |
56 | struct nlmsghdr n; | |
57 | struct fib_rule_hdr frh; | |
99e387d5 JU |
58 | char buf[]; |
59 | } *req = buf; | |
60 | ||
58a1d249 | 61 | const char *ifname = dplane_ctx_rule_get_ifname(ctx); |
942bf97b | 62 | |
67e3369e JU |
63 | if (buflen < sizeof(*req)) |
64 | return 0; | |
99e387d5 | 65 | memset(req, 0, sizeof(*req)); |
e36ea40d SW |
66 | |
67 | /* Assume ipv4 if no src/dst set, we only support ipv4/ipv6 */ | |
68 | if (PREFIX_FAMILY(src_ip)) | |
69 | family = PREFIX_FAMILY(src_ip); | |
70 | else if (PREFIX_FAMILY(dst_ip)) | |
71 | family = PREFIX_FAMILY(dst_ip); | |
72 | else | |
73 | family = AF_INET; | |
74 | ||
942bf97b | 75 | bytelen = (family == AF_INET ? 4 : 16); |
76 | ||
99e387d5 JU |
77 | req->n.nlmsg_type = cmd; |
78 | req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); | |
79 | req->n.nlmsg_flags = NLM_F_REQUEST; | |
942bf97b | 80 | |
99e387d5 JU |
81 | req->frh.family = family; |
82 | req->frh.action = FR_ACT_TO_TBL; | |
942bf97b | 83 | |
0be6e7d7 JU |
84 | if (!nl_attr_put(&req->n, buflen, FRA_PROTOCOL, &protocol, |
85 | sizeof(protocol))) | |
86 | return 0; | |
f3dbec60 | 87 | |
942bf97b | 88 | /* rule's pref # */ |
0be6e7d7 JU |
89 | if (!nl_attr_put32(&req->n, buflen, FRA_PRIORITY, priority)) |
90 | return 0; | |
942bf97b | 91 | |
92 | /* interface on which applied */ | |
0be6e7d7 JU |
93 | if (!nl_attr_put(&req->n, buflen, FRA_IFNAME, ifname, |
94 | strlen(ifname) + 1)) | |
95 | return 0; | |
942bf97b | 96 | |
97 | /* source IP, if specified */ | |
f62e5480 | 98 | if (filter_bm & PBR_FILTER_SRC_IP) { |
99e387d5 | 99 | req->frh.src_len = src_ip->prefixlen; |
0be6e7d7 JU |
100 | if (!nl_attr_put(&req->n, buflen, FRA_SRC, &src_ip->u.prefix, |
101 | bytelen)) | |
102 | return 0; | |
942bf97b | 103 | } |
99e387d5 | 104 | |
942bf97b | 105 | /* destination IP, if specified */ |
f62e5480 | 106 | if (filter_bm & PBR_FILTER_DST_IP) { |
99e387d5 | 107 | req->frh.dst_len = dst_ip->prefixlen; |
0be6e7d7 JU |
108 | if (!nl_attr_put(&req->n, buflen, FRA_DST, &dst_ip->u.prefix, |
109 | bytelen)) | |
110 | return 0; | |
942bf97b | 111 | } |
112 | ||
2bee7aae | 113 | /* fwmark, if specified */ |
0be6e7d7 JU |
114 | if (filter_bm & PBR_FILTER_FWMARK) { |
115 | if (!nl_attr_put32(&req->n, buflen, FRA_FWMARK, fwmark)) | |
116 | return 0; | |
117 | } | |
2bee7aae | 118 | |
01f23aff WC |
119 | /* dsfield, if specified */ |
120 | if (filter_bm & PBR_FILTER_DSFIELD) | |
121 | req->frh.tos = dsfield; | |
122 | ||
8096bd72 DS |
123 | /* protocol to match on */ |
124 | if (filter_bm & PBR_FILTER_IP_PROTOCOL) | |
125 | nl_attr_put8(&req->n, buflen, FRA_IP_PROTO, ip_protocol); | |
126 | ||
942bf97b | 127 | /* Route table to use to forward, if filter criteria matches. */ |
f62e5480 | 128 | if (table < 256) |
99e387d5 | 129 | req->frh.table = table; |
942bf97b | 130 | else { |
99e387d5 | 131 | req->frh.table = RT_TABLE_UNSPEC; |
0be6e7d7 JU |
132 | if (!nl_attr_put32(&req->n, buflen, FRA_TABLE, table)) |
133 | return 0; | |
942bf97b | 134 | } |
135 | ||
136 | if (IS_ZEBRA_DEBUG_KERNEL) | |
fd71d73e | 137 | zlog_debug( |
2dbe669b | 138 | "Tx %s family %s IF %s Pref %u Fwmark %u Src %pFX Dst %pFX Table %u", |
fd71d73e | 139 | nl_msg_type_to_str(cmd), nl_family_to_str(family), |
2dbe669b | 140 | ifname, priority, fwmark, src_ip, dst_ip, table); |
942bf97b | 141 | |
99e387d5 | 142 | return NLMSG_ALIGN(req->n.nlmsg_len); |
942bf97b | 143 | } |
144 | ||
67e3369e JU |
145 | static ssize_t netlink_rule_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf, |
146 | size_t buflen) | |
99e387d5 | 147 | { |
67e3369e JU |
148 | int cmd = RTM_NEWRULE; |
149 | ||
150 | if (dplane_ctx_get_op(ctx) == DPLANE_OP_RULE_DELETE) | |
151 | cmd = RTM_DELRULE; | |
152 | ||
153 | return netlink_rule_msg_encode( | |
154 | cmd, ctx, dplane_ctx_rule_get_filter_bm(ctx), | |
155 | dplane_ctx_rule_get_priority(ctx), | |
156 | dplane_ctx_rule_get_table(ctx), dplane_ctx_rule_get_src_ip(ctx), | |
157 | dplane_ctx_rule_get_dst_ip(ctx), | |
158 | dplane_ctx_rule_get_fwmark(ctx), | |
8ccbc778 DS |
159 | dplane_ctx_rule_get_dsfield(ctx), |
160 | dplane_ctx_rule_get_ipproto(ctx), buf, buflen); | |
67e3369e | 161 | } |
99e387d5 | 162 | |
67e3369e JU |
163 | static ssize_t netlink_oldrule_msg_encoder(struct zebra_dplane_ctx *ctx, |
164 | void *buf, size_t buflen) | |
165 | { | |
166 | return netlink_rule_msg_encode( | |
167 | RTM_DELRULE, ctx, dplane_ctx_rule_get_old_filter_bm(ctx), | |
168 | dplane_ctx_rule_get_old_priority(ctx), | |
169 | dplane_ctx_rule_get_old_table(ctx), | |
170 | dplane_ctx_rule_get_old_src_ip(ctx), | |
171 | dplane_ctx_rule_get_old_dst_ip(ctx), | |
172 | dplane_ctx_rule_get_old_fwmark(ctx), | |
8ccbc778 DS |
173 | dplane_ctx_rule_get_old_dsfield(ctx), |
174 | dplane_ctx_rule_get_old_ipproto(ctx), buf, buflen); | |
99e387d5 | 175 | } |
67e3369e | 176 | |
942bf97b | 177 | /* Public functions */ |
942bf97b | 178 | |
67e3369e JU |
179 | enum netlink_msg_status |
180 | netlink_put_rule_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx) | |
942bf97b | 181 | { |
f62e5480 | 182 | enum dplane_op_e op; |
67e3369e | 183 | enum netlink_msg_status ret; |
f62e5480 JU |
184 | |
185 | op = dplane_ctx_get_op(ctx); | |
67e3369e JU |
186 | if (!(op == DPLANE_OP_RULE_ADD || op == DPLANE_OP_RULE_UPDATE |
187 | || op == DPLANE_OP_RULE_DELETE)) { | |
f62e5480 JU |
188 | flog_err( |
189 | EC_ZEBRA_PBR_RULE_UPDATE, | |
190 | "Context received for kernel rule update with incorrect OP code (%u)", | |
191 | op); | |
67e3369e | 192 | return FRR_NETLINK_ERROR; |
f62e5480 | 193 | } |
3ae327cb | 194 | |
67e3369e | 195 | ret = netlink_batch_add_msg(bth, ctx, netlink_rule_msg_encoder, false); |
3ae327cb SW |
196 | |
197 | /** | |
198 | * Delete the old one. | |
199 | * | |
200 | * Don't care about this result right? | |
201 | */ | |
f62e5480 | 202 | if (op == DPLANE_OP_RULE_UPDATE) |
67e3369e JU |
203 | netlink_batch_add_msg(bth, ctx, netlink_oldrule_msg_encoder, |
204 | true); | |
205 | ||
206 | return ret; | |
3ae327cb SW |
207 | } |
208 | ||
942bf97b | 209 | /* |
210 | * Handle netlink notification informing a rule add or delete. | |
211 | * Handling of an ADD is TBD. | |
212 | * DELs are notified up, if other attributes indicate it may be a | |
213 | * notification of interest. The expectation is that if this corresponds | |
214 | * to a PBR rule added by FRR, it will be readded. | |
ab35be75 SW |
215 | * |
216 | * If startup and we see a rule we created, delete it as its leftover | |
217 | * from a previous instance and should have been removed on shutdown. | |
218 | * | |
942bf97b | 219 | */ |
2414abd3 | 220 | int netlink_rule_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) |
942bf97b | 221 | { |
222 | struct zebra_ns *zns; | |
223 | struct fib_rule_hdr *frh; | |
224 | struct rtattr *tb[FRA_MAX + 1]; | |
225 | int len; | |
226 | char *ifname; | |
cc42104c | 227 | struct zebra_pbr_rule rule = {}; |
ab35be75 | 228 | uint8_t proto = 0; |
8096bd72 | 229 | uint8_t ip_proto = 0; |
942bf97b | 230 | |
1239b60c DS |
231 | frrtrace(3, frr_zebra, netlink_rule_change, h, ns_id, startup); |
232 | ||
942bf97b | 233 | /* Basic validation followed by extracting attributes. */ |
234 | if (h->nlmsg_type != RTM_NEWRULE && h->nlmsg_type != RTM_DELRULE) | |
235 | return 0; | |
236 | ||
942bf97b | 237 | len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct fib_rule_hdr)); |
9bdf8618 | 238 | if (len < 0) { |
15569c58 DA |
239 | zlog_err( |
240 | "%s: Message received from netlink is of a broken size: %d %zu", | |
241 | __func__, h->nlmsg_len, | |
242 | (size_t)NLMSG_LENGTH(sizeof(struct fib_rule_hdr))); | |
942bf97b | 243 | return -1; |
9bdf8618 | 244 | } |
942bf97b | 245 | |
246 | frh = NLMSG_DATA(h); | |
dfbe3a2b | 247 | |
8a1b681c | 248 | if (frh->family != AF_INET && frh->family != AF_INET6) { |
dfbe3a2b DS |
249 | if (frh->family == RTNL_FAMILY_IPMR |
250 | || frh->family == RTNL_FAMILY_IP6MR) { | |
251 | if (IS_ZEBRA_DEBUG_KERNEL) | |
252 | zlog_debug( | |
253 | "Received rule netlink that we are ignoring for family %u, rule change: %u", | |
254 | frh->family, h->nlmsg_type); | |
255 | return 0; | |
256 | } | |
9df414fe | 257 | flog_warn( |
e914ccbe | 258 | EC_ZEBRA_NETLINK_INVALID_AF, |
81227874 | 259 | "Invalid address family: %u received from kernel rule change: %u", |
8a1b681c | 260 | frh->family, h->nlmsg_type); |
942bf97b | 261 | return 0; |
8a1b681c | 262 | } |
942bf97b | 263 | if (frh->action != FR_ACT_TO_TBL) |
264 | return 0; | |
265 | ||
266 | memset(tb, 0, sizeof(tb)); | |
267 | netlink_parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len); | |
268 | ||
942bf97b | 269 | if (tb[FRA_PRIORITY]) |
5dd0722d | 270 | rule.rule.priority = *(uint32_t *)RTA_DATA(tb[FRA_PRIORITY]); |
942bf97b | 271 | |
272 | if (tb[FRA_SRC]) { | |
273 | if (frh->family == AF_INET) | |
5dd0722d | 274 | memcpy(&rule.rule.filter.src_ip.u.prefix4, |
942bf97b | 275 | RTA_DATA(tb[FRA_SRC]), 4); |
276 | else | |
5dd0722d | 277 | memcpy(&rule.rule.filter.src_ip.u.prefix6, |
942bf97b | 278 | RTA_DATA(tb[FRA_SRC]), 16); |
5dd0722d | 279 | rule.rule.filter.src_ip.prefixlen = frh->src_len; |
b6d34c26 | 280 | rule.rule.filter.src_ip.family = frh->family; |
5dd0722d | 281 | rule.rule.filter.filter_bm |= PBR_FILTER_SRC_IP; |
942bf97b | 282 | } |
283 | ||
284 | if (tb[FRA_DST]) { | |
285 | if (frh->family == AF_INET) | |
5dd0722d | 286 | memcpy(&rule.rule.filter.dst_ip.u.prefix4, |
942bf97b | 287 | RTA_DATA(tb[FRA_DST]), 4); |
288 | else | |
5dd0722d | 289 | memcpy(&rule.rule.filter.dst_ip.u.prefix6, |
942bf97b | 290 | RTA_DATA(tb[FRA_DST]), 16); |
5dd0722d | 291 | rule.rule.filter.dst_ip.prefixlen = frh->dst_len; |
b6d34c26 | 292 | rule.rule.filter.dst_ip.family = frh->family; |
5dd0722d | 293 | rule.rule.filter.filter_bm |= PBR_FILTER_DST_IP; |
942bf97b | 294 | } |
295 | ||
296 | if (tb[FRA_TABLE]) | |
5dd0722d | 297 | rule.rule.action.table = *(uint32_t *)RTA_DATA(tb[FRA_TABLE]); |
942bf97b | 298 | else |
5dd0722d | 299 | rule.rule.action.table = frh->table; |
942bf97b | 300 | |
ab35be75 SW |
301 | /* TBD: We don't care about rules not specifying an IIF. */ |
302 | if (tb[FRA_IFNAME] == NULL) | |
303 | return 0; | |
304 | ||
305 | if (tb[FRA_PROTOCOL]) | |
306 | proto = *(uint8_t *)RTA_DATA(tb[FRA_PROTOCOL]); | |
307 | ||
8096bd72 DS |
308 | if (tb[FRA_IP_PROTO]) |
309 | ip_proto = *(uint8_t *)RTA_DATA(tb[FRA_IP_PROTO]); | |
310 | ||
ab35be75 SW |
311 | ifname = (char *)RTA_DATA(tb[FRA_IFNAME]); |
312 | strlcpy(rule.ifname, ifname, sizeof(rule.ifname)); | |
313 | ||
314 | if (h->nlmsg_type == RTM_NEWRULE) { | |
315 | /* | |
316 | * If we see a rule at startup we created, delete it now. | |
317 | * It should have been flushed on a previous shutdown. | |
318 | */ | |
319 | if (startup && proto == RTPROT_ZEBRA) { | |
f62e5480 | 320 | enum zebra_dplane_result ret; |
ab35be75 | 321 | |
f62e5480 | 322 | ret = dplane_pbr_rule_delete(&rule); |
ab35be75 SW |
323 | |
324 | zlog_debug( | |
8096bd72 | 325 | "%s: %s leftover rule: family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u", |
ab35be75 | 326 | __func__, |
f62e5480 JU |
327 | ((ret == ZEBRA_DPLANE_REQUEST_FAILURE) |
328 | ? "Failed to remove" | |
329 | : "Removed"), | |
ab35be75 | 330 | nl_family_to_str(frh->family), rule.ifname, |
2dbe669b DA |
331 | rule.rule.priority, &rule.rule.filter.src_ip, |
332 | &rule.rule.filter.dst_ip, | |
8096bd72 | 333 | rule.rule.action.table, ip_proto); |
ab35be75 SW |
334 | } |
335 | ||
336 | /* TBD */ | |
337 | return 0; | |
338 | } | |
339 | ||
340 | zns = zebra_ns_lookup(ns_id); | |
341 | ||
342 | /* If we don't know the interface, we don't care. */ | |
343 | if (!if_lookup_by_name_per_ns(zns, ifname)) | |
344 | return 0; | |
345 | ||
942bf97b | 346 | if (IS_ZEBRA_DEBUG_KERNEL) |
fd71d73e | 347 | zlog_debug( |
8096bd72 | 348 | "Rx %s family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u", |
fd71d73e | 349 | nl_msg_type_to_str(h->nlmsg_type), |
b19d55d0 | 350 | nl_family_to_str(frh->family), rule.ifname, |
2dbe669b | 351 | rule.rule.priority, &rule.rule.filter.src_ip, |
8096bd72 DS |
352 | &rule.rule.filter.dst_ip, rule.rule.action.table, |
353 | ip_proto); | |
fd71d73e | 354 | |
a0321978 | 355 | return kernel_pbr_rule_del(&rule); |
942bf97b | 356 | } |
357 | ||
ab35be75 SW |
358 | /* |
359 | * Request rules from the kernel | |
360 | */ | |
361 | static int netlink_request_rules(struct zebra_ns *zns, int family, int type) | |
362 | { | |
363 | struct { | |
364 | struct nlmsghdr n; | |
365 | struct fib_rule_hdr frh; | |
366 | char buf[NL_PKT_BUF_SIZE]; | |
367 | } req; | |
368 | ||
369 | memset(&req, 0, sizeof(req)); | |
370 | req.n.nlmsg_type = type; | |
371 | req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; | |
372 | req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)); | |
373 | req.frh.family = family; | |
374 | ||
fd3f8e52 | 375 | return netlink_request(&zns->netlink_cmd, &req); |
ab35be75 SW |
376 | } |
377 | ||
942bf97b | 378 | /* |
379 | * Get to know existing PBR rules in the kernel - typically called at startup. | |
942bf97b | 380 | */ |
381 | int netlink_rules_read(struct zebra_ns *zns) | |
382 | { | |
ab35be75 SW |
383 | int ret; |
384 | struct zebra_dplane_info dp_info; | |
385 | ||
386 | zebra_dplane_info_from_zns(&dp_info, zns, true); | |
387 | ||
388 | ret = netlink_request_rules(zns, AF_INET, RTM_GETRULE); | |
389 | if (ret < 0) | |
390 | return ret; | |
391 | ||
392 | ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd, | |
9bfadae8 | 393 | &dp_info, 0, true); |
ab35be75 SW |
394 | if (ret < 0) |
395 | return ret; | |
396 | ||
397 | ret = netlink_request_rules(zns, AF_INET6, RTM_GETRULE); | |
398 | if (ret < 0) | |
399 | return ret; | |
400 | ||
401 | ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd, | |
9bfadae8 | 402 | &dp_info, 0, true); |
ab35be75 | 403 | return ret; |
942bf97b | 404 | } |
405 | ||
406 | #endif /* HAVE_NETLINK */ |