]>
Commit | Line | Data |
---|---|---|
942bf97b | 1 | /* |
2 | * Zebra Policy Based Routing (PBR) interaction with the kernel using | |
3 | * netlink. | |
4 | * Copyright (C) 2018 Cumulus Networks, Inc. | |
5 | * | |
6 | * This file is part of FRR. | |
7 | * | |
8 | * FRR is free software; you can redistribute it and/or modify it | |
9 | * under the terms of the GNU General Public License as published by the | |
10 | * Free Software Foundation; either version 2, or (at your option) any | |
11 | * later version. | |
12 | * | |
13 | * FRR is distributed in the hope that it will be useful, but | |
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License | |
19 | * along with FRR; see the file COPYING. If not, write to the Free | |
20 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | |
21 | * 02111-1307, USA. | |
22 | */ | |
23 | ||
24 | #include <zebra.h> | |
25 | ||
26 | #ifdef HAVE_NETLINK | |
27 | ||
28 | #include "if.h" | |
29 | #include "prefix.h" | |
30 | #include "vrf.h" | |
31 | ||
32 | #include <linux/fib_rules.h> | |
33 | #include "zebra/zserv.h" | |
34 | #include "zebra/zebra_ns.h" | |
35 | #include "zebra/zebra_vrf.h" | |
36 | #include "zebra/rt.h" | |
37 | #include "zebra/interface.h" | |
38 | #include "zebra/debug.h" | |
39 | #include "zebra/rtadv.h" | |
40 | #include "zebra/kernel_netlink.h" | |
41 | #include "zebra/rule_netlink.h" | |
42 | #include "zebra/zebra_pbr.h" | |
9df414fe | 43 | #include "zebra/zebra_errors.h" |
f62e5480 | 44 | #include "zebra/zebra_dplane.h" |
1239b60c | 45 | #include "zebra/zebra_trace.h" |
942bf97b | 46 | |
47 | /* definitions */ | |
48 | ||
49 | /* static function declarations */ | |
50 | ||
51 | /* Private functions */ | |
52 | ||
99e387d5 JU |
53 | |
54 | /* | |
55 | * netlink_rule_msg_encode | |
56 | * | |
57 | * Encodes netlink RTM_ADDRULE/RTM_DELRULE message to buffer buf of size buflen. | |
58 | * | |
312a6bee JU |
59 | * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer |
60 | * or the number of bytes written to buf. | |
942bf97b | 61 | */ |
8096bd72 DS |
62 | static ssize_t netlink_rule_msg_encode( |
63 | int cmd, const struct zebra_dplane_ctx *ctx, uint32_t filter_bm, | |
64 | uint32_t priority, uint32_t table, const struct prefix *src_ip, | |
65 | const struct prefix *dst_ip, uint32_t fwmark, uint8_t dsfield, | |
66 | uint8_t ip_protocol, void *buf, size_t buflen) | |
942bf97b | 67 | { |
f3dbec60 | 68 | uint8_t protocol = RTPROT_ZEBRA; |
942bf97b | 69 | int family; |
70 | int bytelen; | |
71 | struct { | |
72 | struct nlmsghdr n; | |
73 | struct fib_rule_hdr frh; | |
99e387d5 JU |
74 | char buf[]; |
75 | } *req = buf; | |
76 | ||
58a1d249 | 77 | const char *ifname = dplane_ctx_rule_get_ifname(ctx); |
942bf97b | 78 | |
67e3369e JU |
79 | if (buflen < sizeof(*req)) |
80 | return 0; | |
99e387d5 | 81 | memset(req, 0, sizeof(*req)); |
e36ea40d SW |
82 | |
83 | /* Assume ipv4 if no src/dst set, we only support ipv4/ipv6 */ | |
84 | if (PREFIX_FAMILY(src_ip)) | |
85 | family = PREFIX_FAMILY(src_ip); | |
86 | else if (PREFIX_FAMILY(dst_ip)) | |
87 | family = PREFIX_FAMILY(dst_ip); | |
88 | else | |
89 | family = AF_INET; | |
90 | ||
942bf97b | 91 | bytelen = (family == AF_INET ? 4 : 16); |
92 | ||
99e387d5 JU |
93 | req->n.nlmsg_type = cmd; |
94 | req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); | |
95 | req->n.nlmsg_flags = NLM_F_REQUEST; | |
942bf97b | 96 | |
99e387d5 JU |
97 | req->frh.family = family; |
98 | req->frh.action = FR_ACT_TO_TBL; | |
942bf97b | 99 | |
0be6e7d7 JU |
100 | if (!nl_attr_put(&req->n, buflen, FRA_PROTOCOL, &protocol, |
101 | sizeof(protocol))) | |
102 | return 0; | |
f3dbec60 | 103 | |
942bf97b | 104 | /* rule's pref # */ |
0be6e7d7 JU |
105 | if (!nl_attr_put32(&req->n, buflen, FRA_PRIORITY, priority)) |
106 | return 0; | |
942bf97b | 107 | |
108 | /* interface on which applied */ | |
0be6e7d7 JU |
109 | if (!nl_attr_put(&req->n, buflen, FRA_IFNAME, ifname, |
110 | strlen(ifname) + 1)) | |
111 | return 0; | |
942bf97b | 112 | |
113 | /* source IP, if specified */ | |
f62e5480 | 114 | if (filter_bm & PBR_FILTER_SRC_IP) { |
99e387d5 | 115 | req->frh.src_len = src_ip->prefixlen; |
0be6e7d7 JU |
116 | if (!nl_attr_put(&req->n, buflen, FRA_SRC, &src_ip->u.prefix, |
117 | bytelen)) | |
118 | return 0; | |
942bf97b | 119 | } |
99e387d5 | 120 | |
942bf97b | 121 | /* destination IP, if specified */ |
f62e5480 | 122 | if (filter_bm & PBR_FILTER_DST_IP) { |
99e387d5 | 123 | req->frh.dst_len = dst_ip->prefixlen; |
0be6e7d7 JU |
124 | if (!nl_attr_put(&req->n, buflen, FRA_DST, &dst_ip->u.prefix, |
125 | bytelen)) | |
126 | return 0; | |
942bf97b | 127 | } |
128 | ||
2bee7aae | 129 | /* fwmark, if specified */ |
0be6e7d7 JU |
130 | if (filter_bm & PBR_FILTER_FWMARK) { |
131 | if (!nl_attr_put32(&req->n, buflen, FRA_FWMARK, fwmark)) | |
132 | return 0; | |
133 | } | |
2bee7aae | 134 | |
01f23aff WC |
135 | /* dsfield, if specified */ |
136 | if (filter_bm & PBR_FILTER_DSFIELD) | |
137 | req->frh.tos = dsfield; | |
138 | ||
8096bd72 DS |
139 | /* protocol to match on */ |
140 | if (filter_bm & PBR_FILTER_IP_PROTOCOL) | |
141 | nl_attr_put8(&req->n, buflen, FRA_IP_PROTO, ip_protocol); | |
142 | ||
942bf97b | 143 | /* Route table to use to forward, if filter criteria matches. */ |
f62e5480 | 144 | if (table < 256) |
99e387d5 | 145 | req->frh.table = table; |
942bf97b | 146 | else { |
99e387d5 | 147 | req->frh.table = RT_TABLE_UNSPEC; |
0be6e7d7 JU |
148 | if (!nl_attr_put32(&req->n, buflen, FRA_TABLE, table)) |
149 | return 0; | |
942bf97b | 150 | } |
151 | ||
152 | if (IS_ZEBRA_DEBUG_KERNEL) | |
fd71d73e | 153 | zlog_debug( |
2dbe669b | 154 | "Tx %s family %s IF %s Pref %u Fwmark %u Src %pFX Dst %pFX Table %u", |
fd71d73e | 155 | nl_msg_type_to_str(cmd), nl_family_to_str(family), |
2dbe669b | 156 | ifname, priority, fwmark, src_ip, dst_ip, table); |
942bf97b | 157 | |
99e387d5 | 158 | return NLMSG_ALIGN(req->n.nlmsg_len); |
942bf97b | 159 | } |
160 | ||
67e3369e JU |
161 | static ssize_t netlink_rule_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf, |
162 | size_t buflen) | |
99e387d5 | 163 | { |
67e3369e JU |
164 | int cmd = RTM_NEWRULE; |
165 | ||
166 | if (dplane_ctx_get_op(ctx) == DPLANE_OP_RULE_DELETE) | |
167 | cmd = RTM_DELRULE; | |
168 | ||
169 | return netlink_rule_msg_encode( | |
170 | cmd, ctx, dplane_ctx_rule_get_filter_bm(ctx), | |
171 | dplane_ctx_rule_get_priority(ctx), | |
172 | dplane_ctx_rule_get_table(ctx), dplane_ctx_rule_get_src_ip(ctx), | |
173 | dplane_ctx_rule_get_dst_ip(ctx), | |
174 | dplane_ctx_rule_get_fwmark(ctx), | |
8ccbc778 DS |
175 | dplane_ctx_rule_get_dsfield(ctx), |
176 | dplane_ctx_rule_get_ipproto(ctx), buf, buflen); | |
67e3369e | 177 | } |
99e387d5 | 178 | |
67e3369e JU |
179 | static ssize_t netlink_oldrule_msg_encoder(struct zebra_dplane_ctx *ctx, |
180 | void *buf, size_t buflen) | |
181 | { | |
182 | return netlink_rule_msg_encode( | |
183 | RTM_DELRULE, ctx, dplane_ctx_rule_get_old_filter_bm(ctx), | |
184 | dplane_ctx_rule_get_old_priority(ctx), | |
185 | dplane_ctx_rule_get_old_table(ctx), | |
186 | dplane_ctx_rule_get_old_src_ip(ctx), | |
187 | dplane_ctx_rule_get_old_dst_ip(ctx), | |
188 | dplane_ctx_rule_get_old_fwmark(ctx), | |
8ccbc778 DS |
189 | dplane_ctx_rule_get_old_dsfield(ctx), |
190 | dplane_ctx_rule_get_old_ipproto(ctx), buf, buflen); | |
99e387d5 | 191 | } |
67e3369e | 192 | |
942bf97b | 193 | /* Public functions */ |
942bf97b | 194 | |
67e3369e JU |
195 | enum netlink_msg_status |
196 | netlink_put_rule_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx) | |
942bf97b | 197 | { |
f62e5480 | 198 | enum dplane_op_e op; |
67e3369e | 199 | enum netlink_msg_status ret; |
f62e5480 JU |
200 | |
201 | op = dplane_ctx_get_op(ctx); | |
67e3369e JU |
202 | if (!(op == DPLANE_OP_RULE_ADD || op == DPLANE_OP_RULE_UPDATE |
203 | || op == DPLANE_OP_RULE_DELETE)) { | |
f62e5480 JU |
204 | flog_err( |
205 | EC_ZEBRA_PBR_RULE_UPDATE, | |
206 | "Context received for kernel rule update with incorrect OP code (%u)", | |
207 | op); | |
67e3369e | 208 | return FRR_NETLINK_ERROR; |
f62e5480 | 209 | } |
3ae327cb | 210 | |
67e3369e | 211 | ret = netlink_batch_add_msg(bth, ctx, netlink_rule_msg_encoder, false); |
3ae327cb SW |
212 | |
213 | /** | |
214 | * Delete the old one. | |
215 | * | |
216 | * Don't care about this result right? | |
217 | */ | |
f62e5480 | 218 | if (op == DPLANE_OP_RULE_UPDATE) |
67e3369e JU |
219 | netlink_batch_add_msg(bth, ctx, netlink_oldrule_msg_encoder, |
220 | true); | |
221 | ||
222 | return ret; | |
3ae327cb SW |
223 | } |
224 | ||
942bf97b | 225 | /* |
226 | * Handle netlink notification informing a rule add or delete. | |
227 | * Handling of an ADD is TBD. | |
228 | * DELs are notified up, if other attributes indicate it may be a | |
229 | * notification of interest. The expectation is that if this corresponds | |
230 | * to a PBR rule added by FRR, it will be readded. | |
ab35be75 SW |
231 | * |
232 | * If startup and we see a rule we created, delete it as its leftover | |
233 | * from a previous instance and should have been removed on shutdown. | |
234 | * | |
942bf97b | 235 | */ |
2414abd3 | 236 | int netlink_rule_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) |
942bf97b | 237 | { |
238 | struct zebra_ns *zns; | |
239 | struct fib_rule_hdr *frh; | |
240 | struct rtattr *tb[FRA_MAX + 1]; | |
241 | int len; | |
242 | char *ifname; | |
cc42104c | 243 | struct zebra_pbr_rule rule = {}; |
ab35be75 | 244 | uint8_t proto = 0; |
8096bd72 | 245 | uint8_t ip_proto = 0; |
942bf97b | 246 | |
1239b60c DS |
247 | frrtrace(3, frr_zebra, netlink_rule_change, h, ns_id, startup); |
248 | ||
942bf97b | 249 | /* Basic validation followed by extracting attributes. */ |
250 | if (h->nlmsg_type != RTM_NEWRULE && h->nlmsg_type != RTM_DELRULE) | |
251 | return 0; | |
252 | ||
942bf97b | 253 | len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct fib_rule_hdr)); |
9bdf8618 | 254 | if (len < 0) { |
15569c58 DA |
255 | zlog_err( |
256 | "%s: Message received from netlink is of a broken size: %d %zu", | |
257 | __func__, h->nlmsg_len, | |
258 | (size_t)NLMSG_LENGTH(sizeof(struct fib_rule_hdr))); | |
942bf97b | 259 | return -1; |
9bdf8618 | 260 | } |
942bf97b | 261 | |
262 | frh = NLMSG_DATA(h); | |
dfbe3a2b | 263 | |
8a1b681c | 264 | if (frh->family != AF_INET && frh->family != AF_INET6) { |
dfbe3a2b DS |
265 | if (frh->family == RTNL_FAMILY_IPMR |
266 | || frh->family == RTNL_FAMILY_IP6MR) { | |
267 | if (IS_ZEBRA_DEBUG_KERNEL) | |
268 | zlog_debug( | |
269 | "Received rule netlink that we are ignoring for family %u, rule change: %u", | |
270 | frh->family, h->nlmsg_type); | |
271 | return 0; | |
272 | } | |
9df414fe | 273 | flog_warn( |
e914ccbe | 274 | EC_ZEBRA_NETLINK_INVALID_AF, |
81227874 | 275 | "Invalid address family: %u received from kernel rule change: %u", |
8a1b681c | 276 | frh->family, h->nlmsg_type); |
942bf97b | 277 | return 0; |
8a1b681c | 278 | } |
942bf97b | 279 | if (frh->action != FR_ACT_TO_TBL) |
280 | return 0; | |
281 | ||
282 | memset(tb, 0, sizeof(tb)); | |
283 | netlink_parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len); | |
284 | ||
942bf97b | 285 | if (tb[FRA_PRIORITY]) |
5dd0722d | 286 | rule.rule.priority = *(uint32_t *)RTA_DATA(tb[FRA_PRIORITY]); |
942bf97b | 287 | |
288 | if (tb[FRA_SRC]) { | |
289 | if (frh->family == AF_INET) | |
5dd0722d | 290 | memcpy(&rule.rule.filter.src_ip.u.prefix4, |
942bf97b | 291 | RTA_DATA(tb[FRA_SRC]), 4); |
292 | else | |
5dd0722d | 293 | memcpy(&rule.rule.filter.src_ip.u.prefix6, |
942bf97b | 294 | RTA_DATA(tb[FRA_SRC]), 16); |
5dd0722d | 295 | rule.rule.filter.src_ip.prefixlen = frh->src_len; |
b6d34c26 | 296 | rule.rule.filter.src_ip.family = frh->family; |
5dd0722d | 297 | rule.rule.filter.filter_bm |= PBR_FILTER_SRC_IP; |
942bf97b | 298 | } |
299 | ||
300 | if (tb[FRA_DST]) { | |
301 | if (frh->family == AF_INET) | |
5dd0722d | 302 | memcpy(&rule.rule.filter.dst_ip.u.prefix4, |
942bf97b | 303 | RTA_DATA(tb[FRA_DST]), 4); |
304 | else | |
5dd0722d | 305 | memcpy(&rule.rule.filter.dst_ip.u.prefix6, |
942bf97b | 306 | RTA_DATA(tb[FRA_DST]), 16); |
5dd0722d | 307 | rule.rule.filter.dst_ip.prefixlen = frh->dst_len; |
b6d34c26 | 308 | rule.rule.filter.dst_ip.family = frh->family; |
5dd0722d | 309 | rule.rule.filter.filter_bm |= PBR_FILTER_DST_IP; |
942bf97b | 310 | } |
311 | ||
312 | if (tb[FRA_TABLE]) | |
5dd0722d | 313 | rule.rule.action.table = *(uint32_t *)RTA_DATA(tb[FRA_TABLE]); |
942bf97b | 314 | else |
5dd0722d | 315 | rule.rule.action.table = frh->table; |
942bf97b | 316 | |
ab35be75 SW |
317 | /* TBD: We don't care about rules not specifying an IIF. */ |
318 | if (tb[FRA_IFNAME] == NULL) | |
319 | return 0; | |
320 | ||
321 | if (tb[FRA_PROTOCOL]) | |
322 | proto = *(uint8_t *)RTA_DATA(tb[FRA_PROTOCOL]); | |
323 | ||
8096bd72 DS |
324 | if (tb[FRA_IP_PROTO]) |
325 | ip_proto = *(uint8_t *)RTA_DATA(tb[FRA_IP_PROTO]); | |
326 | ||
ab35be75 SW |
327 | ifname = (char *)RTA_DATA(tb[FRA_IFNAME]); |
328 | strlcpy(rule.ifname, ifname, sizeof(rule.ifname)); | |
329 | ||
330 | if (h->nlmsg_type == RTM_NEWRULE) { | |
331 | /* | |
332 | * If we see a rule at startup we created, delete it now. | |
333 | * It should have been flushed on a previous shutdown. | |
334 | */ | |
335 | if (startup && proto == RTPROT_ZEBRA) { | |
f62e5480 | 336 | enum zebra_dplane_result ret; |
ab35be75 | 337 | |
f62e5480 | 338 | ret = dplane_pbr_rule_delete(&rule); |
ab35be75 SW |
339 | |
340 | zlog_debug( | |
8096bd72 | 341 | "%s: %s leftover rule: family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u", |
ab35be75 | 342 | __func__, |
f62e5480 JU |
343 | ((ret == ZEBRA_DPLANE_REQUEST_FAILURE) |
344 | ? "Failed to remove" | |
345 | : "Removed"), | |
ab35be75 | 346 | nl_family_to_str(frh->family), rule.ifname, |
2dbe669b DA |
347 | rule.rule.priority, &rule.rule.filter.src_ip, |
348 | &rule.rule.filter.dst_ip, | |
8096bd72 | 349 | rule.rule.action.table, ip_proto); |
ab35be75 SW |
350 | } |
351 | ||
352 | /* TBD */ | |
353 | return 0; | |
354 | } | |
355 | ||
356 | zns = zebra_ns_lookup(ns_id); | |
357 | ||
358 | /* If we don't know the interface, we don't care. */ | |
359 | if (!if_lookup_by_name_per_ns(zns, ifname)) | |
360 | return 0; | |
361 | ||
942bf97b | 362 | if (IS_ZEBRA_DEBUG_KERNEL) |
fd71d73e | 363 | zlog_debug( |
8096bd72 | 364 | "Rx %s family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u", |
fd71d73e | 365 | nl_msg_type_to_str(h->nlmsg_type), |
b19d55d0 | 366 | nl_family_to_str(frh->family), rule.ifname, |
2dbe669b | 367 | rule.rule.priority, &rule.rule.filter.src_ip, |
8096bd72 DS |
368 | &rule.rule.filter.dst_ip, rule.rule.action.table, |
369 | ip_proto); | |
fd71d73e | 370 | |
a0321978 | 371 | return kernel_pbr_rule_del(&rule); |
942bf97b | 372 | } |
373 | ||
ab35be75 SW |
374 | /* |
375 | * Request rules from the kernel | |
376 | */ | |
377 | static int netlink_request_rules(struct zebra_ns *zns, int family, int type) | |
378 | { | |
379 | struct { | |
380 | struct nlmsghdr n; | |
381 | struct fib_rule_hdr frh; | |
382 | char buf[NL_PKT_BUF_SIZE]; | |
383 | } req; | |
384 | ||
385 | memset(&req, 0, sizeof(req)); | |
386 | req.n.nlmsg_type = type; | |
387 | req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; | |
388 | req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)); | |
389 | req.frh.family = family; | |
390 | ||
fd3f8e52 | 391 | return netlink_request(&zns->netlink_cmd, &req); |
ab35be75 SW |
392 | } |
393 | ||
942bf97b | 394 | /* |
395 | * Get to know existing PBR rules in the kernel - typically called at startup. | |
942bf97b | 396 | */ |
397 | int netlink_rules_read(struct zebra_ns *zns) | |
398 | { | |
ab35be75 SW |
399 | int ret; |
400 | struct zebra_dplane_info dp_info; | |
401 | ||
402 | zebra_dplane_info_from_zns(&dp_info, zns, true); | |
403 | ||
404 | ret = netlink_request_rules(zns, AF_INET, RTM_GETRULE); | |
405 | if (ret < 0) | |
406 | return ret; | |
407 | ||
408 | ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd, | |
9bfadae8 | 409 | &dp_info, 0, true); |
ab35be75 SW |
410 | if (ret < 0) |
411 | return ret; | |
412 | ||
413 | ret = netlink_request_rules(zns, AF_INET6, RTM_GETRULE); | |
414 | if (ret < 0) | |
415 | return ret; | |
416 | ||
417 | ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd, | |
9bfadae8 | 418 | &dp_info, 0, true); |
ab35be75 | 419 | return ret; |
942bf97b | 420 | } |
421 | ||
422 | #endif /* HAVE_NETLINK */ |