]>
Commit | Line | Data |
---|---|---|
09c434b8 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
a3c90f7a PNA |
2 | #include <linux/kernel.h> |
3 | #include <linux/module.h> | |
4 | #include <linux/init.h> | |
5 | #include <linux/netlink.h> | |
6 | #include <linux/netfilter.h> | |
7 | #include <linux/workqueue.h> | |
8 | #include <linux/spinlock.h> | |
40d102cd | 9 | #include <linux/netfilter/nf_conntrack_common.h> |
a3c90f7a PNA |
10 | #include <linux/netfilter/nf_tables.h> |
11 | #include <net/ip.h> /* for ipv4 options. */ | |
12 | #include <net/netfilter/nf_tables.h> | |
13 | #include <net/netfilter/nf_tables_core.h> | |
14 | #include <net/netfilter/nf_conntrack_core.h> | |
40d102cd | 15 | #include <net/netfilter/nf_conntrack_extend.h> |
a3c90f7a PNA |
16 | #include <net/netfilter/nf_flow_table.h> |
17 | ||
18 | struct nft_flow_offload { | |
19 | struct nft_flowtable *flowtable; | |
20 | }; | |
21 | ||
5139c0c0 PNA |
22 | static enum flow_offload_xmit_type nft_xmit_type(struct dst_entry *dst) |
23 | { | |
24 | if (dst_xfrm(dst)) | |
25 | return FLOW_OFFLOAD_XMIT_XFRM; | |
26 | ||
27 | return FLOW_OFFLOAD_XMIT_NEIGH; | |
28 | } | |
29 | ||
30 | static void nft_default_forward_path(struct nf_flow_route *route, | |
31 | struct dst_entry *dst_cache, | |
32 | enum ip_conntrack_dir dir) | |
33 | { | |
c63a7cc4 | 34 | route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex; |
5139c0c0 PNA |
35 | route->tuple[dir].dst = dst_cache; |
36 | route->tuple[dir].xmit_type = nft_xmit_type(dst_cache); | |
37 | } | |
38 | ||
c63a7cc4 PNA |
39 | static int nft_dev_fill_forward_path(const struct nf_flow_route *route, |
40 | const struct dst_entry *dst_cache, | |
41 | const struct nf_conn *ct, | |
7a27f6ab | 42 | enum ip_conntrack_dir dir, u8 *ha, |
c63a7cc4 PNA |
43 | struct net_device_path_stack *stack) |
44 | { | |
45 | const void *daddr = &ct->tuplehash[!dir].tuple.src.u3; | |
46 | struct net_device *dev = dst_cache->dev; | |
c63a7cc4 PNA |
47 | struct neighbour *n; |
48 | u8 nud_state; | |
49 | ||
50 | n = dst_neigh_lookup(dst_cache, daddr); | |
51 | if (!n) | |
52 | return -1; | |
53 | ||
54 | read_lock_bh(&n->lock); | |
55 | nud_state = n->nud_state; | |
56 | ether_addr_copy(ha, n->ha); | |
57 | read_unlock_bh(&n->lock); | |
58 | neigh_release(n); | |
59 | ||
60 | if (!(nud_state & NUD_VALID)) | |
61 | return -1; | |
62 | ||
63 | return dev_fill_forward_path(dev, ha, stack); | |
64 | } | |
65 | ||
66 | struct nft_forward_info { | |
67 | const struct net_device *indev; | |
7a27f6ab | 68 | const struct net_device *outdev; |
4cd91f7c PNA |
69 | struct id { |
70 | __u16 id; | |
71 | __be16 proto; | |
72 | } encap[NF_FLOW_TABLE_ENCAP_MAX]; | |
73 | u8 num_encaps; | |
7a27f6ab PNA |
74 | u8 h_source[ETH_ALEN]; |
75 | u8 h_dest[ETH_ALEN]; | |
76 | enum flow_offload_xmit_type xmit_type; | |
c63a7cc4 PNA |
77 | }; |
78 | ||
79 | static void nft_dev_path_info(const struct net_device_path_stack *stack, | |
7a27f6ab PNA |
80 | struct nft_forward_info *info, |
81 | unsigned char *ha) | |
c63a7cc4 PNA |
82 | { |
83 | const struct net_device_path *path; | |
84 | int i; | |
85 | ||
7a27f6ab PNA |
86 | memcpy(info->h_dest, ha, ETH_ALEN); |
87 | ||
c63a7cc4 PNA |
88 | for (i = 0; i < stack->num_paths; i++) { |
89 | path = &stack->path[i]; | |
90 | switch (path->type) { | |
91 | case DEV_PATH_ETHERNET: | |
4cd91f7c | 92 | case DEV_PATH_VLAN: |
c63a7cc4 | 93 | info->indev = path->dev; |
7a27f6ab PNA |
94 | if (is_zero_ether_addr(info->h_source)) |
95 | memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN); | |
4cd91f7c PNA |
96 | |
97 | if (path->type == DEV_PATH_ETHERNET) | |
98 | break; | |
99 | ||
100 | /* DEV_PATH_VLAN */ | |
101 | if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) { | |
102 | info->indev = NULL; | |
103 | break; | |
104 | } | |
105 | info->outdev = path->dev; | |
106 | info->encap[info->num_encaps].id = path->encap.id; | |
107 | info->encap[info->num_encaps].proto = path->encap.proto; | |
108 | info->num_encaps++; | |
c63a7cc4 | 109 | break; |
c63a7cc4 | 110 | case DEV_PATH_BRIDGE: |
7a27f6ab PNA |
111 | if (is_zero_ether_addr(info->h_source)) |
112 | memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN); | |
113 | ||
114 | info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT; | |
115 | break; | |
c63a7cc4 PNA |
116 | default: |
117 | info->indev = NULL; | |
118 | break; | |
119 | } | |
120 | } | |
7a27f6ab PNA |
121 | if (!info->outdev) |
122 | info->outdev = info->indev; | |
c63a7cc4 PNA |
123 | } |
124 | ||
125 | static bool nft_flowtable_find_dev(const struct net_device *dev, | |
126 | struct nft_flowtable *ft) | |
127 | { | |
128 | struct nft_hook *hook; | |
129 | bool found = false; | |
130 | ||
131 | list_for_each_entry_rcu(hook, &ft->hook_list, list) { | |
132 | if (hook->ops.dev != dev) | |
133 | continue; | |
134 | ||
135 | found = true; | |
136 | break; | |
137 | } | |
138 | ||
139 | return found; | |
140 | } | |
141 | ||
142 | static void nft_dev_forward_path(struct nf_flow_route *route, | |
143 | const struct nf_conn *ct, | |
144 | enum ip_conntrack_dir dir, | |
145 | struct nft_flowtable *ft) | |
146 | { | |
147 | const struct dst_entry *dst = route->tuple[dir].dst; | |
148 | struct net_device_path_stack stack; | |
149 | struct nft_forward_info info = {}; | |
7a27f6ab | 150 | unsigned char ha[ETH_ALEN]; |
4cd91f7c | 151 | int i; |
c63a7cc4 | 152 | |
7a27f6ab PNA |
153 | if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0) |
154 | nft_dev_path_info(&stack, &info, ha); | |
c63a7cc4 PNA |
155 | |
156 | if (!info.indev || !nft_flowtable_find_dev(info.indev, ft)) | |
157 | return; | |
158 | ||
159 | route->tuple[!dir].in.ifindex = info.indev->ifindex; | |
4cd91f7c PNA |
160 | for (i = 0; i < info.num_encaps; i++) { |
161 | route->tuple[!dir].in.encap[i].id = info.encap[i].id; | |
162 | route->tuple[!dir].in.encap[i].proto = info.encap[i].proto; | |
163 | } | |
164 | route->tuple[!dir].in.num_encaps = info.num_encaps; | |
7a27f6ab PNA |
165 | |
166 | if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) { | |
167 | memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN); | |
168 | memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN); | |
169 | route->tuple[dir].out.ifindex = info.outdev->ifindex; | |
170 | route->tuple[dir].xmit_type = info.xmit_type; | |
171 | } | |
c63a7cc4 PNA |
172 | } |
173 | ||
a3c90f7a PNA |
174 | static int nft_flow_route(const struct nft_pktinfo *pkt, |
175 | const struct nf_conn *ct, | |
176 | struct nf_flow_route *route, | |
c63a7cc4 PNA |
177 | enum ip_conntrack_dir dir, |
178 | struct nft_flowtable *ft) | |
a3c90f7a PNA |
179 | { |
180 | struct dst_entry *this_dst = skb_dst(pkt->skb); | |
181 | struct dst_entry *other_dst = NULL; | |
182 | struct flowi fl; | |
183 | ||
184 | memset(&fl, 0, sizeof(fl)); | |
185 | switch (nft_pf(pkt)) { | |
186 | case NFPROTO_IPV4: | |
a799aea0 | 187 | fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip; |
10f4e765 | 188 | fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex; |
a3c90f7a PNA |
189 | break; |
190 | case NFPROTO_IPV6: | |
a799aea0 | 191 | fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; |
10f4e765 | 192 | fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex; |
a3c90f7a PNA |
193 | break; |
194 | } | |
195 | ||
196 | nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt)); | |
197 | if (!other_dst) | |
198 | return -ENOENT; | |
199 | ||
5139c0c0 PNA |
200 | nft_default_forward_path(route, this_dst, dir); |
201 | nft_default_forward_path(route, other_dst, !dir); | |
a3c90f7a | 202 | |
c63a7cc4 PNA |
203 | if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH && |
204 | route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) { | |
205 | nft_dev_forward_path(route, ct, dir, ft); | |
206 | nft_dev_forward_path(route, ct, !dir, ft); | |
207 | } | |
208 | ||
a3c90f7a PNA |
209 | return 0; |
210 | } | |
211 | ||
69aeb538 | 212 | static bool nft_flow_offload_skip(struct sk_buff *skb, int family) |
a3c90f7a | 213 | { |
a3c90f7a PNA |
214 | if (skb_sec_path(skb)) |
215 | return true; | |
216 | ||
69aeb538 FW |
217 | if (family == NFPROTO_IPV4) { |
218 | const struct ip_options *opt; | |
219 | ||
220 | opt = &(IPCB(skb)->opt); | |
221 | ||
222 | if (unlikely(opt->optlen)) | |
223 | return true; | |
224 | } | |
225 | ||
a3c90f7a PNA |
226 | return false; |
227 | } | |
228 | ||
229 | static void nft_flow_offload_eval(const struct nft_expr *expr, | |
230 | struct nft_regs *regs, | |
231 | const struct nft_pktinfo *pkt) | |
232 | { | |
233 | struct nft_flow_offload *priv = nft_expr_priv(expr); | |
234 | struct nf_flowtable *flowtable = &priv->flowtable->data; | |
dfe42be1 | 235 | struct tcphdr _tcph, *tcph = NULL; |
c63a7cc4 | 236 | struct nf_flow_route route = {}; |
a3c90f7a | 237 | enum ip_conntrack_info ctinfo; |
a3c90f7a PNA |
238 | struct flow_offload *flow; |
239 | enum ip_conntrack_dir dir; | |
240 | struct nf_conn *ct; | |
241 | int ret; | |
242 | ||
69aeb538 | 243 | if (nft_flow_offload_skip(pkt->skb, nft_pf(pkt))) |
a3c90f7a PNA |
244 | goto out; |
245 | ||
246 | ct = nf_ct_get(pkt->skb, &ctinfo); | |
247 | if (!ct) | |
248 | goto out; | |
249 | ||
250 | switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { | |
251 | case IPPROTO_TCP: | |
dfe42be1 PNA |
252 | tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, |
253 | sizeof(_tcph), &_tcph); | |
254 | if (unlikely(!tcph || tcph->fin || tcph->rst)) | |
255 | goto out; | |
8437a620 | 256 | break; |
a3c90f7a PNA |
257 | case IPPROTO_UDP: |
258 | break; | |
259 | default: | |
260 | goto out; | |
261 | } | |
262 | ||
91a9048f | 263 | if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) || |
c4617214 | 264 | ct->status & (IPS_SEQ_ADJUST | IPS_NAT_CLASH)) |
a3c90f7a PNA |
265 | goto out; |
266 | ||
270a8a29 | 267 | if (!nf_ct_is_confirmed(ct)) |
a3c90f7a PNA |
268 | goto out; |
269 | ||
270 | if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) | |
271 | goto out; | |
272 | ||
273 | dir = CTINFO2DIR(ctinfo); | |
c63a7cc4 | 274 | if (nft_flow_route(pkt, ct, &route, dir, priv->flowtable) < 0) |
a3c90f7a PNA |
275 | goto err_flow_route; |
276 | ||
f1363e05 | 277 | flow = flow_offload_alloc(ct); |
a3c90f7a PNA |
278 | if (!flow) |
279 | goto err_flow_alloc; | |
280 | ||
f1363e05 PNA |
281 | if (flow_offload_route_init(flow, &route) < 0) |
282 | goto err_flow_add; | |
283 | ||
dfe42be1 | 284 | if (tcph) { |
8437a620 FW |
285 | ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; |
286 | ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; | |
287 | } | |
288 | ||
a3c90f7a PNA |
289 | ret = flow_offload_add(flowtable, flow); |
290 | if (ret < 0) | |
291 | goto err_flow_add; | |
292 | ||
26a302af | 293 | dst_release(route.tuple[!dir].dst); |
a3c90f7a PNA |
294 | return; |
295 | ||
296 | err_flow_add: | |
297 | flow_offload_free(flow); | |
298 | err_flow_alloc: | |
299 | dst_release(route.tuple[!dir].dst); | |
300 | err_flow_route: | |
301 | clear_bit(IPS_OFFLOAD_BIT, &ct->status); | |
302 | out: | |
303 | regs->verdict.code = NFT_BREAK; | |
304 | } | |
305 | ||
306 | static int nft_flow_offload_validate(const struct nft_ctx *ctx, | |
307 | const struct nft_expr *expr, | |
308 | const struct nft_data **data) | |
309 | { | |
310 | unsigned int hook_mask = (1 << NF_INET_FORWARD); | |
311 | ||
312 | return nft_chain_validate_hooks(ctx->chain, hook_mask); | |
313 | } | |
314 | ||
14c41586 PNA |
315 | static const struct nla_policy nft_flow_offload_policy[NFTA_FLOW_MAX + 1] = { |
316 | [NFTA_FLOW_TABLE_NAME] = { .type = NLA_STRING, | |
317 | .len = NFT_NAME_MAXLEN - 1 }, | |
318 | }; | |
319 | ||
a3c90f7a PNA |
320 | static int nft_flow_offload_init(const struct nft_ctx *ctx, |
321 | const struct nft_expr *expr, | |
322 | const struct nlattr * const tb[]) | |
323 | { | |
324 | struct nft_flow_offload *priv = nft_expr_priv(expr); | |
325 | u8 genmask = nft_genmask_next(ctx->net); | |
326 | struct nft_flowtable *flowtable; | |
327 | ||
328 | if (!tb[NFTA_FLOW_TABLE_NAME]) | |
329 | return -EINVAL; | |
330 | ||
cac20fcd PNA |
331 | flowtable = nft_flowtable_lookup(ctx->table, tb[NFTA_FLOW_TABLE_NAME], |
332 | genmask); | |
a3c90f7a PNA |
333 | if (IS_ERR(flowtable)) |
334 | return PTR_ERR(flowtable); | |
335 | ||
336 | priv->flowtable = flowtable; | |
337 | flowtable->use++; | |
338 | ||
36596dad | 339 | return nf_ct_netns_get(ctx->net, ctx->family); |
a3c90f7a PNA |
340 | } |
341 | ||
9b05b6e1 LGL |
342 | static void nft_flow_offload_deactivate(const struct nft_ctx *ctx, |
343 | const struct nft_expr *expr, | |
344 | enum nft_trans_phase phase) | |
345 | { | |
346 | struct nft_flow_offload *priv = nft_expr_priv(expr); | |
347 | ||
348 | nf_tables_deactivate_flowtable(ctx, priv->flowtable, phase); | |
349 | } | |
350 | ||
351 | static void nft_flow_offload_activate(const struct nft_ctx *ctx, | |
352 | const struct nft_expr *expr) | |
353 | { | |
354 | struct nft_flow_offload *priv = nft_expr_priv(expr); | |
355 | ||
356 | priv->flowtable->use++; | |
357 | } | |
358 | ||
a3c90f7a PNA |
359 | static void nft_flow_offload_destroy(const struct nft_ctx *ctx, |
360 | const struct nft_expr *expr) | |
361 | { | |
36596dad | 362 | nf_ct_netns_put(ctx->net, ctx->family); |
a3c90f7a PNA |
363 | } |
364 | ||
365 | static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr) | |
366 | { | |
367 | struct nft_flow_offload *priv = nft_expr_priv(expr); | |
368 | ||
369 | if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name)) | |
370 | goto nla_put_failure; | |
371 | ||
372 | return 0; | |
373 | ||
374 | nla_put_failure: | |
375 | return -1; | |
376 | } | |
377 | ||
378 | static struct nft_expr_type nft_flow_offload_type; | |
379 | static const struct nft_expr_ops nft_flow_offload_ops = { | |
380 | .type = &nft_flow_offload_type, | |
381 | .size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)), | |
382 | .eval = nft_flow_offload_eval, | |
383 | .init = nft_flow_offload_init, | |
9b05b6e1 LGL |
384 | .activate = nft_flow_offload_activate, |
385 | .deactivate = nft_flow_offload_deactivate, | |
a3c90f7a PNA |
386 | .destroy = nft_flow_offload_destroy, |
387 | .validate = nft_flow_offload_validate, | |
388 | .dump = nft_flow_offload_dump, | |
389 | }; | |
390 | ||
391 | static struct nft_expr_type nft_flow_offload_type __read_mostly = { | |
392 | .name = "flow_offload", | |
393 | .ops = &nft_flow_offload_ops, | |
14c41586 | 394 | .policy = nft_flow_offload_policy, |
a3c90f7a PNA |
395 | .maxattr = NFTA_FLOW_MAX, |
396 | .owner = THIS_MODULE, | |
397 | }; | |
398 | ||
a3c90f7a PNA |
399 | static int flow_offload_netdev_event(struct notifier_block *this, |
400 | unsigned long event, void *ptr) | |
401 | { | |
402 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); | |
403 | ||
404 | if (event != NETDEV_DOWN) | |
405 | return NOTIFY_DONE; | |
406 | ||
5f1be84a | 407 | nf_flow_table_cleanup(dev); |
a3c90f7a PNA |
408 | |
409 | return NOTIFY_DONE; | |
410 | } | |
411 | ||
412 | static struct notifier_block flow_offload_netdev_notifier = { | |
413 | .notifier_call = flow_offload_netdev_event, | |
414 | }; | |
415 | ||
416 | static int __init nft_flow_offload_module_init(void) | |
417 | { | |
418 | int err; | |
419 | ||
584eab29 TY |
420 | err = register_netdevice_notifier(&flow_offload_netdev_notifier); |
421 | if (err) | |
422 | goto err; | |
a3c90f7a PNA |
423 | |
424 | err = nft_register_expr(&nft_flow_offload_type); | |
425 | if (err < 0) | |
426 | goto register_expr; | |
427 | ||
428 | return 0; | |
429 | ||
430 | register_expr: | |
431 | unregister_netdevice_notifier(&flow_offload_netdev_notifier); | |
584eab29 | 432 | err: |
a3c90f7a PNA |
433 | return err; |
434 | } | |
435 | ||
436 | static void __exit nft_flow_offload_module_exit(void) | |
437 | { | |
a3c90f7a PNA |
438 | nft_unregister_expr(&nft_flow_offload_type); |
439 | unregister_netdevice_notifier(&flow_offload_netdev_notifier); | |
a3c90f7a PNA |
440 | } |
441 | ||
442 | module_init(nft_flow_offload_module_init); | |
443 | module_exit(nft_flow_offload_module_exit); | |
444 | ||
445 | MODULE_LICENSE("GPL"); | |
446 | MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); | |
447 | MODULE_ALIAS_NFT_EXPR("flow_offload"); | |
4cacc395 | 448 | MODULE_DESCRIPTION("nftables hardware flow offload module"); |