1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/kernel.h>
3 #include <linux/module.h>
4 #include <linux/init.h>
5 #include <linux/netlink.h>
6 #include <linux/netfilter.h>
7 #include <linux/workqueue.h>
8 #include <linux/spinlock.h>
9 #include <linux/netfilter/nf_conntrack_common.h>
10 #include <linux/netfilter/nf_tables.h>
11 #include <net/ip.h> /* for ipv4 options. */
12 #include <net/netfilter/nf_tables.h>
13 #include <net/netfilter/nf_tables_core.h>
14 #include <net/netfilter/nf_conntrack_core.h>
15 #include <net/netfilter/nf_conntrack_extend.h>
16 #include <net/netfilter/nf_flow_table.h>
18 struct nft_flow_offload
{
19 struct nft_flowtable
*flowtable
;
22 static enum flow_offload_xmit_type
nft_xmit_type(struct dst_entry
*dst
)
25 return FLOW_OFFLOAD_XMIT_XFRM
;
27 return FLOW_OFFLOAD_XMIT_NEIGH
;
30 static void nft_default_forward_path(struct nf_flow_route
*route
,
31 struct dst_entry
*dst_cache
,
32 enum ip_conntrack_dir dir
)
34 route
->tuple
[!dir
].in
.ifindex
= dst_cache
->dev
->ifindex
;
35 route
->tuple
[dir
].dst
= dst_cache
;
36 route
->tuple
[dir
].xmit_type
= nft_xmit_type(dst_cache
);
39 static int nft_dev_fill_forward_path(const struct nf_flow_route
*route
,
40 const struct dst_entry
*dst_cache
,
41 const struct nf_conn
*ct
,
42 enum ip_conntrack_dir dir
, u8
*ha
,
43 struct net_device_path_stack
*stack
)
45 const void *daddr
= &ct
->tuplehash
[!dir
].tuple
.src
.u3
;
46 struct net_device
*dev
= dst_cache
->dev
;
50 n
= dst_neigh_lookup(dst_cache
, daddr
);
54 read_lock_bh(&n
->lock
);
55 nud_state
= n
->nud_state
;
56 ether_addr_copy(ha
, n
->ha
);
57 read_unlock_bh(&n
->lock
);
60 if (!(nud_state
& NUD_VALID
))
63 return dev_fill_forward_path(dev
, ha
, stack
);
66 struct nft_forward_info
{
67 const struct net_device
*indev
;
68 const struct net_device
*outdev
;
72 } encap
[NF_FLOW_TABLE_ENCAP_MAX
];
74 u8 h_source
[ETH_ALEN
];
76 enum flow_offload_xmit_type xmit_type
;
79 static void nft_dev_path_info(const struct net_device_path_stack
*stack
,
80 struct nft_forward_info
*info
,
83 const struct net_device_path
*path
;
86 memcpy(info
->h_dest
, ha
, ETH_ALEN
);
88 for (i
= 0; i
< stack
->num_paths
; i
++) {
89 path
= &stack
->path
[i
];
91 case DEV_PATH_ETHERNET
:
93 info
->indev
= path
->dev
;
94 if (is_zero_ether_addr(info
->h_source
))
95 memcpy(info
->h_source
, path
->dev
->dev_addr
, ETH_ALEN
);
97 if (path
->type
== DEV_PATH_ETHERNET
)
101 if (info
->num_encaps
>= NF_FLOW_TABLE_ENCAP_MAX
) {
105 info
->outdev
= path
->dev
;
106 info
->encap
[info
->num_encaps
].id
= path
->encap
.id
;
107 info
->encap
[info
->num_encaps
].proto
= path
->encap
.proto
;
110 case DEV_PATH_BRIDGE
:
111 if (is_zero_ether_addr(info
->h_source
))
112 memcpy(info
->h_source
, path
->dev
->dev_addr
, ETH_ALEN
);
114 switch (path
->bridge
.vlan_mode
) {
115 case DEV_PATH_BR_VLAN_TAG
:
116 info
->encap
[info
->num_encaps
].id
= path
->bridge
.vlan_id
;
117 info
->encap
[info
->num_encaps
].proto
= path
->bridge
.vlan_proto
;
120 case DEV_PATH_BR_VLAN_UNTAG
:
123 case DEV_PATH_BR_VLAN_KEEP
:
126 info
->xmit_type
= FLOW_OFFLOAD_XMIT_DIRECT
;
134 info
->outdev
= info
->indev
;
137 static bool nft_flowtable_find_dev(const struct net_device
*dev
,
138 struct nft_flowtable
*ft
)
140 struct nft_hook
*hook
;
143 list_for_each_entry_rcu(hook
, &ft
->hook_list
, list
) {
144 if (hook
->ops
.dev
!= dev
)
154 static void nft_dev_forward_path(struct nf_flow_route
*route
,
155 const struct nf_conn
*ct
,
156 enum ip_conntrack_dir dir
,
157 struct nft_flowtable
*ft
)
159 const struct dst_entry
*dst
= route
->tuple
[dir
].dst
;
160 struct net_device_path_stack stack
;
161 struct nft_forward_info info
= {};
162 unsigned char ha
[ETH_ALEN
];
165 if (nft_dev_fill_forward_path(route
, dst
, ct
, dir
, ha
, &stack
) >= 0)
166 nft_dev_path_info(&stack
, &info
, ha
);
168 if (!info
.indev
|| !nft_flowtable_find_dev(info
.indev
, ft
))
171 route
->tuple
[!dir
].in
.ifindex
= info
.indev
->ifindex
;
172 for (i
= 0; i
< info
.num_encaps
; i
++) {
173 route
->tuple
[!dir
].in
.encap
[i
].id
= info
.encap
[i
].id
;
174 route
->tuple
[!dir
].in
.encap
[i
].proto
= info
.encap
[i
].proto
;
176 route
->tuple
[!dir
].in
.num_encaps
= info
.num_encaps
;
178 if (info
.xmit_type
== FLOW_OFFLOAD_XMIT_DIRECT
) {
179 memcpy(route
->tuple
[dir
].out
.h_source
, info
.h_source
, ETH_ALEN
);
180 memcpy(route
->tuple
[dir
].out
.h_dest
, info
.h_dest
, ETH_ALEN
);
181 route
->tuple
[dir
].out
.ifindex
= info
.outdev
->ifindex
;
182 route
->tuple
[dir
].xmit_type
= info
.xmit_type
;
186 static int nft_flow_route(const struct nft_pktinfo
*pkt
,
187 const struct nf_conn
*ct
,
188 struct nf_flow_route
*route
,
189 enum ip_conntrack_dir dir
,
190 struct nft_flowtable
*ft
)
192 struct dst_entry
*this_dst
= skb_dst(pkt
->skb
);
193 struct dst_entry
*other_dst
= NULL
;
196 memset(&fl
, 0, sizeof(fl
));
197 switch (nft_pf(pkt
)) {
199 fl
.u
.ip4
.daddr
= ct
->tuplehash
[dir
].tuple
.src
.u3
.ip
;
200 fl
.u
.ip4
.flowi4_oif
= nft_in(pkt
)->ifindex
;
203 fl
.u
.ip6
.daddr
= ct
->tuplehash
[dir
].tuple
.src
.u3
.in6
;
204 fl
.u
.ip6
.flowi6_oif
= nft_in(pkt
)->ifindex
;
208 nf_route(nft_net(pkt
), &other_dst
, &fl
, false, nft_pf(pkt
));
212 nft_default_forward_path(route
, this_dst
, dir
);
213 nft_default_forward_path(route
, other_dst
, !dir
);
215 if (route
->tuple
[dir
].xmit_type
== FLOW_OFFLOAD_XMIT_NEIGH
&&
216 route
->tuple
[!dir
].xmit_type
== FLOW_OFFLOAD_XMIT_NEIGH
) {
217 nft_dev_forward_path(route
, ct
, dir
, ft
);
218 nft_dev_forward_path(route
, ct
, !dir
, ft
);
224 static bool nft_flow_offload_skip(struct sk_buff
*skb
, int family
)
226 if (skb_sec_path(skb
))
229 if (family
== NFPROTO_IPV4
) {
230 const struct ip_options
*opt
;
232 opt
= &(IPCB(skb
)->opt
);
234 if (unlikely(opt
->optlen
))
241 static void nft_flow_offload_eval(const struct nft_expr
*expr
,
242 struct nft_regs
*regs
,
243 const struct nft_pktinfo
*pkt
)
245 struct nft_flow_offload
*priv
= nft_expr_priv(expr
);
246 struct nf_flowtable
*flowtable
= &priv
->flowtable
->data
;
247 struct tcphdr _tcph
, *tcph
= NULL
;
248 struct nf_flow_route route
= {};
249 enum ip_conntrack_info ctinfo
;
250 struct flow_offload
*flow
;
251 enum ip_conntrack_dir dir
;
255 if (nft_flow_offload_skip(pkt
->skb
, nft_pf(pkt
)))
258 ct
= nf_ct_get(pkt
->skb
, &ctinfo
);
262 switch (ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
.dst
.protonum
) {
264 tcph
= skb_header_pointer(pkt
->skb
, pkt
->xt
.thoff
,
265 sizeof(_tcph
), &_tcph
);
266 if (unlikely(!tcph
|| tcph
->fin
|| tcph
->rst
))
275 if (nf_ct_ext_exist(ct
, NF_CT_EXT_HELPER
) ||
276 ct
->status
& (IPS_SEQ_ADJUST
| IPS_NAT_CLASH
))
279 if (!nf_ct_is_confirmed(ct
))
282 if (test_and_set_bit(IPS_OFFLOAD_BIT
, &ct
->status
))
285 dir
= CTINFO2DIR(ctinfo
);
286 if (nft_flow_route(pkt
, ct
, &route
, dir
, priv
->flowtable
) < 0)
289 flow
= flow_offload_alloc(ct
);
293 if (flow_offload_route_init(flow
, &route
) < 0)
297 ct
->proto
.tcp
.seen
[0].flags
|= IP_CT_TCP_FLAG_BE_LIBERAL
;
298 ct
->proto
.tcp
.seen
[1].flags
|= IP_CT_TCP_FLAG_BE_LIBERAL
;
301 ret
= flow_offload_add(flowtable
, flow
);
305 dst_release(route
.tuple
[!dir
].dst
);
309 flow_offload_free(flow
);
311 dst_release(route
.tuple
[!dir
].dst
);
313 clear_bit(IPS_OFFLOAD_BIT
, &ct
->status
);
315 regs
->verdict
.code
= NFT_BREAK
;
318 static int nft_flow_offload_validate(const struct nft_ctx
*ctx
,
319 const struct nft_expr
*expr
,
320 const struct nft_data
**data
)
322 unsigned int hook_mask
= (1 << NF_INET_FORWARD
);
324 return nft_chain_validate_hooks(ctx
->chain
, hook_mask
);
327 static const struct nla_policy nft_flow_offload_policy
[NFTA_FLOW_MAX
+ 1] = {
328 [NFTA_FLOW_TABLE_NAME
] = { .type
= NLA_STRING
,
329 .len
= NFT_NAME_MAXLEN
- 1 },
332 static int nft_flow_offload_init(const struct nft_ctx
*ctx
,
333 const struct nft_expr
*expr
,
334 const struct nlattr
* const tb
[])
336 struct nft_flow_offload
*priv
= nft_expr_priv(expr
);
337 u8 genmask
= nft_genmask_next(ctx
->net
);
338 struct nft_flowtable
*flowtable
;
340 if (!tb
[NFTA_FLOW_TABLE_NAME
])
343 flowtable
= nft_flowtable_lookup(ctx
->table
, tb
[NFTA_FLOW_TABLE_NAME
],
345 if (IS_ERR(flowtable
))
346 return PTR_ERR(flowtable
);
348 priv
->flowtable
= flowtable
;
351 return nf_ct_netns_get(ctx
->net
, ctx
->family
);
354 static void nft_flow_offload_deactivate(const struct nft_ctx
*ctx
,
355 const struct nft_expr
*expr
,
356 enum nft_trans_phase phase
)
358 struct nft_flow_offload
*priv
= nft_expr_priv(expr
);
360 nf_tables_deactivate_flowtable(ctx
, priv
->flowtable
, phase
);
363 static void nft_flow_offload_activate(const struct nft_ctx
*ctx
,
364 const struct nft_expr
*expr
)
366 struct nft_flow_offload
*priv
= nft_expr_priv(expr
);
368 priv
->flowtable
->use
++;
371 static void nft_flow_offload_destroy(const struct nft_ctx
*ctx
,
372 const struct nft_expr
*expr
)
374 nf_ct_netns_put(ctx
->net
, ctx
->family
);
377 static int nft_flow_offload_dump(struct sk_buff
*skb
, const struct nft_expr
*expr
)
379 struct nft_flow_offload
*priv
= nft_expr_priv(expr
);
381 if (nla_put_string(skb
, NFTA_FLOW_TABLE_NAME
, priv
->flowtable
->name
))
382 goto nla_put_failure
;
390 static struct nft_expr_type nft_flow_offload_type
;
391 static const struct nft_expr_ops nft_flow_offload_ops
= {
392 .type
= &nft_flow_offload_type
,
393 .size
= NFT_EXPR_SIZE(sizeof(struct nft_flow_offload
)),
394 .eval
= nft_flow_offload_eval
,
395 .init
= nft_flow_offload_init
,
396 .activate
= nft_flow_offload_activate
,
397 .deactivate
= nft_flow_offload_deactivate
,
398 .destroy
= nft_flow_offload_destroy
,
399 .validate
= nft_flow_offload_validate
,
400 .dump
= nft_flow_offload_dump
,
403 static struct nft_expr_type nft_flow_offload_type __read_mostly
= {
404 .name
= "flow_offload",
405 .ops
= &nft_flow_offload_ops
,
406 .policy
= nft_flow_offload_policy
,
407 .maxattr
= NFTA_FLOW_MAX
,
408 .owner
= THIS_MODULE
,
411 static int flow_offload_netdev_event(struct notifier_block
*this,
412 unsigned long event
, void *ptr
)
414 struct net_device
*dev
= netdev_notifier_info_to_dev(ptr
);
416 if (event
!= NETDEV_DOWN
)
419 nf_flow_table_cleanup(dev
);
424 static struct notifier_block flow_offload_netdev_notifier
= {
425 .notifier_call
= flow_offload_netdev_event
,
428 static int __init
nft_flow_offload_module_init(void)
432 err
= register_netdevice_notifier(&flow_offload_netdev_notifier
);
436 err
= nft_register_expr(&nft_flow_offload_type
);
443 unregister_netdevice_notifier(&flow_offload_netdev_notifier
);
448 static void __exit
nft_flow_offload_module_exit(void)
450 nft_unregister_expr(&nft_flow_offload_type
);
451 unregister_netdevice_notifier(&flow_offload_netdev_notifier
);
454 module_init(nft_flow_offload_module_init
);
455 module_exit(nft_flow_offload_module_exit
);
457 MODULE_LICENSE("GPL");
458 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
459 MODULE_ALIAS_NFT_EXPR("flow_offload");
460 MODULE_DESCRIPTION("nftables hardware flow offload module");