1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/kernel.h>
3 #include <linux/module.h>
4 #include <linux/init.h>
5 #include <linux/netlink.h>
6 #include <linux/netfilter.h>
7 #include <linux/workqueue.h>
8 #include <linux/spinlock.h>
9 #include <linux/netfilter/nf_conntrack_common.h>
10 #include <linux/netfilter/nf_tables.h>
11 #include <net/ip.h> /* for ipv4 options. */
12 #include <net/netfilter/nf_tables.h>
13 #include <net/netfilter/nf_tables_core.h>
14 #include <net/netfilter/nf_conntrack_core.h>
15 #include <net/netfilter/nf_conntrack_extend.h>
16 #include <net/netfilter/nf_flow_table.h>
18 struct nft_flow_offload
{
19 struct nft_flowtable
*flowtable
;
22 static enum flow_offload_xmit_type
nft_xmit_type(struct dst_entry
*dst
)
25 return FLOW_OFFLOAD_XMIT_XFRM
;
27 return FLOW_OFFLOAD_XMIT_NEIGH
;
30 static void nft_default_forward_path(struct nf_flow_route
*route
,
31 struct dst_entry
*dst_cache
,
32 enum ip_conntrack_dir dir
)
34 route
->tuple
[!dir
].in
.ifindex
= dst_cache
->dev
->ifindex
;
35 route
->tuple
[dir
].dst
= dst_cache
;
36 route
->tuple
[dir
].xmit_type
= nft_xmit_type(dst_cache
);
39 static int nft_dev_fill_forward_path(const struct nf_flow_route
*route
,
40 const struct dst_entry
*dst_cache
,
41 const struct nf_conn
*ct
,
42 enum ip_conntrack_dir dir
, u8
*ha
,
43 struct net_device_path_stack
*stack
)
45 const void *daddr
= &ct
->tuplehash
[!dir
].tuple
.src
.u3
;
46 struct net_device
*dev
= dst_cache
->dev
;
50 n
= dst_neigh_lookup(dst_cache
, daddr
);
54 read_lock_bh(&n
->lock
);
55 nud_state
= n
->nud_state
;
56 ether_addr_copy(ha
, n
->ha
);
57 read_unlock_bh(&n
->lock
);
60 if (!(nud_state
& NUD_VALID
))
63 return dev_fill_forward_path(dev
, ha
, stack
);
66 struct nft_forward_info
{
67 const struct net_device
*indev
;
68 const struct net_device
*outdev
;
72 } encap
[NF_FLOW_TABLE_ENCAP_MAX
];
74 u8 h_source
[ETH_ALEN
];
76 enum flow_offload_xmit_type xmit_type
;
79 static void nft_dev_path_info(const struct net_device_path_stack
*stack
,
80 struct nft_forward_info
*info
,
83 const struct net_device_path
*path
;
86 memcpy(info
->h_dest
, ha
, ETH_ALEN
);
88 for (i
= 0; i
< stack
->num_paths
; i
++) {
89 path
= &stack
->path
[i
];
91 case DEV_PATH_ETHERNET
:
93 info
->indev
= path
->dev
;
94 if (is_zero_ether_addr(info
->h_source
))
95 memcpy(info
->h_source
, path
->dev
->dev_addr
, ETH_ALEN
);
97 if (path
->type
== DEV_PATH_ETHERNET
)
101 if (info
->num_encaps
>= NF_FLOW_TABLE_ENCAP_MAX
) {
105 info
->outdev
= path
->dev
;
106 info
->encap
[info
->num_encaps
].id
= path
->encap
.id
;
107 info
->encap
[info
->num_encaps
].proto
= path
->encap
.proto
;
110 case DEV_PATH_BRIDGE
:
111 if (is_zero_ether_addr(info
->h_source
))
112 memcpy(info
->h_source
, path
->dev
->dev_addr
, ETH_ALEN
);
114 info
->xmit_type
= FLOW_OFFLOAD_XMIT_DIRECT
;
122 info
->outdev
= info
->indev
;
125 static bool nft_flowtable_find_dev(const struct net_device
*dev
,
126 struct nft_flowtable
*ft
)
128 struct nft_hook
*hook
;
131 list_for_each_entry_rcu(hook
, &ft
->hook_list
, list
) {
132 if (hook
->ops
.dev
!= dev
)
142 static void nft_dev_forward_path(struct nf_flow_route
*route
,
143 const struct nf_conn
*ct
,
144 enum ip_conntrack_dir dir
,
145 struct nft_flowtable
*ft
)
147 const struct dst_entry
*dst
= route
->tuple
[dir
].dst
;
148 struct net_device_path_stack stack
;
149 struct nft_forward_info info
= {};
150 unsigned char ha
[ETH_ALEN
];
153 if (nft_dev_fill_forward_path(route
, dst
, ct
, dir
, ha
, &stack
) >= 0)
154 nft_dev_path_info(&stack
, &info
, ha
);
156 if (!info
.indev
|| !nft_flowtable_find_dev(info
.indev
, ft
))
159 route
->tuple
[!dir
].in
.ifindex
= info
.indev
->ifindex
;
160 for (i
= 0; i
< info
.num_encaps
; i
++) {
161 route
->tuple
[!dir
].in
.encap
[i
].id
= info
.encap
[i
].id
;
162 route
->tuple
[!dir
].in
.encap
[i
].proto
= info
.encap
[i
].proto
;
164 route
->tuple
[!dir
].in
.num_encaps
= info
.num_encaps
;
166 if (info
.xmit_type
== FLOW_OFFLOAD_XMIT_DIRECT
) {
167 memcpy(route
->tuple
[dir
].out
.h_source
, info
.h_source
, ETH_ALEN
);
168 memcpy(route
->tuple
[dir
].out
.h_dest
, info
.h_dest
, ETH_ALEN
);
169 route
->tuple
[dir
].out
.ifindex
= info
.outdev
->ifindex
;
170 route
->tuple
[dir
].xmit_type
= info
.xmit_type
;
174 static int nft_flow_route(const struct nft_pktinfo
*pkt
,
175 const struct nf_conn
*ct
,
176 struct nf_flow_route
*route
,
177 enum ip_conntrack_dir dir
,
178 struct nft_flowtable
*ft
)
180 struct dst_entry
*this_dst
= skb_dst(pkt
->skb
);
181 struct dst_entry
*other_dst
= NULL
;
184 memset(&fl
, 0, sizeof(fl
));
185 switch (nft_pf(pkt
)) {
187 fl
.u
.ip4
.daddr
= ct
->tuplehash
[dir
].tuple
.src
.u3
.ip
;
188 fl
.u
.ip4
.flowi4_oif
= nft_in(pkt
)->ifindex
;
191 fl
.u
.ip6
.daddr
= ct
->tuplehash
[dir
].tuple
.src
.u3
.in6
;
192 fl
.u
.ip6
.flowi6_oif
= nft_in(pkt
)->ifindex
;
196 nf_route(nft_net(pkt
), &other_dst
, &fl
, false, nft_pf(pkt
));
200 nft_default_forward_path(route
, this_dst
, dir
);
201 nft_default_forward_path(route
, other_dst
, !dir
);
203 if (route
->tuple
[dir
].xmit_type
== FLOW_OFFLOAD_XMIT_NEIGH
&&
204 route
->tuple
[!dir
].xmit_type
== FLOW_OFFLOAD_XMIT_NEIGH
) {
205 nft_dev_forward_path(route
, ct
, dir
, ft
);
206 nft_dev_forward_path(route
, ct
, !dir
, ft
);
212 static bool nft_flow_offload_skip(struct sk_buff
*skb
, int family
)
214 if (skb_sec_path(skb
))
217 if (family
== NFPROTO_IPV4
) {
218 const struct ip_options
*opt
;
220 opt
= &(IPCB(skb
)->opt
);
222 if (unlikely(opt
->optlen
))
229 static void nft_flow_offload_eval(const struct nft_expr
*expr
,
230 struct nft_regs
*regs
,
231 const struct nft_pktinfo
*pkt
)
233 struct nft_flow_offload
*priv
= nft_expr_priv(expr
);
234 struct nf_flowtable
*flowtable
= &priv
->flowtable
->data
;
235 struct tcphdr _tcph
, *tcph
= NULL
;
236 struct nf_flow_route route
= {};
237 enum ip_conntrack_info ctinfo
;
238 struct flow_offload
*flow
;
239 enum ip_conntrack_dir dir
;
243 if (nft_flow_offload_skip(pkt
->skb
, nft_pf(pkt
)))
246 ct
= nf_ct_get(pkt
->skb
, &ctinfo
);
250 switch (ct
->tuplehash
[IP_CT_DIR_ORIGINAL
].tuple
.dst
.protonum
) {
252 tcph
= skb_header_pointer(pkt
->skb
, pkt
->xt
.thoff
,
253 sizeof(_tcph
), &_tcph
);
254 if (unlikely(!tcph
|| tcph
->fin
|| tcph
->rst
))
263 if (nf_ct_ext_exist(ct
, NF_CT_EXT_HELPER
) ||
264 ct
->status
& (IPS_SEQ_ADJUST
| IPS_NAT_CLASH
))
267 if (!nf_ct_is_confirmed(ct
))
270 if (test_and_set_bit(IPS_OFFLOAD_BIT
, &ct
->status
))
273 dir
= CTINFO2DIR(ctinfo
);
274 if (nft_flow_route(pkt
, ct
, &route
, dir
, priv
->flowtable
) < 0)
277 flow
= flow_offload_alloc(ct
);
281 if (flow_offload_route_init(flow
, &route
) < 0)
285 ct
->proto
.tcp
.seen
[0].flags
|= IP_CT_TCP_FLAG_BE_LIBERAL
;
286 ct
->proto
.tcp
.seen
[1].flags
|= IP_CT_TCP_FLAG_BE_LIBERAL
;
289 ret
= flow_offload_add(flowtable
, flow
);
293 dst_release(route
.tuple
[!dir
].dst
);
297 flow_offload_free(flow
);
299 dst_release(route
.tuple
[!dir
].dst
);
301 clear_bit(IPS_OFFLOAD_BIT
, &ct
->status
);
303 regs
->verdict
.code
= NFT_BREAK
;
306 static int nft_flow_offload_validate(const struct nft_ctx
*ctx
,
307 const struct nft_expr
*expr
,
308 const struct nft_data
**data
)
310 unsigned int hook_mask
= (1 << NF_INET_FORWARD
);
312 return nft_chain_validate_hooks(ctx
->chain
, hook_mask
);
315 static const struct nla_policy nft_flow_offload_policy
[NFTA_FLOW_MAX
+ 1] = {
316 [NFTA_FLOW_TABLE_NAME
] = { .type
= NLA_STRING
,
317 .len
= NFT_NAME_MAXLEN
- 1 },
320 static int nft_flow_offload_init(const struct nft_ctx
*ctx
,
321 const struct nft_expr
*expr
,
322 const struct nlattr
* const tb
[])
324 struct nft_flow_offload
*priv
= nft_expr_priv(expr
);
325 u8 genmask
= nft_genmask_next(ctx
->net
);
326 struct nft_flowtable
*flowtable
;
328 if (!tb
[NFTA_FLOW_TABLE_NAME
])
331 flowtable
= nft_flowtable_lookup(ctx
->table
, tb
[NFTA_FLOW_TABLE_NAME
],
333 if (IS_ERR(flowtable
))
334 return PTR_ERR(flowtable
);
336 priv
->flowtable
= flowtable
;
339 return nf_ct_netns_get(ctx
->net
, ctx
->family
);
342 static void nft_flow_offload_deactivate(const struct nft_ctx
*ctx
,
343 const struct nft_expr
*expr
,
344 enum nft_trans_phase phase
)
346 struct nft_flow_offload
*priv
= nft_expr_priv(expr
);
348 nf_tables_deactivate_flowtable(ctx
, priv
->flowtable
, phase
);
351 static void nft_flow_offload_activate(const struct nft_ctx
*ctx
,
352 const struct nft_expr
*expr
)
354 struct nft_flow_offload
*priv
= nft_expr_priv(expr
);
356 priv
->flowtable
->use
++;
359 static void nft_flow_offload_destroy(const struct nft_ctx
*ctx
,
360 const struct nft_expr
*expr
)
362 nf_ct_netns_put(ctx
->net
, ctx
->family
);
365 static int nft_flow_offload_dump(struct sk_buff
*skb
, const struct nft_expr
*expr
)
367 struct nft_flow_offload
*priv
= nft_expr_priv(expr
);
369 if (nla_put_string(skb
, NFTA_FLOW_TABLE_NAME
, priv
->flowtable
->name
))
370 goto nla_put_failure
;
378 static struct nft_expr_type nft_flow_offload_type
;
379 static const struct nft_expr_ops nft_flow_offload_ops
= {
380 .type
= &nft_flow_offload_type
,
381 .size
= NFT_EXPR_SIZE(sizeof(struct nft_flow_offload
)),
382 .eval
= nft_flow_offload_eval
,
383 .init
= nft_flow_offload_init
,
384 .activate
= nft_flow_offload_activate
,
385 .deactivate
= nft_flow_offload_deactivate
,
386 .destroy
= nft_flow_offload_destroy
,
387 .validate
= nft_flow_offload_validate
,
388 .dump
= nft_flow_offload_dump
,
391 static struct nft_expr_type nft_flow_offload_type __read_mostly
= {
392 .name
= "flow_offload",
393 .ops
= &nft_flow_offload_ops
,
394 .policy
= nft_flow_offload_policy
,
395 .maxattr
= NFTA_FLOW_MAX
,
396 .owner
= THIS_MODULE
,
399 static int flow_offload_netdev_event(struct notifier_block
*this,
400 unsigned long event
, void *ptr
)
402 struct net_device
*dev
= netdev_notifier_info_to_dev(ptr
);
404 if (event
!= NETDEV_DOWN
)
407 nf_flow_table_cleanup(dev
);
412 static struct notifier_block flow_offload_netdev_notifier
= {
413 .notifier_call
= flow_offload_netdev_event
,
416 static int __init
nft_flow_offload_module_init(void)
420 err
= register_netdevice_notifier(&flow_offload_netdev_notifier
);
424 err
= nft_register_expr(&nft_flow_offload_type
);
431 unregister_netdevice_notifier(&flow_offload_netdev_notifier
);
436 static void __exit
nft_flow_offload_module_exit(void)
438 nft_unregister_expr(&nft_flow_offload_type
);
439 unregister_netdevice_notifier(&flow_offload_netdev_notifier
);
442 module_init(nft_flow_offload_module_init
);
443 module_exit(nft_flow_offload_module_exit
);
445 MODULE_LICENSE("GPL");
446 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
447 MODULE_ALIAS_NFT_EXPR("flow_offload");
448 MODULE_DESCRIPTION("nftables hardware flow offload module");