1 #include <linux/kernel.h>
2 #include <linux/init.h>
3 #include <linux/module.h>
4 #include <linux/netfilter.h>
5 #include <linux/rhashtable.h>
6 #include <linux/netdevice.h>
7 #include <linux/tc_act/tc_csum.h>
8 #include <net/flow_offload.h>
9 #include <net/netfilter/nf_flow_table.h>
10 #include <net/netfilter/nf_tables.h>
11 #include <net/netfilter/nf_conntrack.h>
12 #include <net/netfilter/nf_conntrack_acct.h>
13 #include <net/netfilter/nf_conntrack_core.h>
14 #include <net/netfilter/nf_conntrack_tuple.h>
16 static struct workqueue_struct
*nf_flow_offload_add_wq
;
17 static struct workqueue_struct
*nf_flow_offload_del_wq
;
18 static struct workqueue_struct
*nf_flow_offload_stats_wq
;
20 struct flow_offload_work
{
21 struct list_head list
;
22 enum flow_cls_command cmd
;
24 struct nf_flowtable
*flowtable
;
25 struct flow_offload
*flow
;
26 struct work_struct work
;
29 #define NF_FLOW_DISSECTOR(__match, __type, __field) \
30 (__match)->dissector.offset[__type] = \
31 offsetof(struct nf_flow_key, __field)
33 static void nf_flow_rule_lwt_match(struct nf_flow_match
*match
,
34 struct ip_tunnel_info
*tun_info
)
36 struct nf_flow_key
*mask
= &match
->mask
;
37 struct nf_flow_key
*key
= &match
->key
;
38 unsigned int enc_keys
;
40 if (!tun_info
|| !(tun_info
->mode
& IP_TUNNEL_INFO_TX
))
43 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_ENC_CONTROL
, enc_control
);
44 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_ENC_KEYID
, enc_key_id
);
45 key
->enc_key_id
.keyid
= tunnel_id_to_key32(tun_info
->key
.tun_id
);
46 mask
->enc_key_id
.keyid
= 0xffffffff;
47 enc_keys
= BIT(FLOW_DISSECTOR_KEY_ENC_KEYID
) |
48 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL
);
50 if (ip_tunnel_info_af(tun_info
) == AF_INET
) {
51 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
,
53 key
->enc_ipv4
.src
= tun_info
->key
.u
.ipv4
.dst
;
54 key
->enc_ipv4
.dst
= tun_info
->key
.u
.ipv4
.src
;
55 if (key
->enc_ipv4
.src
)
56 mask
->enc_ipv4
.src
= 0xffffffff;
57 if (key
->enc_ipv4
.dst
)
58 mask
->enc_ipv4
.dst
= 0xffffffff;
59 enc_keys
|= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
);
60 key
->enc_control
.addr_type
= FLOW_DISSECTOR_KEY_IPV4_ADDRS
;
62 memcpy(&key
->enc_ipv6
.src
, &tun_info
->key
.u
.ipv6
.dst
,
63 sizeof(struct in6_addr
));
64 memcpy(&key
->enc_ipv6
.dst
, &tun_info
->key
.u
.ipv6
.src
,
65 sizeof(struct in6_addr
));
66 if (memcmp(&key
->enc_ipv6
.src
, &in6addr_any
,
67 sizeof(struct in6_addr
)))
68 memset(&key
->enc_ipv6
.src
, 0xff,
69 sizeof(struct in6_addr
));
70 if (memcmp(&key
->enc_ipv6
.dst
, &in6addr_any
,
71 sizeof(struct in6_addr
)))
72 memset(&key
->enc_ipv6
.dst
, 0xff,
73 sizeof(struct in6_addr
));
74 enc_keys
|= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
);
75 key
->enc_control
.addr_type
= FLOW_DISSECTOR_KEY_IPV6_ADDRS
;
78 match
->dissector
.used_keys
|= enc_keys
;
81 static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan
*key
,
82 struct flow_dissector_key_vlan
*mask
,
83 u16 vlan_id
, __be16 proto
)
85 key
->vlan_id
= vlan_id
;
86 mask
->vlan_id
= VLAN_VID_MASK
;
87 key
->vlan_tpid
= proto
;
88 mask
->vlan_tpid
= 0xffff;
91 static int nf_flow_rule_match(struct nf_flow_match
*match
,
92 const struct flow_offload_tuple
*tuple
,
93 struct dst_entry
*other_dst
)
95 struct nf_flow_key
*mask
= &match
->mask
;
96 struct nf_flow_key
*key
= &match
->key
;
97 struct ip_tunnel_info
*tun_info
;
98 bool vlan_encap
= false;
100 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_META
, meta
);
101 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_CONTROL
, control
);
102 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_BASIC
, basic
);
103 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_IPV4_ADDRS
, ipv4
);
104 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_IPV6_ADDRS
, ipv6
);
105 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_TCP
, tcp
);
106 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_PORTS
, tp
);
108 if (other_dst
&& other_dst
->lwtstate
) {
109 tun_info
= lwt_tun_info(other_dst
->lwtstate
);
110 nf_flow_rule_lwt_match(match
, tun_info
);
113 key
->meta
.ingress_ifindex
= tuple
->iifidx
;
114 mask
->meta
.ingress_ifindex
= 0xffffffff;
116 if (tuple
->encap_num
> 0 && !(tuple
->in_vlan_ingress
& BIT(0)) &&
117 tuple
->encap
[0].proto
== htons(ETH_P_8021Q
)) {
118 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_VLAN
, vlan
);
119 nf_flow_rule_vlan_match(&key
->vlan
, &mask
->vlan
,
121 tuple
->encap
[0].proto
);
125 if (tuple
->encap_num
> 1 && !(tuple
->in_vlan_ingress
& BIT(1)) &&
126 tuple
->encap
[1].proto
== htons(ETH_P_8021Q
)) {
128 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_CVLAN
,
130 nf_flow_rule_vlan_match(&key
->cvlan
, &mask
->cvlan
,
132 tuple
->encap
[1].proto
);
134 NF_FLOW_DISSECTOR(match
, FLOW_DISSECTOR_KEY_VLAN
,
136 nf_flow_rule_vlan_match(&key
->vlan
, &mask
->vlan
,
138 tuple
->encap
[1].proto
);
142 switch (tuple
->l3proto
) {
144 key
->control
.addr_type
= FLOW_DISSECTOR_KEY_IPV4_ADDRS
;
145 key
->basic
.n_proto
= htons(ETH_P_IP
);
146 key
->ipv4
.src
= tuple
->src_v4
.s_addr
;
147 mask
->ipv4
.src
= 0xffffffff;
148 key
->ipv4
.dst
= tuple
->dst_v4
.s_addr
;
149 mask
->ipv4
.dst
= 0xffffffff;
152 key
->control
.addr_type
= FLOW_DISSECTOR_KEY_IPV6_ADDRS
;
153 key
->basic
.n_proto
= htons(ETH_P_IPV6
);
154 key
->ipv6
.src
= tuple
->src_v6
;
155 memset(&mask
->ipv6
.src
, 0xff, sizeof(mask
->ipv6
.src
));
156 key
->ipv6
.dst
= tuple
->dst_v6
;
157 memset(&mask
->ipv6
.dst
, 0xff, sizeof(mask
->ipv6
.dst
));
162 mask
->control
.addr_type
= 0xffff;
163 match
->dissector
.used_keys
|= BIT(key
->control
.addr_type
);
164 mask
->basic
.n_proto
= 0xffff;
166 switch (tuple
->l4proto
) {
169 mask
->tcp
.flags
= cpu_to_be16(be32_to_cpu(TCP_FLAG_RST
| TCP_FLAG_FIN
) >> 16);
170 match
->dissector
.used_keys
|= BIT(FLOW_DISSECTOR_KEY_TCP
);
178 key
->basic
.ip_proto
= tuple
->l4proto
;
179 mask
->basic
.ip_proto
= 0xff;
181 key
->tp
.src
= tuple
->src_port
;
182 mask
->tp
.src
= 0xffff;
183 key
->tp
.dst
= tuple
->dst_port
;
184 mask
->tp
.dst
= 0xffff;
186 match
->dissector
.used_keys
|= BIT(FLOW_DISSECTOR_KEY_META
) |
187 BIT(FLOW_DISSECTOR_KEY_CONTROL
) |
188 BIT(FLOW_DISSECTOR_KEY_BASIC
) |
189 BIT(FLOW_DISSECTOR_KEY_PORTS
);
193 static void flow_offload_mangle(struct flow_action_entry
*entry
,
194 enum flow_action_mangle_base htype
, u32 offset
,
195 const __be32
*value
, const __be32
*mask
)
197 entry
->id
= FLOW_ACTION_MANGLE
;
198 entry
->mangle
.htype
= htype
;
199 entry
->mangle
.offset
= offset
;
200 memcpy(&entry
->mangle
.mask
, mask
, sizeof(u32
));
201 memcpy(&entry
->mangle
.val
, value
, sizeof(u32
));
204 static inline struct flow_action_entry
*
205 flow_action_entry_next(struct nf_flow_rule
*flow_rule
)
207 int i
= flow_rule
->rule
->action
.num_entries
++;
209 return &flow_rule
->rule
->action
.entries
[i
];
212 static int flow_offload_eth_src(struct net
*net
,
213 const struct flow_offload
*flow
,
214 enum flow_offload_tuple_dir dir
,
215 struct nf_flow_rule
*flow_rule
)
217 struct flow_action_entry
*entry0
= flow_action_entry_next(flow_rule
);
218 struct flow_action_entry
*entry1
= flow_action_entry_next(flow_rule
);
219 const struct flow_offload_tuple
*other_tuple
, *this_tuple
;
220 struct net_device
*dev
= NULL
;
221 const unsigned char *addr
;
225 this_tuple
= &flow
->tuplehash
[dir
].tuple
;
227 switch (this_tuple
->xmit_type
) {
228 case FLOW_OFFLOAD_XMIT_DIRECT
:
229 addr
= this_tuple
->out
.h_source
;
231 case FLOW_OFFLOAD_XMIT_NEIGH
:
232 other_tuple
= &flow
->tuplehash
[!dir
].tuple
;
233 dev
= dev_get_by_index(net
, other_tuple
->iifidx
);
237 addr
= dev
->dev_addr
;
244 memcpy(&val16
, addr
, 2);
246 flow_offload_mangle(entry0
, FLOW_ACT_MANGLE_HDR_TYPE_ETH
, 4,
250 memcpy(&val
, addr
+ 2, 4);
251 flow_offload_mangle(entry1
, FLOW_ACT_MANGLE_HDR_TYPE_ETH
, 8,
259 static int flow_offload_eth_dst(struct net
*net
,
260 const struct flow_offload
*flow
,
261 enum flow_offload_tuple_dir dir
,
262 struct nf_flow_rule
*flow_rule
)
264 struct flow_action_entry
*entry0
= flow_action_entry_next(flow_rule
);
265 struct flow_action_entry
*entry1
= flow_action_entry_next(flow_rule
);
266 const struct flow_offload_tuple
*other_tuple
, *this_tuple
;
267 const struct dst_entry
*dst_cache
;
268 unsigned char ha
[ETH_ALEN
];
275 this_tuple
= &flow
->tuplehash
[dir
].tuple
;
277 switch (this_tuple
->xmit_type
) {
278 case FLOW_OFFLOAD_XMIT_DIRECT
:
279 ether_addr_copy(ha
, this_tuple
->out
.h_dest
);
281 case FLOW_OFFLOAD_XMIT_NEIGH
:
282 other_tuple
= &flow
->tuplehash
[!dir
].tuple
;
283 daddr
= &other_tuple
->src_v4
;
284 dst_cache
= this_tuple
->dst_cache
;
285 n
= dst_neigh_lookup(dst_cache
, daddr
);
289 read_lock_bh(&n
->lock
);
290 nud_state
= n
->nud_state
;
291 ether_addr_copy(ha
, n
->ha
);
292 read_unlock_bh(&n
->lock
);
295 if (!(nud_state
& NUD_VALID
))
304 flow_offload_mangle(entry0
, FLOW_ACT_MANGLE_HDR_TYPE_ETH
, 0,
308 memcpy(&val16
, ha
+ 4, 2);
310 flow_offload_mangle(entry1
, FLOW_ACT_MANGLE_HDR_TYPE_ETH
, 4,
316 static void flow_offload_ipv4_snat(struct net
*net
,
317 const struct flow_offload
*flow
,
318 enum flow_offload_tuple_dir dir
,
319 struct nf_flow_rule
*flow_rule
)
321 struct flow_action_entry
*entry
= flow_action_entry_next(flow_rule
);
322 u32 mask
= ~htonl(0xffffffff);
327 case FLOW_OFFLOAD_DIR_ORIGINAL
:
328 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v4
.s_addr
;
329 offset
= offsetof(struct iphdr
, saddr
);
331 case FLOW_OFFLOAD_DIR_REPLY
:
332 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v4
.s_addr
;
333 offset
= offsetof(struct iphdr
, daddr
);
339 flow_offload_mangle(entry
, FLOW_ACT_MANGLE_HDR_TYPE_IP4
, offset
,
343 static void flow_offload_ipv4_dnat(struct net
*net
,
344 const struct flow_offload
*flow
,
345 enum flow_offload_tuple_dir dir
,
346 struct nf_flow_rule
*flow_rule
)
348 struct flow_action_entry
*entry
= flow_action_entry_next(flow_rule
);
349 u32 mask
= ~htonl(0xffffffff);
354 case FLOW_OFFLOAD_DIR_ORIGINAL
:
355 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v4
.s_addr
;
356 offset
= offsetof(struct iphdr
, daddr
);
358 case FLOW_OFFLOAD_DIR_REPLY
:
359 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v4
.s_addr
;
360 offset
= offsetof(struct iphdr
, saddr
);
366 flow_offload_mangle(entry
, FLOW_ACT_MANGLE_HDR_TYPE_IP4
, offset
,
370 static void flow_offload_ipv6_mangle(struct nf_flow_rule
*flow_rule
,
372 const __be32
*addr
, const __be32
*mask
)
374 struct flow_action_entry
*entry
;
377 for (i
= 0, j
= 0; i
< sizeof(struct in6_addr
) / sizeof(u32
); i
+= sizeof(u32
), j
++) {
378 entry
= flow_action_entry_next(flow_rule
);
379 flow_offload_mangle(entry
, FLOW_ACT_MANGLE_HDR_TYPE_IP6
,
380 offset
+ i
, &addr
[j
], mask
);
384 static void flow_offload_ipv6_snat(struct net
*net
,
385 const struct flow_offload
*flow
,
386 enum flow_offload_tuple_dir dir
,
387 struct nf_flow_rule
*flow_rule
)
389 u32 mask
= ~htonl(0xffffffff);
394 case FLOW_OFFLOAD_DIR_ORIGINAL
:
395 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_v6
.s6_addr32
;
396 offset
= offsetof(struct ipv6hdr
, saddr
);
398 case FLOW_OFFLOAD_DIR_REPLY
:
399 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_v6
.s6_addr32
;
400 offset
= offsetof(struct ipv6hdr
, daddr
);
406 flow_offload_ipv6_mangle(flow_rule
, offset
, addr
, &mask
);
409 static void flow_offload_ipv6_dnat(struct net
*net
,
410 const struct flow_offload
*flow
,
411 enum flow_offload_tuple_dir dir
,
412 struct nf_flow_rule
*flow_rule
)
414 u32 mask
= ~htonl(0xffffffff);
419 case FLOW_OFFLOAD_DIR_ORIGINAL
:
420 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_v6
.s6_addr32
;
421 offset
= offsetof(struct ipv6hdr
, daddr
);
423 case FLOW_OFFLOAD_DIR_REPLY
:
424 addr
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_v6
.s6_addr32
;
425 offset
= offsetof(struct ipv6hdr
, saddr
);
431 flow_offload_ipv6_mangle(flow_rule
, offset
, addr
, &mask
);
434 static int flow_offload_l4proto(const struct flow_offload
*flow
)
436 u8 protonum
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.l4proto
;
441 type
= FLOW_ACT_MANGLE_HDR_TYPE_TCP
;
444 type
= FLOW_ACT_MANGLE_HDR_TYPE_UDP
;
453 static void flow_offload_port_snat(struct net
*net
,
454 const struct flow_offload
*flow
,
455 enum flow_offload_tuple_dir dir
,
456 struct nf_flow_rule
*flow_rule
)
458 struct flow_action_entry
*entry
= flow_action_entry_next(flow_rule
);
463 case FLOW_OFFLOAD_DIR_ORIGINAL
:
464 port
= ntohs(flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.dst_port
);
465 offset
= 0; /* offsetof(struct tcphdr, source); */
466 port
= htonl(port
<< 16);
467 mask
= ~htonl(0xffff0000);
469 case FLOW_OFFLOAD_DIR_REPLY
:
470 port
= ntohs(flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.src_port
);
471 offset
= 0; /* offsetof(struct tcphdr, dest); */
473 mask
= ~htonl(0xffff);
479 flow_offload_mangle(entry
, flow_offload_l4proto(flow
), offset
,
483 static void flow_offload_port_dnat(struct net
*net
,
484 const struct flow_offload
*flow
,
485 enum flow_offload_tuple_dir dir
,
486 struct nf_flow_rule
*flow_rule
)
488 struct flow_action_entry
*entry
= flow_action_entry_next(flow_rule
);
493 case FLOW_OFFLOAD_DIR_ORIGINAL
:
494 port
= ntohs(flow
->tuplehash
[FLOW_OFFLOAD_DIR_REPLY
].tuple
.src_port
);
495 offset
= 0; /* offsetof(struct tcphdr, dest); */
497 mask
= ~htonl(0xffff);
499 case FLOW_OFFLOAD_DIR_REPLY
:
500 port
= ntohs(flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.dst_port
);
501 offset
= 0; /* offsetof(struct tcphdr, source); */
502 port
= htonl(port
<< 16);
503 mask
= ~htonl(0xffff0000);
509 flow_offload_mangle(entry
, flow_offload_l4proto(flow
), offset
,
513 static void flow_offload_ipv4_checksum(struct net
*net
,
514 const struct flow_offload
*flow
,
515 struct nf_flow_rule
*flow_rule
)
517 u8 protonum
= flow
->tuplehash
[FLOW_OFFLOAD_DIR_ORIGINAL
].tuple
.l4proto
;
518 struct flow_action_entry
*entry
= flow_action_entry_next(flow_rule
);
520 entry
->id
= FLOW_ACTION_CSUM
;
521 entry
->csum_flags
= TCA_CSUM_UPDATE_FLAG_IPV4HDR
;
525 entry
->csum_flags
|= TCA_CSUM_UPDATE_FLAG_TCP
;
528 entry
->csum_flags
|= TCA_CSUM_UPDATE_FLAG_UDP
;
533 static void flow_offload_redirect(struct net
*net
,
534 const struct flow_offload
*flow
,
535 enum flow_offload_tuple_dir dir
,
536 struct nf_flow_rule
*flow_rule
)
538 const struct flow_offload_tuple
*this_tuple
, *other_tuple
;
539 struct flow_action_entry
*entry
;
540 struct net_device
*dev
;
543 this_tuple
= &flow
->tuplehash
[dir
].tuple
;
544 switch (this_tuple
->xmit_type
) {
545 case FLOW_OFFLOAD_XMIT_DIRECT
:
546 this_tuple
= &flow
->tuplehash
[dir
].tuple
;
547 ifindex
= this_tuple
->out
.hw_ifidx
;
549 case FLOW_OFFLOAD_XMIT_NEIGH
:
550 other_tuple
= &flow
->tuplehash
[!dir
].tuple
;
551 ifindex
= other_tuple
->iifidx
;
557 dev
= dev_get_by_index(net
, ifindex
);
561 entry
= flow_action_entry_next(flow_rule
);
562 entry
->id
= FLOW_ACTION_REDIRECT
;
566 static void flow_offload_encap_tunnel(const struct flow_offload
*flow
,
567 enum flow_offload_tuple_dir dir
,
568 struct nf_flow_rule
*flow_rule
)
570 const struct flow_offload_tuple
*this_tuple
;
571 struct flow_action_entry
*entry
;
572 struct dst_entry
*dst
;
574 this_tuple
= &flow
->tuplehash
[dir
].tuple
;
575 if (this_tuple
->xmit_type
== FLOW_OFFLOAD_XMIT_DIRECT
)
578 dst
= this_tuple
->dst_cache
;
579 if (dst
&& dst
->lwtstate
) {
580 struct ip_tunnel_info
*tun_info
;
582 tun_info
= lwt_tun_info(dst
->lwtstate
);
583 if (tun_info
&& (tun_info
->mode
& IP_TUNNEL_INFO_TX
)) {
584 entry
= flow_action_entry_next(flow_rule
);
585 entry
->id
= FLOW_ACTION_TUNNEL_ENCAP
;
586 entry
->tunnel
= tun_info
;
591 static void flow_offload_decap_tunnel(const struct flow_offload
*flow
,
592 enum flow_offload_tuple_dir dir
,
593 struct nf_flow_rule
*flow_rule
)
595 const struct flow_offload_tuple
*other_tuple
;
596 struct flow_action_entry
*entry
;
597 struct dst_entry
*dst
;
599 other_tuple
= &flow
->tuplehash
[!dir
].tuple
;
600 if (other_tuple
->xmit_type
== FLOW_OFFLOAD_XMIT_DIRECT
)
603 dst
= other_tuple
->dst_cache
;
604 if (dst
&& dst
->lwtstate
) {
605 struct ip_tunnel_info
*tun_info
;
607 tun_info
= lwt_tun_info(dst
->lwtstate
);
608 if (tun_info
&& (tun_info
->mode
& IP_TUNNEL_INFO_TX
)) {
609 entry
= flow_action_entry_next(flow_rule
);
610 entry
->id
= FLOW_ACTION_TUNNEL_DECAP
;
616 nf_flow_rule_route_common(struct net
*net
, const struct flow_offload
*flow
,
617 enum flow_offload_tuple_dir dir
,
618 struct nf_flow_rule
*flow_rule
)
620 const struct flow_offload_tuple
*other_tuple
;
621 const struct flow_offload_tuple
*tuple
;
624 flow_offload_decap_tunnel(flow
, dir
, flow_rule
);
625 flow_offload_encap_tunnel(flow
, dir
, flow_rule
);
627 if (flow_offload_eth_src(net
, flow
, dir
, flow_rule
) < 0 ||
628 flow_offload_eth_dst(net
, flow
, dir
, flow_rule
) < 0)
631 tuple
= &flow
->tuplehash
[dir
].tuple
;
633 for (i
= 0; i
< tuple
->encap_num
; i
++) {
634 struct flow_action_entry
*entry
;
636 if (tuple
->in_vlan_ingress
& BIT(i
))
639 if (tuple
->encap
[i
].proto
== htons(ETH_P_8021Q
)) {
640 entry
= flow_action_entry_next(flow_rule
);
641 entry
->id
= FLOW_ACTION_VLAN_POP
;
645 other_tuple
= &flow
->tuplehash
[!dir
].tuple
;
647 for (i
= 0; i
< other_tuple
->encap_num
; i
++) {
648 struct flow_action_entry
*entry
;
650 if (other_tuple
->in_vlan_ingress
& BIT(i
))
653 entry
= flow_action_entry_next(flow_rule
);
655 switch (other_tuple
->encap
[i
].proto
) {
656 case htons(ETH_P_PPP_SES
):
657 entry
->id
= FLOW_ACTION_PPPOE_PUSH
;
658 entry
->pppoe
.sid
= other_tuple
->encap
[i
].id
;
660 case htons(ETH_P_8021Q
):
661 entry
->id
= FLOW_ACTION_VLAN_PUSH
;
662 entry
->vlan
.vid
= other_tuple
->encap
[i
].id
;
663 entry
->vlan
.proto
= other_tuple
->encap
[i
].proto
;
671 int nf_flow_rule_route_ipv4(struct net
*net
, const struct flow_offload
*flow
,
672 enum flow_offload_tuple_dir dir
,
673 struct nf_flow_rule
*flow_rule
)
675 if (nf_flow_rule_route_common(net
, flow
, dir
, flow_rule
) < 0)
678 if (test_bit(NF_FLOW_SNAT
, &flow
->flags
)) {
679 flow_offload_ipv4_snat(net
, flow
, dir
, flow_rule
);
680 flow_offload_port_snat(net
, flow
, dir
, flow_rule
);
682 if (test_bit(NF_FLOW_DNAT
, &flow
->flags
)) {
683 flow_offload_ipv4_dnat(net
, flow
, dir
, flow_rule
);
684 flow_offload_port_dnat(net
, flow
, dir
, flow_rule
);
686 if (test_bit(NF_FLOW_SNAT
, &flow
->flags
) ||
687 test_bit(NF_FLOW_DNAT
, &flow
->flags
))
688 flow_offload_ipv4_checksum(net
, flow
, flow_rule
);
690 flow_offload_redirect(net
, flow
, dir
, flow_rule
);
694 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4
);
696 int nf_flow_rule_route_ipv6(struct net
*net
, const struct flow_offload
*flow
,
697 enum flow_offload_tuple_dir dir
,
698 struct nf_flow_rule
*flow_rule
)
700 if (nf_flow_rule_route_common(net
, flow
, dir
, flow_rule
) < 0)
703 if (test_bit(NF_FLOW_SNAT
, &flow
->flags
)) {
704 flow_offload_ipv6_snat(net
, flow
, dir
, flow_rule
);
705 flow_offload_port_snat(net
, flow
, dir
, flow_rule
);
707 if (test_bit(NF_FLOW_DNAT
, &flow
->flags
)) {
708 flow_offload_ipv6_dnat(net
, flow
, dir
, flow_rule
);
709 flow_offload_port_dnat(net
, flow
, dir
, flow_rule
);
712 flow_offload_redirect(net
, flow
, dir
, flow_rule
);
716 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6
);
718 #define NF_FLOW_RULE_ACTION_MAX 16
720 static struct nf_flow_rule
*
721 nf_flow_offload_rule_alloc(struct net
*net
,
722 const struct flow_offload_work
*offload
,
723 enum flow_offload_tuple_dir dir
)
725 const struct nf_flowtable
*flowtable
= offload
->flowtable
;
726 const struct flow_offload_tuple
*tuple
, *other_tuple
;
727 const struct flow_offload
*flow
= offload
->flow
;
728 struct dst_entry
*other_dst
= NULL
;
729 struct nf_flow_rule
*flow_rule
;
732 flow_rule
= kzalloc(sizeof(*flow_rule
), GFP_KERNEL
);
736 flow_rule
->rule
= flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX
);
737 if (!flow_rule
->rule
)
740 flow_rule
->rule
->match
.dissector
= &flow_rule
->match
.dissector
;
741 flow_rule
->rule
->match
.mask
= &flow_rule
->match
.mask
;
742 flow_rule
->rule
->match
.key
= &flow_rule
->match
.key
;
744 tuple
= &flow
->tuplehash
[dir
].tuple
;
745 other_tuple
= &flow
->tuplehash
[!dir
].tuple
;
746 if (other_tuple
->xmit_type
== FLOW_OFFLOAD_XMIT_NEIGH
)
747 other_dst
= other_tuple
->dst_cache
;
749 err
= nf_flow_rule_match(&flow_rule
->match
, tuple
, other_dst
);
753 flow_rule
->rule
->action
.num_entries
= 0;
754 if (flowtable
->type
->action(net
, flow
, dir
, flow_rule
) < 0)
760 kfree(flow_rule
->rule
);
767 static void __nf_flow_offload_destroy(struct nf_flow_rule
*flow_rule
)
769 struct flow_action_entry
*entry
;
772 for (i
= 0; i
< flow_rule
->rule
->action
.num_entries
; i
++) {
773 entry
= &flow_rule
->rule
->action
.entries
[i
];
774 if (entry
->id
!= FLOW_ACTION_REDIRECT
)
779 kfree(flow_rule
->rule
);
783 static void nf_flow_offload_destroy(struct nf_flow_rule
*flow_rule
[])
787 for (i
= 0; i
< FLOW_OFFLOAD_DIR_MAX
; i
++)
788 __nf_flow_offload_destroy(flow_rule
[i
]);
791 static int nf_flow_offload_alloc(const struct flow_offload_work
*offload
,
792 struct nf_flow_rule
*flow_rule
[])
794 struct net
*net
= read_pnet(&offload
->flowtable
->net
);
796 flow_rule
[0] = nf_flow_offload_rule_alloc(net
, offload
,
797 FLOW_OFFLOAD_DIR_ORIGINAL
);
801 flow_rule
[1] = nf_flow_offload_rule_alloc(net
, offload
,
802 FLOW_OFFLOAD_DIR_REPLY
);
804 __nf_flow_offload_destroy(flow_rule
[0]);
811 static void nf_flow_offload_init(struct flow_cls_offload
*cls_flow
,
812 __be16 proto
, int priority
,
813 enum flow_cls_command cmd
,
814 const struct flow_offload_tuple
*tuple
,
815 struct netlink_ext_ack
*extack
)
817 cls_flow
->common
.protocol
= proto
;
818 cls_flow
->common
.prio
= priority
;
819 cls_flow
->common
.extack
= extack
;
820 cls_flow
->command
= cmd
;
821 cls_flow
->cookie
= (unsigned long)tuple
;
824 static int nf_flow_offload_tuple(struct nf_flowtable
*flowtable
,
825 struct flow_offload
*flow
,
826 struct nf_flow_rule
*flow_rule
,
827 enum flow_offload_tuple_dir dir
,
828 int priority
, int cmd
,
829 struct flow_stats
*stats
,
830 struct list_head
*block_cb_list
)
832 struct flow_cls_offload cls_flow
= {};
833 struct flow_block_cb
*block_cb
;
834 struct netlink_ext_ack extack
;
835 __be16 proto
= ETH_P_ALL
;
838 nf_flow_offload_init(&cls_flow
, proto
, priority
, cmd
,
839 &flow
->tuplehash
[dir
].tuple
, &extack
);
840 if (cmd
== FLOW_CLS_REPLACE
)
841 cls_flow
.rule
= flow_rule
->rule
;
843 down_read(&flowtable
->flow_block_lock
);
844 list_for_each_entry(block_cb
, block_cb_list
, list
) {
845 err
= block_cb
->cb(TC_SETUP_CLSFLOWER
, &cls_flow
,
852 up_read(&flowtable
->flow_block_lock
);
854 if (cmd
== FLOW_CLS_STATS
)
855 memcpy(stats
, &cls_flow
.stats
, sizeof(*stats
));
860 static int flow_offload_tuple_add(struct flow_offload_work
*offload
,
861 struct nf_flow_rule
*flow_rule
,
862 enum flow_offload_tuple_dir dir
)
864 return nf_flow_offload_tuple(offload
->flowtable
, offload
->flow
,
865 flow_rule
, dir
, offload
->priority
,
866 FLOW_CLS_REPLACE
, NULL
,
867 &offload
->flowtable
->flow_block
.cb_list
);
870 static void flow_offload_tuple_del(struct flow_offload_work
*offload
,
871 enum flow_offload_tuple_dir dir
)
873 nf_flow_offload_tuple(offload
->flowtable
, offload
->flow
, NULL
, dir
,
874 offload
->priority
, FLOW_CLS_DESTROY
, NULL
,
875 &offload
->flowtable
->flow_block
.cb_list
);
878 static int flow_offload_rule_add(struct flow_offload_work
*offload
,
879 struct nf_flow_rule
*flow_rule
[])
883 ok_count
+= flow_offload_tuple_add(offload
, flow_rule
[0],
884 FLOW_OFFLOAD_DIR_ORIGINAL
);
885 ok_count
+= flow_offload_tuple_add(offload
, flow_rule
[1],
886 FLOW_OFFLOAD_DIR_REPLY
);
893 static void flow_offload_work_add(struct flow_offload_work
*offload
)
895 struct nf_flow_rule
*flow_rule
[FLOW_OFFLOAD_DIR_MAX
];
898 err
= nf_flow_offload_alloc(offload
, flow_rule
);
902 err
= flow_offload_rule_add(offload
, flow_rule
);
906 set_bit(IPS_HW_OFFLOAD_BIT
, &offload
->flow
->ct
->status
);
909 nf_flow_offload_destroy(flow_rule
);
912 static void flow_offload_work_del(struct flow_offload_work
*offload
)
914 clear_bit(IPS_HW_OFFLOAD_BIT
, &offload
->flow
->ct
->status
);
915 flow_offload_tuple_del(offload
, FLOW_OFFLOAD_DIR_ORIGINAL
);
916 flow_offload_tuple_del(offload
, FLOW_OFFLOAD_DIR_REPLY
);
917 set_bit(NF_FLOW_HW_DEAD
, &offload
->flow
->flags
);
920 static void flow_offload_tuple_stats(struct flow_offload_work
*offload
,
921 enum flow_offload_tuple_dir dir
,
922 struct flow_stats
*stats
)
924 nf_flow_offload_tuple(offload
->flowtable
, offload
->flow
, NULL
, dir
,
925 offload
->priority
, FLOW_CLS_STATS
, stats
,
926 &offload
->flowtable
->flow_block
.cb_list
);
929 static void flow_offload_work_stats(struct flow_offload_work
*offload
)
931 struct flow_stats stats
[FLOW_OFFLOAD_DIR_MAX
] = {};
934 flow_offload_tuple_stats(offload
, FLOW_OFFLOAD_DIR_ORIGINAL
, &stats
[0]);
935 flow_offload_tuple_stats(offload
, FLOW_OFFLOAD_DIR_REPLY
, &stats
[1]);
937 lastused
= max_t(u64
, stats
[0].lastused
, stats
[1].lastused
);
938 offload
->flow
->timeout
= max_t(u64
, offload
->flow
->timeout
,
939 lastused
+ flow_offload_get_timeout(offload
->flow
));
941 if (offload
->flowtable
->flags
& NF_FLOWTABLE_COUNTER
) {
943 nf_ct_acct_add(offload
->flow
->ct
,
944 FLOW_OFFLOAD_DIR_ORIGINAL
,
945 stats
[0].pkts
, stats
[0].bytes
);
947 nf_ct_acct_add(offload
->flow
->ct
,
948 FLOW_OFFLOAD_DIR_REPLY
,
949 stats
[1].pkts
, stats
[1].bytes
);
953 static void flow_offload_work_handler(struct work_struct
*work
)
955 struct flow_offload_work
*offload
;
957 offload
= container_of(work
, struct flow_offload_work
, work
);
958 switch (offload
->cmd
) {
959 case FLOW_CLS_REPLACE
:
960 flow_offload_work_add(offload
);
962 case FLOW_CLS_DESTROY
:
963 flow_offload_work_del(offload
);
966 flow_offload_work_stats(offload
);
972 clear_bit(NF_FLOW_HW_PENDING
, &offload
->flow
->flags
);
976 static void flow_offload_queue_work(struct flow_offload_work
*offload
)
978 if (offload
->cmd
== FLOW_CLS_REPLACE
)
979 queue_work(nf_flow_offload_add_wq
, &offload
->work
);
980 else if (offload
->cmd
== FLOW_CLS_DESTROY
)
981 queue_work(nf_flow_offload_del_wq
, &offload
->work
);
983 queue_work(nf_flow_offload_stats_wq
, &offload
->work
);
986 static struct flow_offload_work
*
987 nf_flow_offload_work_alloc(struct nf_flowtable
*flowtable
,
988 struct flow_offload
*flow
, unsigned int cmd
)
990 struct flow_offload_work
*offload
;
992 if (test_and_set_bit(NF_FLOW_HW_PENDING
, &flow
->flags
))
995 offload
= kmalloc(sizeof(struct flow_offload_work
), GFP_ATOMIC
);
997 clear_bit(NF_FLOW_HW_PENDING
, &flow
->flags
);
1002 offload
->flow
= flow
;
1003 offload
->priority
= flowtable
->priority
;
1004 offload
->flowtable
= flowtable
;
1005 INIT_WORK(&offload
->work
, flow_offload_work_handler
);
1011 void nf_flow_offload_add(struct nf_flowtable
*flowtable
,
1012 struct flow_offload
*flow
)
1014 struct flow_offload_work
*offload
;
1016 offload
= nf_flow_offload_work_alloc(flowtable
, flow
, FLOW_CLS_REPLACE
);
1020 flow_offload_queue_work(offload
);
1023 void nf_flow_offload_del(struct nf_flowtable
*flowtable
,
1024 struct flow_offload
*flow
)
1026 struct flow_offload_work
*offload
;
1028 offload
= nf_flow_offload_work_alloc(flowtable
, flow
, FLOW_CLS_DESTROY
);
1032 set_bit(NF_FLOW_HW_DYING
, &flow
->flags
);
1033 flow_offload_queue_work(offload
);
1036 void nf_flow_offload_stats(struct nf_flowtable
*flowtable
,
1037 struct flow_offload
*flow
)
1039 struct flow_offload_work
*offload
;
1042 delta
= nf_flow_timeout_delta(flow
->timeout
);
1043 if ((delta
>= (9 * flow_offload_get_timeout(flow
)) / 10))
1046 offload
= nf_flow_offload_work_alloc(flowtable
, flow
, FLOW_CLS_STATS
);
1050 flow_offload_queue_work(offload
);
1053 void nf_flow_table_offload_flush(struct nf_flowtable
*flowtable
)
1055 if (nf_flowtable_hw_offload(flowtable
)) {
1056 flush_workqueue(nf_flow_offload_add_wq
);
1057 flush_workqueue(nf_flow_offload_del_wq
);
1058 flush_workqueue(nf_flow_offload_stats_wq
);
1062 static int nf_flow_table_block_setup(struct nf_flowtable
*flowtable
,
1063 struct flow_block_offload
*bo
,
1064 enum flow_block_command cmd
)
1066 struct flow_block_cb
*block_cb
, *next
;
1070 case FLOW_BLOCK_BIND
:
1071 list_splice(&bo
->cb_list
, &flowtable
->flow_block
.cb_list
);
1073 case FLOW_BLOCK_UNBIND
:
1074 list_for_each_entry_safe(block_cb
, next
, &bo
->cb_list
, list
) {
1075 list_del(&block_cb
->list
);
1076 flow_block_cb_free(block_cb
);
1087 static void nf_flow_table_block_offload_init(struct flow_block_offload
*bo
,
1089 enum flow_block_command cmd
,
1090 struct nf_flowtable
*flowtable
,
1091 struct netlink_ext_ack
*extack
)
1093 memset(bo
, 0, sizeof(*bo
));
1095 bo
->block
= &flowtable
->flow_block
;
1097 bo
->binder_type
= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS
;
1098 bo
->extack
= extack
;
1099 bo
->cb_list_head
= &flowtable
->flow_block
.cb_list
;
1100 INIT_LIST_HEAD(&bo
->cb_list
);
1103 static void nf_flow_table_indr_cleanup(struct flow_block_cb
*block_cb
)
1105 struct nf_flowtable
*flowtable
= block_cb
->indr
.data
;
1106 struct net_device
*dev
= block_cb
->indr
.dev
;
1108 nf_flow_table_gc_cleanup(flowtable
, dev
);
1109 down_write(&flowtable
->flow_block_lock
);
1110 list_del(&block_cb
->list
);
1111 list_del(&block_cb
->driver_list
);
1112 flow_block_cb_free(block_cb
);
1113 up_write(&flowtable
->flow_block_lock
);
1116 static int nf_flow_table_indr_offload_cmd(struct flow_block_offload
*bo
,
1117 struct nf_flowtable
*flowtable
,
1118 struct net_device
*dev
,
1119 enum flow_block_command cmd
,
1120 struct netlink_ext_ack
*extack
)
1122 nf_flow_table_block_offload_init(bo
, dev_net(dev
), cmd
, flowtable
,
1125 return flow_indr_dev_setup_offload(dev
, NULL
, TC_SETUP_FT
, flowtable
, bo
,
1126 nf_flow_table_indr_cleanup
);
1129 static int nf_flow_table_offload_cmd(struct flow_block_offload
*bo
,
1130 struct nf_flowtable
*flowtable
,
1131 struct net_device
*dev
,
1132 enum flow_block_command cmd
,
1133 struct netlink_ext_ack
*extack
)
1137 nf_flow_table_block_offload_init(bo
, dev_net(dev
), cmd
, flowtable
,
1139 err
= dev
->netdev_ops
->ndo_setup_tc(dev
, TC_SETUP_FT
, bo
);
1146 int nf_flow_table_offload_setup(struct nf_flowtable
*flowtable
,
1147 struct net_device
*dev
,
1148 enum flow_block_command cmd
)
1150 struct netlink_ext_ack extack
= {};
1151 struct flow_block_offload bo
;
1154 if (!nf_flowtable_hw_offload(flowtable
))
1157 if (dev
->netdev_ops
->ndo_setup_tc
)
1158 err
= nf_flow_table_offload_cmd(&bo
, flowtable
, dev
, cmd
,
1161 err
= nf_flow_table_indr_offload_cmd(&bo
, flowtable
, dev
, cmd
,
1166 return nf_flow_table_block_setup(flowtable
, &bo
, cmd
);
1168 EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup
);
1170 int nf_flow_table_offload_init(void)
1172 nf_flow_offload_add_wq
= alloc_workqueue("nf_ft_offload_add",
1173 WQ_UNBOUND
| WQ_SYSFS
, 0);
1174 if (!nf_flow_offload_add_wq
)
1177 nf_flow_offload_del_wq
= alloc_workqueue("nf_ft_offload_del",
1178 WQ_UNBOUND
| WQ_SYSFS
, 0);
1179 if (!nf_flow_offload_del_wq
)
1182 nf_flow_offload_stats_wq
= alloc_workqueue("nf_ft_offload_stats",
1183 WQ_UNBOUND
| WQ_SYSFS
, 0);
1184 if (!nf_flow_offload_stats_wq
)
1190 destroy_workqueue(nf_flow_offload_del_wq
);
1192 destroy_workqueue(nf_flow_offload_add_wq
);
1196 void nf_flow_table_offload_exit(void)
1198 destroy_workqueue(nf_flow_offload_add_wq
);
1199 destroy_workqueue(nf_flow_offload_del_wq
);
1200 destroy_workqueue(nf_flow_offload_stats_wq
);