]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - net/netfilter/nf_flow_table_offload.c
Merge tag 'for-5.15-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
[mirror_ubuntu-jammy-kernel.git] / net / netfilter / nf_flow_table_offload.c
1 #include <linux/kernel.h>
2 #include <linux/init.h>
3 #include <linux/module.h>
4 #include <linux/netfilter.h>
5 #include <linux/rhashtable.h>
6 #include <linux/netdevice.h>
7 #include <linux/tc_act/tc_csum.h>
8 #include <net/flow_offload.h>
9 #include <net/netfilter/nf_flow_table.h>
10 #include <net/netfilter/nf_tables.h>
11 #include <net/netfilter/nf_conntrack.h>
12 #include <net/netfilter/nf_conntrack_acct.h>
13 #include <net/netfilter/nf_conntrack_core.h>
14 #include <net/netfilter/nf_conntrack_tuple.h>
15
16 static struct workqueue_struct *nf_flow_offload_add_wq;
17 static struct workqueue_struct *nf_flow_offload_del_wq;
18 static struct workqueue_struct *nf_flow_offload_stats_wq;
19
20 struct flow_offload_work {
21 struct list_head list;
22 enum flow_cls_command cmd;
23 int priority;
24 struct nf_flowtable *flowtable;
25 struct flow_offload *flow;
26 struct work_struct work;
27 };
28
29 #define NF_FLOW_DISSECTOR(__match, __type, __field) \
30 (__match)->dissector.offset[__type] = \
31 offsetof(struct nf_flow_key, __field)
32
33 static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
34 struct ip_tunnel_info *tun_info)
35 {
36 struct nf_flow_key *mask = &match->mask;
37 struct nf_flow_key *key = &match->key;
38 unsigned int enc_keys;
39
40 if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
41 return;
42
43 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
44 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
45 key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
46 mask->enc_key_id.keyid = 0xffffffff;
47 enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
48 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
49
50 if (ip_tunnel_info_af(tun_info) == AF_INET) {
51 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
52 enc_ipv4);
53 key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
54 key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
55 if (key->enc_ipv4.src)
56 mask->enc_ipv4.src = 0xffffffff;
57 if (key->enc_ipv4.dst)
58 mask->enc_ipv4.dst = 0xffffffff;
59 enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
60 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
61 } else {
62 memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
63 sizeof(struct in6_addr));
64 memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
65 sizeof(struct in6_addr));
66 if (memcmp(&key->enc_ipv6.src, &in6addr_any,
67 sizeof(struct in6_addr)))
68 memset(&key->enc_ipv6.src, 0xff,
69 sizeof(struct in6_addr));
70 if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
71 sizeof(struct in6_addr)))
72 memset(&key->enc_ipv6.dst, 0xff,
73 sizeof(struct in6_addr));
74 enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
75 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
76 }
77
78 match->dissector.used_keys |= enc_keys;
79 }
80
81 static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
82 struct flow_dissector_key_vlan *mask,
83 u16 vlan_id, __be16 proto)
84 {
85 key->vlan_id = vlan_id;
86 mask->vlan_id = VLAN_VID_MASK;
87 key->vlan_tpid = proto;
88 mask->vlan_tpid = 0xffff;
89 }
90
91 static int nf_flow_rule_match(struct nf_flow_match *match,
92 const struct flow_offload_tuple *tuple,
93 struct dst_entry *other_dst)
94 {
95 struct nf_flow_key *mask = &match->mask;
96 struct nf_flow_key *key = &match->key;
97 struct ip_tunnel_info *tun_info;
98 bool vlan_encap = false;
99
100 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
101 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
102 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
103 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
104 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
105 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
106 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
107
108 if (other_dst && other_dst->lwtstate) {
109 tun_info = lwt_tun_info(other_dst->lwtstate);
110 nf_flow_rule_lwt_match(match, tun_info);
111 }
112
113 key->meta.ingress_ifindex = tuple->iifidx;
114 mask->meta.ingress_ifindex = 0xffffffff;
115
116 if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
117 tuple->encap[0].proto == htons(ETH_P_8021Q)) {
118 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
119 nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
120 tuple->encap[0].id,
121 tuple->encap[0].proto);
122 vlan_encap = true;
123 }
124
125 if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
126 tuple->encap[1].proto == htons(ETH_P_8021Q)) {
127 if (vlan_encap) {
128 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
129 cvlan);
130 nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
131 tuple->encap[1].id,
132 tuple->encap[1].proto);
133 } else {
134 NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
135 vlan);
136 nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
137 tuple->encap[1].id,
138 tuple->encap[1].proto);
139 }
140 }
141
142 switch (tuple->l3proto) {
143 case AF_INET:
144 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
145 key->basic.n_proto = htons(ETH_P_IP);
146 key->ipv4.src = tuple->src_v4.s_addr;
147 mask->ipv4.src = 0xffffffff;
148 key->ipv4.dst = tuple->dst_v4.s_addr;
149 mask->ipv4.dst = 0xffffffff;
150 break;
151 case AF_INET6:
152 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
153 key->basic.n_proto = htons(ETH_P_IPV6);
154 key->ipv6.src = tuple->src_v6;
155 memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
156 key->ipv6.dst = tuple->dst_v6;
157 memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
158 break;
159 default:
160 return -EOPNOTSUPP;
161 }
162 mask->control.addr_type = 0xffff;
163 match->dissector.used_keys |= BIT(key->control.addr_type);
164 mask->basic.n_proto = 0xffff;
165
166 switch (tuple->l4proto) {
167 case IPPROTO_TCP:
168 key->tcp.flags = 0;
169 mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
170 match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
171 break;
172 case IPPROTO_UDP:
173 break;
174 default:
175 return -EOPNOTSUPP;
176 }
177
178 key->basic.ip_proto = tuple->l4proto;
179 mask->basic.ip_proto = 0xff;
180
181 key->tp.src = tuple->src_port;
182 mask->tp.src = 0xffff;
183 key->tp.dst = tuple->dst_port;
184 mask->tp.dst = 0xffff;
185
186 match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
187 BIT(FLOW_DISSECTOR_KEY_CONTROL) |
188 BIT(FLOW_DISSECTOR_KEY_BASIC) |
189 BIT(FLOW_DISSECTOR_KEY_PORTS);
190 return 0;
191 }
192
193 static void flow_offload_mangle(struct flow_action_entry *entry,
194 enum flow_action_mangle_base htype, u32 offset,
195 const __be32 *value, const __be32 *mask)
196 {
197 entry->id = FLOW_ACTION_MANGLE;
198 entry->mangle.htype = htype;
199 entry->mangle.offset = offset;
200 memcpy(&entry->mangle.mask, mask, sizeof(u32));
201 memcpy(&entry->mangle.val, value, sizeof(u32));
202 }
203
204 static inline struct flow_action_entry *
205 flow_action_entry_next(struct nf_flow_rule *flow_rule)
206 {
207 int i = flow_rule->rule->action.num_entries++;
208
209 return &flow_rule->rule->action.entries[i];
210 }
211
212 static int flow_offload_eth_src(struct net *net,
213 const struct flow_offload *flow,
214 enum flow_offload_tuple_dir dir,
215 struct nf_flow_rule *flow_rule)
216 {
217 struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
218 struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
219 const struct flow_offload_tuple *other_tuple, *this_tuple;
220 struct net_device *dev = NULL;
221 const unsigned char *addr;
222 u32 mask, val;
223 u16 val16;
224
225 this_tuple = &flow->tuplehash[dir].tuple;
226
227 switch (this_tuple->xmit_type) {
228 case FLOW_OFFLOAD_XMIT_DIRECT:
229 addr = this_tuple->out.h_source;
230 break;
231 case FLOW_OFFLOAD_XMIT_NEIGH:
232 other_tuple = &flow->tuplehash[!dir].tuple;
233 dev = dev_get_by_index(net, other_tuple->iifidx);
234 if (!dev)
235 return -ENOENT;
236
237 addr = dev->dev_addr;
238 break;
239 default:
240 return -EOPNOTSUPP;
241 }
242
243 mask = ~0xffff0000;
244 memcpy(&val16, addr, 2);
245 val = val16 << 16;
246 flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
247 &val, &mask);
248
249 mask = ~0xffffffff;
250 memcpy(&val, addr + 2, 4);
251 flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
252 &val, &mask);
253
254 dev_put(dev);
255
256 return 0;
257 }
258
259 static int flow_offload_eth_dst(struct net *net,
260 const struct flow_offload *flow,
261 enum flow_offload_tuple_dir dir,
262 struct nf_flow_rule *flow_rule)
263 {
264 struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
265 struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
266 const struct flow_offload_tuple *other_tuple, *this_tuple;
267 const struct dst_entry *dst_cache;
268 unsigned char ha[ETH_ALEN];
269 struct neighbour *n;
270 const void *daddr;
271 u32 mask, val;
272 u8 nud_state;
273 u16 val16;
274
275 this_tuple = &flow->tuplehash[dir].tuple;
276
277 switch (this_tuple->xmit_type) {
278 case FLOW_OFFLOAD_XMIT_DIRECT:
279 ether_addr_copy(ha, this_tuple->out.h_dest);
280 break;
281 case FLOW_OFFLOAD_XMIT_NEIGH:
282 other_tuple = &flow->tuplehash[!dir].tuple;
283 daddr = &other_tuple->src_v4;
284 dst_cache = this_tuple->dst_cache;
285 n = dst_neigh_lookup(dst_cache, daddr);
286 if (!n)
287 return -ENOENT;
288
289 read_lock_bh(&n->lock);
290 nud_state = n->nud_state;
291 ether_addr_copy(ha, n->ha);
292 read_unlock_bh(&n->lock);
293 neigh_release(n);
294
295 if (!(nud_state & NUD_VALID))
296 return -ENOENT;
297 break;
298 default:
299 return -EOPNOTSUPP;
300 }
301
302 mask = ~0xffffffff;
303 memcpy(&val, ha, 4);
304 flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
305 &val, &mask);
306
307 mask = ~0x0000ffff;
308 memcpy(&val16, ha + 4, 2);
309 val = val16;
310 flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
311 &val, &mask);
312
313 return 0;
314 }
315
316 static void flow_offload_ipv4_snat(struct net *net,
317 const struct flow_offload *flow,
318 enum flow_offload_tuple_dir dir,
319 struct nf_flow_rule *flow_rule)
320 {
321 struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
322 u32 mask = ~htonl(0xffffffff);
323 __be32 addr;
324 u32 offset;
325
326 switch (dir) {
327 case FLOW_OFFLOAD_DIR_ORIGINAL:
328 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
329 offset = offsetof(struct iphdr, saddr);
330 break;
331 case FLOW_OFFLOAD_DIR_REPLY:
332 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
333 offset = offsetof(struct iphdr, daddr);
334 break;
335 default:
336 return;
337 }
338
339 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
340 &addr, &mask);
341 }
342
343 static void flow_offload_ipv4_dnat(struct net *net,
344 const struct flow_offload *flow,
345 enum flow_offload_tuple_dir dir,
346 struct nf_flow_rule *flow_rule)
347 {
348 struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
349 u32 mask = ~htonl(0xffffffff);
350 __be32 addr;
351 u32 offset;
352
353 switch (dir) {
354 case FLOW_OFFLOAD_DIR_ORIGINAL:
355 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
356 offset = offsetof(struct iphdr, daddr);
357 break;
358 case FLOW_OFFLOAD_DIR_REPLY:
359 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
360 offset = offsetof(struct iphdr, saddr);
361 break;
362 default:
363 return;
364 }
365
366 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
367 &addr, &mask);
368 }
369
370 static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
371 unsigned int offset,
372 const __be32 *addr, const __be32 *mask)
373 {
374 struct flow_action_entry *entry;
375 int i, j;
376
377 for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
378 entry = flow_action_entry_next(flow_rule);
379 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
380 offset + i, &addr[j], mask);
381 }
382 }
383
384 static void flow_offload_ipv6_snat(struct net *net,
385 const struct flow_offload *flow,
386 enum flow_offload_tuple_dir dir,
387 struct nf_flow_rule *flow_rule)
388 {
389 u32 mask = ~htonl(0xffffffff);
390 const __be32 *addr;
391 u32 offset;
392
393 switch (dir) {
394 case FLOW_OFFLOAD_DIR_ORIGINAL:
395 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
396 offset = offsetof(struct ipv6hdr, saddr);
397 break;
398 case FLOW_OFFLOAD_DIR_REPLY:
399 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
400 offset = offsetof(struct ipv6hdr, daddr);
401 break;
402 default:
403 return;
404 }
405
406 flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
407 }
408
409 static void flow_offload_ipv6_dnat(struct net *net,
410 const struct flow_offload *flow,
411 enum flow_offload_tuple_dir dir,
412 struct nf_flow_rule *flow_rule)
413 {
414 u32 mask = ~htonl(0xffffffff);
415 const __be32 *addr;
416 u32 offset;
417
418 switch (dir) {
419 case FLOW_OFFLOAD_DIR_ORIGINAL:
420 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
421 offset = offsetof(struct ipv6hdr, daddr);
422 break;
423 case FLOW_OFFLOAD_DIR_REPLY:
424 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
425 offset = offsetof(struct ipv6hdr, saddr);
426 break;
427 default:
428 return;
429 }
430
431 flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
432 }
433
434 static int flow_offload_l4proto(const struct flow_offload *flow)
435 {
436 u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
437 u8 type = 0;
438
439 switch (protonum) {
440 case IPPROTO_TCP:
441 type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
442 break;
443 case IPPROTO_UDP:
444 type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
445 break;
446 default:
447 break;
448 }
449
450 return type;
451 }
452
453 static void flow_offload_port_snat(struct net *net,
454 const struct flow_offload *flow,
455 enum flow_offload_tuple_dir dir,
456 struct nf_flow_rule *flow_rule)
457 {
458 struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
459 u32 mask, port;
460 u32 offset;
461
462 switch (dir) {
463 case FLOW_OFFLOAD_DIR_ORIGINAL:
464 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
465 offset = 0; /* offsetof(struct tcphdr, source); */
466 port = htonl(port << 16);
467 mask = ~htonl(0xffff0000);
468 break;
469 case FLOW_OFFLOAD_DIR_REPLY:
470 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
471 offset = 0; /* offsetof(struct tcphdr, dest); */
472 port = htonl(port);
473 mask = ~htonl(0xffff);
474 break;
475 default:
476 return;
477 }
478
479 flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
480 &port, &mask);
481 }
482
483 static void flow_offload_port_dnat(struct net *net,
484 const struct flow_offload *flow,
485 enum flow_offload_tuple_dir dir,
486 struct nf_flow_rule *flow_rule)
487 {
488 struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
489 u32 mask, port;
490 u32 offset;
491
492 switch (dir) {
493 case FLOW_OFFLOAD_DIR_ORIGINAL:
494 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
495 offset = 0; /* offsetof(struct tcphdr, dest); */
496 port = htonl(port);
497 mask = ~htonl(0xffff);
498 break;
499 case FLOW_OFFLOAD_DIR_REPLY:
500 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
501 offset = 0; /* offsetof(struct tcphdr, source); */
502 port = htonl(port << 16);
503 mask = ~htonl(0xffff0000);
504 break;
505 default:
506 return;
507 }
508
509 flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
510 &port, &mask);
511 }
512
513 static void flow_offload_ipv4_checksum(struct net *net,
514 const struct flow_offload *flow,
515 struct nf_flow_rule *flow_rule)
516 {
517 u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
518 struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
519
520 entry->id = FLOW_ACTION_CSUM;
521 entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
522
523 switch (protonum) {
524 case IPPROTO_TCP:
525 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
526 break;
527 case IPPROTO_UDP:
528 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
529 break;
530 }
531 }
532
533 static void flow_offload_redirect(struct net *net,
534 const struct flow_offload *flow,
535 enum flow_offload_tuple_dir dir,
536 struct nf_flow_rule *flow_rule)
537 {
538 const struct flow_offload_tuple *this_tuple, *other_tuple;
539 struct flow_action_entry *entry;
540 struct net_device *dev;
541 int ifindex;
542
543 this_tuple = &flow->tuplehash[dir].tuple;
544 switch (this_tuple->xmit_type) {
545 case FLOW_OFFLOAD_XMIT_DIRECT:
546 this_tuple = &flow->tuplehash[dir].tuple;
547 ifindex = this_tuple->out.hw_ifidx;
548 break;
549 case FLOW_OFFLOAD_XMIT_NEIGH:
550 other_tuple = &flow->tuplehash[!dir].tuple;
551 ifindex = other_tuple->iifidx;
552 break;
553 default:
554 return;
555 }
556
557 dev = dev_get_by_index(net, ifindex);
558 if (!dev)
559 return;
560
561 entry = flow_action_entry_next(flow_rule);
562 entry->id = FLOW_ACTION_REDIRECT;
563 entry->dev = dev;
564 }
565
566 static void flow_offload_encap_tunnel(const struct flow_offload *flow,
567 enum flow_offload_tuple_dir dir,
568 struct nf_flow_rule *flow_rule)
569 {
570 const struct flow_offload_tuple *this_tuple;
571 struct flow_action_entry *entry;
572 struct dst_entry *dst;
573
574 this_tuple = &flow->tuplehash[dir].tuple;
575 if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
576 return;
577
578 dst = this_tuple->dst_cache;
579 if (dst && dst->lwtstate) {
580 struct ip_tunnel_info *tun_info;
581
582 tun_info = lwt_tun_info(dst->lwtstate);
583 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
584 entry = flow_action_entry_next(flow_rule);
585 entry->id = FLOW_ACTION_TUNNEL_ENCAP;
586 entry->tunnel = tun_info;
587 }
588 }
589 }
590
591 static void flow_offload_decap_tunnel(const struct flow_offload *flow,
592 enum flow_offload_tuple_dir dir,
593 struct nf_flow_rule *flow_rule)
594 {
595 const struct flow_offload_tuple *other_tuple;
596 struct flow_action_entry *entry;
597 struct dst_entry *dst;
598
599 other_tuple = &flow->tuplehash[!dir].tuple;
600 if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
601 return;
602
603 dst = other_tuple->dst_cache;
604 if (dst && dst->lwtstate) {
605 struct ip_tunnel_info *tun_info;
606
607 tun_info = lwt_tun_info(dst->lwtstate);
608 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
609 entry = flow_action_entry_next(flow_rule);
610 entry->id = FLOW_ACTION_TUNNEL_DECAP;
611 }
612 }
613 }
614
615 static int
616 nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
617 enum flow_offload_tuple_dir dir,
618 struct nf_flow_rule *flow_rule)
619 {
620 const struct flow_offload_tuple *other_tuple;
621 const struct flow_offload_tuple *tuple;
622 int i;
623
624 flow_offload_decap_tunnel(flow, dir, flow_rule);
625 flow_offload_encap_tunnel(flow, dir, flow_rule);
626
627 if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
628 flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
629 return -1;
630
631 tuple = &flow->tuplehash[dir].tuple;
632
633 for (i = 0; i < tuple->encap_num; i++) {
634 struct flow_action_entry *entry;
635
636 if (tuple->in_vlan_ingress & BIT(i))
637 continue;
638
639 if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
640 entry = flow_action_entry_next(flow_rule);
641 entry->id = FLOW_ACTION_VLAN_POP;
642 }
643 }
644
645 other_tuple = &flow->tuplehash[!dir].tuple;
646
647 for (i = 0; i < other_tuple->encap_num; i++) {
648 struct flow_action_entry *entry;
649
650 if (other_tuple->in_vlan_ingress & BIT(i))
651 continue;
652
653 entry = flow_action_entry_next(flow_rule);
654
655 switch (other_tuple->encap[i].proto) {
656 case htons(ETH_P_PPP_SES):
657 entry->id = FLOW_ACTION_PPPOE_PUSH;
658 entry->pppoe.sid = other_tuple->encap[i].id;
659 break;
660 case htons(ETH_P_8021Q):
661 entry->id = FLOW_ACTION_VLAN_PUSH;
662 entry->vlan.vid = other_tuple->encap[i].id;
663 entry->vlan.proto = other_tuple->encap[i].proto;
664 break;
665 }
666 }
667
668 return 0;
669 }
670
671 int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
672 enum flow_offload_tuple_dir dir,
673 struct nf_flow_rule *flow_rule)
674 {
675 if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
676 return -1;
677
678 if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
679 flow_offload_ipv4_snat(net, flow, dir, flow_rule);
680 flow_offload_port_snat(net, flow, dir, flow_rule);
681 }
682 if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
683 flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
684 flow_offload_port_dnat(net, flow, dir, flow_rule);
685 }
686 if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
687 test_bit(NF_FLOW_DNAT, &flow->flags))
688 flow_offload_ipv4_checksum(net, flow, flow_rule);
689
690 flow_offload_redirect(net, flow, dir, flow_rule);
691
692 return 0;
693 }
694 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
695
696 int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
697 enum flow_offload_tuple_dir dir,
698 struct nf_flow_rule *flow_rule)
699 {
700 if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
701 return -1;
702
703 if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
704 flow_offload_ipv6_snat(net, flow, dir, flow_rule);
705 flow_offload_port_snat(net, flow, dir, flow_rule);
706 }
707 if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
708 flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
709 flow_offload_port_dnat(net, flow, dir, flow_rule);
710 }
711
712 flow_offload_redirect(net, flow, dir, flow_rule);
713
714 return 0;
715 }
716 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
717
718 #define NF_FLOW_RULE_ACTION_MAX 16
719
720 static struct nf_flow_rule *
721 nf_flow_offload_rule_alloc(struct net *net,
722 const struct flow_offload_work *offload,
723 enum flow_offload_tuple_dir dir)
724 {
725 const struct nf_flowtable *flowtable = offload->flowtable;
726 const struct flow_offload_tuple *tuple, *other_tuple;
727 const struct flow_offload *flow = offload->flow;
728 struct dst_entry *other_dst = NULL;
729 struct nf_flow_rule *flow_rule;
730 int err = -ENOMEM;
731
732 flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
733 if (!flow_rule)
734 goto err_flow;
735
736 flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
737 if (!flow_rule->rule)
738 goto err_flow_rule;
739
740 flow_rule->rule->match.dissector = &flow_rule->match.dissector;
741 flow_rule->rule->match.mask = &flow_rule->match.mask;
742 flow_rule->rule->match.key = &flow_rule->match.key;
743
744 tuple = &flow->tuplehash[dir].tuple;
745 other_tuple = &flow->tuplehash[!dir].tuple;
746 if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
747 other_dst = other_tuple->dst_cache;
748
749 err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
750 if (err < 0)
751 goto err_flow_match;
752
753 flow_rule->rule->action.num_entries = 0;
754 if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
755 goto err_flow_match;
756
757 return flow_rule;
758
759 err_flow_match:
760 kfree(flow_rule->rule);
761 err_flow_rule:
762 kfree(flow_rule);
763 err_flow:
764 return NULL;
765 }
766
767 static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
768 {
769 struct flow_action_entry *entry;
770 int i;
771
772 for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
773 entry = &flow_rule->rule->action.entries[i];
774 if (entry->id != FLOW_ACTION_REDIRECT)
775 continue;
776
777 dev_put(entry->dev);
778 }
779 kfree(flow_rule->rule);
780 kfree(flow_rule);
781 }
782
783 static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
784 {
785 int i;
786
787 for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
788 __nf_flow_offload_destroy(flow_rule[i]);
789 }
790
791 static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
792 struct nf_flow_rule *flow_rule[])
793 {
794 struct net *net = read_pnet(&offload->flowtable->net);
795
796 flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
797 FLOW_OFFLOAD_DIR_ORIGINAL);
798 if (!flow_rule[0])
799 return -ENOMEM;
800
801 flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
802 FLOW_OFFLOAD_DIR_REPLY);
803 if (!flow_rule[1]) {
804 __nf_flow_offload_destroy(flow_rule[0]);
805 return -ENOMEM;
806 }
807
808 return 0;
809 }
810
811 static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
812 __be16 proto, int priority,
813 enum flow_cls_command cmd,
814 const struct flow_offload_tuple *tuple,
815 struct netlink_ext_ack *extack)
816 {
817 cls_flow->common.protocol = proto;
818 cls_flow->common.prio = priority;
819 cls_flow->common.extack = extack;
820 cls_flow->command = cmd;
821 cls_flow->cookie = (unsigned long)tuple;
822 }
823
824 static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
825 struct flow_offload *flow,
826 struct nf_flow_rule *flow_rule,
827 enum flow_offload_tuple_dir dir,
828 int priority, int cmd,
829 struct flow_stats *stats,
830 struct list_head *block_cb_list)
831 {
832 struct flow_cls_offload cls_flow = {};
833 struct flow_block_cb *block_cb;
834 struct netlink_ext_ack extack;
835 __be16 proto = ETH_P_ALL;
836 int err, i = 0;
837
838 nf_flow_offload_init(&cls_flow, proto, priority, cmd,
839 &flow->tuplehash[dir].tuple, &extack);
840 if (cmd == FLOW_CLS_REPLACE)
841 cls_flow.rule = flow_rule->rule;
842
843 down_read(&flowtable->flow_block_lock);
844 list_for_each_entry(block_cb, block_cb_list, list) {
845 err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
846 block_cb->cb_priv);
847 if (err < 0)
848 continue;
849
850 i++;
851 }
852 up_read(&flowtable->flow_block_lock);
853
854 if (cmd == FLOW_CLS_STATS)
855 memcpy(stats, &cls_flow.stats, sizeof(*stats));
856
857 return i;
858 }
859
860 static int flow_offload_tuple_add(struct flow_offload_work *offload,
861 struct nf_flow_rule *flow_rule,
862 enum flow_offload_tuple_dir dir)
863 {
864 return nf_flow_offload_tuple(offload->flowtable, offload->flow,
865 flow_rule, dir, offload->priority,
866 FLOW_CLS_REPLACE, NULL,
867 &offload->flowtable->flow_block.cb_list);
868 }
869
870 static void flow_offload_tuple_del(struct flow_offload_work *offload,
871 enum flow_offload_tuple_dir dir)
872 {
873 nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
874 offload->priority, FLOW_CLS_DESTROY, NULL,
875 &offload->flowtable->flow_block.cb_list);
876 }
877
878 static int flow_offload_rule_add(struct flow_offload_work *offload,
879 struct nf_flow_rule *flow_rule[])
880 {
881 int ok_count = 0;
882
883 ok_count += flow_offload_tuple_add(offload, flow_rule[0],
884 FLOW_OFFLOAD_DIR_ORIGINAL);
885 ok_count += flow_offload_tuple_add(offload, flow_rule[1],
886 FLOW_OFFLOAD_DIR_REPLY);
887 if (ok_count == 0)
888 return -ENOENT;
889
890 return 0;
891 }
892
893 static void flow_offload_work_add(struct flow_offload_work *offload)
894 {
895 struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
896 int err;
897
898 err = nf_flow_offload_alloc(offload, flow_rule);
899 if (err < 0)
900 return;
901
902 err = flow_offload_rule_add(offload, flow_rule);
903 if (err < 0)
904 goto out;
905
906 set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
907
908 out:
909 nf_flow_offload_destroy(flow_rule);
910 }
911
912 static void flow_offload_work_del(struct flow_offload_work *offload)
913 {
914 clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
915 flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
916 flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
917 set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
918 }
919
920 static void flow_offload_tuple_stats(struct flow_offload_work *offload,
921 enum flow_offload_tuple_dir dir,
922 struct flow_stats *stats)
923 {
924 nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
925 offload->priority, FLOW_CLS_STATS, stats,
926 &offload->flowtable->flow_block.cb_list);
927 }
928
929 static void flow_offload_work_stats(struct flow_offload_work *offload)
930 {
931 struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
932 u64 lastused;
933
934 flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
935 flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
936
937 lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
938 offload->flow->timeout = max_t(u64, offload->flow->timeout,
939 lastused + flow_offload_get_timeout(offload->flow));
940
941 if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
942 if (stats[0].pkts)
943 nf_ct_acct_add(offload->flow->ct,
944 FLOW_OFFLOAD_DIR_ORIGINAL,
945 stats[0].pkts, stats[0].bytes);
946 if (stats[1].pkts)
947 nf_ct_acct_add(offload->flow->ct,
948 FLOW_OFFLOAD_DIR_REPLY,
949 stats[1].pkts, stats[1].bytes);
950 }
951 }
952
953 static void flow_offload_work_handler(struct work_struct *work)
954 {
955 struct flow_offload_work *offload;
956
957 offload = container_of(work, struct flow_offload_work, work);
958 switch (offload->cmd) {
959 case FLOW_CLS_REPLACE:
960 flow_offload_work_add(offload);
961 break;
962 case FLOW_CLS_DESTROY:
963 flow_offload_work_del(offload);
964 break;
965 case FLOW_CLS_STATS:
966 flow_offload_work_stats(offload);
967 break;
968 default:
969 WARN_ON_ONCE(1);
970 }
971
972 clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
973 kfree(offload);
974 }
975
976 static void flow_offload_queue_work(struct flow_offload_work *offload)
977 {
978 if (offload->cmd == FLOW_CLS_REPLACE)
979 queue_work(nf_flow_offload_add_wq, &offload->work);
980 else if (offload->cmd == FLOW_CLS_DESTROY)
981 queue_work(nf_flow_offload_del_wq, &offload->work);
982 else
983 queue_work(nf_flow_offload_stats_wq, &offload->work);
984 }
985
986 static struct flow_offload_work *
987 nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
988 struct flow_offload *flow, unsigned int cmd)
989 {
990 struct flow_offload_work *offload;
991
992 if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
993 return NULL;
994
995 offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
996 if (!offload) {
997 clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
998 return NULL;
999 }
1000
1001 offload->cmd = cmd;
1002 offload->flow = flow;
1003 offload->priority = flowtable->priority;
1004 offload->flowtable = flowtable;
1005 INIT_WORK(&offload->work, flow_offload_work_handler);
1006
1007 return offload;
1008 }
1009
1010
1011 void nf_flow_offload_add(struct nf_flowtable *flowtable,
1012 struct flow_offload *flow)
1013 {
1014 struct flow_offload_work *offload;
1015
1016 offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
1017 if (!offload)
1018 return;
1019
1020 flow_offload_queue_work(offload);
1021 }
1022
1023 void nf_flow_offload_del(struct nf_flowtable *flowtable,
1024 struct flow_offload *flow)
1025 {
1026 struct flow_offload_work *offload;
1027
1028 offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
1029 if (!offload)
1030 return;
1031
1032 set_bit(NF_FLOW_HW_DYING, &flow->flags);
1033 flow_offload_queue_work(offload);
1034 }
1035
1036 void nf_flow_offload_stats(struct nf_flowtable *flowtable,
1037 struct flow_offload *flow)
1038 {
1039 struct flow_offload_work *offload;
1040 __s32 delta;
1041
1042 delta = nf_flow_timeout_delta(flow->timeout);
1043 if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
1044 return;
1045
1046 offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
1047 if (!offload)
1048 return;
1049
1050 flow_offload_queue_work(offload);
1051 }
1052
1053 void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
1054 {
1055 if (nf_flowtable_hw_offload(flowtable)) {
1056 flush_workqueue(nf_flow_offload_add_wq);
1057 flush_workqueue(nf_flow_offload_del_wq);
1058 flush_workqueue(nf_flow_offload_stats_wq);
1059 }
1060 }
1061
1062 static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
1063 struct flow_block_offload *bo,
1064 enum flow_block_command cmd)
1065 {
1066 struct flow_block_cb *block_cb, *next;
1067 int err = 0;
1068
1069 switch (cmd) {
1070 case FLOW_BLOCK_BIND:
1071 list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
1072 break;
1073 case FLOW_BLOCK_UNBIND:
1074 list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1075 list_del(&block_cb->list);
1076 flow_block_cb_free(block_cb);
1077 }
1078 break;
1079 default:
1080 WARN_ON_ONCE(1);
1081 err = -EOPNOTSUPP;
1082 }
1083
1084 return err;
1085 }
1086
1087 static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
1088 struct net *net,
1089 enum flow_block_command cmd,
1090 struct nf_flowtable *flowtable,
1091 struct netlink_ext_ack *extack)
1092 {
1093 memset(bo, 0, sizeof(*bo));
1094 bo->net = net;
1095 bo->block = &flowtable->flow_block;
1096 bo->command = cmd;
1097 bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
1098 bo->extack = extack;
1099 bo->cb_list_head = &flowtable->flow_block.cb_list;
1100 INIT_LIST_HEAD(&bo->cb_list);
1101 }
1102
1103 static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb)
1104 {
1105 struct nf_flowtable *flowtable = block_cb->indr.data;
1106 struct net_device *dev = block_cb->indr.dev;
1107
1108 nf_flow_table_gc_cleanup(flowtable, dev);
1109 down_write(&flowtable->flow_block_lock);
1110 list_del(&block_cb->list);
1111 list_del(&block_cb->driver_list);
1112 flow_block_cb_free(block_cb);
1113 up_write(&flowtable->flow_block_lock);
1114 }
1115
1116 static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
1117 struct nf_flowtable *flowtable,
1118 struct net_device *dev,
1119 enum flow_block_command cmd,
1120 struct netlink_ext_ack *extack)
1121 {
1122 nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
1123 extack);
1124
1125 return flow_indr_dev_setup_offload(dev, NULL, TC_SETUP_FT, flowtable, bo,
1126 nf_flow_table_indr_cleanup);
1127 }
1128
1129 static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
1130 struct nf_flowtable *flowtable,
1131 struct net_device *dev,
1132 enum flow_block_command cmd,
1133 struct netlink_ext_ack *extack)
1134 {
1135 int err;
1136
1137 nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
1138 extack);
1139 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
1140 if (err < 0)
1141 return err;
1142
1143 return 0;
1144 }
1145
1146 int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
1147 struct net_device *dev,
1148 enum flow_block_command cmd)
1149 {
1150 struct netlink_ext_ack extack = {};
1151 struct flow_block_offload bo;
1152 int err;
1153
1154 if (!nf_flowtable_hw_offload(flowtable))
1155 return 0;
1156
1157 if (dev->netdev_ops->ndo_setup_tc)
1158 err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
1159 &extack);
1160 else
1161 err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
1162 &extack);
1163 if (err < 0)
1164 return err;
1165
1166 return nf_flow_table_block_setup(flowtable, &bo, cmd);
1167 }
1168 EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
1169
1170 int nf_flow_table_offload_init(void)
1171 {
1172 nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
1173 WQ_UNBOUND | WQ_SYSFS, 0);
1174 if (!nf_flow_offload_add_wq)
1175 return -ENOMEM;
1176
1177 nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
1178 WQ_UNBOUND | WQ_SYSFS, 0);
1179 if (!nf_flow_offload_del_wq)
1180 goto err_del_wq;
1181
1182 nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
1183 WQ_UNBOUND | WQ_SYSFS, 0);
1184 if (!nf_flow_offload_stats_wq)
1185 goto err_stats_wq;
1186
1187 return 0;
1188
1189 err_stats_wq:
1190 destroy_workqueue(nf_flow_offload_del_wq);
1191 err_del_wq:
1192 destroy_workqueue(nf_flow_offload_add_wq);
1193 return -ENOMEM;
1194 }
1195
1196 void nf_flow_table_offload_exit(void)
1197 {
1198 destroy_workqueue(nf_flow_offload_add_wq);
1199 destroy_workqueue(nf_flow_offload_del_wq);
1200 destroy_workqueue(nf_flow_offload_stats_wq);
1201 }