]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/netfilter/nf_flow_table.c
netfilter: add generic flow table infrastructure
[mirror_ubuntu-jammy-kernel.git] / net / netfilter / nf_flow_table.c
CommitLineData
ac2a6666
PNA
1#include <linux/kernel.h>
2#include <linux/init.h>
3#include <linux/module.h>
4#include <linux/netfilter.h>
5#include <linux/rhashtable.h>
6#include <linux/netdevice.h>
7#include <net/netfilter/nf_flow_table.h>
8#include <net/netfilter/nf_conntrack.h>
9#include <net/netfilter/nf_conntrack_core.h>
10#include <net/netfilter/nf_conntrack_tuple.h>
11
12struct flow_offload_entry {
13 struct flow_offload flow;
14 struct nf_conn *ct;
15 struct rcu_head rcu_head;
16};
17
18struct flow_offload *
19flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
20{
21 struct flow_offload_entry *entry;
22 struct flow_offload *flow;
23
24 if (unlikely(nf_ct_is_dying(ct) ||
25 !atomic_inc_not_zero(&ct->ct_general.use)))
26 return NULL;
27
28 entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
29 if (!entry)
30 goto err_ct_refcnt;
31
32 flow = &entry->flow;
33
34 if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
35 goto err_dst_cache_original;
36
37 if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
38 goto err_dst_cache_reply;
39
40 entry->ct = ct;
41
42 switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num) {
43 case NFPROTO_IPV4:
44 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4 =
45 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in;
46 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4 =
47 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in;
48 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4 =
49 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in;
50 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4 =
51 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in;
52 break;
53 case NFPROTO_IPV6:
54 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6 =
55 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.in6;
56 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6 =
57 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6;
58 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6 =
59 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.in6;
60 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6 =
61 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.in6;
62 break;
63 }
64
65 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l3proto =
66 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
67 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto =
68 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
69 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l3proto =
70 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
71 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.l4proto =
72 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
73
74 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache =
75 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst;
76 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache =
77 route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst;
78
79 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port =
80 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port;
81 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port =
82 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
83 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port =
84 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.tcp.port;
85 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port =
86 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
87
88 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dir =
89 FLOW_OFFLOAD_DIR_ORIGINAL;
90 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dir =
91 FLOW_OFFLOAD_DIR_REPLY;
92
93 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
94 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
95 flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.oifidx =
96 route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
97 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
98 route->tuple[FLOW_OFFLOAD_DIR_REPLY].ifindex;
99 flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.oifidx =
100 route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].ifindex;
101
102 if (ct->status & IPS_SRC_NAT)
103 flow->flags |= FLOW_OFFLOAD_SNAT;
104 else if (ct->status & IPS_DST_NAT)
105 flow->flags |= FLOW_OFFLOAD_DNAT;
106
107 return flow;
108
109err_dst_cache_reply:
110 dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
111err_dst_cache_original:
112 kfree(entry);
113err_ct_refcnt:
114 nf_ct_put(ct);
115
116 return NULL;
117}
118EXPORT_SYMBOL_GPL(flow_offload_alloc);
119
120void flow_offload_free(struct flow_offload *flow)
121{
122 struct flow_offload_entry *e;
123
124 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
125 dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
126 e = container_of(flow, struct flow_offload_entry, flow);
127 kfree(e);
128}
129EXPORT_SYMBOL_GPL(flow_offload_free);
130
131void flow_offload_dead(struct flow_offload *flow)
132{
133 flow->flags |= FLOW_OFFLOAD_DYING;
134}
135EXPORT_SYMBOL_GPL(flow_offload_dead);
136
137int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
138{
139 flow->timeout = (u32)jiffies;
140
141 rhashtable_insert_fast(&flow_table->rhashtable,
142 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
143 *flow_table->type->params);
144 rhashtable_insert_fast(&flow_table->rhashtable,
145 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
146 *flow_table->type->params);
147 return 0;
148}
149EXPORT_SYMBOL_GPL(flow_offload_add);
150
151void flow_offload_del(struct nf_flowtable *flow_table,
152 struct flow_offload *flow)
153{
154 struct flow_offload_entry *e;
155
156 rhashtable_remove_fast(&flow_table->rhashtable,
157 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
158 *flow_table->type->params);
159 rhashtable_remove_fast(&flow_table->rhashtable,
160 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
161 *flow_table->type->params);
162
163 e = container_of(flow, struct flow_offload_entry, flow);
164 kfree_rcu(e, rcu_head);
165}
166EXPORT_SYMBOL_GPL(flow_offload_del);
167
168struct flow_offload_tuple_rhash *
169flow_offload_lookup(struct nf_flowtable *flow_table,
170 struct flow_offload_tuple *tuple)
171{
172 return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
173 *flow_table->type->params);
174}
175EXPORT_SYMBOL_GPL(flow_offload_lookup);
176
177static void nf_flow_release_ct(const struct flow_offload *flow)
178{
179 struct flow_offload_entry *e;
180
181 e = container_of(flow, struct flow_offload_entry, flow);
182 nf_ct_delete(e->ct, 0, 0);
183 nf_ct_put(e->ct);
184}
185
186int nf_flow_table_iterate(struct nf_flowtable *flow_table,
187 void (*iter)(struct flow_offload *flow, void *data),
188 void *data)
189{
190 struct flow_offload_tuple_rhash *tuplehash;
191 struct rhashtable_iter hti;
192 struct flow_offload *flow;
193 int err;
194
195 err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
196 if (err)
197 return err;
198
199 rhashtable_walk_start(&hti);
200
201 while ((tuplehash = rhashtable_walk_next(&hti))) {
202 if (IS_ERR(tuplehash)) {
203 err = PTR_ERR(tuplehash);
204 if (err != -EAGAIN)
205 goto out;
206
207 continue;
208 }
209 if (tuplehash->tuple.dir)
210 continue;
211
212 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
213
214 iter(flow, data);
215 }
216out:
217 rhashtable_walk_stop(&hti);
218 rhashtable_walk_exit(&hti);
219
220 return err;
221}
222EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
223
224static inline bool nf_flow_has_expired(const struct flow_offload *flow)
225{
226 return (__s32)(flow->timeout - (u32)jiffies) <= 0;
227}
228
229static inline bool nf_flow_is_dying(const struct flow_offload *flow)
230{
231 return flow->flags & FLOW_OFFLOAD_DYING;
232}
233
234void nf_flow_offload_work_gc(struct work_struct *work)
235{
236 struct flow_offload_tuple_rhash *tuplehash;
237 struct nf_flowtable *flow_table;
238 struct rhashtable_iter hti;
239 struct flow_offload *flow;
240 int err;
241
242 flow_table = container_of(work, struct nf_flowtable, gc_work.work);
243
244 err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
245 if (err)
246 goto schedule;
247
248 rhashtable_walk_start(&hti);
249
250 while ((tuplehash = rhashtable_walk_next(&hti))) {
251 if (IS_ERR(tuplehash)) {
252 err = PTR_ERR(tuplehash);
253 if (err != -EAGAIN)
254 goto out;
255
256 continue;
257 }
258 if (tuplehash->tuple.dir)
259 continue;
260
261 flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
262
263 if (nf_flow_has_expired(flow) ||
264 nf_flow_is_dying(flow)) {
265 flow_offload_del(flow_table, flow);
266 nf_flow_release_ct(flow);
267 }
268 }
269out:
270 rhashtable_walk_stop(&hti);
271 rhashtable_walk_exit(&hti);
272schedule:
273 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
274}
275EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
276
277static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
278{
279 const struct flow_offload_tuple *tuple = data;
280
281 return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
282}
283
284static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
285{
286 const struct flow_offload_tuple_rhash *tuplehash = data;
287
288 return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
289}
290
291static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
292 const void *ptr)
293{
294 const struct flow_offload_tuple *tuple = arg->key;
295 const struct flow_offload_tuple_rhash *x = ptr;
296
297 if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
298 return 1;
299
300 return 0;
301}
302
303const struct rhashtable_params nf_flow_offload_rhash_params = {
304 .head_offset = offsetof(struct flow_offload_tuple_rhash, node),
305 .hashfn = flow_offload_hash,
306 .obj_hashfn = flow_offload_hash_obj,
307 .obj_cmpfn = flow_offload_hash_cmp,
308 .automatic_shrinking = true,
309};
310EXPORT_SYMBOL_GPL(nf_flow_offload_rhash_params);
311
312static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
313 __be16 port, __be16 new_port)
314{
315 struct tcphdr *tcph;
316
317 if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
318 skb_try_make_writable(skb, thoff + sizeof(*tcph)))
319 return -1;
320
321 tcph = (void *)(skb_network_header(skb) + thoff);
322 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
323
324 return 0;
325}
326
327static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
328 __be16 port, __be16 new_port)
329{
330 struct udphdr *udph;
331
332 if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
333 skb_try_make_writable(skb, thoff + sizeof(*udph)))
334 return -1;
335
336 udph = (void *)(skb_network_header(skb) + thoff);
337 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
338 inet_proto_csum_replace2(&udph->check, skb, port,
339 new_port, true);
340 if (!udph->check)
341 udph->check = CSUM_MANGLED_0;
342 }
343
344 return 0;
345}
346
347static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
348 u8 protocol, __be16 port, __be16 new_port)
349{
350 switch (protocol) {
351 case IPPROTO_TCP:
352 if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
353 return NF_DROP;
354 break;
355 case IPPROTO_UDP:
356 if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
357 return NF_DROP;
358 break;
359 }
360
361 return 0;
362}
363
364int nf_flow_snat_port(const struct flow_offload *flow,
365 struct sk_buff *skb, unsigned int thoff,
366 u8 protocol, enum flow_offload_tuple_dir dir)
367{
368 struct flow_ports *hdr;
369 __be16 port, new_port;
370
371 if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
372 skb_try_make_writable(skb, thoff + sizeof(*hdr)))
373 return -1;
374
375 hdr = (void *)(skb_network_header(skb) + thoff);
376
377 switch (dir) {
378 case FLOW_OFFLOAD_DIR_ORIGINAL:
379 port = hdr->source;
380 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
381 hdr->source = new_port;
382 break;
383 case FLOW_OFFLOAD_DIR_REPLY:
384 port = hdr->dest;
385 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
386 hdr->dest = new_port;
387 break;
388 default:
389 return -1;
390 }
391
392 return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
393}
394EXPORT_SYMBOL_GPL(nf_flow_snat_port);
395
396int nf_flow_dnat_port(const struct flow_offload *flow,
397 struct sk_buff *skb, unsigned int thoff,
398 u8 protocol, enum flow_offload_tuple_dir dir)
399{
400 struct flow_ports *hdr;
401 __be16 port, new_port;
402
403 if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
404 skb_try_make_writable(skb, thoff + sizeof(*hdr)))
405 return -1;
406
407 hdr = (void *)(skb_network_header(skb) + thoff);
408
409 switch (dir) {
410 case FLOW_OFFLOAD_DIR_ORIGINAL:
411 port = hdr->dest;
412 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port;
413 hdr->dest = new_port;
414 break;
415 case FLOW_OFFLOAD_DIR_REPLY:
416 port = hdr->source;
417 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
418 hdr->source = new_port;
419 break;
420 default:
421 return -1;
422 }
423
424 return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
425}
426EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
427
428MODULE_LICENSE("GPL");
429MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");