]>
Commit | Line | Data |
---|---|---|
5b497af4 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
3a0af8fd | 2 | /* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch> |
3a0af8fd TG |
3 | */ |
4 | ||
5 | #include <linux/kernel.h> | |
6 | #include <linux/module.h> | |
7 | #include <linux/skbuff.h> | |
8 | #include <linux/types.h> | |
9 | #include <linux/bpf.h> | |
10 | #include <net/lwtunnel.h> | |
ca78801a | 11 | #include <net/gre.h> |
3bd0b152 | 12 | #include <net/ip6_route.h> |
3616d08b | 13 | #include <net/ipv6_stubs.h> |
3a0af8fd TG |
14 | |
15 | struct bpf_lwt_prog { | |
16 | struct bpf_prog *prog; | |
17 | char *name; | |
18 | }; | |
19 | ||
20 | struct bpf_lwt { | |
21 | struct bpf_lwt_prog in; | |
22 | struct bpf_lwt_prog out; | |
23 | struct bpf_lwt_prog xmit; | |
24 | int family; | |
25 | }; | |
26 | ||
27 | #define MAX_PROG_NAME 256 | |
28 | ||
29 | static inline struct bpf_lwt *bpf_lwt_lwtunnel(struct lwtunnel_state *lwt) | |
30 | { | |
31 | return (struct bpf_lwt *)lwt->data; | |
32 | } | |
33 | ||
34 | #define NO_REDIRECT false | |
35 | #define CAN_REDIRECT true | |
36 | ||
37 | static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, | |
38 | struct dst_entry *dst, bool can_redirect) | |
39 | { | |
40 | int ret; | |
41 | ||
e3366884 | 42 | /* Migration disable and BH disable are needed to protect per-cpu |
d9054a1f | 43 | * redirect_info between BPF prog and skb_do_redirect(). |
3a0af8fd | 44 | */ |
e3366884 | 45 | migrate_disable(); |
d9054a1f | 46 | local_bh_disable(); |
6aaae2b6 | 47 | bpf_compute_data_pointers(skb); |
3a0af8fd | 48 | ret = bpf_prog_run_save_cb(lwt->prog, skb); |
3a0af8fd TG |
49 | |
50 | switch (ret) { | |
51 | case BPF_OK: | |
3bd0b152 | 52 | case BPF_LWT_REROUTE: |
3a0af8fd TG |
53 | break; |
54 | ||
55 | case BPF_REDIRECT: | |
56 | if (unlikely(!can_redirect)) { | |
57 | pr_warn_once("Illegal redirect return code in prog %s\n", | |
58 | lwt->name ? : "<unknown>"); | |
59 | ret = BPF_OK; | |
60 | } else { | |
e7c87bd6 | 61 | skb_reset_mac_header(skb); |
3a0af8fd TG |
62 | ret = skb_do_redirect(skb); |
63 | if (ret == 0) | |
64 | ret = BPF_REDIRECT; | |
65 | } | |
66 | break; | |
67 | ||
68 | case BPF_DROP: | |
69 | kfree_skb(skb); | |
70 | ret = -EPERM; | |
71 | break; | |
72 | ||
73 | default: | |
74 | pr_warn_once("bpf-lwt: Illegal return value %u, expect packet loss\n", ret); | |
75 | kfree_skb(skb); | |
76 | ret = -EINVAL; | |
77 | break; | |
78 | } | |
79 | ||
d9054a1f | 80 | local_bh_enable(); |
e3366884 | 81 | migrate_enable(); |
3a0af8fd TG |
82 | |
83 | return ret; | |
84 | } | |
85 | ||
3bd0b152 PO |
86 | static int bpf_lwt_input_reroute(struct sk_buff *skb) |
87 | { | |
88 | int err = -EINVAL; | |
89 | ||
90 | if (skb->protocol == htons(ETH_P_IP)) { | |
9e8acd9c | 91 | struct net_device *dev = skb_dst(skb)->dev; |
3bd0b152 PO |
92 | struct iphdr *iph = ip_hdr(skb); |
93 | ||
9e8acd9c JB |
94 | dev_hold(dev); |
95 | skb_dst_drop(skb); | |
3bd0b152 | 96 | err = ip_route_input_noref(skb, iph->daddr, iph->saddr, |
9e8acd9c JB |
97 | iph->tos, dev); |
98 | dev_put(dev); | |
3bd0b152 | 99 | } else if (skb->protocol == htons(ETH_P_IPV6)) { |
9e8acd9c | 100 | skb_dst_drop(skb); |
3bd0b152 PO |
101 | err = ipv6_stub->ipv6_route_input(skb); |
102 | } else { | |
103 | err = -EAFNOSUPPORT; | |
104 | } | |
105 | ||
106 | if (err) | |
107 | goto err; | |
108 | return dst_input(skb); | |
109 | ||
110 | err: | |
111 | kfree_skb(skb); | |
112 | return err; | |
113 | } | |
114 | ||
3a0af8fd TG |
115 | static int bpf_input(struct sk_buff *skb) |
116 | { | |
117 | struct dst_entry *dst = skb_dst(skb); | |
118 | struct bpf_lwt *bpf; | |
119 | int ret; | |
120 | ||
121 | bpf = bpf_lwt_lwtunnel(dst->lwtstate); | |
122 | if (bpf->in.prog) { | |
123 | ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT); | |
124 | if (ret < 0) | |
125 | return ret; | |
3bd0b152 PO |
126 | if (ret == BPF_LWT_REROUTE) |
127 | return bpf_lwt_input_reroute(skb); | |
3a0af8fd TG |
128 | } |
129 | ||
130 | if (unlikely(!dst->lwtstate->orig_input)) { | |
3a0af8fd TG |
131 | kfree_skb(skb); |
132 | return -EINVAL; | |
133 | } | |
134 | ||
135 | return dst->lwtstate->orig_input(skb); | |
136 | } | |
137 | ||
138 | static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb) | |
139 | { | |
140 | struct dst_entry *dst = skb_dst(skb); | |
141 | struct bpf_lwt *bpf; | |
142 | int ret; | |
143 | ||
144 | bpf = bpf_lwt_lwtunnel(dst->lwtstate); | |
145 | if (bpf->out.prog) { | |
146 | ret = run_lwt_bpf(skb, &bpf->out, dst, NO_REDIRECT); | |
147 | if (ret < 0) | |
148 | return ret; | |
149 | } | |
150 | ||
151 | if (unlikely(!dst->lwtstate->orig_output)) { | |
152 | pr_warn_once("orig_output not set on dst for prog %s\n", | |
153 | bpf->out.name); | |
154 | kfree_skb(skb); | |
155 | return -EINVAL; | |
156 | } | |
157 | ||
158 | return dst->lwtstate->orig_output(net, sk, skb); | |
159 | } | |
160 | ||
161 | static int xmit_check_hhlen(struct sk_buff *skb) | |
162 | { | |
163 | int hh_len = skb_dst(skb)->dev->hard_header_len; | |
164 | ||
165 | if (skb_headroom(skb) < hh_len) { | |
166 | int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); | |
167 | ||
168 | if (pskb_expand_head(skb, nhead, 0, GFP_ATOMIC)) | |
169 | return -ENOMEM; | |
170 | } | |
171 | ||
172 | return 0; | |
173 | } | |
174 | ||
3bd0b152 PO |
175 | static int bpf_lwt_xmit_reroute(struct sk_buff *skb) |
176 | { | |
177 | struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev); | |
178 | int oif = l3mdev ? l3mdev->ifindex : 0; | |
179 | struct dst_entry *dst = NULL; | |
fb405883 | 180 | int err = -EAFNOSUPPORT; |
3bd0b152 PO |
181 | struct sock *sk; |
182 | struct net *net; | |
183 | bool ipv4; | |
3bd0b152 PO |
184 | |
185 | if (skb->protocol == htons(ETH_P_IP)) | |
186 | ipv4 = true; | |
187 | else if (skb->protocol == htons(ETH_P_IPV6)) | |
188 | ipv4 = false; | |
189 | else | |
fb405883 | 190 | goto err; |
3bd0b152 PO |
191 | |
192 | sk = sk_to_full_sk(skb->sk); | |
193 | if (sk) { | |
194 | if (sk->sk_bound_dev_if) | |
195 | oif = sk->sk_bound_dev_if; | |
196 | net = sock_net(sk); | |
197 | } else { | |
198 | net = dev_net(skb_dst(skb)->dev); | |
199 | } | |
200 | ||
201 | if (ipv4) { | |
202 | struct iphdr *iph = ip_hdr(skb); | |
203 | struct flowi4 fl4 = {}; | |
204 | struct rtable *rt; | |
205 | ||
206 | fl4.flowi4_oif = oif; | |
207 | fl4.flowi4_mark = skb->mark; | |
208 | fl4.flowi4_uid = sock_net_uid(net, sk); | |
209 | fl4.flowi4_tos = RT_TOS(iph->tos); | |
210 | fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; | |
211 | fl4.flowi4_proto = iph->protocol; | |
212 | fl4.daddr = iph->daddr; | |
213 | fl4.saddr = iph->saddr; | |
214 | ||
215 | rt = ip_route_output_key(net, &fl4); | |
fb405883 PO |
216 | if (IS_ERR(rt)) { |
217 | err = PTR_ERR(rt); | |
218 | goto err; | |
219 | } | |
3bd0b152 PO |
220 | dst = &rt->dst; |
221 | } else { | |
222 | struct ipv6hdr *iph6 = ipv6_hdr(skb); | |
223 | struct flowi6 fl6 = {}; | |
224 | ||
225 | fl6.flowi6_oif = oif; | |
226 | fl6.flowi6_mark = skb->mark; | |
227 | fl6.flowi6_uid = sock_net_uid(net, sk); | |
228 | fl6.flowlabel = ip6_flowinfo(iph6); | |
229 | fl6.flowi6_proto = iph6->nexthdr; | |
230 | fl6.daddr = iph6->daddr; | |
231 | fl6.saddr = iph6->saddr; | |
232 | ||
6c8991f4 | 233 | dst = ipv6_stub->ipv6_dst_lookup_flow(net, skb->sk, &fl6, NULL); |
fb405883 PO |
234 | if (IS_ERR(dst)) { |
235 | err = PTR_ERR(dst); | |
236 | goto err; | |
237 | } | |
3bd0b152 PO |
238 | } |
239 | if (unlikely(dst->error)) { | |
fb405883 | 240 | err = dst->error; |
3bd0b152 | 241 | dst_release(dst); |
fb405883 | 242 | goto err; |
3bd0b152 PO |
243 | } |
244 | ||
245 | /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it | |
246 | * was done for the previous dst, so we are doing it here again, in | |
247 | * case the new dst needs much more space. The call below is a noop | |
248 | * if there is enough header space in skb. | |
249 | */ | |
250 | err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); | |
251 | if (unlikely(err)) | |
fb405883 | 252 | goto err; |
3bd0b152 PO |
253 | |
254 | skb_dst_drop(skb); | |
255 | skb_dst_set(skb, dst); | |
256 | ||
257 | err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb); | |
258 | if (unlikely(err)) | |
bd16693f | 259 | return err; |
3bd0b152 PO |
260 | |
261 | /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */ | |
262 | return LWTUNNEL_XMIT_DONE; | |
fb405883 PO |
263 | |
264 | err: | |
265 | kfree_skb(skb); | |
266 | return err; | |
3bd0b152 PO |
267 | } |
268 | ||
3a0af8fd TG |
269 | static int bpf_xmit(struct sk_buff *skb) |
270 | { | |
271 | struct dst_entry *dst = skb_dst(skb); | |
272 | struct bpf_lwt *bpf; | |
273 | ||
274 | bpf = bpf_lwt_lwtunnel(dst->lwtstate); | |
275 | if (bpf->xmit.prog) { | |
3bd0b152 | 276 | __be16 proto = skb->protocol; |
3a0af8fd TG |
277 | int ret; |
278 | ||
279 | ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT); | |
280 | switch (ret) { | |
281 | case BPF_OK: | |
3bd0b152 PO |
282 | /* If the header changed, e.g. via bpf_lwt_push_encap, |
283 | * BPF_LWT_REROUTE below should have been used if the | |
284 | * protocol was also changed. | |
285 | */ | |
286 | if (skb->protocol != proto) { | |
287 | kfree_skb(skb); | |
288 | return -EINVAL; | |
289 | } | |
3a0af8fd TG |
290 | /* If the header was expanded, headroom might be too |
291 | * small for L2 header to come, expand as needed. | |
292 | */ | |
293 | ret = xmit_check_hhlen(skb); | |
294 | if (unlikely(ret)) | |
295 | return ret; | |
296 | ||
297 | return LWTUNNEL_XMIT_CONTINUE; | |
298 | case BPF_REDIRECT: | |
299 | return LWTUNNEL_XMIT_DONE; | |
3bd0b152 PO |
300 | case BPF_LWT_REROUTE: |
301 | return bpf_lwt_xmit_reroute(skb); | |
3a0af8fd TG |
302 | default: |
303 | return ret; | |
304 | } | |
305 | } | |
306 | ||
307 | return LWTUNNEL_XMIT_CONTINUE; | |
308 | } | |
309 | ||
310 | static void bpf_lwt_prog_destroy(struct bpf_lwt_prog *prog) | |
311 | { | |
312 | if (prog->prog) | |
313 | bpf_prog_put(prog->prog); | |
314 | ||
315 | kfree(prog->name); | |
316 | } | |
317 | ||
318 | static void bpf_destroy_state(struct lwtunnel_state *lwt) | |
319 | { | |
320 | struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt); | |
321 | ||
322 | bpf_lwt_prog_destroy(&bpf->in); | |
323 | bpf_lwt_prog_destroy(&bpf->out); | |
324 | bpf_lwt_prog_destroy(&bpf->xmit); | |
325 | } | |
326 | ||
327 | static const struct nla_policy bpf_prog_policy[LWT_BPF_PROG_MAX + 1] = { | |
328 | [LWT_BPF_PROG_FD] = { .type = NLA_U32, }, | |
329 | [LWT_BPF_PROG_NAME] = { .type = NLA_NUL_STRING, | |
330 | .len = MAX_PROG_NAME }, | |
331 | }; | |
332 | ||
333 | static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog, | |
334 | enum bpf_prog_type type) | |
335 | { | |
336 | struct nlattr *tb[LWT_BPF_PROG_MAX + 1]; | |
337 | struct bpf_prog *p; | |
338 | int ret; | |
339 | u32 fd; | |
340 | ||
8cb08174 JB |
341 | ret = nla_parse_nested_deprecated(tb, LWT_BPF_PROG_MAX, attr, |
342 | bpf_prog_policy, NULL); | |
3a0af8fd TG |
343 | if (ret < 0) |
344 | return ret; | |
345 | ||
346 | if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME]) | |
347 | return -EINVAL; | |
348 | ||
71eb5255 | 349 | prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_ATOMIC); |
3a0af8fd TG |
350 | if (!prog->name) |
351 | return -ENOMEM; | |
352 | ||
353 | fd = nla_get_u32(tb[LWT_BPF_PROG_FD]); | |
354 | p = bpf_prog_get_type(fd, type); | |
355 | if (IS_ERR(p)) | |
356 | return PTR_ERR(p); | |
357 | ||
358 | prog->prog = p; | |
359 | ||
360 | return 0; | |
361 | } | |
362 | ||
363 | static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = { | |
364 | [LWT_BPF_IN] = { .type = NLA_NESTED, }, | |
365 | [LWT_BPF_OUT] = { .type = NLA_NESTED, }, | |
366 | [LWT_BPF_XMIT] = { .type = NLA_NESTED, }, | |
367 | [LWT_BPF_XMIT_HEADROOM] = { .type = NLA_U32 }, | |
368 | }; | |
369 | ||
faee6769 | 370 | static int bpf_build_state(struct net *net, struct nlattr *nla, |
3a0af8fd | 371 | unsigned int family, const void *cfg, |
9ae28727 DA |
372 | struct lwtunnel_state **ts, |
373 | struct netlink_ext_ack *extack) | |
3a0af8fd TG |
374 | { |
375 | struct nlattr *tb[LWT_BPF_MAX + 1]; | |
376 | struct lwtunnel_state *newts; | |
377 | struct bpf_lwt *bpf; | |
378 | int ret; | |
379 | ||
380 | if (family != AF_INET && family != AF_INET6) | |
381 | return -EAFNOSUPPORT; | |
382 | ||
8cb08174 JB |
383 | ret = nla_parse_nested_deprecated(tb, LWT_BPF_MAX, nla, bpf_nl_policy, |
384 | extack); | |
3a0af8fd TG |
385 | if (ret < 0) |
386 | return ret; | |
387 | ||
388 | if (!tb[LWT_BPF_IN] && !tb[LWT_BPF_OUT] && !tb[LWT_BPF_XMIT]) | |
389 | return -EINVAL; | |
390 | ||
391 | newts = lwtunnel_state_alloc(sizeof(*bpf)); | |
392 | if (!newts) | |
393 | return -ENOMEM; | |
394 | ||
395 | newts->type = LWTUNNEL_ENCAP_BPF; | |
396 | bpf = bpf_lwt_lwtunnel(newts); | |
397 | ||
398 | if (tb[LWT_BPF_IN]) { | |
399 | newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; | |
400 | ret = bpf_parse_prog(tb[LWT_BPF_IN], &bpf->in, | |
401 | BPF_PROG_TYPE_LWT_IN); | |
402 | if (ret < 0) | |
403 | goto errout; | |
404 | } | |
405 | ||
406 | if (tb[LWT_BPF_OUT]) { | |
407 | newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; | |
408 | ret = bpf_parse_prog(tb[LWT_BPF_OUT], &bpf->out, | |
409 | BPF_PROG_TYPE_LWT_OUT); | |
410 | if (ret < 0) | |
411 | goto errout; | |
412 | } | |
413 | ||
414 | if (tb[LWT_BPF_XMIT]) { | |
415 | newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT; | |
416 | ret = bpf_parse_prog(tb[LWT_BPF_XMIT], &bpf->xmit, | |
417 | BPF_PROG_TYPE_LWT_XMIT); | |
418 | if (ret < 0) | |
419 | goto errout; | |
420 | } | |
421 | ||
422 | if (tb[LWT_BPF_XMIT_HEADROOM]) { | |
423 | u32 headroom = nla_get_u32(tb[LWT_BPF_XMIT_HEADROOM]); | |
424 | ||
425 | if (headroom > LWT_BPF_MAX_HEADROOM) { | |
426 | ret = -ERANGE; | |
427 | goto errout; | |
428 | } | |
429 | ||
430 | newts->headroom = headroom; | |
431 | } | |
432 | ||
433 | bpf->family = family; | |
434 | *ts = newts; | |
435 | ||
436 | return 0; | |
437 | ||
438 | errout: | |
439 | bpf_destroy_state(newts); | |
440 | kfree(newts); | |
441 | return ret; | |
442 | } | |
443 | ||
444 | static int bpf_fill_lwt_prog(struct sk_buff *skb, int attr, | |
445 | struct bpf_lwt_prog *prog) | |
446 | { | |
447 | struct nlattr *nest; | |
448 | ||
449 | if (!prog->prog) | |
450 | return 0; | |
451 | ||
ae0be8de | 452 | nest = nla_nest_start_noflag(skb, attr); |
3a0af8fd TG |
453 | if (!nest) |
454 | return -EMSGSIZE; | |
455 | ||
456 | if (prog->name && | |
457 | nla_put_string(skb, LWT_BPF_PROG_NAME, prog->name)) | |
458 | return -EMSGSIZE; | |
459 | ||
460 | return nla_nest_end(skb, nest); | |
461 | } | |
462 | ||
463 | static int bpf_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwt) | |
464 | { | |
465 | struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt); | |
466 | ||
467 | if (bpf_fill_lwt_prog(skb, LWT_BPF_IN, &bpf->in) < 0 || | |
468 | bpf_fill_lwt_prog(skb, LWT_BPF_OUT, &bpf->out) < 0 || | |
469 | bpf_fill_lwt_prog(skb, LWT_BPF_XMIT, &bpf->xmit) < 0) | |
470 | return -EMSGSIZE; | |
471 | ||
472 | return 0; | |
473 | } | |
474 | ||
475 | static int bpf_encap_nlsize(struct lwtunnel_state *lwtstate) | |
476 | { | |
477 | int nest_len = nla_total_size(sizeof(struct nlattr)) + | |
478 | nla_total_size(MAX_PROG_NAME) + /* LWT_BPF_PROG_NAME */ | |
479 | 0; | |
480 | ||
481 | return nest_len + /* LWT_BPF_IN */ | |
482 | nest_len + /* LWT_BPF_OUT */ | |
483 | nest_len + /* LWT_BPF_XMIT */ | |
484 | 0; | |
485 | } | |
486 | ||
79471b10 | 487 | static int bpf_lwt_prog_cmp(struct bpf_lwt_prog *a, struct bpf_lwt_prog *b) |
3a0af8fd TG |
488 | { |
489 | /* FIXME: | |
490 | * The LWT state is currently rebuilt for delete requests which | |
491 | * results in a new bpf_prog instance. Comparing names for now. | |
492 | */ | |
493 | if (!a->name && !b->name) | |
494 | return 0; | |
495 | ||
496 | if (!a->name || !b->name) | |
497 | return 1; | |
498 | ||
499 | return strcmp(a->name, b->name); | |
500 | } | |
501 | ||
502 | static int bpf_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) | |
503 | { | |
504 | struct bpf_lwt *a_bpf = bpf_lwt_lwtunnel(a); | |
505 | struct bpf_lwt *b_bpf = bpf_lwt_lwtunnel(b); | |
506 | ||
507 | return bpf_lwt_prog_cmp(&a_bpf->in, &b_bpf->in) || | |
508 | bpf_lwt_prog_cmp(&a_bpf->out, &b_bpf->out) || | |
509 | bpf_lwt_prog_cmp(&a_bpf->xmit, &b_bpf->xmit); | |
510 | } | |
511 | ||
512 | static const struct lwtunnel_encap_ops bpf_encap_ops = { | |
513 | .build_state = bpf_build_state, | |
514 | .destroy_state = bpf_destroy_state, | |
515 | .input = bpf_input, | |
516 | .output = bpf_output, | |
517 | .xmit = bpf_xmit, | |
518 | .fill_encap = bpf_fill_encap_info, | |
519 | .get_encap_size = bpf_encap_nlsize, | |
520 | .cmp_encap = bpf_encap_cmp, | |
88ff7334 | 521 | .owner = THIS_MODULE, |
3a0af8fd TG |
522 | }; |
523 | ||
ca78801a PO |
524 | static int handle_gso_type(struct sk_buff *skb, unsigned int gso_type, |
525 | int encap_len) | |
526 | { | |
527 | struct skb_shared_info *shinfo = skb_shinfo(skb); | |
528 | ||
529 | gso_type |= SKB_GSO_DODGY; | |
530 | shinfo->gso_type |= gso_type; | |
531 | skb_decrease_gso_size(shinfo, encap_len); | |
532 | shinfo->gso_segs = 0; | |
533 | return 0; | |
534 | } | |
535 | ||
52f27877 PO |
536 | static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len) |
537 | { | |
ca78801a PO |
538 | int next_hdr_offset; |
539 | void *next_hdr; | |
540 | __u8 protocol; | |
541 | ||
542 | /* SCTP and UDP_L4 gso need more nuanced handling than what | |
543 | * handle_gso_type() does above: skb_decrease_gso_size() is not enough. | |
544 | * So at the moment only TCP GSO packets are let through. | |
545 | */ | |
546 | if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) | |
547 | return -ENOTSUPP; | |
548 | ||
549 | if (ipv4) { | |
550 | protocol = ip_hdr(skb)->protocol; | |
551 | next_hdr_offset = sizeof(struct iphdr); | |
552 | next_hdr = skb_network_header(skb) + next_hdr_offset; | |
553 | } else { | |
554 | protocol = ipv6_hdr(skb)->nexthdr; | |
555 | next_hdr_offset = sizeof(struct ipv6hdr); | |
556 | next_hdr = skb_network_header(skb) + next_hdr_offset; | |
557 | } | |
558 | ||
559 | switch (protocol) { | |
560 | case IPPROTO_GRE: | |
561 | next_hdr_offset += sizeof(struct gre_base_hdr); | |
562 | if (next_hdr_offset > encap_len) | |
563 | return -EINVAL; | |
564 | ||
565 | if (((struct gre_base_hdr *)next_hdr)->flags & GRE_CSUM) | |
566 | return handle_gso_type(skb, SKB_GSO_GRE_CSUM, | |
567 | encap_len); | |
568 | return handle_gso_type(skb, SKB_GSO_GRE, encap_len); | |
569 | ||
570 | case IPPROTO_UDP: | |
571 | next_hdr_offset += sizeof(struct udphdr); | |
572 | if (next_hdr_offset > encap_len) | |
573 | return -EINVAL; | |
574 | ||
575 | if (((struct udphdr *)next_hdr)->check) | |
576 | return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL_CSUM, | |
577 | encap_len); | |
578 | return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL, encap_len); | |
579 | ||
580 | case IPPROTO_IP: | |
581 | case IPPROTO_IPV6: | |
582 | if (ipv4) | |
583 | return handle_gso_type(skb, SKB_GSO_IPXIP4, encap_len); | |
584 | else | |
585 | return handle_gso_type(skb, SKB_GSO_IPXIP6, encap_len); | |
586 | ||
587 | default: | |
588 | return -EPROTONOSUPPORT; | |
589 | } | |
52f27877 PO |
590 | } |
591 | ||
592 | int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) | |
593 | { | |
594 | struct iphdr *iph; | |
595 | bool ipv4; | |
596 | int err; | |
597 | ||
598 | if (unlikely(len < sizeof(struct iphdr) || len > LWT_BPF_MAX_HEADROOM)) | |
599 | return -EINVAL; | |
600 | ||
601 | /* validate protocol and length */ | |
602 | iph = (struct iphdr *)hdr; | |
603 | if (iph->version == 4) { | |
604 | ipv4 = true; | |
605 | if (unlikely(len < iph->ihl * 4)) | |
606 | return -EINVAL; | |
607 | } else if (iph->version == 6) { | |
608 | ipv4 = false; | |
609 | if (unlikely(len < sizeof(struct ipv6hdr))) | |
610 | return -EINVAL; | |
611 | } else { | |
612 | return -EINVAL; | |
613 | } | |
614 | ||
615 | if (ingress) | |
616 | err = skb_cow_head(skb, len + skb->mac_len); | |
617 | else | |
618 | err = skb_cow_head(skb, | |
619 | len + LL_RESERVED_SPACE(skb_dst(skb)->dev)); | |
620 | if (unlikely(err)) | |
621 | return err; | |
622 | ||
623 | /* push the encap headers and fix pointers */ | |
624 | skb_reset_inner_headers(skb); | |
ea0371f7 PO |
625 | skb_reset_inner_mac_header(skb); /* mac header is not yet set */ |
626 | skb_set_inner_protocol(skb, skb->protocol); | |
52f27877 PO |
627 | skb->encapsulation = 1; |
628 | skb_push(skb, len); | |
629 | if (ingress) | |
630 | skb_postpush_rcsum(skb, iph, len); | |
631 | skb_reset_network_header(skb); | |
632 | memcpy(skb_network_header(skb), hdr, len); | |
633 | bpf_compute_data_pointers(skb); | |
634 | skb_clear_hash(skb); | |
635 | ||
636 | if (ipv4) { | |
637 | skb->protocol = htons(ETH_P_IP); | |
638 | iph = ip_hdr(skb); | |
639 | ||
640 | if (!iph->check) | |
641 | iph->check = ip_fast_csum((unsigned char *)iph, | |
642 | iph->ihl); | |
643 | } else { | |
644 | skb->protocol = htons(ETH_P_IPV6); | |
645 | } | |
646 | ||
647 | if (skb_is_gso(skb)) | |
648 | return handle_gso_encap(skb, ipv4, len); | |
649 | ||
650 | return 0; | |
651 | } | |
652 | ||
3a0af8fd TG |
653 | static int __init bpf_lwt_init(void) |
654 | { | |
655 | return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF); | |
656 | } | |
657 | ||
658 | subsys_initcall(bpf_lwt_init) |