]>
Commit | Line | Data |
---|---|---|
3a0af8fd TG |
1 | /* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch> |
2 | * | |
3 | * This program is free software; you can redistribute it and/or | |
4 | * modify it under the terms of version 2 of the GNU General Public | |
5 | * License as published by the Free Software Foundation. | |
6 | * | |
7 | * This program is distributed in the hope that it will be useful, but | |
8 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
10 | * General Public License for more details. | |
11 | */ | |
12 | ||
13 | #include <linux/kernel.h> | |
14 | #include <linux/module.h> | |
15 | #include <linux/skbuff.h> | |
16 | #include <linux/types.h> | |
17 | #include <linux/bpf.h> | |
18 | #include <net/lwtunnel.h> | |
ca78801a | 19 | #include <net/gre.h> |
3bd0b152 | 20 | #include <net/ip6_route.h> |
3a0af8fd TG |
21 | |
22 | struct bpf_lwt_prog { | |
23 | struct bpf_prog *prog; | |
24 | char *name; | |
25 | }; | |
26 | ||
27 | struct bpf_lwt { | |
28 | struct bpf_lwt_prog in; | |
29 | struct bpf_lwt_prog out; | |
30 | struct bpf_lwt_prog xmit; | |
31 | int family; | |
32 | }; | |
33 | ||
34 | #define MAX_PROG_NAME 256 | |
35 | ||
36 | static inline struct bpf_lwt *bpf_lwt_lwtunnel(struct lwtunnel_state *lwt) | |
37 | { | |
38 | return (struct bpf_lwt *)lwt->data; | |
39 | } | |
40 | ||
41 | #define NO_REDIRECT false | |
42 | #define CAN_REDIRECT true | |
43 | ||
44 | static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, | |
45 | struct dst_entry *dst, bool can_redirect) | |
46 | { | |
47 | int ret; | |
48 | ||
49 | /* Preempt disable is needed to protect per-cpu redirect_info between | |
50 | * BPF prog and skb_do_redirect(). The call_rcu in bpf_prog_put() and | |
51 | * access to maps strictly require a rcu_read_lock() for protection, | |
52 | * mixing with BH RCU lock doesn't work. | |
53 | */ | |
54 | preempt_disable(); | |
6aaae2b6 | 55 | bpf_compute_data_pointers(skb); |
3a0af8fd | 56 | ret = bpf_prog_run_save_cb(lwt->prog, skb); |
3a0af8fd TG |
57 | |
58 | switch (ret) { | |
59 | case BPF_OK: | |
3bd0b152 | 60 | case BPF_LWT_REROUTE: |
3a0af8fd TG |
61 | break; |
62 | ||
63 | case BPF_REDIRECT: | |
64 | if (unlikely(!can_redirect)) { | |
65 | pr_warn_once("Illegal redirect return code in prog %s\n", | |
66 | lwt->name ? : "<unknown>"); | |
67 | ret = BPF_OK; | |
68 | } else { | |
e7c87bd6 | 69 | skb_reset_mac_header(skb); |
3a0af8fd TG |
70 | ret = skb_do_redirect(skb); |
71 | if (ret == 0) | |
72 | ret = BPF_REDIRECT; | |
73 | } | |
74 | break; | |
75 | ||
76 | case BPF_DROP: | |
77 | kfree_skb(skb); | |
78 | ret = -EPERM; | |
79 | break; | |
80 | ||
81 | default: | |
82 | pr_warn_once("bpf-lwt: Illegal return value %u, expect packet loss\n", ret); | |
83 | kfree_skb(skb); | |
84 | ret = -EINVAL; | |
85 | break; | |
86 | } | |
87 | ||
88 | preempt_enable(); | |
89 | ||
90 | return ret; | |
91 | } | |
92 | ||
3bd0b152 PO |
93 | static int bpf_lwt_input_reroute(struct sk_buff *skb) |
94 | { | |
95 | int err = -EINVAL; | |
96 | ||
97 | if (skb->protocol == htons(ETH_P_IP)) { | |
98 | struct iphdr *iph = ip_hdr(skb); | |
99 | ||
100 | err = ip_route_input_noref(skb, iph->daddr, iph->saddr, | |
101 | iph->tos, skb_dst(skb)->dev); | |
102 | } else if (skb->protocol == htons(ETH_P_IPV6)) { | |
103 | err = ipv6_stub->ipv6_route_input(skb); | |
104 | } else { | |
105 | err = -EAFNOSUPPORT; | |
106 | } | |
107 | ||
108 | if (err) | |
109 | goto err; | |
110 | return dst_input(skb); | |
111 | ||
112 | err: | |
113 | kfree_skb(skb); | |
114 | return err; | |
115 | } | |
116 | ||
3a0af8fd TG |
117 | static int bpf_input(struct sk_buff *skb) |
118 | { | |
119 | struct dst_entry *dst = skb_dst(skb); | |
120 | struct bpf_lwt *bpf; | |
121 | int ret; | |
122 | ||
123 | bpf = bpf_lwt_lwtunnel(dst->lwtstate); | |
124 | if (bpf->in.prog) { | |
125 | ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT); | |
126 | if (ret < 0) | |
127 | return ret; | |
3bd0b152 PO |
128 | if (ret == BPF_LWT_REROUTE) |
129 | return bpf_lwt_input_reroute(skb); | |
3a0af8fd TG |
130 | } |
131 | ||
132 | if (unlikely(!dst->lwtstate->orig_input)) { | |
3a0af8fd TG |
133 | kfree_skb(skb); |
134 | return -EINVAL; | |
135 | } | |
136 | ||
137 | return dst->lwtstate->orig_input(skb); | |
138 | } | |
139 | ||
140 | static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb) | |
141 | { | |
142 | struct dst_entry *dst = skb_dst(skb); | |
143 | struct bpf_lwt *bpf; | |
144 | int ret; | |
145 | ||
146 | bpf = bpf_lwt_lwtunnel(dst->lwtstate); | |
147 | if (bpf->out.prog) { | |
148 | ret = run_lwt_bpf(skb, &bpf->out, dst, NO_REDIRECT); | |
149 | if (ret < 0) | |
150 | return ret; | |
151 | } | |
152 | ||
153 | if (unlikely(!dst->lwtstate->orig_output)) { | |
154 | pr_warn_once("orig_output not set on dst for prog %s\n", | |
155 | bpf->out.name); | |
156 | kfree_skb(skb); | |
157 | return -EINVAL; | |
158 | } | |
159 | ||
160 | return dst->lwtstate->orig_output(net, sk, skb); | |
161 | } | |
162 | ||
163 | static int xmit_check_hhlen(struct sk_buff *skb) | |
164 | { | |
165 | int hh_len = skb_dst(skb)->dev->hard_header_len; | |
166 | ||
167 | if (skb_headroom(skb) < hh_len) { | |
168 | int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); | |
169 | ||
170 | if (pskb_expand_head(skb, nhead, 0, GFP_ATOMIC)) | |
171 | return -ENOMEM; | |
172 | } | |
173 | ||
174 | return 0; | |
175 | } | |
176 | ||
3bd0b152 PO |
177 | static int bpf_lwt_xmit_reroute(struct sk_buff *skb) |
178 | { | |
179 | struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev); | |
180 | int oif = l3mdev ? l3mdev->ifindex : 0; | |
181 | struct dst_entry *dst = NULL; | |
fb405883 | 182 | int err = -EAFNOSUPPORT; |
3bd0b152 PO |
183 | struct sock *sk; |
184 | struct net *net; | |
185 | bool ipv4; | |
3bd0b152 PO |
186 | |
187 | if (skb->protocol == htons(ETH_P_IP)) | |
188 | ipv4 = true; | |
189 | else if (skb->protocol == htons(ETH_P_IPV6)) | |
190 | ipv4 = false; | |
191 | else | |
fb405883 | 192 | goto err; |
3bd0b152 PO |
193 | |
194 | sk = sk_to_full_sk(skb->sk); | |
195 | if (sk) { | |
196 | if (sk->sk_bound_dev_if) | |
197 | oif = sk->sk_bound_dev_if; | |
198 | net = sock_net(sk); | |
199 | } else { | |
200 | net = dev_net(skb_dst(skb)->dev); | |
201 | } | |
202 | ||
203 | if (ipv4) { | |
204 | struct iphdr *iph = ip_hdr(skb); | |
205 | struct flowi4 fl4 = {}; | |
206 | struct rtable *rt; | |
207 | ||
208 | fl4.flowi4_oif = oif; | |
209 | fl4.flowi4_mark = skb->mark; | |
210 | fl4.flowi4_uid = sock_net_uid(net, sk); | |
211 | fl4.flowi4_tos = RT_TOS(iph->tos); | |
212 | fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; | |
213 | fl4.flowi4_proto = iph->protocol; | |
214 | fl4.daddr = iph->daddr; | |
215 | fl4.saddr = iph->saddr; | |
216 | ||
217 | rt = ip_route_output_key(net, &fl4); | |
fb405883 PO |
218 | if (IS_ERR(rt)) { |
219 | err = PTR_ERR(rt); | |
220 | goto err; | |
221 | } | |
3bd0b152 PO |
222 | dst = &rt->dst; |
223 | } else { | |
224 | struct ipv6hdr *iph6 = ipv6_hdr(skb); | |
225 | struct flowi6 fl6 = {}; | |
226 | ||
227 | fl6.flowi6_oif = oif; | |
228 | fl6.flowi6_mark = skb->mark; | |
229 | fl6.flowi6_uid = sock_net_uid(net, sk); | |
230 | fl6.flowlabel = ip6_flowinfo(iph6); | |
231 | fl6.flowi6_proto = iph6->nexthdr; | |
232 | fl6.daddr = iph6->daddr; | |
233 | fl6.saddr = iph6->saddr; | |
234 | ||
235 | err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6); | |
fb405883 PO |
236 | if (unlikely(err)) |
237 | goto err; | |
238 | if (IS_ERR(dst)) { | |
239 | err = PTR_ERR(dst); | |
240 | goto err; | |
241 | } | |
3bd0b152 PO |
242 | } |
243 | if (unlikely(dst->error)) { | |
fb405883 | 244 | err = dst->error; |
3bd0b152 | 245 | dst_release(dst); |
fb405883 | 246 | goto err; |
3bd0b152 PO |
247 | } |
248 | ||
249 | /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it | |
250 | * was done for the previous dst, so we are doing it here again, in | |
251 | * case the new dst needs much more space. The call below is a noop | |
252 | * if there is enough header space in skb. | |
253 | */ | |
254 | err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); | |
255 | if (unlikely(err)) | |
fb405883 | 256 | goto err; |
3bd0b152 PO |
257 | |
258 | skb_dst_drop(skb); | |
259 | skb_dst_set(skb, dst); | |
260 | ||
261 | err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb); | |
262 | if (unlikely(err)) | |
bd16693f | 263 | return err; |
3bd0b152 PO |
264 | |
265 | /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */ | |
266 | return LWTUNNEL_XMIT_DONE; | |
fb405883 PO |
267 | |
268 | err: | |
269 | kfree_skb(skb); | |
270 | return err; | |
3bd0b152 PO |
271 | } |
272 | ||
3a0af8fd TG |
273 | static int bpf_xmit(struct sk_buff *skb) |
274 | { | |
275 | struct dst_entry *dst = skb_dst(skb); | |
276 | struct bpf_lwt *bpf; | |
277 | ||
278 | bpf = bpf_lwt_lwtunnel(dst->lwtstate); | |
279 | if (bpf->xmit.prog) { | |
3bd0b152 | 280 | __be16 proto = skb->protocol; |
3a0af8fd TG |
281 | int ret; |
282 | ||
283 | ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT); | |
284 | switch (ret) { | |
285 | case BPF_OK: | |
3bd0b152 PO |
286 | /* If the header changed, e.g. via bpf_lwt_push_encap, |
287 | * BPF_LWT_REROUTE below should have been used if the | |
288 | * protocol was also changed. | |
289 | */ | |
290 | if (skb->protocol != proto) { | |
291 | kfree_skb(skb); | |
292 | return -EINVAL; | |
293 | } | |
3a0af8fd TG |
294 | /* If the header was expanded, headroom might be too |
295 | * small for L2 header to come, expand as needed. | |
296 | */ | |
297 | ret = xmit_check_hhlen(skb); | |
298 | if (unlikely(ret)) | |
299 | return ret; | |
300 | ||
301 | return LWTUNNEL_XMIT_CONTINUE; | |
302 | case BPF_REDIRECT: | |
303 | return LWTUNNEL_XMIT_DONE; | |
3bd0b152 PO |
304 | case BPF_LWT_REROUTE: |
305 | return bpf_lwt_xmit_reroute(skb); | |
3a0af8fd TG |
306 | default: |
307 | return ret; | |
308 | } | |
309 | } | |
310 | ||
311 | return LWTUNNEL_XMIT_CONTINUE; | |
312 | } | |
313 | ||
314 | static void bpf_lwt_prog_destroy(struct bpf_lwt_prog *prog) | |
315 | { | |
316 | if (prog->prog) | |
317 | bpf_prog_put(prog->prog); | |
318 | ||
319 | kfree(prog->name); | |
320 | } | |
321 | ||
322 | static void bpf_destroy_state(struct lwtunnel_state *lwt) | |
323 | { | |
324 | struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt); | |
325 | ||
326 | bpf_lwt_prog_destroy(&bpf->in); | |
327 | bpf_lwt_prog_destroy(&bpf->out); | |
328 | bpf_lwt_prog_destroy(&bpf->xmit); | |
329 | } | |
330 | ||
331 | static const struct nla_policy bpf_prog_policy[LWT_BPF_PROG_MAX + 1] = { | |
332 | [LWT_BPF_PROG_FD] = { .type = NLA_U32, }, | |
333 | [LWT_BPF_PROG_NAME] = { .type = NLA_NUL_STRING, | |
334 | .len = MAX_PROG_NAME }, | |
335 | }; | |
336 | ||
337 | static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog, | |
338 | enum bpf_prog_type type) | |
339 | { | |
340 | struct nlattr *tb[LWT_BPF_PROG_MAX + 1]; | |
341 | struct bpf_prog *p; | |
342 | int ret; | |
343 | u32 fd; | |
344 | ||
fceb6435 JB |
345 | ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy, |
346 | NULL); | |
3a0af8fd TG |
347 | if (ret < 0) |
348 | return ret; | |
349 | ||
350 | if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME]) | |
351 | return -EINVAL; | |
352 | ||
71eb5255 | 353 | prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_ATOMIC); |
3a0af8fd TG |
354 | if (!prog->name) |
355 | return -ENOMEM; | |
356 | ||
357 | fd = nla_get_u32(tb[LWT_BPF_PROG_FD]); | |
358 | p = bpf_prog_get_type(fd, type); | |
359 | if (IS_ERR(p)) | |
360 | return PTR_ERR(p); | |
361 | ||
362 | prog->prog = p; | |
363 | ||
364 | return 0; | |
365 | } | |
366 | ||
367 | static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = { | |
368 | [LWT_BPF_IN] = { .type = NLA_NESTED, }, | |
369 | [LWT_BPF_OUT] = { .type = NLA_NESTED, }, | |
370 | [LWT_BPF_XMIT] = { .type = NLA_NESTED, }, | |
371 | [LWT_BPF_XMIT_HEADROOM] = { .type = NLA_U32 }, | |
372 | }; | |
373 | ||
30357d7d | 374 | static int bpf_build_state(struct nlattr *nla, |
3a0af8fd | 375 | unsigned int family, const void *cfg, |
9ae28727 DA |
376 | struct lwtunnel_state **ts, |
377 | struct netlink_ext_ack *extack) | |
3a0af8fd TG |
378 | { |
379 | struct nlattr *tb[LWT_BPF_MAX + 1]; | |
380 | struct lwtunnel_state *newts; | |
381 | struct bpf_lwt *bpf; | |
382 | int ret; | |
383 | ||
384 | if (family != AF_INET && family != AF_INET6) | |
385 | return -EAFNOSUPPORT; | |
386 | ||
9ae28727 | 387 | ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy, extack); |
3a0af8fd TG |
388 | if (ret < 0) |
389 | return ret; | |
390 | ||
391 | if (!tb[LWT_BPF_IN] && !tb[LWT_BPF_OUT] && !tb[LWT_BPF_XMIT]) | |
392 | return -EINVAL; | |
393 | ||
394 | newts = lwtunnel_state_alloc(sizeof(*bpf)); | |
395 | if (!newts) | |
396 | return -ENOMEM; | |
397 | ||
398 | newts->type = LWTUNNEL_ENCAP_BPF; | |
399 | bpf = bpf_lwt_lwtunnel(newts); | |
400 | ||
401 | if (tb[LWT_BPF_IN]) { | |
402 | newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; | |
403 | ret = bpf_parse_prog(tb[LWT_BPF_IN], &bpf->in, | |
404 | BPF_PROG_TYPE_LWT_IN); | |
405 | if (ret < 0) | |
406 | goto errout; | |
407 | } | |
408 | ||
409 | if (tb[LWT_BPF_OUT]) { | |
410 | newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; | |
411 | ret = bpf_parse_prog(tb[LWT_BPF_OUT], &bpf->out, | |
412 | BPF_PROG_TYPE_LWT_OUT); | |
413 | if (ret < 0) | |
414 | goto errout; | |
415 | } | |
416 | ||
417 | if (tb[LWT_BPF_XMIT]) { | |
418 | newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT; | |
419 | ret = bpf_parse_prog(tb[LWT_BPF_XMIT], &bpf->xmit, | |
420 | BPF_PROG_TYPE_LWT_XMIT); | |
421 | if (ret < 0) | |
422 | goto errout; | |
423 | } | |
424 | ||
425 | if (tb[LWT_BPF_XMIT_HEADROOM]) { | |
426 | u32 headroom = nla_get_u32(tb[LWT_BPF_XMIT_HEADROOM]); | |
427 | ||
428 | if (headroom > LWT_BPF_MAX_HEADROOM) { | |
429 | ret = -ERANGE; | |
430 | goto errout; | |
431 | } | |
432 | ||
433 | newts->headroom = headroom; | |
434 | } | |
435 | ||
436 | bpf->family = family; | |
437 | *ts = newts; | |
438 | ||
439 | return 0; | |
440 | ||
441 | errout: | |
442 | bpf_destroy_state(newts); | |
443 | kfree(newts); | |
444 | return ret; | |
445 | } | |
446 | ||
447 | static int bpf_fill_lwt_prog(struct sk_buff *skb, int attr, | |
448 | struct bpf_lwt_prog *prog) | |
449 | { | |
450 | struct nlattr *nest; | |
451 | ||
452 | if (!prog->prog) | |
453 | return 0; | |
454 | ||
455 | nest = nla_nest_start(skb, attr); | |
456 | if (!nest) | |
457 | return -EMSGSIZE; | |
458 | ||
459 | if (prog->name && | |
460 | nla_put_string(skb, LWT_BPF_PROG_NAME, prog->name)) | |
461 | return -EMSGSIZE; | |
462 | ||
463 | return nla_nest_end(skb, nest); | |
464 | } | |
465 | ||
466 | static int bpf_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwt) | |
467 | { | |
468 | struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt); | |
469 | ||
470 | if (bpf_fill_lwt_prog(skb, LWT_BPF_IN, &bpf->in) < 0 || | |
471 | bpf_fill_lwt_prog(skb, LWT_BPF_OUT, &bpf->out) < 0 || | |
472 | bpf_fill_lwt_prog(skb, LWT_BPF_XMIT, &bpf->xmit) < 0) | |
473 | return -EMSGSIZE; | |
474 | ||
475 | return 0; | |
476 | } | |
477 | ||
478 | static int bpf_encap_nlsize(struct lwtunnel_state *lwtstate) | |
479 | { | |
480 | int nest_len = nla_total_size(sizeof(struct nlattr)) + | |
481 | nla_total_size(MAX_PROG_NAME) + /* LWT_BPF_PROG_NAME */ | |
482 | 0; | |
483 | ||
484 | return nest_len + /* LWT_BPF_IN */ | |
485 | nest_len + /* LWT_BPF_OUT */ | |
486 | nest_len + /* LWT_BPF_XMIT */ | |
487 | 0; | |
488 | } | |
489 | ||
79471b10 | 490 | static int bpf_lwt_prog_cmp(struct bpf_lwt_prog *a, struct bpf_lwt_prog *b) |
3a0af8fd TG |
491 | { |
492 | /* FIXME: | |
493 | * The LWT state is currently rebuilt for delete requests which | |
494 | * results in a new bpf_prog instance. Comparing names for now. | |
495 | */ | |
496 | if (!a->name && !b->name) | |
497 | return 0; | |
498 | ||
499 | if (!a->name || !b->name) | |
500 | return 1; | |
501 | ||
502 | return strcmp(a->name, b->name); | |
503 | } | |
504 | ||
505 | static int bpf_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) | |
506 | { | |
507 | struct bpf_lwt *a_bpf = bpf_lwt_lwtunnel(a); | |
508 | struct bpf_lwt *b_bpf = bpf_lwt_lwtunnel(b); | |
509 | ||
510 | return bpf_lwt_prog_cmp(&a_bpf->in, &b_bpf->in) || | |
511 | bpf_lwt_prog_cmp(&a_bpf->out, &b_bpf->out) || | |
512 | bpf_lwt_prog_cmp(&a_bpf->xmit, &b_bpf->xmit); | |
513 | } | |
514 | ||
515 | static const struct lwtunnel_encap_ops bpf_encap_ops = { | |
516 | .build_state = bpf_build_state, | |
517 | .destroy_state = bpf_destroy_state, | |
518 | .input = bpf_input, | |
519 | .output = bpf_output, | |
520 | .xmit = bpf_xmit, | |
521 | .fill_encap = bpf_fill_encap_info, | |
522 | .get_encap_size = bpf_encap_nlsize, | |
523 | .cmp_encap = bpf_encap_cmp, | |
88ff7334 | 524 | .owner = THIS_MODULE, |
3a0af8fd TG |
525 | }; |
526 | ||
ca78801a PO |
527 | static int handle_gso_type(struct sk_buff *skb, unsigned int gso_type, |
528 | int encap_len) | |
529 | { | |
530 | struct skb_shared_info *shinfo = skb_shinfo(skb); | |
531 | ||
532 | gso_type |= SKB_GSO_DODGY; | |
533 | shinfo->gso_type |= gso_type; | |
534 | skb_decrease_gso_size(shinfo, encap_len); | |
535 | shinfo->gso_segs = 0; | |
536 | return 0; | |
537 | } | |
538 | ||
52f27877 PO |
539 | static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len) |
540 | { | |
ca78801a PO |
541 | int next_hdr_offset; |
542 | void *next_hdr; | |
543 | __u8 protocol; | |
544 | ||
545 | /* SCTP and UDP_L4 gso need more nuanced handling than what | |
546 | * handle_gso_type() does above: skb_decrease_gso_size() is not enough. | |
547 | * So at the moment only TCP GSO packets are let through. | |
548 | */ | |
549 | if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) | |
550 | return -ENOTSUPP; | |
551 | ||
552 | if (ipv4) { | |
553 | protocol = ip_hdr(skb)->protocol; | |
554 | next_hdr_offset = sizeof(struct iphdr); | |
555 | next_hdr = skb_network_header(skb) + next_hdr_offset; | |
556 | } else { | |
557 | protocol = ipv6_hdr(skb)->nexthdr; | |
558 | next_hdr_offset = sizeof(struct ipv6hdr); | |
559 | next_hdr = skb_network_header(skb) + next_hdr_offset; | |
560 | } | |
561 | ||
562 | switch (protocol) { | |
563 | case IPPROTO_GRE: | |
564 | next_hdr_offset += sizeof(struct gre_base_hdr); | |
565 | if (next_hdr_offset > encap_len) | |
566 | return -EINVAL; | |
567 | ||
568 | if (((struct gre_base_hdr *)next_hdr)->flags & GRE_CSUM) | |
569 | return handle_gso_type(skb, SKB_GSO_GRE_CSUM, | |
570 | encap_len); | |
571 | return handle_gso_type(skb, SKB_GSO_GRE, encap_len); | |
572 | ||
573 | case IPPROTO_UDP: | |
574 | next_hdr_offset += sizeof(struct udphdr); | |
575 | if (next_hdr_offset > encap_len) | |
576 | return -EINVAL; | |
577 | ||
578 | if (((struct udphdr *)next_hdr)->check) | |
579 | return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL_CSUM, | |
580 | encap_len); | |
581 | return handle_gso_type(skb, SKB_GSO_UDP_TUNNEL, encap_len); | |
582 | ||
583 | case IPPROTO_IP: | |
584 | case IPPROTO_IPV6: | |
585 | if (ipv4) | |
586 | return handle_gso_type(skb, SKB_GSO_IPXIP4, encap_len); | |
587 | else | |
588 | return handle_gso_type(skb, SKB_GSO_IPXIP6, encap_len); | |
589 | ||
590 | default: | |
591 | return -EPROTONOSUPPORT; | |
592 | } | |
52f27877 PO |
593 | } |
594 | ||
595 | int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) | |
596 | { | |
597 | struct iphdr *iph; | |
598 | bool ipv4; | |
599 | int err; | |
600 | ||
601 | if (unlikely(len < sizeof(struct iphdr) || len > LWT_BPF_MAX_HEADROOM)) | |
602 | return -EINVAL; | |
603 | ||
604 | /* validate protocol and length */ | |
605 | iph = (struct iphdr *)hdr; | |
606 | if (iph->version == 4) { | |
607 | ipv4 = true; | |
608 | if (unlikely(len < iph->ihl * 4)) | |
609 | return -EINVAL; | |
610 | } else if (iph->version == 6) { | |
611 | ipv4 = false; | |
612 | if (unlikely(len < sizeof(struct ipv6hdr))) | |
613 | return -EINVAL; | |
614 | } else { | |
615 | return -EINVAL; | |
616 | } | |
617 | ||
618 | if (ingress) | |
619 | err = skb_cow_head(skb, len + skb->mac_len); | |
620 | else | |
621 | err = skb_cow_head(skb, | |
622 | len + LL_RESERVED_SPACE(skb_dst(skb)->dev)); | |
623 | if (unlikely(err)) | |
624 | return err; | |
625 | ||
626 | /* push the encap headers and fix pointers */ | |
627 | skb_reset_inner_headers(skb); | |
ea0371f7 PO |
628 | skb_reset_inner_mac_header(skb); /* mac header is not yet set */ |
629 | skb_set_inner_protocol(skb, skb->protocol); | |
52f27877 PO |
630 | skb->encapsulation = 1; |
631 | skb_push(skb, len); | |
632 | if (ingress) | |
633 | skb_postpush_rcsum(skb, iph, len); | |
634 | skb_reset_network_header(skb); | |
635 | memcpy(skb_network_header(skb), hdr, len); | |
636 | bpf_compute_data_pointers(skb); | |
637 | skb_clear_hash(skb); | |
638 | ||
639 | if (ipv4) { | |
640 | skb->protocol = htons(ETH_P_IP); | |
641 | iph = ip_hdr(skb); | |
642 | ||
643 | if (!iph->check) | |
644 | iph->check = ip_fast_csum((unsigned char *)iph, | |
645 | iph->ihl); | |
646 | } else { | |
647 | skb->protocol = htons(ETH_P_IPV6); | |
648 | } | |
649 | ||
650 | if (skb_is_gso(skb)) | |
651 | return handle_gso_encap(skb, ipv4, len); | |
652 | ||
653 | return 0; | |
654 | } | |
655 | ||
3a0af8fd TG |
656 | static int __init bpf_lwt_init(void) |
657 | { | |
658 | return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF); | |
659 | } | |
660 | ||
661 | subsys_initcall(bpf_lwt_init) |