1 #include <linux/module.h>
2 #include <linux/errno.h>
3 #include <linux/socket.h>
4 #include <linux/skbuff.h>
7 #include <linux/types.h>
8 #include <linux/kernel.h>
9 #include <net/genetlink.h>
12 #include <net/protocol.h>
14 #include <net/udp_tunnel.h>
16 #include <uapi/linux/fou.h>
17 #include <uapi/linux/genetlink.h>
19 static DEFINE_SPINLOCK(fou_lock
);
20 static LIST_HEAD(fou_list
);
26 struct udp_offload udp_offloads
;
27 struct list_head list
;
33 struct udp_port_cfg udp_config
;
36 static inline struct fou
*fou_from_sock(struct sock
*sk
)
38 return sk
->sk_user_data
;
41 static void fou_recv_pull(struct sk_buff
*skb
, size_t len
)
43 struct iphdr
*iph
= ip_hdr(skb
);
45 /* Remove 'len' bytes from the packet (UDP header and
46 * FOU header if present).
48 iph
->tot_len
= htons(ntohs(iph
->tot_len
) - len
);
50 skb_postpull_rcsum(skb
, udp_hdr(skb
), len
);
51 skb_reset_transport_header(skb
);
54 static int fou_udp_recv(struct sock
*sk
, struct sk_buff
*skb
)
56 struct fou
*fou
= fou_from_sock(sk
);
61 fou_recv_pull(skb
, sizeof(struct udphdr
));
63 return -fou
->protocol
;
66 static struct guehdr
*gue_remcsum(struct sk_buff
*skb
, struct guehdr
*guehdr
,
67 void *data
, size_t hdrlen
, u8 ipproto
)
70 size_t start
= ntohs(pd
[0]);
71 size_t offset
= ntohs(pd
[1]);
72 size_t plen
= hdrlen
+ max_t(size_t, offset
+ sizeof(u16
), start
);
75 if (skb
->remcsum_offload
) {
76 /* Already processed in GRO path */
77 skb
->remcsum_offload
= 0;
81 if (!pskb_may_pull(skb
, plen
))
83 guehdr
= (struct guehdr
*)&udp_hdr(skb
)[1];
85 if (unlikely(skb
->ip_summed
!= CHECKSUM_COMPLETE
))
86 __skb_checksum_complete(skb
);
88 delta
= remcsum_adjust((void *)guehdr
+ hdrlen
,
89 skb
->csum
, start
, offset
);
91 /* Adjust skb->csum since we changed the packet */
92 skb
->csum
= csum_add(skb
->csum
, delta
);
97 static int gue_control_message(struct sk_buff
*skb
, struct guehdr
*guehdr
)
104 static int gue_udp_recv(struct sock
*sk
, struct sk_buff
*skb
)
106 struct fou
*fou
= fou_from_sock(sk
);
107 size_t len
, optlen
, hdrlen
;
108 struct guehdr
*guehdr
;
115 len
= sizeof(struct udphdr
) + sizeof(struct guehdr
);
116 if (!pskb_may_pull(skb
, len
))
119 guehdr
= (struct guehdr
*)&udp_hdr(skb
)[1];
121 optlen
= guehdr
->hlen
<< 2;
124 if (!pskb_may_pull(skb
, len
))
127 /* guehdr may change after pull */
128 guehdr
= (struct guehdr
*)&udp_hdr(skb
)[1];
130 hdrlen
= sizeof(struct guehdr
) + optlen
;
132 if (guehdr
->version
!= 0 || validate_gue_flags(guehdr
, optlen
))
135 hdrlen
= sizeof(struct guehdr
) + optlen
;
137 ip_hdr(skb
)->tot_len
= htons(ntohs(ip_hdr(skb
)->tot_len
) - len
);
139 /* Pull csum through the guehdr now . This can be used if
140 * there is a remote checksum offload.
142 skb_postpull_rcsum(skb
, udp_hdr(skb
), len
);
146 if (guehdr
->flags
& GUE_FLAG_PRIV
) {
147 __be32 flags
= *(__be32
*)(data
+ doffset
);
149 doffset
+= GUE_LEN_PRIV
;
151 if (flags
& GUE_PFLAG_REMCSUM
) {
152 guehdr
= gue_remcsum(skb
, guehdr
, data
+ doffset
,
153 hdrlen
, guehdr
->proto_ctype
);
159 doffset
+= GUE_PLEN_REMCSUM
;
163 if (unlikely(guehdr
->control
))
164 return gue_control_message(skb
, guehdr
);
166 __skb_pull(skb
, sizeof(struct udphdr
) + hdrlen
);
167 skb_reset_transport_header(skb
);
169 return -guehdr
->proto_ctype
;
176 static struct sk_buff
**fou_gro_receive(struct sk_buff
**head
,
178 struct udp_offload
*uoff
)
180 const struct net_offload
*ops
;
181 struct sk_buff
**pp
= NULL
;
182 u8 proto
= NAPI_GRO_CB(skb
)->proto
;
183 const struct net_offload
**offloads
;
186 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
187 ops
= rcu_dereference(offloads
[proto
]);
188 if (!ops
|| !ops
->callbacks
.gro_receive
)
191 pp
= ops
->callbacks
.gro_receive(head
, skb
);
199 static int fou_gro_complete(struct sk_buff
*skb
, int nhoff
,
200 struct udp_offload
*uoff
)
202 const struct net_offload
*ops
;
203 u8 proto
= NAPI_GRO_CB(skb
)->proto
;
205 const struct net_offload
**offloads
;
207 udp_tunnel_gro_complete(skb
, nhoff
);
210 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
211 ops
= rcu_dereference(offloads
[proto
]);
212 if (WARN_ON(!ops
|| !ops
->callbacks
.gro_complete
))
215 err
= ops
->callbacks
.gro_complete(skb
, nhoff
);
223 static struct guehdr
*gue_gro_remcsum(struct sk_buff
*skb
, unsigned int off
,
224 struct guehdr
*guehdr
, void *data
,
225 size_t hdrlen
, u8 ipproto
)
228 size_t start
= ntohs(pd
[0]);
229 size_t offset
= ntohs(pd
[1]);
230 size_t plen
= hdrlen
+ max_t(size_t, offset
+ sizeof(u16
), start
);
233 if (skb
->remcsum_offload
)
236 if (!NAPI_GRO_CB(skb
)->csum_valid
)
239 /* Pull checksum that will be written */
240 if (skb_gro_header_hard(skb
, off
+ plen
)) {
241 guehdr
= skb_gro_header_slow(skb
, off
+ plen
, off
);
246 delta
= remcsum_adjust((void *)guehdr
+ hdrlen
,
247 NAPI_GRO_CB(skb
)->csum
, start
, offset
);
249 /* Adjust skb->csum since we changed the packet */
250 skb
->csum
= csum_add(skb
->csum
, delta
);
251 NAPI_GRO_CB(skb
)->csum
= csum_add(NAPI_GRO_CB(skb
)->csum
, delta
);
253 skb
->remcsum_offload
= 1;
258 static struct sk_buff
**gue_gro_receive(struct sk_buff
**head
,
260 struct udp_offload
*uoff
)
262 const struct net_offload
**offloads
;
263 const struct net_offload
*ops
;
264 struct sk_buff
**pp
= NULL
;
266 struct guehdr
*guehdr
;
267 size_t len
, optlen
, hdrlen
, off
;
272 off
= skb_gro_offset(skb
);
273 len
= off
+ sizeof(*guehdr
);
275 guehdr
= skb_gro_header_fast(skb
, off
);
276 if (skb_gro_header_hard(skb
, len
)) {
277 guehdr
= skb_gro_header_slow(skb
, len
, off
);
278 if (unlikely(!guehdr
))
282 optlen
= guehdr
->hlen
<< 2;
285 if (skb_gro_header_hard(skb
, len
)) {
286 guehdr
= skb_gro_header_slow(skb
, len
, off
);
287 if (unlikely(!guehdr
))
291 if (unlikely(guehdr
->control
) || guehdr
->version
!= 0 ||
292 validate_gue_flags(guehdr
, optlen
))
295 hdrlen
= sizeof(*guehdr
) + optlen
;
297 /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr,
298 * this is needed if there is a remote checkcsum offload.
300 skb_gro_postpull_rcsum(skb
, guehdr
, hdrlen
);
304 if (guehdr
->flags
& GUE_FLAG_PRIV
) {
305 __be32 flags
= *(__be32
*)(data
+ doffset
);
307 doffset
+= GUE_LEN_PRIV
;
309 if (flags
& GUE_PFLAG_REMCSUM
) {
310 guehdr
= gue_gro_remcsum(skb
, off
, guehdr
,
311 data
+ doffset
, hdrlen
,
312 guehdr
->proto_ctype
);
318 doffset
+= GUE_PLEN_REMCSUM
;
322 skb_gro_pull(skb
, hdrlen
);
326 for (p
= *head
; p
; p
= p
->next
) {
327 const struct guehdr
*guehdr2
;
329 if (!NAPI_GRO_CB(p
)->same_flow
)
332 guehdr2
= (struct guehdr
*)(p
->data
+ off
);
334 /* Compare base GUE header to be equal (covers
335 * hlen, version, proto_ctype, and flags.
337 if (guehdr
->word
!= guehdr2
->word
) {
338 NAPI_GRO_CB(p
)->same_flow
= 0;
342 /* Compare optional fields are the same. */
343 if (guehdr
->hlen
&& memcmp(&guehdr
[1], &guehdr2
[1],
344 guehdr
->hlen
<< 2)) {
345 NAPI_GRO_CB(p
)->same_flow
= 0;
351 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
352 ops
= rcu_dereference(offloads
[guehdr
->proto_ctype
]);
353 if (WARN_ON(!ops
|| !ops
->callbacks
.gro_receive
))
356 pp
= ops
->callbacks
.gro_receive(head
, skb
);
361 NAPI_GRO_CB(skb
)->flush
|= flush
;
366 static int gue_gro_complete(struct sk_buff
*skb
, int nhoff
,
367 struct udp_offload
*uoff
)
369 const struct net_offload
**offloads
;
370 struct guehdr
*guehdr
= (struct guehdr
*)(skb
->data
+ nhoff
);
371 const struct net_offload
*ops
;
372 unsigned int guehlen
;
376 proto
= guehdr
->proto_ctype
;
378 guehlen
= sizeof(*guehdr
) + (guehdr
->hlen
<< 2);
381 offloads
= NAPI_GRO_CB(skb
)->is_ipv6
? inet6_offloads
: inet_offloads
;
382 ops
= rcu_dereference(offloads
[proto
]);
383 if (WARN_ON(!ops
|| !ops
->callbacks
.gro_complete
))
386 err
= ops
->callbacks
.gro_complete(skb
, nhoff
+ guehlen
);
393 static int fou_add_to_port_list(struct fou
*fou
)
397 spin_lock(&fou_lock
);
398 list_for_each_entry(fout
, &fou_list
, list
) {
399 if (fou
->port
== fout
->port
) {
400 spin_unlock(&fou_lock
);
405 list_add(&fou
->list
, &fou_list
);
406 spin_unlock(&fou_lock
);
411 static void fou_release(struct fou
*fou
)
413 struct socket
*sock
= fou
->sock
;
414 struct sock
*sk
= sock
->sk
;
416 udp_del_offload(&fou
->udp_offloads
);
418 list_del(&fou
->list
);
420 /* Remove hooks into tunnel socket */
421 sk
->sk_user_data
= NULL
;
428 static int fou_encap_init(struct sock
*sk
, struct fou
*fou
, struct fou_cfg
*cfg
)
430 udp_sk(sk
)->encap_rcv
= fou_udp_recv
;
431 fou
->protocol
= cfg
->protocol
;
432 fou
->udp_offloads
.callbacks
.gro_receive
= fou_gro_receive
;
433 fou
->udp_offloads
.callbacks
.gro_complete
= fou_gro_complete
;
434 fou
->udp_offloads
.port
= cfg
->udp_config
.local_udp_port
;
435 fou
->udp_offloads
.ipproto
= cfg
->protocol
;
440 static int gue_encap_init(struct sock
*sk
, struct fou
*fou
, struct fou_cfg
*cfg
)
442 udp_sk(sk
)->encap_rcv
= gue_udp_recv
;
443 fou
->udp_offloads
.callbacks
.gro_receive
= gue_gro_receive
;
444 fou
->udp_offloads
.callbacks
.gro_complete
= gue_gro_complete
;
445 fou
->udp_offloads
.port
= cfg
->udp_config
.local_udp_port
;
450 static int fou_create(struct net
*net
, struct fou_cfg
*cfg
,
451 struct socket
**sockp
)
453 struct fou
*fou
= NULL
;
455 struct socket
*sock
= NULL
;
458 /* Open UDP socket */
459 err
= udp_sock_create(net
, &cfg
->udp_config
, &sock
);
463 /* Allocate FOU port structure */
464 fou
= kzalloc(sizeof(*fou
), GFP_KERNEL
);
472 fou
->port
= cfg
->udp_config
.local_udp_port
;
474 /* Initial for fou type */
476 case FOU_ENCAP_DIRECT
:
477 err
= fou_encap_init(sk
, fou
, cfg
);
482 err
= gue_encap_init(sk
, fou
, cfg
);
491 udp_sk(sk
)->encap_type
= 1;
494 sk
->sk_user_data
= fou
;
497 inet_inc_convert_csum(sk
);
499 sk
->sk_allocation
= GFP_ATOMIC
;
501 if (cfg
->udp_config
.family
== AF_INET
) {
502 err
= udp_add_offload(&fou
->udp_offloads
);
507 err
= fou_add_to_port_list(fou
);
524 static int fou_destroy(struct net
*net
, struct fou_cfg
*cfg
)
527 u16 port
= cfg
->udp_config
.local_udp_port
;
530 spin_lock(&fou_lock
);
531 list_for_each_entry(fou
, &fou_list
, list
) {
532 if (fou
->port
== port
) {
533 udp_del_offload(&fou
->udp_offloads
);
539 spin_unlock(&fou_lock
);
544 static struct genl_family fou_nl_family
= {
545 .id
= GENL_ID_GENERATE
,
547 .name
= FOU_GENL_NAME
,
548 .version
= FOU_GENL_VERSION
,
549 .maxattr
= FOU_ATTR_MAX
,
553 static struct nla_policy fou_nl_policy
[FOU_ATTR_MAX
+ 1] = {
554 [FOU_ATTR_PORT
] = { .type
= NLA_U16
, },
555 [FOU_ATTR_AF
] = { .type
= NLA_U8
, },
556 [FOU_ATTR_IPPROTO
] = { .type
= NLA_U8
, },
557 [FOU_ATTR_TYPE
] = { .type
= NLA_U8
, },
560 static int parse_nl_config(struct genl_info
*info
,
563 memset(cfg
, 0, sizeof(*cfg
));
565 cfg
->udp_config
.family
= AF_INET
;
567 if (info
->attrs
[FOU_ATTR_AF
]) {
568 u8 family
= nla_get_u8(info
->attrs
[FOU_ATTR_AF
]);
570 if (family
!= AF_INET
&& family
!= AF_INET6
)
573 cfg
->udp_config
.family
= family
;
576 if (info
->attrs
[FOU_ATTR_PORT
]) {
577 u16 port
= nla_get_u16(info
->attrs
[FOU_ATTR_PORT
]);
579 cfg
->udp_config
.local_udp_port
= port
;
582 if (info
->attrs
[FOU_ATTR_IPPROTO
])
583 cfg
->protocol
= nla_get_u8(info
->attrs
[FOU_ATTR_IPPROTO
]);
585 if (info
->attrs
[FOU_ATTR_TYPE
])
586 cfg
->type
= nla_get_u8(info
->attrs
[FOU_ATTR_TYPE
]);
591 static int fou_nl_cmd_add_port(struct sk_buff
*skb
, struct genl_info
*info
)
596 err
= parse_nl_config(info
, &cfg
);
600 return fou_create(&init_net
, &cfg
, NULL
);
603 static int fou_nl_cmd_rm_port(struct sk_buff
*skb
, struct genl_info
*info
)
607 parse_nl_config(info
, &cfg
);
609 return fou_destroy(&init_net
, &cfg
);
612 static const struct genl_ops fou_nl_ops
[] = {
615 .doit
= fou_nl_cmd_add_port
,
616 .policy
= fou_nl_policy
,
617 .flags
= GENL_ADMIN_PERM
,
621 .doit
= fou_nl_cmd_rm_port
,
622 .policy
= fou_nl_policy
,
623 .flags
= GENL_ADMIN_PERM
,
627 size_t fou_encap_hlen(struct ip_tunnel_encap
*e
)
629 return sizeof(struct udphdr
);
631 EXPORT_SYMBOL(fou_encap_hlen
);
633 size_t gue_encap_hlen(struct ip_tunnel_encap
*e
)
636 bool need_priv
= false;
638 len
= sizeof(struct udphdr
) + sizeof(struct guehdr
);
640 if (e
->flags
& TUNNEL_ENCAP_FLAG_REMCSUM
) {
641 len
+= GUE_PLEN_REMCSUM
;
645 len
+= need_priv
? GUE_LEN_PRIV
: 0;
649 EXPORT_SYMBOL(gue_encap_hlen
);
651 static void fou_build_udp(struct sk_buff
*skb
, struct ip_tunnel_encap
*e
,
652 struct flowi4
*fl4
, u8
*protocol
, __be16 sport
)
656 skb_push(skb
, sizeof(struct udphdr
));
657 skb_reset_transport_header(skb
);
663 uh
->len
= htons(skb
->len
);
665 udp_set_csum(!(e
->flags
& TUNNEL_ENCAP_FLAG_CSUM
), skb
,
666 fl4
->saddr
, fl4
->daddr
, skb
->len
);
668 *protocol
= IPPROTO_UDP
;
671 int fou_build_header(struct sk_buff
*skb
, struct ip_tunnel_encap
*e
,
672 u8
*protocol
, struct flowi4
*fl4
)
674 bool csum
= !!(e
->flags
& TUNNEL_ENCAP_FLAG_CSUM
);
675 int type
= csum
? SKB_GSO_UDP_TUNNEL_CSUM
: SKB_GSO_UDP_TUNNEL
;
678 skb
= iptunnel_handle_offloads(skb
, csum
, type
);
683 sport
= e
->sport
? : udp_flow_src_port(dev_net(skb
->dev
),
685 fou_build_udp(skb
, e
, fl4
, protocol
, sport
);
689 EXPORT_SYMBOL(fou_build_header
);
691 int gue_build_header(struct sk_buff
*skb
, struct ip_tunnel_encap
*e
,
692 u8
*protocol
, struct flowi4
*fl4
)
694 bool csum
= !!(e
->flags
& TUNNEL_ENCAP_FLAG_CSUM
);
695 int type
= csum
? SKB_GSO_UDP_TUNNEL_CSUM
: SKB_GSO_UDP_TUNNEL
;
696 struct guehdr
*guehdr
;
697 size_t hdrlen
, optlen
= 0;
700 bool need_priv
= false;
702 if ((e
->flags
& TUNNEL_ENCAP_FLAG_REMCSUM
) &&
703 skb
->ip_summed
== CHECKSUM_PARTIAL
) {
705 optlen
+= GUE_PLEN_REMCSUM
;
706 type
|= SKB_GSO_TUNNEL_REMCSUM
;
710 optlen
+= need_priv
? GUE_LEN_PRIV
: 0;
712 skb
= iptunnel_handle_offloads(skb
, csum
, type
);
717 /* Get source port (based on flow hash) before skb_push */
718 sport
= e
->sport
? : udp_flow_src_port(dev_net(skb
->dev
),
721 hdrlen
= sizeof(struct guehdr
) + optlen
;
723 skb_push(skb
, hdrlen
);
725 guehdr
= (struct guehdr
*)skb
->data
;
729 guehdr
->hlen
= optlen
>> 2;
731 guehdr
->proto_ctype
= *protocol
;
736 __be32
*flags
= data
;
738 guehdr
->flags
|= GUE_FLAG_PRIV
;
740 data
+= GUE_LEN_PRIV
;
742 if (type
& SKB_GSO_TUNNEL_REMCSUM
) {
743 u16 csum_start
= skb_checksum_start_offset(skb
);
746 if (csum_start
< hdrlen
)
749 csum_start
-= hdrlen
;
750 pd
[0] = htons(csum_start
);
751 pd
[1] = htons(csum_start
+ skb
->csum_offset
);
753 if (!skb_is_gso(skb
)) {
754 skb
->ip_summed
= CHECKSUM_NONE
;
755 skb
->encapsulation
= 0;
758 *flags
|= GUE_PFLAG_REMCSUM
;
759 data
+= GUE_PLEN_REMCSUM
;
764 fou_build_udp(skb
, e
, fl4
, protocol
, sport
);
768 EXPORT_SYMBOL(gue_build_header
);
770 #ifdef CONFIG_NET_FOU_IP_TUNNELS
772 static const struct ip_tunnel_encap_ops __read_mostly fou_iptun_ops
= {
773 .encap_hlen
= fou_encap_hlen
,
774 .build_header
= fou_build_header
,
777 static const struct ip_tunnel_encap_ops __read_mostly gue_iptun_ops
= {
778 .encap_hlen
= gue_encap_hlen
,
779 .build_header
= gue_build_header
,
782 static int ip_tunnel_encap_add_fou_ops(void)
786 ret
= ip_tunnel_encap_add_ops(&fou_iptun_ops
, TUNNEL_ENCAP_FOU
);
788 pr_err("can't add fou ops\n");
792 ret
= ip_tunnel_encap_add_ops(&gue_iptun_ops
, TUNNEL_ENCAP_GUE
);
794 pr_err("can't add gue ops\n");
795 ip_tunnel_encap_del_ops(&fou_iptun_ops
, TUNNEL_ENCAP_FOU
);
802 static void ip_tunnel_encap_del_fou_ops(void)
804 ip_tunnel_encap_del_ops(&fou_iptun_ops
, TUNNEL_ENCAP_FOU
);
805 ip_tunnel_encap_del_ops(&gue_iptun_ops
, TUNNEL_ENCAP_GUE
);
810 static int ip_tunnel_encap_add_fou_ops(void)
815 static void ip_tunnel_encap_del_fou_ops(void)
821 static int __init
fou_init(void)
825 ret
= genl_register_family_with_ops(&fou_nl_family
,
831 ret
= ip_tunnel_encap_add_fou_ops();
833 genl_unregister_family(&fou_nl_family
);
839 static void __exit
fou_fini(void)
841 struct fou
*fou
, *next
;
843 ip_tunnel_encap_del_fou_ops();
845 genl_unregister_family(&fou_nl_family
);
847 /* Close all the FOU sockets */
849 spin_lock(&fou_lock
);
850 list_for_each_entry_safe(fou
, next
, &fou_list
, list
)
852 spin_unlock(&fou_lock
);
855 module_init(fou_init
);
856 module_exit(fou_fini
);
857 MODULE_AUTHOR("Tom Herbert <therbert@google.com>");
858 MODULE_LICENSE("GPL");