2 * Copyright (c) 2014 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/version.h>
25 #include <linux/net.h>
26 #include <linux/rculist.h>
27 #include <linux/udp.h>
29 #include <net/geneve.h>
32 #include <net/route.h>
42 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
43 * |Ver| Opt Len |O|C| Rsvd. | Protocol Type |
44 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
45 * | Virtual Network Identifier (VNI) | Reserved |
46 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
47 * | Variable Length Options |
48 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
51 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
52 * | Option Class | Type |R|R|R| Length |
53 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
54 * | Variable Option Data |
55 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
59 #ifdef __LITTLE_ENDIAN_BITFIELD
75 struct geneve_opt options
[];
80 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
83 * struct geneve_port - Keeps track of open UDP ports
84 * @sock: The socket created for this port number.
92 static LIST_HEAD(geneve_ports
);
94 static inline struct geneve_port
*geneve_vport(const struct vport
*vport
)
96 return vport_priv(vport
);
99 static inline struct genevehdr
*geneve_hdr(const struct sk_buff
*skb
)
101 return (struct genevehdr
*)(udp_hdr(skb
) + 1);
104 /* Convert 64 bit tunnel ID to 24 bit VNI. */
105 static void tunnel_id_to_vni(__be64 tun_id
, __u8
*vni
)
108 vni
[0] = (__force __u8
)(tun_id
>> 16);
109 vni
[1] = (__force __u8
)(tun_id
>> 8);
110 vni
[2] = (__force __u8
)tun_id
;
112 vni
[0] = (__force __u8
)((__force u64
)tun_id
>> 40);
113 vni
[1] = (__force __u8
)((__force u64
)tun_id
>> 48);
114 vni
[2] = (__force __u8
)((__force u64
)tun_id
>> 56);
118 /* Convert 24 bit VNI to 64 bit tunnel ID. */
119 static __be64
vni_to_tunnel_id(const __u8
*vni
)
122 return (vni
[0] << 16) | (vni
[1] << 8) | vni
[2];
124 return (__force __be64
)(((__force u64
)vni
[0] << 40) |
125 ((__force u64
)vni
[1] << 48) |
126 ((__force u64
)vni
[2] << 56));
130 static void geneve_build_header(const struct vport
*vport
,
133 struct geneve_port
*geneve_port
= geneve_vport(vport
);
134 struct net
*net
= ovs_dp_get_net(vport
->dp
);
135 struct udphdr
*udph
= udp_hdr(skb
);
136 struct genevehdr
*geneveh
= (struct genevehdr
*)(udph
+ 1);
137 const struct ovs_tunnel_info
*tun_info
= OVS_CB(skb
)->egress_tun_info
;
139 udph
->dest
= inet_sport(geneve_port
->sock
->sk
);
140 udph
->source
= udp_flow_src_port(net
, skb
, 0, 0, true);
142 udph
->len
= htons(skb
->len
- skb_transport_offset(skb
));
144 geneveh
->ver
= GENEVE_VER
;
145 geneveh
->opt_len
= tun_info
->options_len
/ 4;
146 geneveh
->oam
= !!(tun_info
->tunnel
.tun_flags
& TUNNEL_OAM
);
147 geneveh
->critical
= !!(tun_info
->tunnel
.tun_flags
& TUNNEL_CRIT_OPT
);
149 geneveh
->proto_type
= htons(ETH_P_TEB
);
150 tunnel_id_to_vni(tun_info
->tunnel
.tun_id
, geneveh
->vni
);
153 memcpy(geneveh
->options
, tun_info
->options
, tun_info
->options_len
);
156 static int geneve_rcv(struct sock
*sk
, struct sk_buff
*skb
)
158 struct geneve_port
*geneve_port
;
159 struct genevehdr
*geneveh
;
161 struct ovs_tunnel_info tun_info
;
165 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
166 if (unlikely(udp_lib_checksum_complete(skb
)))
170 if (unlikely(!pskb_may_pull(skb
, GENEVE_BASE_HLEN
)))
173 geneveh
= geneve_hdr(skb
);
175 if (unlikely(geneveh
->ver
!= GENEVE_VER
))
178 if (unlikely(geneveh
->proto_type
!= htons(ETH_P_TEB
)))
181 geneve_port
= rcu_dereference_sk_user_data(sk
);
182 if (unlikely(!geneve_port
))
185 opts_len
= geneveh
->opt_len
* 4;
186 if (iptunnel_pull_header(skb
, GENEVE_BASE_HLEN
+ opts_len
,
190 geneveh
= geneve_hdr(skb
);
192 flags
= TUNNEL_KEY
| TUNNEL_OPTIONS_PRESENT
|
193 (udp_hdr(skb
)->check
!= 0 ? TUNNEL_CSUM
: 0) |
194 (geneveh
->oam
? TUNNEL_OAM
: 0) |
195 (geneveh
->critical
? TUNNEL_CRIT_OPT
: 0);
197 key
= vni_to_tunnel_id(geneveh
->vni
);
198 ovs_flow_tun_info_init(&tun_info
, ip_hdr(skb
),
199 udp_hdr(skb
)->source
, udp_hdr(skb
)->dest
,
201 geneveh
->options
, opts_len
);
203 ovs_vport_receive(vport_from_priv(geneve_port
), skb
, &tun_info
);
212 /* Arbitrary value. Irrelevant as long as it's not 0 since we set the handler. */
213 #define UDP_ENCAP_GENEVE 1
214 static int geneve_socket_init(struct geneve_port
*geneve_port
, struct net
*net
,
217 struct sockaddr_in sin
;
220 err
= sock_create_kern(AF_INET
, SOCK_DGRAM
, 0,
225 /* release net ref. */
226 sk_change_net(geneve_port
->sock
->sk
, net
);
228 sin
.sin_family
= AF_INET
;
229 sin
.sin_addr
.s_addr
= htonl(INADDR_ANY
);
230 sin
.sin_port
= dst_port
;
232 err
= kernel_bind(geneve_port
->sock
,
233 (struct sockaddr
*)&sin
, sizeof(struct sockaddr_in
));
237 rcu_assign_sk_user_data(geneve_port
->sock
->sk
, geneve_port
);
238 udp_sk(geneve_port
->sock
->sk
)->encap_type
= UDP_ENCAP_GENEVE
;
239 udp_sk(geneve_port
->sock
->sk
)->encap_rcv
= geneve_rcv
;
246 sk_release_kernel(geneve_port
->sock
->sk
);
248 pr_warn("cannot register geneve protocol handler: %d\n", err
);
252 static int geneve_get_options(const struct vport
*vport
,
255 struct geneve_port
*geneve_port
= geneve_vport(vport
);
257 if (nla_put_u16(skb
, OVS_TUNNEL_ATTR_DST_PORT
,
258 ntohs(inet_sport(geneve_port
->sock
->sk
))))
263 static void geneve_tnl_destroy(struct vport
*vport
)
265 struct geneve_port
*geneve_port
= geneve_vport(vport
);
268 rcu_assign_sk_user_data(geneve_port
->sock
->sk
, NULL
);
269 sk_release_kernel(geneve_port
->sock
->sk
);
271 ovs_vport_deferred_free(vport
);
274 static struct vport
*geneve_tnl_create(const struct vport_parms
*parms
)
276 struct net
*net
= ovs_dp_get_net(parms
->dp
);
277 struct nlattr
*options
= parms
->options
;
278 struct geneve_port
*geneve_port
;
289 a
= nla_find_nested(options
, OVS_TUNNEL_ATTR_DST_PORT
);
290 if (a
&& nla_len(a
) == sizeof(u16
)) {
291 dst_port
= nla_get_u16(a
);
293 /* Require destination port from userspace. */
298 vport
= ovs_vport_alloc(sizeof(struct geneve_port
),
299 &ovs_geneve_vport_ops
, parms
);
303 geneve_port
= geneve_vport(vport
);
304 strncpy(geneve_port
->name
, parms
->name
, IFNAMSIZ
);
306 err
= geneve_socket_init(geneve_port
, net
, htons(dst_port
));
313 ovs_vport_free(vport
);
318 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0)
320 static void geneve_fix_segment(struct sk_buff
*skb
)
322 struct udphdr
*udph
= udp_hdr(skb
);
324 udph
->len
= htons(skb
->len
- skb_transport_offset(skb
));
327 static int handle_offloads(struct sk_buff
*skb
)
329 if (skb_is_gso(skb
)) {
330 if (skb_is_encapsulated(skb
))
332 OVS_GSO_CB(skb
)->fix_segment
= geneve_fix_segment
;
333 } else if (skb
->ip_summed
!= CHECKSUM_PARTIAL
) {
334 skb
->ip_summed
= CHECKSUM_NONE
;
339 static int handle_offloads(struct sk_buff
*skb
)
341 if (skb_is_gso(skb
)) {
344 if (skb_is_encapsulated(skb
))
347 err
= skb_unclone(skb
, GFP_ATOMIC
);
351 skb_shinfo(skb
)->gso_type
|= SKB_GSO_UDP_TUNNEL
;
352 } else if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
353 skb
->ip_summed
= CHECKSUM_NONE
;
355 skb
->encapsulation
= 1;
360 static int geneve_send(struct vport
*vport
, struct sk_buff
*skb
)
362 struct ovs_key_ipv4_tunnel
*tun_key
;
363 int network_offset
= skb_network_offset(skb
);
371 if (unlikely(!OVS_CB(skb
)->egress_tun_info
))
374 tun_key
= &OVS_CB(skb
)->egress_tun_info
->tunnel
;
377 saddr
= tun_key
->ipv4_src
;
378 rt
= find_route(ovs_dp_get_net(vport
->dp
),
379 &saddr
, tun_key
->ipv4_dst
,
380 IPPROTO_UDP
, tun_key
->ipv4_tos
,
387 min_headroom
= LL_RESERVED_SPACE(rt_dst(rt
).dev
) + rt_dst(rt
).header_len
389 + OVS_CB(skb
)->egress_tun_info
->options_len
390 + sizeof(struct iphdr
)
391 + (vlan_tx_tag_present(skb
) ? VLAN_HLEN
: 0);
393 if (skb_headroom(skb
) < min_headroom
|| skb_header_cloned(skb
)) {
394 int head_delta
= SKB_DATA_ALIGN(min_headroom
-
398 err
= pskb_expand_head(skb
, max_t(int, head_delta
, 0),
404 if (vlan_tx_tag_present(skb
)) {
405 if (unlikely(!__vlan_put_tag(skb
,
407 vlan_tx_tag_get(skb
)))) {
411 vlan_set_tci(skb
, 0);
414 skb_reset_inner_headers(skb
);
416 __skb_push(skb
, GENEVE_BASE_HLEN
+
417 OVS_CB(skb
)->egress_tun_info
->options_len
);
418 skb_reset_transport_header(skb
);
420 geneve_build_header(vport
, skb
);
423 err
= handle_offloads(skb
);
427 df
= tun_key
->tun_flags
& TUNNEL_DONT_FRAGMENT
? htons(IP_DF
) : 0;
429 sent_len
= iptunnel_xmit(skb
->sk
, rt
, skb
,
430 saddr
, tun_key
->ipv4_dst
,
431 IPPROTO_UDP
, tun_key
->ipv4_tos
,
435 return sent_len
> 0 ? sent_len
+ network_offset
: sent_len
;
443 static const char *geneve_get_name(const struct vport
*vport
)
445 struct geneve_port
*geneve_port
= geneve_vport(vport
);
446 return geneve_port
->name
;
449 static int geneve_get_egress_tun_info(struct vport
*vport
, struct sk_buff
*skb
,
450 struct ovs_tunnel_info
*egress_tun_info
)
452 struct geneve_port
*geneve_port
= geneve_vport(vport
);
453 struct net
*net
= ovs_dp_get_net(vport
->dp
);
456 * Get tp_src and tp_dst, refert to geneve_build_header().
458 return ovs_tunnel_get_egress_info(egress_tun_info
,
459 ovs_dp_get_net(vport
->dp
),
460 OVS_CB(skb
)->egress_tun_info
,
461 IPPROTO_UDP
, skb
->mark
,
462 udp_flow_src_port(net
, skb
, 0, 0, true),
463 inet_sport(geneve_port
->sock
->sk
));
467 const struct vport_ops ovs_geneve_vport_ops
= {
468 .type
= OVS_VPORT_TYPE_GENEVE
,
469 .create
= geneve_tnl_create
,
470 .destroy
= geneve_tnl_destroy
,
471 .get_name
= geneve_get_name
,
472 .get_options
= geneve_get_options
,
474 .get_egress_tun_info
= geneve_get_egress_tun_info
,