]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/vport-vxlan.c
datapath: Add support for VXLAN tunnels to Open vSwitch
[mirror_ovs.git] / datapath / vport-vxlan.c
CommitLineData
79f827fa
KM
1/*
2 * Copyright (c) 2011 Nicira, Inc.
3 * Copyright (c) 2012 Cisco Systems, Inc.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21
22#include <linux/version.h>
23#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
24
25#include <linux/in.h>
26#include <linux/ip.h>
27#include <linux/list.h>
28#include <linux/net.h>
29#include <linux/udp.h>
30
31#include <net/icmp.h>
32#include <net/ip.h>
33#include <net/udp.h>
34
35#include "datapath.h"
36#include "tunnel.h"
37#include "vport.h"
38#include "vport-generic.h"
39
40#define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */
41
42/**
43 * struct vxlanhdr - VXLAN header
44 * @vx_flags: Must have the exact value %VXLAN_FLAGS.
45 * @vx_vni: VXLAN Network Identifier (VNI) in top 24 bits, low 8 bits zeroed.
46 */
47struct vxlanhdr {
48 __be32 vx_flags;
49 __be32 vx_vni;
50};
51
52#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
53
54static inline int vxlan_hdr_len(const struct tnl_mutable_config *mutable,
55 const struct ovs_key_ipv4_tunnel *tun_key)
56{
57 return VXLAN_HLEN;
58}
59
60/**
61 * struct vxlan_port - Keeps track of open UDP ports
62 * @list: list element.
63 * @port: The UDP port number in network byte order.
64 * @socket: The socket created for this port number.
65 * @count: How many ports are using this socket/port.
66 */
67struct vxlan_port {
68 struct list_head list;
69 __be16 port;
70 struct socket *vxlan_rcv_socket;
71 int count;
72};
73
74static LIST_HEAD(vxlan_ports);
75
76static struct vxlan_port *vxlan_port_exists(struct net *net, __be16 port)
77{
78 struct vxlan_port *vxlan_port;
79
80 list_for_each_entry(vxlan_port, &vxlan_ports, list) {
81 if (vxlan_port->port == port &&
82 net_eq(sock_net(vxlan_port->vxlan_rcv_socket->sk), net))
83 return vxlan_port;
84 }
85
86 return NULL;
87}
88
89static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
90{
91 return (struct vxlanhdr *)(udp_hdr(skb) + 1);
92}
93
94/* Compute source port for outgoing packet.
95 * Currently we use the flow hash.
96 */
97static u16 get_src_port(struct sk_buff *skb)
98{
99 int low;
100 int high;
101 unsigned int range;
102 u32 hash = OVS_CB(skb)->flow->hash;
103
104 inet_get_local_port_range(&low, &high);
105 range = (high - low) + 1;
106 return (((u64) hash * range) >> 32) + low;
107}
108
109static struct sk_buff *vxlan_build_header(const struct vport *vport,
110 const struct tnl_mutable_config *mutable,
111 struct dst_entry *dst,
112 struct sk_buff *skb,
113 int tunnel_hlen)
114{
115 struct udphdr *udph = udp_hdr(skb);
116 struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1);
117 const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
118 __be64 out_key;
119 u32 flags;
120
121 tnl_get_param(mutable, tun_key, &flags, &out_key);
122
123 udph->dest = mutable->dst_port;
124 udph->source = htons(get_src_port(skb));
125 udph->check = 0;
126 udph->len = htons(skb->len - skb_transport_offset(skb));
127
128 vxh->vx_flags = htonl(VXLAN_FLAGS);
129 vxh->vx_vni = htonl(be64_to_cpu(out_key) << 8);
130
131 /*
132 * Allow our local IP stack to fragment the outer packet even if the
133 * DF bit is set as a last resort. We also need to force selection of
134 * an IP ID here because Linux will otherwise leave it at 0 if the
135 * packet originally had DF set.
136 */
137 skb->local_df = 1;
138 __ip_select_ident(ip_hdr(skb), dst, 0);
139
140 return skb;
141}
142
143/* Called with rcu_read_lock and BH disabled. */
144static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
145{
146 struct vport *vport;
147 struct vxlanhdr *vxh;
148 const struct tnl_mutable_config *mutable;
149 struct iphdr *iph;
150 struct ovs_key_ipv4_tunnel tun_key;
151 __be64 key;
152 u32 tunnel_flags = 0;
153
154 if (unlikely(!pskb_may_pull(skb, VXLAN_HLEN + ETH_HLEN)))
155 goto error;
156
157 vxh = vxlan_hdr(skb);
158 if (unlikely(vxh->vx_flags != htonl(VXLAN_FLAGS) ||
159 vxh->vx_vni & htonl(0xff)))
160 goto error;
161
162 __skb_pull(skb, VXLAN_HLEN);
163 skb_postpull_rcsum(skb, skb_transport_header(skb), VXLAN_HLEN + ETH_HLEN);
164
165 key = cpu_to_be64(ntohl(vxh->vx_vni) >> 8);
166
167 iph = ip_hdr(skb);
168 vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr,
169 key, TNL_T_PROTO_VXLAN, &mutable);
170 if (unlikely(!vport)) {
171 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
172 goto error;
173 }
174
175 if (mutable->flags & TNL_F_IN_KEY_MATCH || !mutable->key.daddr)
176 tunnel_flags = OVS_TNL_F_KEY;
177 else
178 key = 0;
179
180 /* Save outer tunnel values */
181 tnl_tun_key_init(&tun_key, iph, key, tunnel_flags);
182 OVS_CB(skb)->tun_key = &tun_key;
183
184 ovs_tnl_rcv(vport, skb);
185 goto out;
186
187error:
188 kfree_skb(skb);
189out:
190 return 0;
191}
192
193/* Random value. Irrelevant as long as it's not 0 since we set the handler. */
194#define UDP_ENCAP_VXLAN 1
195static int vxlan_socket_init(struct vxlan_port *vxlan_port, struct net *net)
196{
197 int err;
198 struct sockaddr_in sin;
199
200 err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
201 &vxlan_port->vxlan_rcv_socket);
202 if (err)
203 goto error;
204
205 /* release net ref. */
206 sk_change_net(vxlan_port->vxlan_rcv_socket->sk, net);
207
208 sin.sin_family = AF_INET;
209 sin.sin_addr.s_addr = htonl(INADDR_ANY);
210 sin.sin_port = vxlan_port->port;
211
212 err = kernel_bind(vxlan_port->vxlan_rcv_socket, (struct sockaddr *)&sin,
213 sizeof(struct sockaddr_in));
214 if (err)
215 goto error_sock;
216
217 udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_type = UDP_ENCAP_VXLAN;
218 udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_rcv = vxlan_rcv;
219
220 udp_encap_enable();
221
222 return 0;
223
224error_sock:
225 sock_release(vxlan_port->vxlan_rcv_socket);
226error:
227 pr_warn("cannot register vxlan protocol handler\n");
228 return err;
229}
230
231static void vxlan_tunnel_release(struct vxlan_port *vxlan_port)
232{
233 vxlan_port->count--;
234
235 if (vxlan_port->count == 0) {
236 /* Release old socket */
237 sock_release(vxlan_port->vxlan_rcv_socket);
238 list_del(&vxlan_port->list);
239 kfree(vxlan_port);
240 }
241}
242static int vxlan_tunnel_setup(struct net *net, struct nlattr *options,
243 struct vxlan_port **vxport)
244{
245 struct nlattr *a;
246 int err;
247 u16 dst_port;
248 struct vxlan_port *vxlan_port = NULL;
249
250 *vxport = NULL;
251
252 if (!options) {
253 err = -EINVAL;
254 goto out;
255 }
256
257 a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
258 if (a && nla_len(a) == sizeof(u16)) {
259 dst_port = nla_get_u16(a);
260 } else {
261 /* Require destination port from userspace. */
262 err = -EINVAL;
263 goto out;
264 }
265
266 /* Verify if we already have a socket created for this port */
267 vxlan_port = vxlan_port_exists(net, htons(dst_port));
268 if (vxlan_port) {
269 vxlan_port->count++;
270 err = 0;
271 goto out;
272 }
273
274 /* Add a new socket for this port */
275 vxlan_port = kzalloc(sizeof(struct vxlan_port), GFP_KERNEL);
276 if (!vxlan_port) {
277 err = -ENOMEM;
278 goto out;
279 }
280
281 vxlan_port->port = htons(dst_port);
282 vxlan_port->count = 1;
283 list_add_tail(&vxlan_port->list, &vxlan_ports);
284
285 err = vxlan_socket_init(vxlan_port, net);
286 if (err)
287 goto error;
288
289 *vxport = vxlan_port;
290 goto out;
291
292error:
293 list_del(&vxlan_port->list);
294 kfree(vxlan_port);
295out:
296 return err;
297}
298
299static int vxlan_set_options(struct vport *vport, struct nlattr *options)
300{
301 int err;
302 struct net *net = ovs_dp_get_net(vport->dp);
303 struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
304 struct tnl_mutable_config *config;
305 struct vxlan_port *old_port = NULL;
306 struct vxlan_port *vxlan_port = NULL;
307
308 config = rtnl_dereference(tnl_vport->mutable);
309
310 old_port = vxlan_port_exists(net, config->dst_port);
311
312 err = vxlan_tunnel_setup(net, options, &vxlan_port);
313 if (err)
314 goto out;
315
316 err = ovs_tnl_set_options(vport, options);
317
318 if (err)
319 vxlan_tunnel_release(vxlan_port);
320 else {
321 /* Release old socket */
322 vxlan_tunnel_release(old_port);
323 }
324out:
325 return err;
326}
327
328static const struct tnl_ops ovs_vxlan_tnl_ops = {
329 .tunnel_type = TNL_T_PROTO_VXLAN,
330 .ipproto = IPPROTO_UDP,
331 .hdr_len = vxlan_hdr_len,
332 .build_header = vxlan_build_header,
333};
334
335static void vxlan_tnl_destroy(struct vport *vport)
336{
337 struct vxlan_port *vxlan_port;
338 struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
339 struct tnl_mutable_config *config;
340
341 config = rtnl_dereference(tnl_vport->mutable);
342
343 vxlan_port = vxlan_port_exists(ovs_dp_get_net(vport->dp),
344 config->dst_port);
345
346 vxlan_tunnel_release(vxlan_port);
347
348 ovs_tnl_destroy(vport);
349}
350
351static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
352{
353 int err;
354 struct vport *vport;
355 struct vxlan_port *vxlan_port = NULL;
356
357 err = vxlan_tunnel_setup(ovs_dp_get_net(parms->dp), parms->options,
358 &vxlan_port);
359 if (err)
360 return ERR_PTR(err);
361
362 vport = ovs_tnl_create(parms, &ovs_vxlan_vport_ops, &ovs_vxlan_tnl_ops);
363
364 if (IS_ERR(vport))
365 vxlan_tunnel_release(vxlan_port);
366
367 return vport;
368}
369
370const struct vport_ops ovs_vxlan_vport_ops = {
371 .type = OVS_VPORT_TYPE_VXLAN,
372 .flags = VPORT_F_TUN_ID,
373 .create = vxlan_tnl_create,
374 .destroy = vxlan_tnl_destroy,
375 .set_addr = ovs_tnl_set_addr,
376 .get_name = ovs_tnl_get_name,
377 .get_addr = ovs_tnl_get_addr,
378 .get_options = ovs_tnl_get_options,
379 .set_options = vxlan_set_options,
380 .get_dev_flags = ovs_vport_gen_get_dev_flags,
381 .is_running = ovs_vport_gen_is_running,
382 .get_operstate = ovs_vport_gen_get_operstate,
383 .send = ovs_tnl_send,
384};
385#else
386#warning VXLAN tunneling will not be available on kernels before 2.6.26
387#endif /* Linux kernel < 2.6.26 */