]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/vport-vxlan.c
tunneling: Don't send ICMP messages if no tunnel port is found.
[mirror_ovs.git] / datapath / vport-vxlan.c
CommitLineData
79f827fa
KM
1/*
2 * Copyright (c) 2011 Nicira, Inc.
3 * Copyright (c) 2012 Cisco Systems, Inc.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21
22#include <linux/version.h>
23#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
24
25#include <linux/in.h>
26#include <linux/ip.h>
27#include <linux/list.h>
28#include <linux/net.h>
29#include <linux/udp.h>
30
31#include <net/icmp.h>
32#include <net/ip.h>
33#include <net/udp.h>
34
35#include "datapath.h"
36#include "tunnel.h"
37#include "vport.h"
79f827fa
KM
38
39#define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */
40
41/**
42 * struct vxlanhdr - VXLAN header
43 * @vx_flags: Must have the exact value %VXLAN_FLAGS.
44 * @vx_vni: VXLAN Network Identifier (VNI) in top 24 bits, low 8 bits zeroed.
45 */
46struct vxlanhdr {
47 __be32 vx_flags;
48 __be32 vx_vni;
49};
50
51#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
52
53static inline int vxlan_hdr_len(const struct tnl_mutable_config *mutable,
54 const struct ovs_key_ipv4_tunnel *tun_key)
55{
56 return VXLAN_HLEN;
57}
58
59/**
60 * struct vxlan_port - Keeps track of open UDP ports
61 * @list: list element.
62 * @port: The UDP port number in network byte order.
63 * @socket: The socket created for this port number.
64 * @count: How many ports are using this socket/port.
65 */
66struct vxlan_port {
67 struct list_head list;
68 __be16 port;
69 struct socket *vxlan_rcv_socket;
70 int count;
71};
72
73static LIST_HEAD(vxlan_ports);
74
75static struct vxlan_port *vxlan_port_exists(struct net *net, __be16 port)
76{
77 struct vxlan_port *vxlan_port;
78
79 list_for_each_entry(vxlan_port, &vxlan_ports, list) {
80 if (vxlan_port->port == port &&
81 net_eq(sock_net(vxlan_port->vxlan_rcv_socket->sk), net))
82 return vxlan_port;
83 }
84
85 return NULL;
86}
87
88static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb)
89{
90 return (struct vxlanhdr *)(udp_hdr(skb) + 1);
91}
92
93/* Compute source port for outgoing packet.
94 * Currently we use the flow hash.
95 */
96static u16 get_src_port(struct sk_buff *skb)
97{
98 int low;
99 int high;
100 unsigned int range;
101 u32 hash = OVS_CB(skb)->flow->hash;
102
103 inet_get_local_port_range(&low, &high);
104 range = (high - low) + 1;
105 return (((u64) hash * range) >> 32) + low;
106}
107
108static struct sk_buff *vxlan_build_header(const struct vport *vport,
109 const struct tnl_mutable_config *mutable,
110 struct dst_entry *dst,
111 struct sk_buff *skb,
112 int tunnel_hlen)
113{
114 struct udphdr *udph = udp_hdr(skb);
115 struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1);
116 const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key;
117 __be64 out_key;
118 u32 flags;
119
120 tnl_get_param(mutable, tun_key, &flags, &out_key);
121
122 udph->dest = mutable->dst_port;
123 udph->source = htons(get_src_port(skb));
124 udph->check = 0;
125 udph->len = htons(skb->len - skb_transport_offset(skb));
126
127 vxh->vx_flags = htonl(VXLAN_FLAGS);
128 vxh->vx_vni = htonl(be64_to_cpu(out_key) << 8);
129
130 /*
131 * Allow our local IP stack to fragment the outer packet even if the
132 * DF bit is set as a last resort. We also need to force selection of
133 * an IP ID here because Linux will otherwise leave it at 0 if the
134 * packet originally had DF set.
135 */
136 skb->local_df = 1;
137 __ip_select_ident(ip_hdr(skb), dst, 0);
138
139 return skb;
140}
141
142/* Called with rcu_read_lock and BH disabled. */
143static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
144{
145 struct vport *vport;
146 struct vxlanhdr *vxh;
147 const struct tnl_mutable_config *mutable;
148 struct iphdr *iph;
149 struct ovs_key_ipv4_tunnel tun_key;
150 __be64 key;
151 u32 tunnel_flags = 0;
152
153 if (unlikely(!pskb_may_pull(skb, VXLAN_HLEN + ETH_HLEN)))
154 goto error;
155
156 vxh = vxlan_hdr(skb);
157 if (unlikely(vxh->vx_flags != htonl(VXLAN_FLAGS) ||
158 vxh->vx_vni & htonl(0xff)))
159 goto error;
160
161 __skb_pull(skb, VXLAN_HLEN);
162 skb_postpull_rcsum(skb, skb_transport_header(skb), VXLAN_HLEN + ETH_HLEN);
163
164 key = cpu_to_be64(ntohl(vxh->vx_vni) >> 8);
165
166 iph = ip_hdr(skb);
167 vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr,
168 key, TNL_T_PROTO_VXLAN, &mutable);
498e1285 169 if (unlikely(!vport))
79f827fa 170 goto error;
79f827fa
KM
171
172 if (mutable->flags & TNL_F_IN_KEY_MATCH || !mutable->key.daddr)
173 tunnel_flags = OVS_TNL_F_KEY;
174 else
175 key = 0;
176
177 /* Save outer tunnel values */
178 tnl_tun_key_init(&tun_key, iph, key, tunnel_flags);
179 OVS_CB(skb)->tun_key = &tun_key;
180
181 ovs_tnl_rcv(vport, skb);
182 goto out;
183
184error:
185 kfree_skb(skb);
186out:
187 return 0;
188}
189
190/* Random value. Irrelevant as long as it's not 0 since we set the handler. */
191#define UDP_ENCAP_VXLAN 1
192static int vxlan_socket_init(struct vxlan_port *vxlan_port, struct net *net)
193{
194 int err;
195 struct sockaddr_in sin;
196
197 err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
198 &vxlan_port->vxlan_rcv_socket);
199 if (err)
200 goto error;
201
202 /* release net ref. */
203 sk_change_net(vxlan_port->vxlan_rcv_socket->sk, net);
204
205 sin.sin_family = AF_INET;
206 sin.sin_addr.s_addr = htonl(INADDR_ANY);
207 sin.sin_port = vxlan_port->port;
208
209 err = kernel_bind(vxlan_port->vxlan_rcv_socket, (struct sockaddr *)&sin,
210 sizeof(struct sockaddr_in));
211 if (err)
212 goto error_sock;
213
214 udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_type = UDP_ENCAP_VXLAN;
215 udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_rcv = vxlan_rcv;
216
217 udp_encap_enable();
218
219 return 0;
220
221error_sock:
df80d44d 222 sk_release_kernel(vxlan_port->vxlan_rcv_socket->sk);
79f827fa
KM
223error:
224 pr_warn("cannot register vxlan protocol handler\n");
225 return err;
226}
227
228static void vxlan_tunnel_release(struct vxlan_port *vxlan_port)
229{
230 vxlan_port->count--;
231
232 if (vxlan_port->count == 0) {
233 /* Release old socket */
df80d44d 234 sk_release_kernel(vxlan_port->vxlan_rcv_socket->sk);
79f827fa
KM
235 list_del(&vxlan_port->list);
236 kfree(vxlan_port);
237 }
238}
239static int vxlan_tunnel_setup(struct net *net, struct nlattr *options,
240 struct vxlan_port **vxport)
241{
242 struct nlattr *a;
243 int err;
244 u16 dst_port;
245 struct vxlan_port *vxlan_port = NULL;
246
247 *vxport = NULL;
248
249 if (!options) {
250 err = -EINVAL;
251 goto out;
252 }
253
254 a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
255 if (a && nla_len(a) == sizeof(u16)) {
256 dst_port = nla_get_u16(a);
257 } else {
258 /* Require destination port from userspace. */
259 err = -EINVAL;
260 goto out;
261 }
262
263 /* Verify if we already have a socket created for this port */
264 vxlan_port = vxlan_port_exists(net, htons(dst_port));
265 if (vxlan_port) {
266 vxlan_port->count++;
267 err = 0;
3b6f2889 268 *vxport = vxlan_port;
79f827fa
KM
269 goto out;
270 }
271
272 /* Add a new socket for this port */
273 vxlan_port = kzalloc(sizeof(struct vxlan_port), GFP_KERNEL);
274 if (!vxlan_port) {
275 err = -ENOMEM;
276 goto out;
277 }
278
279 vxlan_port->port = htons(dst_port);
280 vxlan_port->count = 1;
281 list_add_tail(&vxlan_port->list, &vxlan_ports);
282
283 err = vxlan_socket_init(vxlan_port, net);
284 if (err)
285 goto error;
286
287 *vxport = vxlan_port;
288 goto out;
289
290error:
291 list_del(&vxlan_port->list);
292 kfree(vxlan_port);
293out:
294 return err;
295}
296
297static int vxlan_set_options(struct vport *vport, struct nlattr *options)
298{
299 int err;
300 struct net *net = ovs_dp_get_net(vport->dp);
301 struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
302 struct tnl_mutable_config *config;
303 struct vxlan_port *old_port = NULL;
304 struct vxlan_port *vxlan_port = NULL;
305
306 config = rtnl_dereference(tnl_vport->mutable);
307
308 old_port = vxlan_port_exists(net, config->dst_port);
309
310 err = vxlan_tunnel_setup(net, options, &vxlan_port);
311 if (err)
312 goto out;
313
314 err = ovs_tnl_set_options(vport, options);
315
316 if (err)
317 vxlan_tunnel_release(vxlan_port);
318 else {
319 /* Release old socket */
320 vxlan_tunnel_release(old_port);
321 }
322out:
323 return err;
324}
325
326static const struct tnl_ops ovs_vxlan_tnl_ops = {
327 .tunnel_type = TNL_T_PROTO_VXLAN,
328 .ipproto = IPPROTO_UDP,
329 .hdr_len = vxlan_hdr_len,
330 .build_header = vxlan_build_header,
331};
332
333static void vxlan_tnl_destroy(struct vport *vport)
334{
335 struct vxlan_port *vxlan_port;
336 struct tnl_vport *tnl_vport = tnl_vport_priv(vport);
337 struct tnl_mutable_config *config;
338
339 config = rtnl_dereference(tnl_vport->mutable);
340
341 vxlan_port = vxlan_port_exists(ovs_dp_get_net(vport->dp),
342 config->dst_port);
343
344 vxlan_tunnel_release(vxlan_port);
345
346 ovs_tnl_destroy(vport);
347}
348
349static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
350{
351 int err;
352 struct vport *vport;
353 struct vxlan_port *vxlan_port = NULL;
354
355 err = vxlan_tunnel_setup(ovs_dp_get_net(parms->dp), parms->options,
356 &vxlan_port);
357 if (err)
358 return ERR_PTR(err);
359
360 vport = ovs_tnl_create(parms, &ovs_vxlan_vport_ops, &ovs_vxlan_tnl_ops);
361
362 if (IS_ERR(vport))
363 vxlan_tunnel_release(vxlan_port);
364
365 return vport;
366}
367
368const struct vport_ops ovs_vxlan_vport_ops = {
369 .type = OVS_VPORT_TYPE_VXLAN,
370 .flags = VPORT_F_TUN_ID,
371 .create = vxlan_tnl_create,
372 .destroy = vxlan_tnl_destroy,
79f827fa 373 .get_name = ovs_tnl_get_name,
79f827fa
KM
374 .get_options = ovs_tnl_get_options,
375 .set_options = vxlan_set_options,
79f827fa
KM
376 .send = ovs_tnl_send,
377};
378#else
379#warning VXLAN tunneling will not be available on kernels before 2.6.26
380#endif /* Linux kernel < 2.6.26 */