]>
Commit | Line | Data |
---|---|---|
79f827fa KM |
1 | /* |
2 | * Copyright (c) 2011 Nicira, Inc. | |
3 | * Copyright (c) 2012 Cisco Systems, Inc. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of version 2 of the GNU General Public | |
7 | * License as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, but | |
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | |
17 | * 02110-1301, USA | |
18 | */ | |
19 | ||
20 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
21 | ||
22 | #include <linux/version.h> | |
23 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) | |
24 | ||
25 | #include <linux/in.h> | |
26 | #include <linux/ip.h> | |
27 | #include <linux/list.h> | |
28 | #include <linux/net.h> | |
29 | #include <linux/udp.h> | |
30 | ||
31 | #include <net/icmp.h> | |
32 | #include <net/ip.h> | |
33 | #include <net/udp.h> | |
34 | ||
35 | #include "datapath.h" | |
36 | #include "tunnel.h" | |
37 | #include "vport.h" | |
79f827fa KM |
38 | |
39 | #define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */ | |
40 | ||
41 | /** | |
42 | * struct vxlanhdr - VXLAN header | |
43 | * @vx_flags: Must have the exact value %VXLAN_FLAGS. | |
44 | * @vx_vni: VXLAN Network Identifier (VNI) in top 24 bits, low 8 bits zeroed. | |
45 | */ | |
46 | struct vxlanhdr { | |
47 | __be32 vx_flags; | |
48 | __be32 vx_vni; | |
49 | }; | |
50 | ||
51 | #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) | |
52 | ||
53 | static inline int vxlan_hdr_len(const struct tnl_mutable_config *mutable, | |
54 | const struct ovs_key_ipv4_tunnel *tun_key) | |
55 | { | |
56 | return VXLAN_HLEN; | |
57 | } | |
58 | ||
59 | /** | |
60 | * struct vxlan_port - Keeps track of open UDP ports | |
61 | * @list: list element. | |
62 | * @port: The UDP port number in network byte order. | |
63 | * @socket: The socket created for this port number. | |
64 | * @count: How many ports are using this socket/port. | |
65 | */ | |
66 | struct vxlan_port { | |
67 | struct list_head list; | |
68 | __be16 port; | |
69 | struct socket *vxlan_rcv_socket; | |
70 | int count; | |
71 | }; | |
72 | ||
73 | static LIST_HEAD(vxlan_ports); | |
74 | ||
75 | static struct vxlan_port *vxlan_port_exists(struct net *net, __be16 port) | |
76 | { | |
77 | struct vxlan_port *vxlan_port; | |
78 | ||
79 | list_for_each_entry(vxlan_port, &vxlan_ports, list) { | |
80 | if (vxlan_port->port == port && | |
81 | net_eq(sock_net(vxlan_port->vxlan_rcv_socket->sk), net)) | |
82 | return vxlan_port; | |
83 | } | |
84 | ||
85 | return NULL; | |
86 | } | |
87 | ||
88 | static inline struct vxlanhdr *vxlan_hdr(const struct sk_buff *skb) | |
89 | { | |
90 | return (struct vxlanhdr *)(udp_hdr(skb) + 1); | |
91 | } | |
92 | ||
93 | /* Compute source port for outgoing packet. | |
94 | * Currently we use the flow hash. | |
95 | */ | |
96 | static u16 get_src_port(struct sk_buff *skb) | |
97 | { | |
98 | int low; | |
99 | int high; | |
100 | unsigned int range; | |
101 | u32 hash = OVS_CB(skb)->flow->hash; | |
102 | ||
103 | inet_get_local_port_range(&low, &high); | |
104 | range = (high - low) + 1; | |
105 | return (((u64) hash * range) >> 32) + low; | |
106 | } | |
107 | ||
108 | static struct sk_buff *vxlan_build_header(const struct vport *vport, | |
109 | const struct tnl_mutable_config *mutable, | |
110 | struct dst_entry *dst, | |
111 | struct sk_buff *skb, | |
112 | int tunnel_hlen) | |
113 | { | |
114 | struct udphdr *udph = udp_hdr(skb); | |
115 | struct vxlanhdr *vxh = (struct vxlanhdr *)(udph + 1); | |
116 | const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; | |
117 | __be64 out_key; | |
118 | u32 flags; | |
119 | ||
120 | tnl_get_param(mutable, tun_key, &flags, &out_key); | |
121 | ||
122 | udph->dest = mutable->dst_port; | |
123 | udph->source = htons(get_src_port(skb)); | |
124 | udph->check = 0; | |
125 | udph->len = htons(skb->len - skb_transport_offset(skb)); | |
126 | ||
127 | vxh->vx_flags = htonl(VXLAN_FLAGS); | |
128 | vxh->vx_vni = htonl(be64_to_cpu(out_key) << 8); | |
129 | ||
130 | /* | |
131 | * Allow our local IP stack to fragment the outer packet even if the | |
132 | * DF bit is set as a last resort. We also need to force selection of | |
133 | * an IP ID here because Linux will otherwise leave it at 0 if the | |
134 | * packet originally had DF set. | |
135 | */ | |
136 | skb->local_df = 1; | |
137 | __ip_select_ident(ip_hdr(skb), dst, 0); | |
138 | ||
139 | return skb; | |
140 | } | |
141 | ||
142 | /* Called with rcu_read_lock and BH disabled. */ | |
143 | static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) | |
144 | { | |
145 | struct vport *vport; | |
146 | struct vxlanhdr *vxh; | |
147 | const struct tnl_mutable_config *mutable; | |
148 | struct iphdr *iph; | |
149 | struct ovs_key_ipv4_tunnel tun_key; | |
150 | __be64 key; | |
151 | u32 tunnel_flags = 0; | |
152 | ||
153 | if (unlikely(!pskb_may_pull(skb, VXLAN_HLEN + ETH_HLEN))) | |
154 | goto error; | |
155 | ||
156 | vxh = vxlan_hdr(skb); | |
157 | if (unlikely(vxh->vx_flags != htonl(VXLAN_FLAGS) || | |
158 | vxh->vx_vni & htonl(0xff))) | |
159 | goto error; | |
160 | ||
161 | __skb_pull(skb, VXLAN_HLEN); | |
162 | skb_postpull_rcsum(skb, skb_transport_header(skb), VXLAN_HLEN + ETH_HLEN); | |
163 | ||
164 | key = cpu_to_be64(ntohl(vxh->vx_vni) >> 8); | |
165 | ||
166 | iph = ip_hdr(skb); | |
167 | vport = ovs_tnl_find_port(dev_net(skb->dev), iph->daddr, iph->saddr, | |
168 | key, TNL_T_PROTO_VXLAN, &mutable); | |
498e1285 | 169 | if (unlikely(!vport)) |
79f827fa | 170 | goto error; |
79f827fa KM |
171 | |
172 | if (mutable->flags & TNL_F_IN_KEY_MATCH || !mutable->key.daddr) | |
173 | tunnel_flags = OVS_TNL_F_KEY; | |
174 | else | |
175 | key = 0; | |
176 | ||
177 | /* Save outer tunnel values */ | |
178 | tnl_tun_key_init(&tun_key, iph, key, tunnel_flags); | |
179 | OVS_CB(skb)->tun_key = &tun_key; | |
180 | ||
181 | ovs_tnl_rcv(vport, skb); | |
182 | goto out; | |
183 | ||
184 | error: | |
185 | kfree_skb(skb); | |
186 | out: | |
187 | return 0; | |
188 | } | |
189 | ||
190 | /* Random value. Irrelevant as long as it's not 0 since we set the handler. */ | |
191 | #define UDP_ENCAP_VXLAN 1 | |
192 | static int vxlan_socket_init(struct vxlan_port *vxlan_port, struct net *net) | |
193 | { | |
194 | int err; | |
195 | struct sockaddr_in sin; | |
196 | ||
197 | err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, | |
198 | &vxlan_port->vxlan_rcv_socket); | |
199 | if (err) | |
200 | goto error; | |
201 | ||
202 | /* release net ref. */ | |
203 | sk_change_net(vxlan_port->vxlan_rcv_socket->sk, net); | |
204 | ||
205 | sin.sin_family = AF_INET; | |
206 | sin.sin_addr.s_addr = htonl(INADDR_ANY); | |
207 | sin.sin_port = vxlan_port->port; | |
208 | ||
209 | err = kernel_bind(vxlan_port->vxlan_rcv_socket, (struct sockaddr *)&sin, | |
210 | sizeof(struct sockaddr_in)); | |
211 | if (err) | |
212 | goto error_sock; | |
213 | ||
214 | udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_type = UDP_ENCAP_VXLAN; | |
215 | udp_sk(vxlan_port->vxlan_rcv_socket->sk)->encap_rcv = vxlan_rcv; | |
216 | ||
217 | udp_encap_enable(); | |
218 | ||
219 | return 0; | |
220 | ||
221 | error_sock: | |
df80d44d | 222 | sk_release_kernel(vxlan_port->vxlan_rcv_socket->sk); |
79f827fa KM |
223 | error: |
224 | pr_warn("cannot register vxlan protocol handler\n"); | |
225 | return err; | |
226 | } | |
227 | ||
228 | static void vxlan_tunnel_release(struct vxlan_port *vxlan_port) | |
229 | { | |
230 | vxlan_port->count--; | |
231 | ||
232 | if (vxlan_port->count == 0) { | |
233 | /* Release old socket */ | |
df80d44d | 234 | sk_release_kernel(vxlan_port->vxlan_rcv_socket->sk); |
79f827fa KM |
235 | list_del(&vxlan_port->list); |
236 | kfree(vxlan_port); | |
237 | } | |
238 | } | |
239 | static int vxlan_tunnel_setup(struct net *net, struct nlattr *options, | |
240 | struct vxlan_port **vxport) | |
241 | { | |
242 | struct nlattr *a; | |
243 | int err; | |
244 | u16 dst_port; | |
245 | struct vxlan_port *vxlan_port = NULL; | |
246 | ||
247 | *vxport = NULL; | |
248 | ||
249 | if (!options) { | |
250 | err = -EINVAL; | |
251 | goto out; | |
252 | } | |
253 | ||
254 | a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); | |
255 | if (a && nla_len(a) == sizeof(u16)) { | |
256 | dst_port = nla_get_u16(a); | |
257 | } else { | |
258 | /* Require destination port from userspace. */ | |
259 | err = -EINVAL; | |
260 | goto out; | |
261 | } | |
262 | ||
263 | /* Verify if we already have a socket created for this port */ | |
264 | vxlan_port = vxlan_port_exists(net, htons(dst_port)); | |
265 | if (vxlan_port) { | |
266 | vxlan_port->count++; | |
267 | err = 0; | |
3b6f2889 | 268 | *vxport = vxlan_port; |
79f827fa KM |
269 | goto out; |
270 | } | |
271 | ||
272 | /* Add a new socket for this port */ | |
273 | vxlan_port = kzalloc(sizeof(struct vxlan_port), GFP_KERNEL); | |
274 | if (!vxlan_port) { | |
275 | err = -ENOMEM; | |
276 | goto out; | |
277 | } | |
278 | ||
279 | vxlan_port->port = htons(dst_port); | |
280 | vxlan_port->count = 1; | |
281 | list_add_tail(&vxlan_port->list, &vxlan_ports); | |
282 | ||
283 | err = vxlan_socket_init(vxlan_port, net); | |
284 | if (err) | |
285 | goto error; | |
286 | ||
287 | *vxport = vxlan_port; | |
288 | goto out; | |
289 | ||
290 | error: | |
291 | list_del(&vxlan_port->list); | |
292 | kfree(vxlan_port); | |
293 | out: | |
294 | return err; | |
295 | } | |
296 | ||
297 | static int vxlan_set_options(struct vport *vport, struct nlattr *options) | |
298 | { | |
299 | int err; | |
300 | struct net *net = ovs_dp_get_net(vport->dp); | |
301 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); | |
302 | struct tnl_mutable_config *config; | |
303 | struct vxlan_port *old_port = NULL; | |
304 | struct vxlan_port *vxlan_port = NULL; | |
305 | ||
306 | config = rtnl_dereference(tnl_vport->mutable); | |
307 | ||
308 | old_port = vxlan_port_exists(net, config->dst_port); | |
309 | ||
310 | err = vxlan_tunnel_setup(net, options, &vxlan_port); | |
311 | if (err) | |
312 | goto out; | |
313 | ||
314 | err = ovs_tnl_set_options(vport, options); | |
315 | ||
316 | if (err) | |
317 | vxlan_tunnel_release(vxlan_port); | |
318 | else { | |
319 | /* Release old socket */ | |
320 | vxlan_tunnel_release(old_port); | |
321 | } | |
322 | out: | |
323 | return err; | |
324 | } | |
325 | ||
326 | static const struct tnl_ops ovs_vxlan_tnl_ops = { | |
327 | .tunnel_type = TNL_T_PROTO_VXLAN, | |
328 | .ipproto = IPPROTO_UDP, | |
329 | .hdr_len = vxlan_hdr_len, | |
330 | .build_header = vxlan_build_header, | |
331 | }; | |
332 | ||
333 | static void vxlan_tnl_destroy(struct vport *vport) | |
334 | { | |
335 | struct vxlan_port *vxlan_port; | |
336 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); | |
337 | struct tnl_mutable_config *config; | |
338 | ||
339 | config = rtnl_dereference(tnl_vport->mutable); | |
340 | ||
341 | vxlan_port = vxlan_port_exists(ovs_dp_get_net(vport->dp), | |
342 | config->dst_port); | |
343 | ||
344 | vxlan_tunnel_release(vxlan_port); | |
345 | ||
346 | ovs_tnl_destroy(vport); | |
347 | } | |
348 | ||
349 | static struct vport *vxlan_tnl_create(const struct vport_parms *parms) | |
350 | { | |
351 | int err; | |
352 | struct vport *vport; | |
353 | struct vxlan_port *vxlan_port = NULL; | |
354 | ||
355 | err = vxlan_tunnel_setup(ovs_dp_get_net(parms->dp), parms->options, | |
356 | &vxlan_port); | |
357 | if (err) | |
358 | return ERR_PTR(err); | |
359 | ||
360 | vport = ovs_tnl_create(parms, &ovs_vxlan_vport_ops, &ovs_vxlan_tnl_ops); | |
361 | ||
362 | if (IS_ERR(vport)) | |
363 | vxlan_tunnel_release(vxlan_port); | |
364 | ||
365 | return vport; | |
366 | } | |
367 | ||
368 | const struct vport_ops ovs_vxlan_vport_ops = { | |
369 | .type = OVS_VPORT_TYPE_VXLAN, | |
370 | .flags = VPORT_F_TUN_ID, | |
371 | .create = vxlan_tnl_create, | |
372 | .destroy = vxlan_tnl_destroy, | |
79f827fa | 373 | .get_name = ovs_tnl_get_name, |
79f827fa KM |
374 | .get_options = ovs_tnl_get_options, |
375 | .set_options = vxlan_set_options, | |
79f827fa KM |
376 | .send = ovs_tnl_send, |
377 | }; | |
378 | #else | |
379 | #warning VXLAN tunneling will not be available on kernels before 2.6.26 | |
380 | #endif /* Linux kernel < 2.6.26 */ |