]>
Commit | Line | Data |
---|---|---|
a6ae068b LJ |
1 | /* |
2 | * Copyright (c) 2011 Nicira, Inc. | |
3 | * Copyright (c) 2013 Cisco Systems, Inc. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of version 2 of the GNU General Public | |
7 | * License as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, but | |
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | |
17 | * 02110-1301, USA | |
18 | */ | |
19 | ||
20 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
21 | ||
22 | #include <linux/version.h> | |
23 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) | |
24 | ||
25 | #include <linux/in.h> | |
26 | #include <linux/ip.h> | |
a6ae068b | 27 | #include <linux/net.h> |
85c9de19 | 28 | #include <linux/rculist.h> |
a6ae068b LJ |
29 | #include <linux/udp.h> |
30 | ||
31 | #include <net/icmp.h> | |
32 | #include <net/ip.h> | |
33 | #include <net/udp.h> | |
34 | ||
35 | #include "datapath.h" | |
36 | #include "tunnel.h" | |
37 | #include "vport.h" | |
38 | ||
39 | ||
40 | /* | |
41 | * LISP encapsulation header: | |
42 | * | |
43 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
44 | * |N|L|E|V|I|flags| Nonce/Map-Version | | |
45 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
46 | * | Instance ID/Locator Status Bits | | |
47 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
48 | * | |
49 | */ | |
50 | ||
51 | /** | |
52 | * struct lisphdr - LISP header | |
53 | * @nonce_present: Flag indicating the presence of a 24 bit nonce value. | |
54 | * @locator_status_bits_present: Flag indicating the presence of Locator Status | |
55 | * Bits (LSB). | |
56 | * @solicit_echo_nonce: Flag indicating the use of the echo noncing mechanism. | |
57 | * @map_version_present: Flag indicating the use of mapping versioning. | |
58 | * @instance_id_present: Flag indicating the presence of a 24 bit Instance ID. | |
59 | * @reserved_flags: 3 bits reserved for future flags. | |
60 | * @nonce: 24 bit nonce value. | |
61 | * @map_version: 24 bit mapping version. | |
62 | * @locator_status_bits: Locator Status Bits: 32 bits when instance_id_present | |
63 | * is not set, 8 bits when it is. | |
64 | * @instance_id: 24 bit Instance ID | |
65 | */ | |
66 | struct lisphdr { | |
67 | #ifdef __LITTLE_ENDIAN_BITFIELD | |
68 | __u8 reserved_flags:3; | |
69 | __u8 instance_id_present:1; | |
70 | __u8 map_version_present:1; | |
71 | __u8 solicit_echo_nonce:1; | |
72 | __u8 locator_status_bits_present:1; | |
73 | __u8 nonce_present:1; | |
74 | #else | |
75 | __u8 nonce_present:1; | |
76 | __u8 locator_status_bits_present:1; | |
77 | __u8 solicit_echo_nonce:1; | |
78 | __u8 map_version_present:1; | |
79 | __u8 instance_id_present:1; | |
80 | __u8 reserved_flags:3; | |
81 | #endif | |
82 | union { | |
83 | __u8 nonce[3]; | |
84 | __u8 map_version[3]; | |
85 | } u1; | |
86 | union { | |
87 | __be32 locator_status_bits; | |
88 | struct { | |
89 | __u8 instance_id[3]; | |
90 | __u8 locator_status_bits; | |
91 | } word2; | |
92 | } u2; | |
93 | }; | |
94 | ||
95 | #define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr)) | |
96 | ||
a6ae068b LJ |
97 | /** |
98 | * struct lisp_port - Keeps track of open UDP ports | |
c405d282 PS |
99 | * @dst_port: lisp UDP port no. |
100 | * @list: list element in @lisp_ports. | |
101 | * @lisp_rcv_socket: The socket created for this port number. | |
102 | * @name: vport name. | |
a6ae068b LJ |
103 | */ |
104 | struct lisp_port { | |
c405d282 | 105 | __be16 dst_port; |
a6ae068b | 106 | struct list_head list; |
a6ae068b | 107 | struct socket *lisp_rcv_socket; |
c405d282 | 108 | char name[IFNAMSIZ]; |
a6ae068b LJ |
109 | }; |
110 | ||
111 | static LIST_HEAD(lisp_ports); | |
112 | ||
c405d282 PS |
113 | static inline struct lisp_port *lisp_vport(const struct vport *vport) |
114 | { | |
115 | return vport_priv(vport); | |
116 | } | |
117 | ||
85c9de19 | 118 | static struct lisp_port *lisp_find_port(struct net *net, __be16 port) |
a6ae068b LJ |
119 | { |
120 | struct lisp_port *lisp_port; | |
121 | ||
85c9de19 | 122 | list_for_each_entry_rcu(lisp_port, &lisp_ports, list) { |
c405d282 | 123 | if (lisp_port->dst_port == port && |
a6ae068b LJ |
124 | net_eq(sock_net(lisp_port->lisp_rcv_socket->sk), net)) |
125 | return lisp_port; | |
126 | } | |
127 | ||
128 | return NULL; | |
129 | } | |
130 | ||
131 | static inline struct lisphdr *lisp_hdr(const struct sk_buff *skb) | |
132 | { | |
133 | return (struct lisphdr *)(udp_hdr(skb) + 1); | |
134 | } | |
135 | ||
a6ae068b LJ |
136 | /* Convert 64 bit tunnel ID to 24 bit Instance ID. */ |
137 | static void tunnel_id_to_instance_id(__be64 tun_id, __u8 *iid) | |
138 | { | |
139 | ||
140 | #ifdef __BIG_ENDIAN | |
141 | iid[0] = (__force __u8)(tun_id >> 16); | |
142 | iid[1] = (__force __u8)(tun_id >> 8); | |
143 | iid[2] = (__force __u8)tun_id; | |
144 | #else | |
145 | iid[0] = (__force __u8)((__force u64)tun_id >> 40); | |
146 | iid[1] = (__force __u8)((__force u64)tun_id >> 48); | |
147 | iid[2] = (__force __u8)((__force u64)tun_id >> 56); | |
148 | #endif | |
149 | } | |
150 | ||
151 | /* Convert 24 bit Instance ID to 64 bit tunnel ID. */ | |
152 | static __be64 instance_id_to_tunnel_id(__u8 *iid) | |
153 | { | |
154 | #ifdef __BIG_ENDIAN | |
155 | return (iid[0] << 16) | (iid[1] << 8) | iid[2]; | |
156 | #else | |
157 | return (__force __be64)(((__force u64)iid[0] << 40) | | |
158 | ((__force u64)iid[1] << 48) | | |
159 | ((__force u64)iid[2] << 56)); | |
160 | #endif | |
161 | } | |
162 | ||
85c9de19 PS |
163 | static void lisp_build_header(const struct vport *vport, |
164 | struct sk_buff *skb, | |
165 | int tunnel_hlen) | |
a6ae068b | 166 | { |
c405d282 | 167 | struct lisp_port *lisp_port = lisp_vport(vport); |
a6ae068b LJ |
168 | struct udphdr *udph = udp_hdr(skb); |
169 | struct lisphdr *lisph = (struct lisphdr *)(udph + 1); | |
170 | const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; | |
a6ae068b | 171 | |
c405d282 | 172 | udph->dest = lisp_port->dst_port; |
a6ae068b LJ |
173 | udph->source = htons(ovs_tnl_get_src_port(skb)); |
174 | udph->check = 0; | |
175 | udph->len = htons(skb->len - skb_transport_offset(skb)); | |
176 | ||
177 | lisph->nonce_present = 0; /* We don't support echo nonce algorithm */ | |
178 | lisph->locator_status_bits_present = 1; /* Set LSB */ | |
179 | lisph->solicit_echo_nonce = 0; /* No echo noncing */ | |
180 | lisph->map_version_present = 0; /* No mapping versioning, nonce instead */ | |
181 | lisph->instance_id_present = 1; /* Store the tun_id as Instance ID */ | |
182 | lisph->reserved_flags = 0; /* Reserved flags, set to 0 */ | |
183 | ||
184 | lisph->u1.nonce[0] = 0; | |
185 | lisph->u1.nonce[1] = 0; | |
186 | lisph->u1.nonce[2] = 0; | |
187 | ||
85c9de19 | 188 | tunnel_id_to_instance_id(tun_key->tun_id, &lisph->u2.word2.instance_id[0]); |
a6ae068b | 189 | lisph->u2.word2.locator_status_bits = 1; |
a6ae068b LJ |
190 | } |
191 | ||
192 | /* Called with rcu_read_lock and BH disabled. */ | |
193 | static int lisp_rcv(struct sock *sk, struct sk_buff *skb) | |
194 | { | |
85c9de19 | 195 | struct lisp_port *lisp_port; |
a6ae068b | 196 | struct lisphdr *lisph; |
a6ae068b LJ |
197 | struct iphdr *iph, *inner_iph; |
198 | struct ovs_key_ipv4_tunnel tun_key; | |
199 | __be64 key; | |
a6ae068b LJ |
200 | struct ethhdr *ethh; |
201 | __be16 protocol; | |
202 | ||
85c9de19 PS |
203 | lisp_port = lisp_find_port(dev_net(skb->dev), udp_hdr(skb)->dest); |
204 | if (unlikely(!lisp_port)) | |
205 | goto error; | |
206 | ||
a6ae068b LJ |
207 | if (unlikely(!pskb_may_pull(skb, LISP_HLEN))) |
208 | goto error; | |
209 | ||
210 | lisph = lisp_hdr(skb); | |
211 | ||
212 | skb_pull_rcsum(skb, LISP_HLEN); | |
213 | ||
214 | if (lisph->instance_id_present != 1) | |
215 | key = 0; | |
216 | else | |
217 | key = instance_id_to_tunnel_id(&lisph->u2.word2.instance_id[0]); | |
218 | ||
a6ae068b | 219 | /* Save outer tunnel values */ |
85c9de19 | 220 | iph = ip_hdr(skb); |
adda018c | 221 | ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY); |
a6ae068b LJ |
222 | |
223 | /* Drop non-IP inner packets */ | |
224 | inner_iph = (struct iphdr *)(lisph + 1); | |
225 | switch (inner_iph->version) { | |
226 | case 4: | |
227 | protocol = htons(ETH_P_IP); | |
228 | break; | |
229 | case 6: | |
230 | protocol = htons(ETH_P_IPV6); | |
231 | break; | |
232 | default: | |
233 | goto error; | |
234 | } | |
235 | ||
236 | /* Add Ethernet header */ | |
237 | ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN); | |
238 | memset(ethh, 0, ETH_HLEN); | |
239 | ethh->h_dest[0] = 0x02; | |
240 | ethh->h_source[0] = 0x02; | |
241 | ethh->h_proto = protocol; | |
242 | ||
3cfede14 PS |
243 | ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); |
244 | ||
db0c3134 | 245 | ovs_tnl_rcv(vport_from_priv(lisp_port), skb, &tun_key); |
a6ae068b LJ |
246 | goto out; |
247 | ||
248 | error: | |
249 | kfree_skb(skb); | |
250 | out: | |
251 | return 0; | |
252 | } | |
253 | ||
254 | /* Arbitrary value. Irrelevant as long as it's not 0 since we set the handler. */ | |
255 | #define UDP_ENCAP_LISP 1 | |
256 | static int lisp_socket_init(struct lisp_port *lisp_port, struct net *net) | |
257 | { | |
a6ae068b | 258 | struct sockaddr_in sin; |
c405d282 | 259 | int err; |
a6ae068b LJ |
260 | |
261 | err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, | |
262 | &lisp_port->lisp_rcv_socket); | |
263 | if (err) | |
264 | goto error; | |
265 | ||
266 | /* release net ref. */ | |
267 | sk_change_net(lisp_port->lisp_rcv_socket->sk, net); | |
268 | ||
269 | sin.sin_family = AF_INET; | |
270 | sin.sin_addr.s_addr = htonl(INADDR_ANY); | |
c405d282 | 271 | sin.sin_port = lisp_port->dst_port; |
a6ae068b LJ |
272 | |
273 | err = kernel_bind(lisp_port->lisp_rcv_socket, (struct sockaddr *)&sin, | |
274 | sizeof(struct sockaddr_in)); | |
275 | if (err) | |
276 | goto error_sock; | |
277 | ||
278 | udp_sk(lisp_port->lisp_rcv_socket->sk)->encap_type = UDP_ENCAP_LISP; | |
279 | udp_sk(lisp_port->lisp_rcv_socket->sk)->encap_rcv = lisp_rcv; | |
280 | ||
281 | udp_encap_enable(); | |
282 | ||
283 | return 0; | |
284 | ||
285 | error_sock: | |
286 | sk_release_kernel(lisp_port->lisp_rcv_socket->sk); | |
287 | error: | |
288 | pr_warn("cannot register lisp protocol handler: %d\n", err); | |
289 | return err; | |
290 | } | |
291 | ||
c405d282 | 292 | static int lisp_get_options(const struct vport *vport, struct sk_buff *skb) |
a6ae068b | 293 | { |
c405d282 | 294 | struct lisp_port *lisp_port = lisp_vport(vport); |
a6ae068b | 295 | |
c405d282 PS |
296 | if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(lisp_port->dst_port))) |
297 | return -EMSGSIZE; | |
298 | return 0; | |
a6ae068b LJ |
299 | } |
300 | ||
c405d282 | 301 | static void lisp_tnl_destroy(struct vport *vport) |
a6ae068b | 302 | { |
c405d282 PS |
303 | struct lisp_port *lisp_port = lisp_vport(vport); |
304 | ||
85c9de19 PS |
305 | list_del_rcu(&lisp_port->list); |
306 | /* Release socket */ | |
307 | sk_release_kernel(lisp_port->lisp_rcv_socket->sk); | |
c405d282 PS |
308 | |
309 | ovs_vport_deferred_free(vport); | |
85c9de19 PS |
310 | } |
311 | ||
c405d282 | 312 | static struct vport *lisp_tnl_create(const struct vport_parms *parms) |
85c9de19 | 313 | { |
c405d282 PS |
314 | struct net *net = ovs_dp_get_net(parms->dp); |
315 | struct nlattr *options = parms->options; | |
85c9de19 | 316 | struct lisp_port *lisp_port; |
c405d282 | 317 | struct vport *vport; |
a6ae068b LJ |
318 | struct nlattr *a; |
319 | int err; | |
320 | u16 dst_port; | |
a6ae068b LJ |
321 | |
322 | if (!options) { | |
323 | err = -EINVAL; | |
c405d282 | 324 | goto error; |
a6ae068b LJ |
325 | } |
326 | ||
327 | a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); | |
328 | if (a && nla_len(a) == sizeof(u16)) { | |
329 | dst_port = nla_get_u16(a); | |
330 | } else { | |
331 | /* Require destination port from userspace. */ | |
332 | err = -EINVAL; | |
c405d282 | 333 | goto error; |
a6ae068b LJ |
334 | } |
335 | ||
336 | /* Verify if we already have a socket created for this port */ | |
c405d282 | 337 | if (lisp_find_port(net, htons(dst_port))) { |
85c9de19 | 338 | err = -EEXIST; |
c405d282 | 339 | goto error; |
a6ae068b LJ |
340 | } |
341 | ||
c405d282 PS |
342 | vport = ovs_vport_alloc(sizeof(struct lisp_port), |
343 | &ovs_lisp_vport_ops, parms); | |
344 | if (IS_ERR(vport)) | |
345 | return vport; | |
a6ae068b | 346 | |
c405d282 PS |
347 | lisp_port = lisp_vport(vport); |
348 | lisp_port->dst_port = htons(dst_port); | |
349 | strncpy(lisp_port->name, parms->name, IFNAMSIZ); | |
a6ae068b LJ |
350 | |
351 | err = lisp_socket_init(lisp_port, net); | |
352 | if (err) | |
c405d282 | 353 | goto error_free; |
a6ae068b | 354 | |
c405d282 PS |
355 | list_add_tail_rcu(&lisp_port->list, &lisp_ports); |
356 | return vport; | |
a6ae068b | 357 | |
c405d282 PS |
358 | error_free: |
359 | ovs_vport_free(vport); | |
a6ae068b | 360 | error: |
c405d282 | 361 | return ERR_PTR(err); |
a6ae068b LJ |
362 | } |
363 | ||
c405d282 | 364 | static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb) |
a6ae068b | 365 | { |
c405d282 PS |
366 | int tnl_len; |
367 | int network_offset = skb_network_offset(skb); | |
a6ae068b | 368 | |
be7cd27e PS |
369 | if (unlikely(!OVS_CB(skb)->tun_key)) |
370 | return -EINVAL; | |
a6ae068b | 371 | |
c405d282 PS |
372 | /* We only encapsulate IPv4 and IPv6 packets */ |
373 | switch (skb->protocol) { | |
374 | case htons(ETH_P_IP): | |
375 | case htons(ETH_P_IPV6): | |
376 | /* Pop off "inner" Ethernet header */ | |
377 | skb_pull(skb, network_offset); | |
378 | tnl_len = ovs_tnl_send(vport, skb, IPPROTO_UDP, | |
379 | LISP_HLEN, lisp_build_header); | |
380 | return tnl_len > 0 ? tnl_len + network_offset : tnl_len; | |
381 | default: | |
fe7744e8 | 382 | kfree_skb(skb); |
be7cd27e | 383 | return 0; |
c405d282 | 384 | } |
a6ae068b LJ |
385 | } |
386 | ||
c405d282 | 387 | static const char *lisp_get_name(const struct vport *vport) |
a6ae068b | 388 | { |
c405d282 PS |
389 | struct lisp_port *lisp_port = lisp_vport(vport); |
390 | return lisp_port->name; | |
a6ae068b LJ |
391 | } |
392 | ||
393 | const struct vport_ops ovs_lisp_vport_ops = { | |
394 | .type = OVS_VPORT_TYPE_LISP, | |
a6ae068b LJ |
395 | .create = lisp_tnl_create, |
396 | .destroy = lisp_tnl_destroy, | |
c405d282 | 397 | .get_name = lisp_get_name, |
85c9de19 | 398 | .get_options = lisp_get_options, |
a6ae068b LJ |
399 | .send = lisp_tnl_send, |
400 | }; | |
401 | #else | |
402 | #warning LISP tunneling will not be available on kernels before 2.6.26 | |
403 | #endif /* Linux kernel < 2.6.26 */ |