]>
Commit | Line | Data |
---|---|---|
a6ae068b LJ |
1 | /* |
2 | * Copyright (c) 2011 Nicira, Inc. | |
3 | * Copyright (c) 2013 Cisco Systems, Inc. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of version 2 of the GNU General Public | |
7 | * License as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, but | |
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | |
17 | * 02110-1301, USA | |
18 | */ | |
19 | ||
20 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
21 | ||
22 | #include <linux/version.h> | |
a6ae068b LJ |
23 | |
24 | #include <linux/in.h> | |
25 | #include <linux/ip.h> | |
a6ae068b | 26 | #include <linux/net.h> |
85c9de19 | 27 | #include <linux/rculist.h> |
a6ae068b LJ |
28 | #include <linux/udp.h> |
29 | ||
30 | #include <net/icmp.h> | |
31 | #include <net/ip.h> | |
11aa8dff | 32 | #include <net/route.h> |
a6ae068b | 33 | #include <net/udp.h> |
11aa8dff | 34 | #include <net/xfrm.h> |
a6ae068b LJ |
35 | |
36 | #include "datapath.h" | |
3cf54a56 | 37 | #include "gso.h" |
a6ae068b LJ |
38 | #include "vport.h" |
39 | ||
a6ae068b LJ |
40 | /* |
41 | * LISP encapsulation header: | |
42 | * | |
43 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
44 | * |N|L|E|V|I|flags| Nonce/Map-Version | | |
45 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
46 | * | Instance ID/Locator Status Bits | | |
47 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
48 | * | |
49 | */ | |
50 | ||
51 | /** | |
52 | * struct lisphdr - LISP header | |
53 | * @nonce_present: Flag indicating the presence of a 24 bit nonce value. | |
54 | * @locator_status_bits_present: Flag indicating the presence of Locator Status | |
55 | * Bits (LSB). | |
56 | * @solicit_echo_nonce: Flag indicating the use of the echo noncing mechanism. | |
57 | * @map_version_present: Flag indicating the use of mapping versioning. | |
58 | * @instance_id_present: Flag indicating the presence of a 24 bit Instance ID. | |
59 | * @reserved_flags: 3 bits reserved for future flags. | |
60 | * @nonce: 24 bit nonce value. | |
61 | * @map_version: 24 bit mapping version. | |
62 | * @locator_status_bits: Locator Status Bits: 32 bits when instance_id_present | |
63 | * is not set, 8 bits when it is. | |
64 | * @instance_id: 24 bit Instance ID | |
65 | */ | |
66 | struct lisphdr { | |
67 | #ifdef __LITTLE_ENDIAN_BITFIELD | |
68 | __u8 reserved_flags:3; | |
69 | __u8 instance_id_present:1; | |
70 | __u8 map_version_present:1; | |
71 | __u8 solicit_echo_nonce:1; | |
72 | __u8 locator_status_bits_present:1; | |
73 | __u8 nonce_present:1; | |
74 | #else | |
75 | __u8 nonce_present:1; | |
76 | __u8 locator_status_bits_present:1; | |
77 | __u8 solicit_echo_nonce:1; | |
78 | __u8 map_version_present:1; | |
79 | __u8 instance_id_present:1; | |
80 | __u8 reserved_flags:3; | |
81 | #endif | |
82 | union { | |
83 | __u8 nonce[3]; | |
84 | __u8 map_version[3]; | |
85 | } u1; | |
86 | union { | |
87 | __be32 locator_status_bits; | |
88 | struct { | |
89 | __u8 instance_id[3]; | |
90 | __u8 locator_status_bits; | |
91 | } word2; | |
92 | } u2; | |
93 | }; | |
94 | ||
95 | #define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr)) | |
96 | ||
a6ae068b LJ |
97 | /** |
98 | * struct lisp_port - Keeps track of open UDP ports | |
c405d282 PS |
99 | * @dst_port: lisp UDP port no. |
100 | * @list: list element in @lisp_ports. | |
101 | * @lisp_rcv_socket: The socket created for this port number. | |
102 | * @name: vport name. | |
a6ae068b LJ |
103 | */ |
104 | struct lisp_port { | |
c405d282 | 105 | __be16 dst_port; |
a6ae068b | 106 | struct list_head list; |
a6ae068b | 107 | struct socket *lisp_rcv_socket; |
c405d282 | 108 | char name[IFNAMSIZ]; |
a6ae068b LJ |
109 | }; |
110 | ||
111 | static LIST_HEAD(lisp_ports); | |
112 | ||
c405d282 PS |
113 | static inline struct lisp_port *lisp_vport(const struct vport *vport) |
114 | { | |
115 | return vport_priv(vport); | |
116 | } | |
117 | ||
85c9de19 | 118 | static struct lisp_port *lisp_find_port(struct net *net, __be16 port) |
a6ae068b LJ |
119 | { |
120 | struct lisp_port *lisp_port; | |
121 | ||
85c9de19 | 122 | list_for_each_entry_rcu(lisp_port, &lisp_ports, list) { |
c405d282 | 123 | if (lisp_port->dst_port == port && |
a6ae068b LJ |
124 | net_eq(sock_net(lisp_port->lisp_rcv_socket->sk), net)) |
125 | return lisp_port; | |
126 | } | |
127 | ||
128 | return NULL; | |
129 | } | |
130 | ||
131 | static inline struct lisphdr *lisp_hdr(const struct sk_buff *skb) | |
132 | { | |
133 | return (struct lisphdr *)(udp_hdr(skb) + 1); | |
134 | } | |
135 | ||
a6ae068b LJ |
136 | /* Convert 64 bit tunnel ID to 24 bit Instance ID. */ |
137 | static void tunnel_id_to_instance_id(__be64 tun_id, __u8 *iid) | |
138 | { | |
139 | ||
140 | #ifdef __BIG_ENDIAN | |
141 | iid[0] = (__force __u8)(tun_id >> 16); | |
142 | iid[1] = (__force __u8)(tun_id >> 8); | |
143 | iid[2] = (__force __u8)tun_id; | |
144 | #else | |
145 | iid[0] = (__force __u8)((__force u64)tun_id >> 40); | |
146 | iid[1] = (__force __u8)((__force u64)tun_id >> 48); | |
147 | iid[2] = (__force __u8)((__force u64)tun_id >> 56); | |
148 | #endif | |
149 | } | |
150 | ||
151 | /* Convert 24 bit Instance ID to 64 bit tunnel ID. */ | |
152 | static __be64 instance_id_to_tunnel_id(__u8 *iid) | |
153 | { | |
154 | #ifdef __BIG_ENDIAN | |
155 | return (iid[0] << 16) | (iid[1] << 8) | iid[2]; | |
156 | #else | |
157 | return (__force __be64)(((__force u64)iid[0] << 40) | | |
158 | ((__force u64)iid[1] << 48) | | |
159 | ((__force u64)iid[2] << 56)); | |
160 | #endif | |
161 | } | |
162 | ||
11aa8dff PS |
163 | /* Compute source UDP port for outgoing packet. |
164 | * Currently we use the flow hash. | |
165 | */ | |
3cf54a56 | 166 | static u16 get_src_port(struct sk_buff *skb) |
11aa8dff PS |
167 | { |
168 | int low; | |
169 | int high; | |
170 | unsigned int range; | |
171 | struct sw_flow_key *pkt_key = OVS_CB(skb)->pkt_key; | |
172 | u32 hash = jhash2((const u32 *)pkt_key, | |
173 | sizeof(*pkt_key) / sizeof(u32), 0); | |
174 | ||
175 | inet_get_local_port_range(&low, &high); | |
176 | range = (high - low) + 1; | |
177 | return (((u64) hash * range) >> 32) + low; | |
178 | } | |
179 | ||
85c9de19 | 180 | static void lisp_build_header(const struct vport *vport, |
3cf54a56 | 181 | struct sk_buff *skb) |
a6ae068b | 182 | { |
c405d282 | 183 | struct lisp_port *lisp_port = lisp_vport(vport); |
a6ae068b LJ |
184 | struct udphdr *udph = udp_hdr(skb); |
185 | struct lisphdr *lisph = (struct lisphdr *)(udph + 1); | |
186 | const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; | |
a6ae068b | 187 | |
c405d282 | 188 | udph->dest = lisp_port->dst_port; |
3cf54a56 | 189 | udph->source = htons(get_src_port(skb)); |
a6ae068b LJ |
190 | udph->check = 0; |
191 | udph->len = htons(skb->len - skb_transport_offset(skb)); | |
192 | ||
193 | lisph->nonce_present = 0; /* We don't support echo nonce algorithm */ | |
194 | lisph->locator_status_bits_present = 1; /* Set LSB */ | |
195 | lisph->solicit_echo_nonce = 0; /* No echo noncing */ | |
196 | lisph->map_version_present = 0; /* No mapping versioning, nonce instead */ | |
197 | lisph->instance_id_present = 1; /* Store the tun_id as Instance ID */ | |
198 | lisph->reserved_flags = 0; /* Reserved flags, set to 0 */ | |
199 | ||
200 | lisph->u1.nonce[0] = 0; | |
201 | lisph->u1.nonce[1] = 0; | |
202 | lisph->u1.nonce[2] = 0; | |
203 | ||
85c9de19 | 204 | tunnel_id_to_instance_id(tun_key->tun_id, &lisph->u2.word2.instance_id[0]); |
a6ae068b | 205 | lisph->u2.word2.locator_status_bits = 1; |
a6ae068b LJ |
206 | } |
207 | ||
208 | /* Called with rcu_read_lock and BH disabled. */ | |
209 | static int lisp_rcv(struct sock *sk, struct sk_buff *skb) | |
210 | { | |
85c9de19 | 211 | struct lisp_port *lisp_port; |
a6ae068b | 212 | struct lisphdr *lisph; |
a6ae068b LJ |
213 | struct iphdr *iph, *inner_iph; |
214 | struct ovs_key_ipv4_tunnel tun_key; | |
215 | __be64 key; | |
a6ae068b LJ |
216 | struct ethhdr *ethh; |
217 | __be16 protocol; | |
218 | ||
85c9de19 PS |
219 | lisp_port = lisp_find_port(dev_net(skb->dev), udp_hdr(skb)->dest); |
220 | if (unlikely(!lisp_port)) | |
221 | goto error; | |
222 | ||
971fd4ab | 223 | if (iptunnel_pull_header(skb, LISP_HLEN, 0)) |
a6ae068b LJ |
224 | goto error; |
225 | ||
226 | lisph = lisp_hdr(skb); | |
227 | ||
a6ae068b LJ |
228 | if (lisph->instance_id_present != 1) |
229 | key = 0; | |
230 | else | |
231 | key = instance_id_to_tunnel_id(&lisph->u2.word2.instance_id[0]); | |
232 | ||
a6ae068b | 233 | /* Save outer tunnel values */ |
85c9de19 | 234 | iph = ip_hdr(skb); |
adda018c | 235 | ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY); |
a6ae068b LJ |
236 | |
237 | /* Drop non-IP inner packets */ | |
238 | inner_iph = (struct iphdr *)(lisph + 1); | |
239 | switch (inner_iph->version) { | |
240 | case 4: | |
241 | protocol = htons(ETH_P_IP); | |
242 | break; | |
243 | case 6: | |
244 | protocol = htons(ETH_P_IPV6); | |
245 | break; | |
246 | default: | |
247 | goto error; | |
248 | } | |
971fd4ab | 249 | skb->protocol = protocol; |
a6ae068b LJ |
250 | |
251 | /* Add Ethernet header */ | |
252 | ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN); | |
253 | memset(ethh, 0, ETH_HLEN); | |
254 | ethh->h_dest[0] = 0x02; | |
255 | ethh->h_source[0] = 0x02; | |
256 | ethh->h_proto = protocol; | |
257 | ||
3cfede14 PS |
258 | ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); |
259 | ||
971fd4ab | 260 | ovs_vport_receive(vport_from_priv(lisp_port), skb, &tun_key); |
a6ae068b LJ |
261 | goto out; |
262 | ||
263 | error: | |
264 | kfree_skb(skb); | |
265 | out: | |
266 | return 0; | |
267 | } | |
268 | ||
269 | /* Arbitrary value. Irrelevant as long as it's not 0 since we set the handler. */ | |
270 | #define UDP_ENCAP_LISP 1 | |
271 | static int lisp_socket_init(struct lisp_port *lisp_port, struct net *net) | |
272 | { | |
a6ae068b | 273 | struct sockaddr_in sin; |
c405d282 | 274 | int err; |
a6ae068b LJ |
275 | |
276 | err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, | |
277 | &lisp_port->lisp_rcv_socket); | |
278 | if (err) | |
279 | goto error; | |
280 | ||
281 | /* release net ref. */ | |
282 | sk_change_net(lisp_port->lisp_rcv_socket->sk, net); | |
283 | ||
284 | sin.sin_family = AF_INET; | |
285 | sin.sin_addr.s_addr = htonl(INADDR_ANY); | |
c405d282 | 286 | sin.sin_port = lisp_port->dst_port; |
a6ae068b LJ |
287 | |
288 | err = kernel_bind(lisp_port->lisp_rcv_socket, (struct sockaddr *)&sin, | |
289 | sizeof(struct sockaddr_in)); | |
290 | if (err) | |
291 | goto error_sock; | |
292 | ||
293 | udp_sk(lisp_port->lisp_rcv_socket->sk)->encap_type = UDP_ENCAP_LISP; | |
294 | udp_sk(lisp_port->lisp_rcv_socket->sk)->encap_rcv = lisp_rcv; | |
295 | ||
296 | udp_encap_enable(); | |
297 | ||
298 | return 0; | |
299 | ||
300 | error_sock: | |
301 | sk_release_kernel(lisp_port->lisp_rcv_socket->sk); | |
302 | error: | |
303 | pr_warn("cannot register lisp protocol handler: %d\n", err); | |
304 | return err; | |
305 | } | |
306 | ||
c405d282 | 307 | static int lisp_get_options(const struct vport *vport, struct sk_buff *skb) |
a6ae068b | 308 | { |
c405d282 | 309 | struct lisp_port *lisp_port = lisp_vport(vport); |
a6ae068b | 310 | |
c405d282 PS |
311 | if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(lisp_port->dst_port))) |
312 | return -EMSGSIZE; | |
313 | return 0; | |
a6ae068b LJ |
314 | } |
315 | ||
c405d282 | 316 | static void lisp_tnl_destroy(struct vport *vport) |
a6ae068b | 317 | { |
c405d282 PS |
318 | struct lisp_port *lisp_port = lisp_vport(vport); |
319 | ||
85c9de19 PS |
320 | list_del_rcu(&lisp_port->list); |
321 | /* Release socket */ | |
322 | sk_release_kernel(lisp_port->lisp_rcv_socket->sk); | |
c405d282 PS |
323 | |
324 | ovs_vport_deferred_free(vport); | |
85c9de19 PS |
325 | } |
326 | ||
c405d282 | 327 | static struct vport *lisp_tnl_create(const struct vport_parms *parms) |
85c9de19 | 328 | { |
c405d282 PS |
329 | struct net *net = ovs_dp_get_net(parms->dp); |
330 | struct nlattr *options = parms->options; | |
85c9de19 | 331 | struct lisp_port *lisp_port; |
c405d282 | 332 | struct vport *vport; |
a6ae068b LJ |
333 | struct nlattr *a; |
334 | int err; | |
335 | u16 dst_port; | |
a6ae068b LJ |
336 | |
337 | if (!options) { | |
338 | err = -EINVAL; | |
c405d282 | 339 | goto error; |
a6ae068b LJ |
340 | } |
341 | ||
342 | a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); | |
343 | if (a && nla_len(a) == sizeof(u16)) { | |
344 | dst_port = nla_get_u16(a); | |
345 | } else { | |
346 | /* Require destination port from userspace. */ | |
347 | err = -EINVAL; | |
c405d282 | 348 | goto error; |
a6ae068b LJ |
349 | } |
350 | ||
351 | /* Verify if we already have a socket created for this port */ | |
c405d282 | 352 | if (lisp_find_port(net, htons(dst_port))) { |
85c9de19 | 353 | err = -EEXIST; |
c405d282 | 354 | goto error; |
a6ae068b LJ |
355 | } |
356 | ||
c405d282 PS |
357 | vport = ovs_vport_alloc(sizeof(struct lisp_port), |
358 | &ovs_lisp_vport_ops, parms); | |
359 | if (IS_ERR(vport)) | |
360 | return vport; | |
a6ae068b | 361 | |
c405d282 PS |
362 | lisp_port = lisp_vport(vport); |
363 | lisp_port->dst_port = htons(dst_port); | |
364 | strncpy(lisp_port->name, parms->name, IFNAMSIZ); | |
a6ae068b LJ |
365 | |
366 | err = lisp_socket_init(lisp_port, net); | |
367 | if (err) | |
c405d282 | 368 | goto error_free; |
a6ae068b | 369 | |
c405d282 PS |
370 | list_add_tail_rcu(&lisp_port->list, &lisp_ports); |
371 | return vport; | |
a6ae068b | 372 | |
c405d282 PS |
373 | error_free: |
374 | ovs_vport_free(vport); | |
a6ae068b | 375 | error: |
c405d282 | 376 | return ERR_PTR(err); |
a6ae068b LJ |
377 | } |
378 | ||
3cf54a56 | 379 | static void lisp_fix_segment(struct sk_buff *skb) |
11aa8dff | 380 | { |
3cf54a56 | 381 | struct udphdr *udph = udp_hdr(skb); |
11aa8dff | 382 | |
3cf54a56 | 383 | udph->len = htons(skb->len - skb_transport_offset(skb)); |
11aa8dff PS |
384 | } |
385 | ||
3cf54a56 | 386 | static void handle_offloads(struct sk_buff *skb) |
11aa8dff | 387 | { |
3cf54a56 PS |
388 | if (skb_is_gso(skb)) |
389 | OVS_GSO_CB(skb)->fix_segment = lisp_fix_segment; | |
390 | else if (skb->ip_summed != CHECKSUM_PARTIAL) | |
391 | skb->ip_summed = CHECKSUM_NONE; | |
11aa8dff PS |
392 | } |
393 | ||
3cf54a56 | 394 | static int lisp_send(struct vport *vport, struct sk_buff *skb) |
11aa8dff | 395 | { |
3cf54a56 | 396 | int network_offset = skb_network_offset(skb); |
11aa8dff | 397 | struct rtable *rt; |
3cf54a56 | 398 | int min_headroom; |
11aa8dff | 399 | __be32 saddr; |
3cf54a56 PS |
400 | __be16 df; |
401 | int sent_len; | |
11aa8dff | 402 | int err; |
3cf54a56 PS |
403 | |
404 | if (unlikely(!OVS_CB(skb)->tun_key)) | |
405 | return -EINVAL; | |
406 | ||
407 | if (skb->protocol != htons(ETH_P_IP) && | |
408 | skb->protocol != htons(ETH_P_IPV6)) { | |
409 | kfree_skb(skb); | |
410 | return 0; | |
411 | } | |
11aa8dff PS |
412 | |
413 | /* Route lookup */ | |
414 | saddr = OVS_CB(skb)->tun_key->ipv4_src; | |
415 | rt = find_route(ovs_dp_get_net(vport->dp), | |
416 | &saddr, | |
417 | OVS_CB(skb)->tun_key->ipv4_dst, | |
3cf54a56 | 418 | IPPROTO_UDP, |
11aa8dff | 419 | OVS_CB(skb)->tun_key->ipv4_tos, |
3025a772 | 420 | skb->mark); |
11aa8dff PS |
421 | if (IS_ERR(rt)) { |
422 | err = PTR_ERR(rt); | |
423 | goto error; | |
424 | } | |
425 | ||
11aa8dff | 426 | min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len |
3cf54a56 | 427 | + sizeof(struct iphdr) + LISP_HLEN; |
11aa8dff PS |
428 | |
429 | if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { | |
430 | int head_delta = SKB_DATA_ALIGN(min_headroom - | |
431 | skb_headroom(skb) + | |
432 | 16); | |
433 | ||
434 | err = pskb_expand_head(skb, max_t(int, head_delta, 0), | |
435 | 0, GFP_ATOMIC); | |
436 | if (unlikely(err)) | |
437 | goto err_free_rt; | |
438 | } | |
439 | ||
df7d2c59 | 440 | /* Reset l2 headers. */ |
3cf54a56 PS |
441 | skb_pull(skb, network_offset); |
442 | skb_reset_mac_header(skb); | |
df7d2c59 PS |
443 | vlan_set_tci(skb, 0); |
444 | ||
3cf54a56 PS |
445 | skb_reset_inner_headers(skb); |
446 | ||
447 | __skb_push(skb, LISP_HLEN); | |
448 | skb_reset_transport_header(skb); | |
449 | ||
450 | lisp_build_header(vport, skb); | |
451 | ||
11aa8dff | 452 | /* Offloading */ |
3cf54a56 PS |
453 | handle_offloads(skb); |
454 | skb->local_df = 1; | |
455 | ||
456 | df = OVS_CB(skb)->tun_key->tun_flags & | |
11aa8dff | 457 | TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; |
2b897011 | 458 | sent_len = iptunnel_xmit(rt, skb, |
3cf54a56 PS |
459 | saddr, OVS_CB(skb)->tun_key->ipv4_dst, |
460 | IPPROTO_UDP, OVS_CB(skb)->tun_key->ipv4_tos, | |
461 | OVS_CB(skb)->tun_key->ipv4_ttl, df); | |
11aa8dff | 462 | |
3cf54a56 | 463 | return sent_len > 0 ? sent_len + network_offset : sent_len; |
11aa8dff PS |
464 | |
465 | err_free_rt: | |
466 | ip_rt_put(rt); | |
467 | error: | |
468 | return err; | |
469 | } | |
470 | ||
c405d282 | 471 | static const char *lisp_get_name(const struct vport *vport) |
a6ae068b | 472 | { |
c405d282 PS |
473 | struct lisp_port *lisp_port = lisp_vport(vport); |
474 | return lisp_port->name; | |
a6ae068b LJ |
475 | } |
476 | ||
477 | const struct vport_ops ovs_lisp_vport_ops = { | |
478 | .type = OVS_VPORT_TYPE_LISP, | |
a6ae068b LJ |
479 | .create = lisp_tnl_create, |
480 | .destroy = lisp_tnl_destroy, | |
c405d282 | 481 | .get_name = lisp_get_name, |
85c9de19 | 482 | .get_options = lisp_get_options, |
3cf54a56 | 483 | .send = lisp_send, |
a6ae068b | 484 | }; |